src/share/vm/gc_implementation/g1/concurrentMark.cpp

Sun, 21 Apr 2013 20:41:04 -0700

author
dcubed
date
Sun, 21 Apr 2013 20:41:04 -0700
changeset 4967
5a9fa2ba85f0
parent 4904
7b835924c31c
child 5018
b06ac540229e
permissions
-rw-r--r--

8012907: anti-delta fix for 8010992
Summary: anti-delta fix for 8010992 until 8012902 can be fixed
Reviewed-by: acorn, minqi, rdurbin

     1 /*
     2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
    32 #include "gc_implementation/g1/g1Log.hpp"
    33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
    34 #include "gc_implementation/g1/g1RemSet.hpp"
    35 #include "gc_implementation/g1/heapRegion.inline.hpp"
    36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    38 #include "gc_implementation/shared/vmGCOperations.hpp"
    39 #include "memory/genOopClosures.inline.hpp"
    40 #include "memory/referencePolicy.hpp"
    41 #include "memory/resourceArea.hpp"
    42 #include "oops/oop.inline.hpp"
    43 #include "runtime/handles.inline.hpp"
    44 #include "runtime/java.hpp"
    45 #include "services/memTracker.hpp"
    47 // Concurrent marking bit map wrapper
    49 CMBitMapRO::CMBitMapRO(int shifter) :
    50   _bm(),
    51   _shifter(shifter) {
    52   _bmStartWord = 0;
    53   _bmWordSize = 0;
    54 }
    56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    57                                                HeapWord* limit) const {
    58   // First we must round addr *up* to a possible object boundary.
    59   addr = (HeapWord*)align_size_up((intptr_t)addr,
    60                                   HeapWordSize << _shifter);
    61   size_t addrOffset = heapWordToOffset(addr);
    62   if (limit == NULL) {
    63     limit = _bmStartWord + _bmWordSize;
    64   }
    65   size_t limitOffset = heapWordToOffset(limit);
    66   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    67   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    68   assert(nextAddr >= addr, "get_next_one postcondition");
    69   assert(nextAddr == limit || isMarked(nextAddr),
    70          "get_next_one postcondition");
    71   return nextAddr;
    72 }
    74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    75                                                  HeapWord* limit) const {
    76   size_t addrOffset = heapWordToOffset(addr);
    77   if (limit == NULL) {
    78     limit = _bmStartWord + _bmWordSize;
    79   }
    80   size_t limitOffset = heapWordToOffset(limit);
    81   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    83   assert(nextAddr >= addr, "get_next_one postcondition");
    84   assert(nextAddr == limit || !isMarked(nextAddr),
    85          "get_next_one postcondition");
    86   return nextAddr;
    87 }
    89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    90   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    91   return (int) (diff >> _shifter);
    92 }
    94 #ifndef PRODUCT
    95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
    96   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
    97   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
    98          "size inconsistency");
    99   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
   100          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
   101 }
   102 #endif
   104 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
   105   _bm.print_on_error(st, prefix);
   106 }
   108 bool CMBitMap::allocate(ReservedSpace heap_rs) {
   109   _bmStartWord = (HeapWord*)(heap_rs.base());
   110   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
   111   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
   112                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
   113   if (!brs.is_reserved()) {
   114     warning("ConcurrentMark marking bit map allocation failure");
   115     return false;
   116   }
   117   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
   118   // For now we'll just commit all of the bit map up front.
   119   // Later on we'll try to be more parsimonious with swap.
   120   if (!_virtual_space.initialize(brs, brs.size())) {
   121     warning("ConcurrentMark marking bit map backing store failure");
   122     return false;
   123   }
   124   assert(_virtual_space.committed_size() == brs.size(),
   125          "didn't reserve backing store for all of concurrent marking bit map?");
   126   _bm.set_map((uintptr_t*)_virtual_space.low());
   127   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
   128          _bmWordSize, "inconsistency in bit map sizing");
   129   _bm.set_size(_bmWordSize >> _shifter);
   130   return true;
   131 }
   133 void CMBitMap::clearAll() {
   134   _bm.clear();
   135   return;
   136 }
   138 void CMBitMap::markRange(MemRegion mr) {
   139   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   140   assert(!mr.is_empty(), "unexpected empty region");
   141   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   142           ((HeapWord *) mr.end())),
   143          "markRange memory region end is not card aligned");
   144   // convert address range into offset range
   145   _bm.at_put_range(heapWordToOffset(mr.start()),
   146                    heapWordToOffset(mr.end()), true);
   147 }
   149 void CMBitMap::clearRange(MemRegion mr) {
   150   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   151   assert(!mr.is_empty(), "unexpected empty region");
   152   // convert address range into offset range
   153   _bm.at_put_range(heapWordToOffset(mr.start()),
   154                    heapWordToOffset(mr.end()), false);
   155 }
   157 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   158                                             HeapWord* end_addr) {
   159   HeapWord* start = getNextMarkedWordAddress(addr);
   160   start = MIN2(start, end_addr);
   161   HeapWord* end   = getNextUnmarkedWordAddress(start);
   162   end = MIN2(end, end_addr);
   163   assert(start <= end, "Consistency check");
   164   MemRegion mr(start, end);
   165   if (!mr.is_empty()) {
   166     clearRange(mr);
   167   }
   168   return mr;
   169 }
   171 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   172   _base(NULL), _cm(cm)
   173 #ifdef ASSERT
   174   , _drain_in_progress(false)
   175   , _drain_in_progress_yields(false)
   176 #endif
   177 {}
   179 bool CMMarkStack::allocate(size_t capacity) {
   180   // allocate a stack of the requisite depth
   181   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
   182   if (!rs.is_reserved()) {
   183     warning("ConcurrentMark MarkStack allocation failure");
   184     return false;
   185   }
   186   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
   187   if (!_virtual_space.initialize(rs, rs.size())) {
   188     warning("ConcurrentMark MarkStack backing store failure");
   189     // Release the virtual memory reserved for the marking stack
   190     rs.release();
   191     return false;
   192   }
   193   assert(_virtual_space.committed_size() == rs.size(),
   194          "Didn't reserve backing store for all of ConcurrentMark stack?");
   195   _base = (oop*) _virtual_space.low();
   196   setEmpty();
   197   _capacity = (jint) capacity;
   198   _saved_index = -1;
   199   _should_expand = false;
   200   NOT_PRODUCT(_max_depth = 0);
   201   return true;
   202 }
   204 void CMMarkStack::expand() {
   205   // Called, during remark, if we've overflown the marking stack during marking.
   206   assert(isEmpty(), "stack should been emptied while handling overflow");
   207   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
   208   // Clear expansion flag
   209   _should_expand = false;
   210   if (_capacity == (jint) MarkStackSizeMax) {
   211     if (PrintGCDetails && Verbose) {
   212       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
   213     }
   214     return;
   215   }
   216   // Double capacity if possible
   217   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
   218   // Do not give up existing stack until we have managed to
   219   // get the double capacity that we desired.
   220   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
   221                                                            sizeof(oop)));
   222   if (rs.is_reserved()) {
   223     // Release the backing store associated with old stack
   224     _virtual_space.release();
   225     // Reinitialize virtual space for new stack
   226     if (!_virtual_space.initialize(rs, rs.size())) {
   227       fatal("Not enough swap for expanded marking stack capacity");
   228     }
   229     _base = (oop*)(_virtual_space.low());
   230     _index = 0;
   231     _capacity = new_capacity;
   232   } else {
   233     if (PrintGCDetails && Verbose) {
   234       // Failed to double capacity, continue;
   235       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
   236                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
   237                           _capacity / K, new_capacity / K);
   238     }
   239   }
   240 }
   242 void CMMarkStack::set_should_expand() {
   243   // If we're resetting the marking state because of an
   244   // marking stack overflow, record that we should, if
   245   // possible, expand the stack.
   246   _should_expand = _cm->has_overflown();
   247 }
   249 CMMarkStack::~CMMarkStack() {
   250   if (_base != NULL) {
   251     _base = NULL;
   252     _virtual_space.release();
   253   }
   254 }
   256 void CMMarkStack::par_push(oop ptr) {
   257   while (true) {
   258     if (isFull()) {
   259       _overflow = true;
   260       return;
   261     }
   262     // Otherwise...
   263     jint index = _index;
   264     jint next_index = index+1;
   265     jint res = Atomic::cmpxchg(next_index, &_index, index);
   266     if (res == index) {
   267       _base[index] = ptr;
   268       // Note that we don't maintain this atomically.  We could, but it
   269       // doesn't seem necessary.
   270       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   271       return;
   272     }
   273     // Otherwise, we need to try again.
   274   }
   275 }
   277 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   278   while (true) {
   279     if (isFull()) {
   280       _overflow = true;
   281       return;
   282     }
   283     // Otherwise...
   284     jint index = _index;
   285     jint next_index = index + n;
   286     if (next_index > _capacity) {
   287       _overflow = true;
   288       return;
   289     }
   290     jint res = Atomic::cmpxchg(next_index, &_index, index);
   291     if (res == index) {
   292       for (int i = 0; i < n; i++) {
   293         int  ind = index + i;
   294         assert(ind < _capacity, "By overflow test above.");
   295         _base[ind] = ptr_arr[i];
   296       }
   297       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   298       return;
   299     }
   300     // Otherwise, we need to try again.
   301   }
   302 }
   304 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   305   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   306   jint start = _index;
   307   jint next_index = start + n;
   308   if (next_index > _capacity) {
   309     _overflow = true;
   310     return;
   311   }
   312   // Otherwise.
   313   _index = next_index;
   314   for (int i = 0; i < n; i++) {
   315     int ind = start + i;
   316     assert(ind < _capacity, "By overflow test above.");
   317     _base[ind] = ptr_arr[i];
   318   }
   319   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   320 }
   322 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   323   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   324   jint index = _index;
   325   if (index == 0) {
   326     *n = 0;
   327     return false;
   328   } else {
   329     int k = MIN2(max, index);
   330     jint  new_ind = index - k;
   331     for (int j = 0; j < k; j++) {
   332       ptr_arr[j] = _base[new_ind + j];
   333     }
   334     _index = new_ind;
   335     *n = k;
   336     return true;
   337   }
   338 }
   340 template<class OopClosureClass>
   341 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   342   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   343          || SafepointSynchronize::is_at_safepoint(),
   344          "Drain recursion must be yield-safe.");
   345   bool res = true;
   346   debug_only(_drain_in_progress = true);
   347   debug_only(_drain_in_progress_yields = yield_after);
   348   while (!isEmpty()) {
   349     oop newOop = pop();
   350     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   351     assert(newOop->is_oop(), "Expected an oop");
   352     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   353            "only grey objects on this stack");
   354     newOop->oop_iterate(cl);
   355     if (yield_after && _cm->do_yield_check()) {
   356       res = false;
   357       break;
   358     }
   359   }
   360   debug_only(_drain_in_progress = false);
   361   return res;
   362 }
   364 void CMMarkStack::note_start_of_gc() {
   365   assert(_saved_index == -1,
   366          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
   367   _saved_index = _index;
   368 }
   370 void CMMarkStack::note_end_of_gc() {
   371   // This is intentionally a guarantee, instead of an assert. If we
   372   // accidentally add something to the mark stack during GC, it
   373   // will be a correctness issue so it's better if we crash. we'll
   374   // only check this once per GC anyway, so it won't be a performance
   375   // issue in any way.
   376   guarantee(_saved_index == _index,
   377             err_msg("saved index: %d index: %d", _saved_index, _index));
   378   _saved_index = -1;
   379 }
   381 void CMMarkStack::oops_do(OopClosure* f) {
   382   assert(_saved_index == _index,
   383          err_msg("saved index: %d index: %d", _saved_index, _index));
   384   for (int i = 0; i < _index; i += 1) {
   385     f->do_oop(&_base[i]);
   386   }
   387 }
   389 bool ConcurrentMark::not_yet_marked(oop obj) const {
   390   return _g1h->is_obj_ill(obj);
   391 }
   393 CMRootRegions::CMRootRegions() :
   394   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
   395   _should_abort(false),  _next_survivor(NULL) { }
   397 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
   398   _young_list = g1h->young_list();
   399   _cm = cm;
   400 }
   402 void CMRootRegions::prepare_for_scan() {
   403   assert(!scan_in_progress(), "pre-condition");
   405   // Currently, only survivors can be root regions.
   406   assert(_next_survivor == NULL, "pre-condition");
   407   _next_survivor = _young_list->first_survivor_region();
   408   _scan_in_progress = (_next_survivor != NULL);
   409   _should_abort = false;
   410 }
   412 HeapRegion* CMRootRegions::claim_next() {
   413   if (_should_abort) {
   414     // If someone has set the should_abort flag, we return NULL to
   415     // force the caller to bail out of their loop.
   416     return NULL;
   417   }
   419   // Currently, only survivors can be root regions.
   420   HeapRegion* res = _next_survivor;
   421   if (res != NULL) {
   422     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   423     // Read it again in case it changed while we were waiting for the lock.
   424     res = _next_survivor;
   425     if (res != NULL) {
   426       if (res == _young_list->last_survivor_region()) {
   427         // We just claimed the last survivor so store NULL to indicate
   428         // that we're done.
   429         _next_survivor = NULL;
   430       } else {
   431         _next_survivor = res->get_next_young_region();
   432       }
   433     } else {
   434       // Someone else claimed the last survivor while we were trying
   435       // to take the lock so nothing else to do.
   436     }
   437   }
   438   assert(res == NULL || res->is_survivor(), "post-condition");
   440   return res;
   441 }
   443 void CMRootRegions::scan_finished() {
   444   assert(scan_in_progress(), "pre-condition");
   446   // Currently, only survivors can be root regions.
   447   if (!_should_abort) {
   448     assert(_next_survivor == NULL, "we should have claimed all survivors");
   449   }
   450   _next_survivor = NULL;
   452   {
   453     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   454     _scan_in_progress = false;
   455     RootRegionScan_lock->notify_all();
   456   }
   457 }
   459 bool CMRootRegions::wait_until_scan_finished() {
   460   if (!scan_in_progress()) return false;
   462   {
   463     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   464     while (scan_in_progress()) {
   465       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
   466     }
   467   }
   468   return true;
   469 }
   471 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   472 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   473 #endif // _MSC_VER
   475 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
   476   return MAX2((n_par_threads + 2) / 4, 1U);
   477 }
   479 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
   480   _g1h(g1h),
   481   _markBitMap1(MinObjAlignment - 1),
   482   _markBitMap2(MinObjAlignment - 1),
   484   _parallel_marking_threads(0),
   485   _max_parallel_marking_threads(0),
   486   _sleep_factor(0.0),
   487   _marking_task_overhead(1.0),
   488   _cleanup_sleep_factor(0.0),
   489   _cleanup_task_overhead(1.0),
   490   _cleanup_list("Cleanup List"),
   491   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
   492   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
   493             CardTableModRefBS::card_shift,
   494             false /* in_resource_area*/),
   496   _prevMarkBitMap(&_markBitMap1),
   497   _nextMarkBitMap(&_markBitMap2),
   499   _markStack(this),
   500   // _finger set in set_non_marking_state
   502   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
   503   // _active_tasks set in set_non_marking_state
   504   // _tasks set inside the constructor
   505   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
   506   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
   508   _has_overflown(false),
   509   _concurrent(false),
   510   _has_aborted(false),
   511   _restart_for_overflow(false),
   512   _concurrent_marking_in_progress(false),
   514   // _verbose_level set below
   516   _init_times(),
   517   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   518   _cleanup_times(),
   519   _total_counting_time(0.0),
   520   _total_rs_scrub_time(0.0),
   522   _parallel_workers(NULL),
   524   _count_card_bitmaps(NULL),
   525   _count_marked_bytes(NULL),
   526   _completed_initialization(false) {
   527   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   528   if (verbose_level < no_verbose) {
   529     verbose_level = no_verbose;
   530   }
   531   if (verbose_level > high_verbose) {
   532     verbose_level = high_verbose;
   533   }
   534   _verbose_level = verbose_level;
   536   if (verbose_low()) {
   537     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   538                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   539   }
   541   if (!_markBitMap1.allocate(heap_rs)) {
   542     warning("Failed to allocate first CM bit map");
   543     return;
   544   }
   545   if (!_markBitMap2.allocate(heap_rs)) {
   546     warning("Failed to allocate second CM bit map");
   547     return;
   548   }
   550   // Create & start a ConcurrentMark thread.
   551   _cmThread = new ConcurrentMarkThread(this);
   552   assert(cmThread() != NULL, "CM Thread should have been created");
   553   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   555   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   556   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
   557   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
   559   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   560   satb_qs.set_buffer_size(G1SATBBufferSize);
   562   _root_regions.init(_g1h, this);
   564   if (ConcGCThreads > ParallelGCThreads) {
   565     warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
   566             "than ParallelGCThreads (" UINT32_FORMAT ").",
   567             ConcGCThreads, ParallelGCThreads);
   568     return;
   569   }
   570   if (ParallelGCThreads == 0) {
   571     // if we are not running with any parallel GC threads we will not
   572     // spawn any marking threads either
   573     _parallel_marking_threads =       0;
   574     _max_parallel_marking_threads =   0;
   575     _sleep_factor             =     0.0;
   576     _marking_task_overhead    =     1.0;
   577   } else {
   578     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
   579       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
   580       // if both are set
   581       _sleep_factor             = 0.0;
   582       _marking_task_overhead    = 1.0;
   583     } else if (G1MarkingOverheadPercent > 0) {
   584       // We will calculate the number of parallel marking threads based
   585       // on a target overhead with respect to the soft real-time goal
   586       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   587       double overall_cm_overhead =
   588         (double) MaxGCPauseMillis * marking_overhead /
   589         (double) GCPauseIntervalMillis;
   590       double cpu_ratio = 1.0 / (double) os::processor_count();
   591       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   592       double marking_task_overhead =
   593         overall_cm_overhead / marking_thread_num *
   594                                                 (double) os::processor_count();
   595       double sleep_factor =
   596                          (1.0 - marking_task_overhead) / marking_task_overhead;
   598       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
   599       _sleep_factor             = sleep_factor;
   600       _marking_task_overhead    = marking_task_overhead;
   601     } else {
   602       // Calculate the number of parallel marking threads by scaling
   603       // the number of parallel GC threads.
   604       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
   605       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
   606       _sleep_factor             = 0.0;
   607       _marking_task_overhead    = 1.0;
   608     }
   610     assert(ConcGCThreads > 0, "Should have been set");
   611     _parallel_marking_threads = (uint) ConcGCThreads;
   612     _max_parallel_marking_threads = _parallel_marking_threads;
   614     if (parallel_marking_threads() > 1) {
   615       _cleanup_task_overhead = 1.0;
   616     } else {
   617       _cleanup_task_overhead = marking_task_overhead();
   618     }
   619     _cleanup_sleep_factor =
   620                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   622 #if 0
   623     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   624     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   625     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   626     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   627     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   628 #endif
   630     guarantee(parallel_marking_threads() > 0, "peace of mind");
   631     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   632          _max_parallel_marking_threads, false, true);
   633     if (_parallel_workers == NULL) {
   634       vm_exit_during_initialization("Failed necessary allocation.");
   635     } else {
   636       _parallel_workers->initialize_workers();
   637     }
   638   }
   640   if (FLAG_IS_DEFAULT(MarkStackSize)) {
   641     uintx mark_stack_size =
   642       MIN2(MarkStackSizeMax,
   643           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
   644     // Verify that the calculated value for MarkStackSize is in range.
   645     // It would be nice to use the private utility routine from Arguments.
   646     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
   647       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
   648               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   649               mark_stack_size, 1, MarkStackSizeMax);
   650       return;
   651     }
   652     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
   653   } else {
   654     // Verify MarkStackSize is in range.
   655     if (FLAG_IS_CMDLINE(MarkStackSize)) {
   656       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
   657         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   658           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
   659                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   660                   MarkStackSize, 1, MarkStackSizeMax);
   661           return;
   662         }
   663       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
   664         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   665           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
   666                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
   667                   MarkStackSize, MarkStackSizeMax);
   668           return;
   669         }
   670       }
   671     }
   672   }
   674   if (!_markStack.allocate(MarkStackSize)) {
   675     warning("Failed to allocate CM marking stack");
   676     return;
   677   }
   679   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
   680   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
   682   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
   683   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
   685   BitMap::idx_t card_bm_size = _card_bm.size();
   687   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   688   _active_tasks = _max_worker_id;
   690   size_t max_regions = (size_t) _g1h->max_regions();
   691   for (uint i = 0; i < _max_worker_id; ++i) {
   692     CMTaskQueue* task_queue = new CMTaskQueue();
   693     task_queue->initialize();
   694     _task_queues->register_queue(i, task_queue);
   696     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
   697     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
   699     _tasks[i] = new CMTask(i, this,
   700                            _count_marked_bytes[i],
   701                            &_count_card_bitmaps[i],
   702                            task_queue, _task_queues);
   704     _accum_task_vtime[i] = 0.0;
   705   }
   707   // Calculate the card number for the bottom of the heap. Used
   708   // in biasing indexes into the accounting card bitmaps.
   709   _heap_bottom_card_num =
   710     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
   711                                 CardTableModRefBS::card_shift);
   713   // Clear all the liveness counting data
   714   clear_all_count_data();
   716   // so that the call below can read a sensible value
   717   _heap_start = (HeapWord*) heap_rs.base();
   718   set_non_marking_state();
   719   _completed_initialization = true;
   720 }
   722 void ConcurrentMark::update_g1_committed(bool force) {
   723   // If concurrent marking is not in progress, then we do not need to
   724   // update _heap_end.
   725   if (!concurrent_marking_in_progress() && !force) return;
   727   MemRegion committed = _g1h->g1_committed();
   728   assert(committed.start() == _heap_start, "start shouldn't change");
   729   HeapWord* new_end = committed.end();
   730   if (new_end > _heap_end) {
   731     // The heap has been expanded.
   733     _heap_end = new_end;
   734   }
   735   // Notice that the heap can also shrink. However, this only happens
   736   // during a Full GC (at least currently) and the entire marking
   737   // phase will bail out and the task will not be restarted. So, let's
   738   // do nothing.
   739 }
   741 void ConcurrentMark::reset() {
   742   // Starting values for these two. This should be called in a STW
   743   // phase. CM will be notified of any future g1_committed expansions
   744   // will be at the end of evacuation pauses, when tasks are
   745   // inactive.
   746   MemRegion committed = _g1h->g1_committed();
   747   _heap_start = committed.start();
   748   _heap_end   = committed.end();
   750   // Separated the asserts so that we know which one fires.
   751   assert(_heap_start != NULL, "heap bounds should look ok");
   752   assert(_heap_end != NULL, "heap bounds should look ok");
   753   assert(_heap_start < _heap_end, "heap bounds should look ok");
   755   // Reset all the marking data structures and any necessary flags
   756   reset_marking_state();
   758   if (verbose_low()) {
   759     gclog_or_tty->print_cr("[global] resetting");
   760   }
   762   // We do reset all of them, since different phases will use
   763   // different number of active threads. So, it's easiest to have all
   764   // of them ready.
   765   for (uint i = 0; i < _max_worker_id; ++i) {
   766     _tasks[i]->reset(_nextMarkBitMap);
   767   }
   769   // we need this to make sure that the flag is on during the evac
   770   // pause with initial mark piggy-backed
   771   set_concurrent_marking_in_progress();
   772 }
   775 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
   776   _markStack.set_should_expand();
   777   _markStack.setEmpty();        // Also clears the _markStack overflow flag
   778   if (clear_overflow) {
   779     clear_has_overflown();
   780   } else {
   781     assert(has_overflown(), "pre-condition");
   782   }
   783   _finger = _heap_start;
   785   for (uint i = 0; i < _max_worker_id; ++i) {
   786     CMTaskQueue* queue = _task_queues->queue(i);
   787     queue->set_empty();
   788   }
   789 }
   791 void ConcurrentMark::set_concurrency(uint active_tasks) {
   792   assert(active_tasks <= _max_worker_id, "we should not have more");
   794   _active_tasks = active_tasks;
   795   // Need to update the three data structures below according to the
   796   // number of active threads for this phase.
   797   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   798   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   799   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   800 }
   802 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
   803   set_concurrency(active_tasks);
   805   _concurrent = concurrent;
   806   // We propagate this to all tasks, not just the active ones.
   807   for (uint i = 0; i < _max_worker_id; ++i)
   808     _tasks[i]->set_concurrent(concurrent);
   810   if (concurrent) {
   811     set_concurrent_marking_in_progress();
   812   } else {
   813     // We currently assume that the concurrent flag has been set to
   814     // false before we start remark. At this point we should also be
   815     // in a STW phase.
   816     assert(!concurrent_marking_in_progress(), "invariant");
   817     assert(_finger == _heap_end,
   818            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
   819                    _finger, _heap_end));
   820     update_g1_committed(true);
   821   }
   822 }
   824 void ConcurrentMark::set_non_marking_state() {
   825   // We set the global marking state to some default values when we're
   826   // not doing marking.
   827   reset_marking_state();
   828   _active_tasks = 0;
   829   clear_concurrent_marking_in_progress();
   830 }
   832 ConcurrentMark::~ConcurrentMark() {
   833   // The ConcurrentMark instance is never freed.
   834   ShouldNotReachHere();
   835 }
   837 void ConcurrentMark::clearNextBitmap() {
   838   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   839   G1CollectorPolicy* g1p = g1h->g1_policy();
   841   // Make sure that the concurrent mark thread looks to still be in
   842   // the current cycle.
   843   guarantee(cmThread()->during_cycle(), "invariant");
   845   // We are finishing up the current cycle by clearing the next
   846   // marking bitmap and getting it ready for the next cycle. During
   847   // this time no other cycle can start. So, let's make sure that this
   848   // is the case.
   849   guarantee(!g1h->mark_in_progress(), "invariant");
   851   // clear the mark bitmap (no grey objects to start with).
   852   // We need to do this in chunks and offer to yield in between
   853   // each chunk.
   854   HeapWord* start  = _nextMarkBitMap->startWord();
   855   HeapWord* end    = _nextMarkBitMap->endWord();
   856   HeapWord* cur    = start;
   857   size_t chunkSize = M;
   858   while (cur < end) {
   859     HeapWord* next = cur + chunkSize;
   860     if (next > end) {
   861       next = end;
   862     }
   863     MemRegion mr(cur,next);
   864     _nextMarkBitMap->clearRange(mr);
   865     cur = next;
   866     do_yield_check();
   868     // Repeat the asserts from above. We'll do them as asserts here to
   869     // minimize their overhead on the product. However, we'll have
   870     // them as guarantees at the beginning / end of the bitmap
   871     // clearing to get some checking in the product.
   872     assert(cmThread()->during_cycle(), "invariant");
   873     assert(!g1h->mark_in_progress(), "invariant");
   874   }
   876   // Clear the liveness counting data
   877   clear_all_count_data();
   879   // Repeat the asserts from above.
   880   guarantee(cmThread()->during_cycle(), "invariant");
   881   guarantee(!g1h->mark_in_progress(), "invariant");
   882 }
   884 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   885 public:
   886   bool doHeapRegion(HeapRegion* r) {
   887     if (!r->continuesHumongous()) {
   888       r->note_start_of_marking();
   889     }
   890     return false;
   891   }
   892 };
   894 void ConcurrentMark::checkpointRootsInitialPre() {
   895   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   896   G1CollectorPolicy* g1p = g1h->g1_policy();
   898   _has_aborted = false;
   900 #ifndef PRODUCT
   901   if (G1PrintReachableAtInitialMark) {
   902     print_reachable("at-cycle-start",
   903                     VerifyOption_G1UsePrevMarking, true /* all */);
   904   }
   905 #endif
   907   // Initialise marking structures. This has to be done in a STW phase.
   908   reset();
   910   // For each region note start of marking.
   911   NoteStartOfMarkHRClosure startcl;
   912   g1h->heap_region_iterate(&startcl);
   913 }
   916 void ConcurrentMark::checkpointRootsInitialPost() {
   917   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   919   // If we force an overflow during remark, the remark operation will
   920   // actually abort and we'll restart concurrent marking. If we always
   921   // force an oveflow during remark we'll never actually complete the
   922   // marking phase. So, we initilize this here, at the start of the
   923   // cycle, so that at the remaining overflow number will decrease at
   924   // every remark and we'll eventually not need to cause one.
   925   force_overflow_stw()->init();
   927   // Start Concurrent Marking weak-reference discovery.
   928   ReferenceProcessor* rp = g1h->ref_processor_cm();
   929   // enable ("weak") refs discovery
   930   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
   931   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   933   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   934   // This is the start of  the marking cycle, we're expected all
   935   // threads to have SATB queues with active set to false.
   936   satb_mq_set.set_active_all_threads(true, /* new active value */
   937                                      false /* expected_active */);
   939   _root_regions.prepare_for_scan();
   941   // update_g1_committed() will be called at the end of an evac pause
   942   // when marking is on. So, it's also called at the end of the
   943   // initial-mark pause to update the heap end, if the heap expands
   944   // during it. No need to call it here.
   945 }
   947 /*
   948  * Notice that in the next two methods, we actually leave the STS
   949  * during the barrier sync and join it immediately afterwards. If we
   950  * do not do this, the following deadlock can occur: one thread could
   951  * be in the barrier sync code, waiting for the other thread to also
   952  * sync up, whereas another one could be trying to yield, while also
   953  * waiting for the other threads to sync up too.
   954  *
   955  * Note, however, that this code is also used during remark and in
   956  * this case we should not attempt to leave / enter the STS, otherwise
   957  * we'll either hit an asseert (debug / fastdebug) or deadlock
   958  * (product). So we should only leave / enter the STS if we are
   959  * operating concurrently.
   960  *
   961  * Because the thread that does the sync barrier has left the STS, it
   962  * is possible to be suspended for a Full GC or an evacuation pause
   963  * could occur. This is actually safe, since the entering the sync
   964  * barrier is one of the last things do_marking_step() does, and it
   965  * doesn't manipulate any data structures afterwards.
   966  */
   968 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
   969   if (verbose_low()) {
   970     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
   971   }
   973   if (concurrent()) {
   974     ConcurrentGCThread::stsLeave();
   975   }
   976   _first_overflow_barrier_sync.enter();
   977   if (concurrent()) {
   978     ConcurrentGCThread::stsJoin();
   979   }
   980   // at this point everyone should have synced up and not be doing any
   981   // more work
   983   if (verbose_low()) {
   984     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
   985   }
   987   // If we're executing the concurrent phase of marking, reset the marking
   988   // state; otherwise the marking state is reset after reference processing,
   989   // during the remark pause.
   990   // If we reset here as a result of an overflow during the remark we will
   991   // see assertion failures from any subsequent set_concurrency_and_phase()
   992   // calls.
   993   if (concurrent()) {
   994     // let the task associated with with worker 0 do this
   995     if (worker_id == 0) {
   996       // task 0 is responsible for clearing the global data structures
   997       // We should be here because of an overflow. During STW we should
   998       // not clear the overflow flag since we rely on it being true when
   999       // we exit this method to abort the pause and restart concurent
  1000       // marking.
  1001       reset_marking_state(true /* clear_overflow */);
  1002       force_overflow()->update();
  1004       if (G1Log::fine()) {
  1005         gclog_or_tty->date_stamp(PrintGCDateStamps);
  1006         gclog_or_tty->stamp(PrintGCTimeStamps);
  1007         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
  1012   // after this, each task should reset its own data structures then
  1013   // then go into the second barrier
  1016 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
  1017   if (verbose_low()) {
  1018     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
  1021   if (concurrent()) {
  1022     ConcurrentGCThread::stsLeave();
  1024   _second_overflow_barrier_sync.enter();
  1025   if (concurrent()) {
  1026     ConcurrentGCThread::stsJoin();
  1028   // at this point everything should be re-initialized and ready to go
  1030   if (verbose_low()) {
  1031     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
  1035 #ifndef PRODUCT
  1036 void ForceOverflowSettings::init() {
  1037   _num_remaining = G1ConcMarkForceOverflow;
  1038   _force = false;
  1039   update();
  1042 void ForceOverflowSettings::update() {
  1043   if (_num_remaining > 0) {
  1044     _num_remaining -= 1;
  1045     _force = true;
  1046   } else {
  1047     _force = false;
  1051 bool ForceOverflowSettings::should_force() {
  1052   if (_force) {
  1053     _force = false;
  1054     return true;
  1055   } else {
  1056     return false;
  1059 #endif // !PRODUCT
  1061 class CMConcurrentMarkingTask: public AbstractGangTask {
  1062 private:
  1063   ConcurrentMark*       _cm;
  1064   ConcurrentMarkThread* _cmt;
  1066 public:
  1067   void work(uint worker_id) {
  1068     assert(Thread::current()->is_ConcurrentGC_thread(),
  1069            "this should only be done by a conc GC thread");
  1070     ResourceMark rm;
  1072     double start_vtime = os::elapsedVTime();
  1074     ConcurrentGCThread::stsJoin();
  1076     assert(worker_id < _cm->active_tasks(), "invariant");
  1077     CMTask* the_task = _cm->task(worker_id);
  1078     the_task->record_start_time();
  1079     if (!_cm->has_aborted()) {
  1080       do {
  1081         double start_vtime_sec = os::elapsedVTime();
  1082         double start_time_sec = os::elapsedTime();
  1083         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  1085         the_task->do_marking_step(mark_step_duration_ms,
  1086                                   true  /* do_termination */,
  1087                                   false /* is_serial*/);
  1089         double end_time_sec = os::elapsedTime();
  1090         double end_vtime_sec = os::elapsedVTime();
  1091         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1092         double elapsed_time_sec = end_time_sec - start_time_sec;
  1093         _cm->clear_has_overflown();
  1095         bool ret = _cm->do_yield_check(worker_id);
  1097         jlong sleep_time_ms;
  1098         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1099           sleep_time_ms =
  1100             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1101           ConcurrentGCThread::stsLeave();
  1102           os::sleep(Thread::current(), sleep_time_ms, false);
  1103           ConcurrentGCThread::stsJoin();
  1105         double end_time2_sec = os::elapsedTime();
  1106         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1108 #if 0
  1109           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1110                                  "overhead %1.4lf",
  1111                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1112                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1113           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1114                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1115 #endif
  1116       } while (!_cm->has_aborted() && the_task->has_aborted());
  1118     the_task->record_end_time();
  1119     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1121     ConcurrentGCThread::stsLeave();
  1123     double end_vtime = os::elapsedVTime();
  1124     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
  1127   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1128                           ConcurrentMarkThread* cmt) :
  1129       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1131   ~CMConcurrentMarkingTask() { }
  1132 };
  1134 // Calculates the number of active workers for a concurrent
  1135 // phase.
  1136 uint ConcurrentMark::calc_parallel_marking_threads() {
  1137   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1138     uint n_conc_workers = 0;
  1139     if (!UseDynamicNumberOfGCThreads ||
  1140         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
  1141          !ForceDynamicNumberOfGCThreads)) {
  1142       n_conc_workers = max_parallel_marking_threads();
  1143     } else {
  1144       n_conc_workers =
  1145         AdaptiveSizePolicy::calc_default_active_workers(
  1146                                      max_parallel_marking_threads(),
  1147                                      1, /* Minimum workers */
  1148                                      parallel_marking_threads(),
  1149                                      Threads::number_of_non_daemon_threads());
  1150       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
  1151       // that scaling has already gone into "_max_parallel_marking_threads".
  1153     assert(n_conc_workers > 0, "Always need at least 1");
  1154     return n_conc_workers;
  1156   // If we are not running with any parallel GC threads we will not
  1157   // have spawned any marking threads either. Hence the number of
  1158   // concurrent workers should be 0.
  1159   return 0;
  1162 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
  1163   // Currently, only survivors can be root regions.
  1164   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
  1165   G1RootRegionScanClosure cl(_g1h, this, worker_id);
  1167   const uintx interval = PrefetchScanIntervalInBytes;
  1168   HeapWord* curr = hr->bottom();
  1169   const HeapWord* end = hr->top();
  1170   while (curr < end) {
  1171     Prefetch::read(curr, interval);
  1172     oop obj = oop(curr);
  1173     int size = obj->oop_iterate(&cl);
  1174     assert(size == obj->size(), "sanity");
  1175     curr += size;
  1179 class CMRootRegionScanTask : public AbstractGangTask {
  1180 private:
  1181   ConcurrentMark* _cm;
  1183 public:
  1184   CMRootRegionScanTask(ConcurrentMark* cm) :
  1185     AbstractGangTask("Root Region Scan"), _cm(cm) { }
  1187   void work(uint worker_id) {
  1188     assert(Thread::current()->is_ConcurrentGC_thread(),
  1189            "this should only be done by a conc GC thread");
  1191     CMRootRegions* root_regions = _cm->root_regions();
  1192     HeapRegion* hr = root_regions->claim_next();
  1193     while (hr != NULL) {
  1194       _cm->scanRootRegion(hr, worker_id);
  1195       hr = root_regions->claim_next();
  1198 };
  1200 void ConcurrentMark::scanRootRegions() {
  1201   // scan_in_progress() will have been set to true only if there was
  1202   // at least one root region to scan. So, if it's false, we
  1203   // should not attempt to do any further work.
  1204   if (root_regions()->scan_in_progress()) {
  1205     _parallel_marking_threads = calc_parallel_marking_threads();
  1206     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1207            "Maximum number of marking threads exceeded");
  1208     uint active_workers = MAX2(1U, parallel_marking_threads());
  1210     CMRootRegionScanTask task(this);
  1211     if (use_parallel_marking_threads()) {
  1212       _parallel_workers->set_active_workers((int) active_workers);
  1213       _parallel_workers->run_task(&task);
  1214     } else {
  1215       task.work(0);
  1218     // It's possible that has_aborted() is true here without actually
  1219     // aborting the survivor scan earlier. This is OK as it's
  1220     // mainly used for sanity checking.
  1221     root_regions()->scan_finished();
  1225 void ConcurrentMark::markFromRoots() {
  1226   // we might be tempted to assert that:
  1227   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1228   //        "inconsistent argument?");
  1229   // However that wouldn't be right, because it's possible that
  1230   // a safepoint is indeed in progress as a younger generation
  1231   // stop-the-world GC happens even as we mark in this generation.
  1233   _restart_for_overflow = false;
  1234   force_overflow_conc()->init();
  1236   // _g1h has _n_par_threads
  1237   _parallel_marking_threads = calc_parallel_marking_threads();
  1238   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1239     "Maximum number of marking threads exceeded");
  1241   uint active_workers = MAX2(1U, parallel_marking_threads());
  1243   // Parallel task terminator is set in "set_concurrency_and_phase()"
  1244   set_concurrency_and_phase(active_workers, true /* concurrent */);
  1246   CMConcurrentMarkingTask markingTask(this, cmThread());
  1247   if (use_parallel_marking_threads()) {
  1248     _parallel_workers->set_active_workers((int)active_workers);
  1249     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
  1250     // and the decisions on that MT processing is made elsewhere.
  1251     assert(_parallel_workers->active_workers() > 0, "Should have been set");
  1252     _parallel_workers->run_task(&markingTask);
  1253   } else {
  1254     markingTask.work(0);
  1256   print_stats();
  1259 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1260   // world is stopped at this checkpoint
  1261   assert(SafepointSynchronize::is_at_safepoint(),
  1262          "world should be stopped");
  1264   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1266   // If a full collection has happened, we shouldn't do this.
  1267   if (has_aborted()) {
  1268     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1269     return;
  1272   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1274   if (VerifyDuringGC) {
  1275     HandleMark hm;  // handle scope
  1276     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1277     Universe::heap()->prepare_for_verify();
  1278     Universe::verify(/* silent */ false,
  1279                      /* option */ VerifyOption_G1UsePrevMarking);
  1282   G1CollectorPolicy* g1p = g1h->g1_policy();
  1283   g1p->record_concurrent_mark_remark_start();
  1285   double start = os::elapsedTime();
  1287   checkpointRootsFinalWork();
  1289   double mark_work_end = os::elapsedTime();
  1291   weakRefsWork(clear_all_soft_refs);
  1293   if (has_overflown()) {
  1294     // Oops.  We overflowed.  Restart concurrent marking.
  1295     _restart_for_overflow = true;
  1296     if (G1TraceMarkStackOverflow) {
  1297       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1300     // Verify the heap w.r.t. the previous marking bitmap.
  1301     if (VerifyDuringGC) {
  1302       HandleMark hm;  // handle scope
  1303       gclog_or_tty->print(" VerifyDuringGC:(overflow)");
  1304       Universe::heap()->prepare_for_verify();
  1305       Universe::verify(/* silent */ false,
  1306                        /* option */ VerifyOption_G1UsePrevMarking);
  1309     // Clear the marking state because we will be restarting
  1310     // marking due to overflowing the global mark stack.
  1311     reset_marking_state();
  1312   } else {
  1313     // Aggregate the per-task counting data that we have accumulated
  1314     // while marking.
  1315     aggregate_count_data();
  1317     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1318     // We're done with marking.
  1319     // This is the end of  the marking cycle, we're expected all
  1320     // threads to have SATB queues with active set to true.
  1321     satb_mq_set.set_active_all_threads(false, /* new active value */
  1322                                        true /* expected_active */);
  1324     if (VerifyDuringGC) {
  1325       HandleMark hm;  // handle scope
  1326       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1327       Universe::heap()->prepare_for_verify();
  1328       Universe::verify(/* silent */ false,
  1329                        /* option */ VerifyOption_G1UseNextMarking);
  1331     assert(!restart_for_overflow(), "sanity");
  1332     // Completely reset the marking state since marking completed
  1333     set_non_marking_state();
  1336   // Expand the marking stack, if we have to and if we can.
  1337   if (_markStack.should_expand()) {
  1338     _markStack.expand();
  1341   // Statistics
  1342   double now = os::elapsedTime();
  1343   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1344   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1345   _remark_times.add((now - start) * 1000.0);
  1347   g1p->record_concurrent_mark_remark_end();
  1350 // Base class of the closures that finalize and verify the
  1351 // liveness counting data.
  1352 class CMCountDataClosureBase: public HeapRegionClosure {
  1353 protected:
  1354   G1CollectedHeap* _g1h;
  1355   ConcurrentMark* _cm;
  1356   CardTableModRefBS* _ct_bs;
  1358   BitMap* _region_bm;
  1359   BitMap* _card_bm;
  1361   // Takes a region that's not empty (i.e., it has at least one
  1362   // live object in it and sets its corresponding bit on the region
  1363   // bitmap to 1. If the region is "starts humongous" it will also set
  1364   // to 1 the bits on the region bitmap that correspond to its
  1365   // associated "continues humongous" regions.
  1366   void set_bit_for_region(HeapRegion* hr) {
  1367     assert(!hr->continuesHumongous(), "should have filtered those out");
  1369     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1370     if (!hr->startsHumongous()) {
  1371       // Normal (non-humongous) case: just set the bit.
  1372       _region_bm->par_at_put(index, true);
  1373     } else {
  1374       // Starts humongous case: calculate how many regions are part of
  1375       // this humongous region and then set the bit range.
  1376       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
  1377       _region_bm->par_at_put_range(index, end_index, true);
  1381 public:
  1382   CMCountDataClosureBase(G1CollectedHeap* g1h,
  1383                          BitMap* region_bm, BitMap* card_bm):
  1384     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1385     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  1386     _region_bm(region_bm), _card_bm(card_bm) { }
  1387 };
  1389 // Closure that calculates the # live objects per region. Used
  1390 // for verification purposes during the cleanup pause.
  1391 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
  1392   CMBitMapRO* _bm;
  1393   size_t _region_marked_bytes;
  1395 public:
  1396   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
  1397                          BitMap* region_bm, BitMap* card_bm) :
  1398     CMCountDataClosureBase(g1h, region_bm, card_bm),
  1399     _bm(bm), _region_marked_bytes(0) { }
  1401   bool doHeapRegion(HeapRegion* hr) {
  1403     if (hr->continuesHumongous()) {
  1404       // We will ignore these here and process them when their
  1405       // associated "starts humongous" region is processed (see
  1406       // set_bit_for_heap_region()). Note that we cannot rely on their
  1407       // associated "starts humongous" region to have their bit set to
  1408       // 1 since, due to the region chunking in the parallel region
  1409       // iteration, a "continues humongous" region might be visited
  1410       // before its associated "starts humongous".
  1411       return false;
  1414     HeapWord* ntams = hr->next_top_at_mark_start();
  1415     HeapWord* start = hr->bottom();
  1417     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
  1418            err_msg("Preconditions not met - "
  1419                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
  1420                    start, ntams, hr->end()));
  1422     // Find the first marked object at or after "start".
  1423     start = _bm->getNextMarkedWordAddress(start, ntams);
  1425     size_t marked_bytes = 0;
  1427     while (start < ntams) {
  1428       oop obj = oop(start);
  1429       int obj_sz = obj->size();
  1430       HeapWord* obj_end = start + obj_sz;
  1432       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  1433       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
  1435       // Note: if we're looking at the last region in heap - obj_end
  1436       // could be actually just beyond the end of the heap; end_idx
  1437       // will then correspond to a (non-existent) card that is also
  1438       // just beyond the heap.
  1439       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
  1440         // end of object is not card aligned - increment to cover
  1441         // all the cards spanned by the object
  1442         end_idx += 1;
  1445       // Set the bits in the card BM for the cards spanned by this object.
  1446       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1448       // Add the size of this object to the number of marked bytes.
  1449       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1451       // Find the next marked object after this one.
  1452       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
  1455     // Mark the allocated-since-marking portion...
  1456     HeapWord* top = hr->top();
  1457     if (ntams < top) {
  1458       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1459       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1461       // Note: if we're looking at the last region in heap - top
  1462       // could be actually just beyond the end of the heap; end_idx
  1463       // will then correspond to a (non-existent) card that is also
  1464       // just beyond the heap.
  1465       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1466         // end of object is not card aligned - increment to cover
  1467         // all the cards spanned by the object
  1468         end_idx += 1;
  1470       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1472       // This definitely means the region has live objects.
  1473       set_bit_for_region(hr);
  1476     // Update the live region bitmap.
  1477     if (marked_bytes > 0) {
  1478       set_bit_for_region(hr);
  1481     // Set the marked bytes for the current region so that
  1482     // it can be queried by a calling verificiation routine
  1483     _region_marked_bytes = marked_bytes;
  1485     return false;
  1488   size_t region_marked_bytes() const { return _region_marked_bytes; }
  1489 };
  1491 // Heap region closure used for verifying the counting data
  1492 // that was accumulated concurrently and aggregated during
  1493 // the remark pause. This closure is applied to the heap
  1494 // regions during the STW cleanup pause.
  1496 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
  1497   G1CollectedHeap* _g1h;
  1498   ConcurrentMark* _cm;
  1499   CalcLiveObjectsClosure _calc_cl;
  1500   BitMap* _region_bm;   // Region BM to be verified
  1501   BitMap* _card_bm;     // Card BM to be verified
  1502   bool _verbose;        // verbose output?
  1504   BitMap* _exp_region_bm; // Expected Region BM values
  1505   BitMap* _exp_card_bm;   // Expected card BM values
  1507   int _failures;
  1509 public:
  1510   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
  1511                                 BitMap* region_bm,
  1512                                 BitMap* card_bm,
  1513                                 BitMap* exp_region_bm,
  1514                                 BitMap* exp_card_bm,
  1515                                 bool verbose) :
  1516     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1517     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
  1518     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
  1519     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
  1520     _failures(0) { }
  1522   int failures() const { return _failures; }
  1524   bool doHeapRegion(HeapRegion* hr) {
  1525     if (hr->continuesHumongous()) {
  1526       // We will ignore these here and process them when their
  1527       // associated "starts humongous" region is processed (see
  1528       // set_bit_for_heap_region()). Note that we cannot rely on their
  1529       // associated "starts humongous" region to have their bit set to
  1530       // 1 since, due to the region chunking in the parallel region
  1531       // iteration, a "continues humongous" region might be visited
  1532       // before its associated "starts humongous".
  1533       return false;
  1536     int failures = 0;
  1538     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
  1539     // this region and set the corresponding bits in the expected region
  1540     // and card bitmaps.
  1541     bool res = _calc_cl.doHeapRegion(hr);
  1542     assert(res == false, "should be continuing");
  1544     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
  1545                     Mutex::_no_safepoint_check_flag);
  1547     // Verify the marked bytes for this region.
  1548     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
  1549     size_t act_marked_bytes = hr->next_marked_bytes();
  1551     // We're not OK if expected marked bytes > actual marked bytes. It means
  1552     // we have missed accounting some objects during the actual marking.
  1553     if (exp_marked_bytes > act_marked_bytes) {
  1554       if (_verbose) {
  1555         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
  1556                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
  1557                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
  1559       failures += 1;
  1562     // Verify the bit, for this region, in the actual and expected
  1563     // (which was just calculated) region bit maps.
  1564     // We're not OK if the bit in the calculated expected region
  1565     // bitmap is set and the bit in the actual region bitmap is not.
  1566     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1568     bool expected = _exp_region_bm->at(index);
  1569     bool actual = _region_bm->at(index);
  1570     if (expected && !actual) {
  1571       if (_verbose) {
  1572         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
  1573                                "expected: %s, actual: %s",
  1574                                hr->hrs_index(),
  1575                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1577       failures += 1;
  1580     // Verify that the card bit maps for the cards spanned by the current
  1581     // region match. We have an error if we have a set bit in the expected
  1582     // bit map and the corresponding bit in the actual bitmap is not set.
  1584     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
  1585     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
  1587     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
  1588       expected = _exp_card_bm->at(i);
  1589       actual = _card_bm->at(i);
  1591       if (expected && !actual) {
  1592         if (_verbose) {
  1593           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
  1594                                  "expected: %s, actual: %s",
  1595                                  hr->hrs_index(), i,
  1596                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1598         failures += 1;
  1602     if (failures > 0 && _verbose)  {
  1603       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
  1604                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
  1605                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
  1606                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
  1609     _failures += failures;
  1611     // We could stop iteration over the heap when we
  1612     // find the first violating region by returning true.
  1613     return false;
  1615 };
  1618 class G1ParVerifyFinalCountTask: public AbstractGangTask {
  1619 protected:
  1620   G1CollectedHeap* _g1h;
  1621   ConcurrentMark* _cm;
  1622   BitMap* _actual_region_bm;
  1623   BitMap* _actual_card_bm;
  1625   uint    _n_workers;
  1627   BitMap* _expected_region_bm;
  1628   BitMap* _expected_card_bm;
  1630   int  _failures;
  1631   bool _verbose;
  1633 public:
  1634   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
  1635                             BitMap* region_bm, BitMap* card_bm,
  1636                             BitMap* expected_region_bm, BitMap* expected_card_bm)
  1637     : AbstractGangTask("G1 verify final counting"),
  1638       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1639       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1640       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
  1641       _failures(0), _verbose(false),
  1642       _n_workers(0) {
  1643     assert(VerifyDuringGC, "don't call this otherwise");
  1645     // Use the value already set as the number of active threads
  1646     // in the call to run_task().
  1647     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1648       assert( _g1h->workers()->active_workers() > 0,
  1649         "Should have been previously set");
  1650       _n_workers = _g1h->workers()->active_workers();
  1651     } else {
  1652       _n_workers = 1;
  1655     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
  1656     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
  1658     _verbose = _cm->verbose_medium();
  1661   void work(uint worker_id) {
  1662     assert(worker_id < _n_workers, "invariant");
  1664     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
  1665                                             _actual_region_bm, _actual_card_bm,
  1666                                             _expected_region_bm,
  1667                                             _expected_card_bm,
  1668                                             _verbose);
  1670     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1671       _g1h->heap_region_par_iterate_chunked(&verify_cl,
  1672                                             worker_id,
  1673                                             _n_workers,
  1674                                             HeapRegion::VerifyCountClaimValue);
  1675     } else {
  1676       _g1h->heap_region_iterate(&verify_cl);
  1679     Atomic::add(verify_cl.failures(), &_failures);
  1682   int failures() const { return _failures; }
  1683 };
  1685 // Closure that finalizes the liveness counting data.
  1686 // Used during the cleanup pause.
  1687 // Sets the bits corresponding to the interval [NTAMS, top]
  1688 // (which contains the implicitly live objects) in the
  1689 // card liveness bitmap. Also sets the bit for each region,
  1690 // containing live data, in the region liveness bitmap.
  1692 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
  1693  public:
  1694   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
  1695                               BitMap* region_bm,
  1696                               BitMap* card_bm) :
  1697     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
  1699   bool doHeapRegion(HeapRegion* hr) {
  1701     if (hr->continuesHumongous()) {
  1702       // We will ignore these here and process them when their
  1703       // associated "starts humongous" region is processed (see
  1704       // set_bit_for_heap_region()). Note that we cannot rely on their
  1705       // associated "starts humongous" region to have their bit set to
  1706       // 1 since, due to the region chunking in the parallel region
  1707       // iteration, a "continues humongous" region might be visited
  1708       // before its associated "starts humongous".
  1709       return false;
  1712     HeapWord* ntams = hr->next_top_at_mark_start();
  1713     HeapWord* top   = hr->top();
  1715     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
  1717     // Mark the allocated-since-marking portion...
  1718     if (ntams < top) {
  1719       // This definitely means the region has live objects.
  1720       set_bit_for_region(hr);
  1722       // Now set the bits in the card bitmap for [ntams, top)
  1723       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1724       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1726       // Note: if we're looking at the last region in heap - top
  1727       // could be actually just beyond the end of the heap; end_idx
  1728       // will then correspond to a (non-existent) card that is also
  1729       // just beyond the heap.
  1730       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1731         // end of object is not card aligned - increment to cover
  1732         // all the cards spanned by the object
  1733         end_idx += 1;
  1736       assert(end_idx <= _card_bm->size(),
  1737              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1738                      end_idx, _card_bm->size()));
  1739       assert(start_idx < _card_bm->size(),
  1740              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1741                      start_idx, _card_bm->size()));
  1743       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1746     // Set the bit for the region if it contains live data
  1747     if (hr->next_marked_bytes() > 0) {
  1748       set_bit_for_region(hr);
  1751     return false;
  1753 };
  1755 class G1ParFinalCountTask: public AbstractGangTask {
  1756 protected:
  1757   G1CollectedHeap* _g1h;
  1758   ConcurrentMark* _cm;
  1759   BitMap* _actual_region_bm;
  1760   BitMap* _actual_card_bm;
  1762   uint    _n_workers;
  1764 public:
  1765   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
  1766     : AbstractGangTask("G1 final counting"),
  1767       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1768       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1769       _n_workers(0) {
  1770     // Use the value already set as the number of active threads
  1771     // in the call to run_task().
  1772     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1773       assert( _g1h->workers()->active_workers() > 0,
  1774         "Should have been previously set");
  1775       _n_workers = _g1h->workers()->active_workers();
  1776     } else {
  1777       _n_workers = 1;
  1781   void work(uint worker_id) {
  1782     assert(worker_id < _n_workers, "invariant");
  1784     FinalCountDataUpdateClosure final_update_cl(_g1h,
  1785                                                 _actual_region_bm,
  1786                                                 _actual_card_bm);
  1788     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1789       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
  1790                                             worker_id,
  1791                                             _n_workers,
  1792                                             HeapRegion::FinalCountClaimValue);
  1793     } else {
  1794       _g1h->heap_region_iterate(&final_update_cl);
  1797 };
  1799 class G1ParNoteEndTask;
  1801 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1802   G1CollectedHeap* _g1;
  1803   int _worker_num;
  1804   size_t _max_live_bytes;
  1805   uint _regions_claimed;
  1806   size_t _freed_bytes;
  1807   FreeRegionList* _local_cleanup_list;
  1808   OldRegionSet* _old_proxy_set;
  1809   HumongousRegionSet* _humongous_proxy_set;
  1810   HRRSCleanupTask* _hrrs_cleanup_task;
  1811   double _claimed_region_time;
  1812   double _max_region_time;
  1814 public:
  1815   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1816                              int worker_num,
  1817                              FreeRegionList* local_cleanup_list,
  1818                              OldRegionSet* old_proxy_set,
  1819                              HumongousRegionSet* humongous_proxy_set,
  1820                              HRRSCleanupTask* hrrs_cleanup_task) :
  1821     _g1(g1), _worker_num(worker_num),
  1822     _max_live_bytes(0), _regions_claimed(0),
  1823     _freed_bytes(0),
  1824     _claimed_region_time(0.0), _max_region_time(0.0),
  1825     _local_cleanup_list(local_cleanup_list),
  1826     _old_proxy_set(old_proxy_set),
  1827     _humongous_proxy_set(humongous_proxy_set),
  1828     _hrrs_cleanup_task(hrrs_cleanup_task) { }
  1830   size_t freed_bytes() { return _freed_bytes; }
  1832   bool doHeapRegion(HeapRegion *hr) {
  1833     if (hr->continuesHumongous()) {
  1834       return false;
  1836     // We use a claim value of zero here because all regions
  1837     // were claimed with value 1 in the FinalCount task.
  1838     _g1->reset_gc_time_stamps(hr);
  1839     double start = os::elapsedTime();
  1840     _regions_claimed++;
  1841     hr->note_end_of_marking();
  1842     _max_live_bytes += hr->max_live_bytes();
  1843     _g1->free_region_if_empty(hr,
  1844                               &_freed_bytes,
  1845                               _local_cleanup_list,
  1846                               _old_proxy_set,
  1847                               _humongous_proxy_set,
  1848                               _hrrs_cleanup_task,
  1849                               true /* par */);
  1850     double region_time = (os::elapsedTime() - start);
  1851     _claimed_region_time += region_time;
  1852     if (region_time > _max_region_time) {
  1853       _max_region_time = region_time;
  1855     return false;
  1858   size_t max_live_bytes() { return _max_live_bytes; }
  1859   uint regions_claimed() { return _regions_claimed; }
  1860   double claimed_region_time_sec() { return _claimed_region_time; }
  1861   double max_region_time_sec() { return _max_region_time; }
  1862 };
  1864 class G1ParNoteEndTask: public AbstractGangTask {
  1865   friend class G1NoteEndOfConcMarkClosure;
  1867 protected:
  1868   G1CollectedHeap* _g1h;
  1869   size_t _max_live_bytes;
  1870   size_t _freed_bytes;
  1871   FreeRegionList* _cleanup_list;
  1873 public:
  1874   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1875                    FreeRegionList* cleanup_list) :
  1876     AbstractGangTask("G1 note end"), _g1h(g1h),
  1877     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
  1879   void work(uint worker_id) {
  1880     double start = os::elapsedTime();
  1881     FreeRegionList local_cleanup_list("Local Cleanup List");
  1882     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
  1883     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
  1884     HRRSCleanupTask hrrs_cleanup_task;
  1885     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
  1886                                            &old_proxy_set,
  1887                                            &humongous_proxy_set,
  1888                                            &hrrs_cleanup_task);
  1889     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1890       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
  1891                                             _g1h->workers()->active_workers(),
  1892                                             HeapRegion::NoteEndClaimValue);
  1893     } else {
  1894       _g1h->heap_region_iterate(&g1_note_end);
  1896     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1898     // Now update the lists
  1899     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
  1900                                             NULL /* free_list */,
  1901                                             &old_proxy_set,
  1902                                             &humongous_proxy_set,
  1903                                             true /* par */);
  1905       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1906       _max_live_bytes += g1_note_end.max_live_bytes();
  1907       _freed_bytes += g1_note_end.freed_bytes();
  1909       // If we iterate over the global cleanup list at the end of
  1910       // cleanup to do this printing we will not guarantee to only
  1911       // generate output for the newly-reclaimed regions (the list
  1912       // might not be empty at the beginning of cleanup; we might
  1913       // still be working on its previous contents). So we do the
  1914       // printing here, before we append the new regions to the global
  1915       // cleanup list.
  1917       G1HRPrinter* hr_printer = _g1h->hr_printer();
  1918       if (hr_printer->is_active()) {
  1919         HeapRegionLinkedListIterator iter(&local_cleanup_list);
  1920         while (iter.more_available()) {
  1921           HeapRegion* hr = iter.get_next();
  1922           hr_printer->cleanup(hr);
  1926       _cleanup_list->add_as_tail(&local_cleanup_list);
  1927       assert(local_cleanup_list.is_empty(), "post-condition");
  1929       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
  1932   size_t max_live_bytes() { return _max_live_bytes; }
  1933   size_t freed_bytes() { return _freed_bytes; }
  1934 };
  1936 class G1ParScrubRemSetTask: public AbstractGangTask {
  1937 protected:
  1938   G1RemSet* _g1rs;
  1939   BitMap* _region_bm;
  1940   BitMap* _card_bm;
  1941 public:
  1942   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1943                        BitMap* region_bm, BitMap* card_bm) :
  1944     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1945     _region_bm(region_bm), _card_bm(card_bm) { }
  1947   void work(uint worker_id) {
  1948     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1949       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
  1950                        HeapRegion::ScrubRemSetClaimValue);
  1951     } else {
  1952       _g1rs->scrub(_region_bm, _card_bm);
  1956 };
  1958 void ConcurrentMark::cleanup() {
  1959   // world is stopped at this checkpoint
  1960   assert(SafepointSynchronize::is_at_safepoint(),
  1961          "world should be stopped");
  1962   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1964   // If a full collection has happened, we shouldn't do this.
  1965   if (has_aborted()) {
  1966     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1967     return;
  1970   HRSPhaseSetter x(HRSPhaseCleanup);
  1971   g1h->verify_region_sets_optional();
  1973   if (VerifyDuringGC) {
  1974     HandleMark hm;  // handle scope
  1975     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1976     Universe::heap()->prepare_for_verify();
  1977     Universe::verify(/* silent */ false,
  1978                      /* option */ VerifyOption_G1UsePrevMarking);
  1981   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1982   g1p->record_concurrent_mark_cleanup_start();
  1984   double start = os::elapsedTime();
  1986   HeapRegionRemSet::reset_for_cleanup_tasks();
  1988   uint n_workers;
  1990   // Do counting once more with the world stopped for good measure.
  1991   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
  1993   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1994    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  1995            "sanity check");
  1997     g1h->set_par_threads();
  1998     n_workers = g1h->n_par_threads();
  1999     assert(g1h->n_par_threads() == n_workers,
  2000            "Should not have been reset");
  2001     g1h->workers()->run_task(&g1_par_count_task);
  2002     // Done with the parallel phase so reset to 0.
  2003     g1h->set_par_threads(0);
  2005     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
  2006            "sanity check");
  2007   } else {
  2008     n_workers = 1;
  2009     g1_par_count_task.work(0);
  2012   if (VerifyDuringGC) {
  2013     // Verify that the counting data accumulated during marking matches
  2014     // that calculated by walking the marking bitmap.
  2016     // Bitmaps to hold expected values
  2017     BitMap expected_region_bm(_region_bm.size(), false);
  2018     BitMap expected_card_bm(_card_bm.size(), false);
  2020     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
  2021                                                  &_region_bm,
  2022                                                  &_card_bm,
  2023                                                  &expected_region_bm,
  2024                                                  &expected_card_bm);
  2026     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2027       g1h->set_par_threads((int)n_workers);
  2028       g1h->workers()->run_task(&g1_par_verify_task);
  2029       // Done with the parallel phase so reset to 0.
  2030       g1h->set_par_threads(0);
  2032       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
  2033              "sanity check");
  2034     } else {
  2035       g1_par_verify_task.work(0);
  2038     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
  2041   size_t start_used_bytes = g1h->used();
  2042   g1h->set_marking_complete();
  2044   double count_end = os::elapsedTime();
  2045   double this_final_counting_time = (count_end - start);
  2046   _total_counting_time += this_final_counting_time;
  2048   if (G1PrintRegionLivenessInfo) {
  2049     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
  2050     _g1h->heap_region_iterate(&cl);
  2053   // Install newly created mark bitMap as "prev".
  2054   swapMarkBitMaps();
  2056   g1h->reset_gc_time_stamp();
  2058   // Note end of marking in all heap regions.
  2059   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
  2060   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2061     g1h->set_par_threads((int)n_workers);
  2062     g1h->workers()->run_task(&g1_par_note_end_task);
  2063     g1h->set_par_threads(0);
  2065     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  2066            "sanity check");
  2067   } else {
  2068     g1_par_note_end_task.work(0);
  2070   g1h->check_gc_time_stamps();
  2072   if (!cleanup_list_is_empty()) {
  2073     // The cleanup list is not empty, so we'll have to process it
  2074     // concurrently. Notify anyone else that might be wanting free
  2075     // regions that there will be more free regions coming soon.
  2076     g1h->set_free_regions_coming();
  2079   // call below, since it affects the metric by which we sort the heap
  2080   // regions.
  2081   if (G1ScrubRemSets) {
  2082     double rs_scrub_start = os::elapsedTime();
  2083     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  2084     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2085       g1h->set_par_threads((int)n_workers);
  2086       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  2087       g1h->set_par_threads(0);
  2089       assert(g1h->check_heap_region_claim_values(
  2090                                             HeapRegion::ScrubRemSetClaimValue),
  2091              "sanity check");
  2092     } else {
  2093       g1_par_scrub_rs_task.work(0);
  2096     double rs_scrub_end = os::elapsedTime();
  2097     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  2098     _total_rs_scrub_time += this_rs_scrub_time;
  2101   // this will also free any regions totally full of garbage objects,
  2102   // and sort the regions.
  2103   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
  2105   // Statistics.
  2106   double end = os::elapsedTime();
  2107   _cleanup_times.add((end - start) * 1000.0);
  2109   if (G1Log::fine()) {
  2110     g1h->print_size_transition(gclog_or_tty,
  2111                                start_used_bytes,
  2112                                g1h->used(),
  2113                                g1h->capacity());
  2116   // Clean up will have freed any regions completely full of garbage.
  2117   // Update the soft reference policy with the new heap occupancy.
  2118   Universe::update_heap_info_at_gc();
  2120   // We need to make this be a "collection" so any collection pause that
  2121   // races with it goes around and waits for completeCleanup to finish.
  2122   g1h->increment_total_collections();
  2124   // We reclaimed old regions so we should calculate the sizes to make
  2125   // sure we update the old gen/space data.
  2126   g1h->g1mm()->update_sizes();
  2128   if (VerifyDuringGC) {
  2129     HandleMark hm;  // handle scope
  2130     gclog_or_tty->print(" VerifyDuringGC:(after)");
  2131     Universe::heap()->prepare_for_verify();
  2132     Universe::verify(/* silent */ false,
  2133                      /* option */ VerifyOption_G1UsePrevMarking);
  2136   g1h->verify_region_sets_optional();
  2139 void ConcurrentMark::completeCleanup() {
  2140   if (has_aborted()) return;
  2142   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2144   _cleanup_list.verify_optional();
  2145   FreeRegionList tmp_free_list("Tmp Free List");
  2147   if (G1ConcRegionFreeingVerbose) {
  2148     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2149                            "cleanup list has %u entries",
  2150                            _cleanup_list.length());
  2153   // Noone else should be accessing the _cleanup_list at this point,
  2154   // so it's not necessary to take any locks
  2155   while (!_cleanup_list.is_empty()) {
  2156     HeapRegion* hr = _cleanup_list.remove_head();
  2157     assert(hr != NULL, "the list was not empty");
  2158     hr->par_clear();
  2159     tmp_free_list.add_as_tail(hr);
  2161     // Instead of adding one region at a time to the secondary_free_list,
  2162     // we accumulate them in the local list and move them a few at a
  2163     // time. This also cuts down on the number of notify_all() calls
  2164     // we do during this process. We'll also append the local list when
  2165     // _cleanup_list is empty (which means we just removed the last
  2166     // region from the _cleanup_list).
  2167     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
  2168         _cleanup_list.is_empty()) {
  2169       if (G1ConcRegionFreeingVerbose) {
  2170         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2171                                "appending %u entries to the secondary_free_list, "
  2172                                "cleanup list still has %u entries",
  2173                                tmp_free_list.length(),
  2174                                _cleanup_list.length());
  2178         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  2179         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
  2180         SecondaryFreeList_lock->notify_all();
  2183       if (G1StressConcRegionFreeing) {
  2184         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
  2185           os::sleep(Thread::current(), (jlong) 1, false);
  2190   assert(tmp_free_list.is_empty(), "post-condition");
  2193 // Supporting Object and Oop closures for reference discovery
  2194 // and processing in during marking
  2196 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  2197   HeapWord* addr = (HeapWord*)obj;
  2198   return addr != NULL &&
  2199          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  2202 // 'Keep Alive' oop closure used by both serial parallel reference processing.
  2203 // Uses the CMTask associated with a worker thread (for serial reference
  2204 // processing the CMTask for worker 0 is used) to preserve (mark) and
  2205 // trace referent objects.
  2206 //
  2207 // Using the CMTask and embedded local queues avoids having the worker
  2208 // threads operating on the global mark stack. This reduces the risk
  2209 // of overflowing the stack - which we would rather avoid at this late
  2210 // state. Also using the tasks' local queues removes the potential
  2211 // of the workers interfering with each other that could occur if
  2212 // operating on the global stack.
  2214 class G1CMKeepAliveAndDrainClosure: public OopClosure {
  2215   ConcurrentMark* _cm;
  2216   CMTask*         _task;
  2217   int             _ref_counter_limit;
  2218   int             _ref_counter;
  2219   bool            _is_serial;
  2220  public:
  2221   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2222     _cm(cm), _task(task), _is_serial(is_serial),
  2223     _ref_counter_limit(G1RefProcDrainInterval) {
  2224     assert(_ref_counter_limit > 0, "sanity");
  2225     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2226     _ref_counter = _ref_counter_limit;
  2229   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2230   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2232   template <class T> void do_oop_work(T* p) {
  2233     if (!_cm->has_overflown()) {
  2234       oop obj = oopDesc::load_decode_heap_oop(p);
  2235       if (_cm->verbose_high()) {
  2236         gclog_or_tty->print_cr("\t[%u] we're looking at location "
  2237                                "*"PTR_FORMAT" = "PTR_FORMAT,
  2238                                _task->worker_id(), p, (void*) obj);
  2241       _task->deal_with_reference(obj);
  2242       _ref_counter--;
  2244       if (_ref_counter == 0) {
  2245         // We have dealt with _ref_counter_limit references, pushing them
  2246         // and objects reachable from them on to the local stack (and
  2247         // possibly the global stack). Call CMTask::do_marking_step() to
  2248         // process these entries.
  2249         //
  2250         // We call CMTask::do_marking_step() in a loop, which we'll exit if
  2251         // there's nothing more to do (i.e. we're done with the entries that
  2252         // were pushed as a result of the CMTask::deal_with_reference() calls
  2253         // above) or we overflow.
  2254         //
  2255         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2256         // flag while there may still be some work to do. (See the comment at
  2257         // the beginning of CMTask::do_marking_step() for those conditions -
  2258         // one of which is reaching the specified time target.) It is only
  2259         // when CMTask::do_marking_step() returns without setting the
  2260         // has_aborted() flag that the marking step has completed.
  2261         do {
  2262           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  2263           _task->do_marking_step(mark_step_duration_ms,
  2264                                  false      /* do_termination */,
  2265                                  _is_serial);
  2266         } while (_task->has_aborted() && !_cm->has_overflown());
  2267         _ref_counter = _ref_counter_limit;
  2269     } else {
  2270       if (_cm->verbose_high()) {
  2271          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
  2275 };
  2277 // 'Drain' oop closure used by both serial and parallel reference processing.
  2278 // Uses the CMTask associated with a given worker thread (for serial
  2279 // reference processing the CMtask for worker 0 is used). Calls the
  2280 // do_marking_step routine, with an unbelievably large timeout value,
  2281 // to drain the marking data structures of the remaining entries
  2282 // added by the 'keep alive' oop closure above.
  2284 class G1CMDrainMarkingStackClosure: public VoidClosure {
  2285   ConcurrentMark* _cm;
  2286   CMTask*         _task;
  2287   bool            _is_serial;
  2288  public:
  2289   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2290     _cm(cm), _task(task), _is_serial(is_serial) {
  2291     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2294   void do_void() {
  2295     do {
  2296       if (_cm->verbose_high()) {
  2297         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
  2298                                _task->worker_id(), BOOL_TO_STR(_is_serial));
  2301       // We call CMTask::do_marking_step() to completely drain the local
  2302       // and global marking stacks of entries pushed by the 'keep alive'
  2303       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
  2304       //
  2305       // CMTask::do_marking_step() is called in a loop, which we'll exit
  2306       // if there's nothing more to do (i.e. we'completely drained the
  2307       // entries that were pushed as a a result of applying the 'keep alive'
  2308       // closure to the entries on the discovered ref lists) or we overflow
  2309       // the global marking stack.
  2310       //
  2311       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2312       // flag while there may still be some work to do. (See the comment at
  2313       // the beginning of CMTask::do_marking_step() for those conditions -
  2314       // one of which is reaching the specified time target.) It is only
  2315       // when CMTask::do_marking_step() returns without setting the
  2316       // has_aborted() flag that the marking step has completed.
  2318       _task->do_marking_step(1000000000.0 /* something very large */,
  2319                              true         /* do_termination */,
  2320                              _is_serial);
  2321     } while (_task->has_aborted() && !_cm->has_overflown());
  2323 };
  2325 // Implementation of AbstractRefProcTaskExecutor for parallel
  2326 // reference processing at the end of G1 concurrent marking
  2328 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
  2329 private:
  2330   G1CollectedHeap* _g1h;
  2331   ConcurrentMark*  _cm;
  2332   WorkGang*        _workers;
  2333   int              _active_workers;
  2335 public:
  2336   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
  2337                         ConcurrentMark* cm,
  2338                         WorkGang* workers,
  2339                         int n_workers) :
  2340     _g1h(g1h), _cm(cm),
  2341     _workers(workers), _active_workers(n_workers) { }
  2343   // Executes the given task using concurrent marking worker threads.
  2344   virtual void execute(ProcessTask& task);
  2345   virtual void execute(EnqueueTask& task);
  2346 };
  2348 class G1CMRefProcTaskProxy: public AbstractGangTask {
  2349   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  2350   ProcessTask&     _proc_task;
  2351   G1CollectedHeap* _g1h;
  2352   ConcurrentMark*  _cm;
  2354 public:
  2355   G1CMRefProcTaskProxy(ProcessTask& proc_task,
  2356                      G1CollectedHeap* g1h,
  2357                      ConcurrentMark* cm) :
  2358     AbstractGangTask("Process reference objects in parallel"),
  2359     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
  2360     ReferenceProcessor* rp = _g1h->ref_processor_cm();
  2361     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
  2364   virtual void work(uint worker_id) {
  2365     CMTask* task = _cm->task(worker_id);
  2366     G1CMIsAliveClosure g1_is_alive(_g1h);
  2367     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
  2368     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
  2370     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
  2372 };
  2374 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
  2375   assert(_workers != NULL, "Need parallel worker threads.");
  2376   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2378   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  2380   // We need to reset the concurrency level before each
  2381   // proxy task execution, so that the termination protocol
  2382   // and overflow handling in CMTask::do_marking_step() knows
  2383   // how many workers to wait for.
  2384   _cm->set_concurrency(_active_workers);
  2385   _g1h->set_par_threads(_active_workers);
  2386   _workers->run_task(&proc_task_proxy);
  2387   _g1h->set_par_threads(0);
  2390 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
  2391   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
  2392   EnqueueTask& _enq_task;
  2394 public:
  2395   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
  2396     AbstractGangTask("Enqueue reference objects in parallel"),
  2397     _enq_task(enq_task) { }
  2399   virtual void work(uint worker_id) {
  2400     _enq_task.work(worker_id);
  2402 };
  2404 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
  2405   assert(_workers != NULL, "Need parallel worker threads.");
  2406   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2408   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
  2410   // Not strictly necessary but...
  2411   //
  2412   // We need to reset the concurrency level before each
  2413   // proxy task execution, so that the termination protocol
  2414   // and overflow handling in CMTask::do_marking_step() knows
  2415   // how many workers to wait for.
  2416   _cm->set_concurrency(_active_workers);
  2417   _g1h->set_par_threads(_active_workers);
  2418   _workers->run_task(&enq_task_proxy);
  2419   _g1h->set_par_threads(0);
  2422 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  2423   if (has_overflown()) {
  2424     // Skip processing the discovered references if we have
  2425     // overflown the global marking stack. Reference objects
  2426     // only get discovered once so it is OK to not
  2427     // de-populate the discovered reference lists. We could have,
  2428     // but the only benefit would be that, when marking restarts,
  2429     // less reference objects are discovered.
  2430     return;
  2433   ResourceMark rm;
  2434   HandleMark   hm;
  2436   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2438   // Is alive closure.
  2439   G1CMIsAliveClosure g1_is_alive(g1h);
  2441   // Inner scope to exclude the cleaning of the string and symbol
  2442   // tables from the displayed time.
  2444     if (G1Log::finer()) {
  2445       gclog_or_tty->put(' ');
  2447     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
  2449     ReferenceProcessor* rp = g1h->ref_processor_cm();
  2451     // See the comment in G1CollectedHeap::ref_processing_init()
  2452     // about how reference processing currently works in G1.
  2454     // Set the soft reference policy
  2455     rp->setup_policy(clear_all_soft_refs);
  2456     assert(_markStack.isEmpty(), "mark stack should be empty");
  2458     // Instances of the 'Keep Alive' and 'Complete GC' closures used
  2459     // in serial reference processing. Note these closures are also
  2460     // used for serially processing (by the the current thread) the
  2461     // JNI references during parallel reference processing.
  2462     //
  2463     // These closures do not need to synchronize with the worker
  2464     // threads involved in parallel reference processing as these
  2465     // instances are executed serially by the current thread (e.g.
  2466     // reference processing is not multi-threaded and is thus
  2467     // performed by the current thread instead of a gang worker).
  2468     //
  2469     // The gang tasks involved in parallel reference procssing create
  2470     // their own instances of these closures, which do their own
  2471     // synchronization among themselves.
  2472     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
  2473     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
  2475     // We need at least one active thread. If reference processing
  2476     // is not multi-threaded we use the current (VMThread) thread,
  2477     // otherwise we use the work gang from the G1CollectedHeap and
  2478     // we utilize all the worker threads we can.
  2479     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
  2480     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
  2481     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
  2483     // Parallel processing task executor.
  2484     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
  2485                                               g1h->workers(), active_workers);
  2486     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
  2488     // Set the concurrency level. The phase was already set prior to
  2489     // executing the remark task.
  2490     set_concurrency(active_workers);
  2492     // Set the degree of MT processing here.  If the discovery was done MT,
  2493     // the number of threads involved during discovery could differ from
  2494     // the number of active workers.  This is OK as long as the discovered
  2495     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
  2496     rp->set_active_mt_degree(active_workers);
  2498     // Process the weak references.
  2499     rp->process_discovered_references(&g1_is_alive,
  2500                                       &g1_keep_alive,
  2501                                       &g1_drain_mark_stack,
  2502                                       executor);
  2504     // The do_oop work routines of the keep_alive and drain_marking_stack
  2505     // oop closures will set the has_overflown flag if we overflow the
  2506     // global marking stack.
  2508     assert(_markStack.overflow() || _markStack.isEmpty(),
  2509             "mark stack should be empty (unless it overflowed)");
  2511     if (_markStack.overflow()) {
  2512       // This should have been done already when we tried to push an
  2513       // entry on to the global mark stack. But let's do it again.
  2514       set_has_overflown();
  2517     assert(rp->num_q() == active_workers, "why not");
  2519     rp->enqueue_discovered_references(executor);
  2521     rp->verify_no_references_recorded();
  2522     assert(!rp->discovery_enabled(), "Post condition");
  2525   // Now clean up stale oops in StringTable
  2526   StringTable::unlink(&g1_is_alive);
  2527   // Clean up unreferenced symbols in symbol table.
  2528   SymbolTable::unlink();
  2531 void ConcurrentMark::swapMarkBitMaps() {
  2532   CMBitMapRO* temp = _prevMarkBitMap;
  2533   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  2534   _nextMarkBitMap  = (CMBitMap*)  temp;
  2537 class CMRemarkTask: public AbstractGangTask {
  2538 private:
  2539   ConcurrentMark* _cm;
  2540   bool            _is_serial;
  2541 public:
  2542   void work(uint worker_id) {
  2543     // Since all available tasks are actually started, we should
  2544     // only proceed if we're supposed to be actived.
  2545     if (worker_id < _cm->active_tasks()) {
  2546       CMTask* task = _cm->task(worker_id);
  2547       task->record_start_time();
  2548       do {
  2549         task->do_marking_step(1000000000.0 /* something very large */,
  2550                               true         /* do_termination       */,
  2551                               _is_serial);
  2552       } while (task->has_aborted() && !_cm->has_overflown());
  2553       // If we overflow, then we do not want to restart. We instead
  2554       // want to abort remark and do concurrent marking again.
  2555       task->record_end_time();
  2559   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
  2560     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
  2561     _cm->terminator()->reset_for_reuse(active_workers);
  2563 };
  2565 void ConcurrentMark::checkpointRootsFinalWork() {
  2566   ResourceMark rm;
  2567   HandleMark   hm;
  2568   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2570   g1h->ensure_parsability(false);
  2572   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2573     G1CollectedHeap::StrongRootsScope srs(g1h);
  2574     // this is remark, so we'll use up all active threads
  2575     uint active_workers = g1h->workers()->active_workers();
  2576     if (active_workers == 0) {
  2577       assert(active_workers > 0, "Should have been set earlier");
  2578       active_workers = (uint) ParallelGCThreads;
  2579       g1h->workers()->set_active_workers(active_workers);
  2581     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2582     // Leave _parallel_marking_threads at it's
  2583     // value originally calculated in the ConcurrentMark
  2584     // constructor and pass values of the active workers
  2585     // through the gang in the task.
  2587     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
  2588     // We will start all available threads, even if we decide that the
  2589     // active_workers will be fewer. The extra ones will just bail out
  2590     // immediately.
  2591     g1h->set_par_threads(active_workers);
  2592     g1h->workers()->run_task(&remarkTask);
  2593     g1h->set_par_threads(0);
  2594   } else {
  2595     G1CollectedHeap::StrongRootsScope srs(g1h);
  2596     uint active_workers = 1;
  2597     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2599     // Note - if there's no work gang then the VMThread will be
  2600     // the thread to execute the remark - serially. We have
  2601     // to pass true for the is_serial parameter so that
  2602     // CMTask::do_marking_step() doesn't enter the sync
  2603     // barriers in the event of an overflow. Doing so will
  2604     // cause an assert that the current thread is not a
  2605     // concurrent GC thread.
  2606     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
  2607     remarkTask.work(0);
  2609   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2610   guarantee(has_overflown() ||
  2611             satb_mq_set.completed_buffers_num() == 0,
  2612             err_msg("Invariant: has_overflown = %s, num buffers = %d",
  2613                     BOOL_TO_STR(has_overflown()),
  2614                     satb_mq_set.completed_buffers_num()));
  2616   print_stats();
  2619 #ifndef PRODUCT
  2621 class PrintReachableOopClosure: public OopClosure {
  2622 private:
  2623   G1CollectedHeap* _g1h;
  2624   outputStream*    _out;
  2625   VerifyOption     _vo;
  2626   bool             _all;
  2628 public:
  2629   PrintReachableOopClosure(outputStream* out,
  2630                            VerifyOption  vo,
  2631                            bool          all) :
  2632     _g1h(G1CollectedHeap::heap()),
  2633     _out(out), _vo(vo), _all(all) { }
  2635   void do_oop(narrowOop* p) { do_oop_work(p); }
  2636   void do_oop(      oop* p) { do_oop_work(p); }
  2638   template <class T> void do_oop_work(T* p) {
  2639     oop         obj = oopDesc::load_decode_heap_oop(p);
  2640     const char* str = NULL;
  2641     const char* str2 = "";
  2643     if (obj == NULL) {
  2644       str = "";
  2645     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2646       str = " O";
  2647     } else {
  2648       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2649       guarantee(hr != NULL, "invariant");
  2650       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
  2651       bool marked = _g1h->is_marked(obj, _vo);
  2653       if (over_tams) {
  2654         str = " >";
  2655         if (marked) {
  2656           str2 = " AND MARKED";
  2658       } else if (marked) {
  2659         str = " M";
  2660       } else {
  2661         str = " NOT";
  2665     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2666                    p, (void*) obj, str, str2);
  2668 };
  2670 class PrintReachableObjectClosure : public ObjectClosure {
  2671 private:
  2672   G1CollectedHeap* _g1h;
  2673   outputStream*    _out;
  2674   VerifyOption     _vo;
  2675   bool             _all;
  2676   HeapRegion*      _hr;
  2678 public:
  2679   PrintReachableObjectClosure(outputStream* out,
  2680                               VerifyOption  vo,
  2681                               bool          all,
  2682                               HeapRegion*   hr) :
  2683     _g1h(G1CollectedHeap::heap()),
  2684     _out(out), _vo(vo), _all(all), _hr(hr) { }
  2686   void do_object(oop o) {
  2687     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
  2688     bool marked = _g1h->is_marked(o, _vo);
  2689     bool print_it = _all || over_tams || marked;
  2691     if (print_it) {
  2692       _out->print_cr(" "PTR_FORMAT"%s",
  2693                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2694       PrintReachableOopClosure oopCl(_out, _vo, _all);
  2695       o->oop_iterate_no_header(&oopCl);
  2698 };
  2700 class PrintReachableRegionClosure : public HeapRegionClosure {
  2701 private:
  2702   G1CollectedHeap* _g1h;
  2703   outputStream*    _out;
  2704   VerifyOption     _vo;
  2705   bool             _all;
  2707 public:
  2708   bool doHeapRegion(HeapRegion* hr) {
  2709     HeapWord* b = hr->bottom();
  2710     HeapWord* e = hr->end();
  2711     HeapWord* t = hr->top();
  2712     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
  2713     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2714                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2715     _out->cr();
  2717     HeapWord* from = b;
  2718     HeapWord* to   = t;
  2720     if (to > from) {
  2721       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2722       _out->cr();
  2723       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
  2724       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2725       _out->cr();
  2728     return false;
  2731   PrintReachableRegionClosure(outputStream* out,
  2732                               VerifyOption  vo,
  2733                               bool          all) :
  2734     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
  2735 };
  2737 void ConcurrentMark::print_reachable(const char* str,
  2738                                      VerifyOption vo,
  2739                                      bool all) {
  2740   gclog_or_tty->cr();
  2741   gclog_or_tty->print_cr("== Doing heap dump... ");
  2743   if (G1PrintReachableBaseFile == NULL) {
  2744     gclog_or_tty->print_cr("  #### error: no base file defined");
  2745     return;
  2748   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2749       (JVM_MAXPATHLEN - 1)) {
  2750     gclog_or_tty->print_cr("  #### error: file name too long");
  2751     return;
  2754   char file_name[JVM_MAXPATHLEN];
  2755   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2756   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2758   fileStream fout(file_name);
  2759   if (!fout.is_open()) {
  2760     gclog_or_tty->print_cr("  #### error: could not open file");
  2761     return;
  2764   outputStream* out = &fout;
  2765   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
  2766   out->cr();
  2768   out->print_cr("--- ITERATING OVER REGIONS");
  2769   out->cr();
  2770   PrintReachableRegionClosure rcl(out, vo, all);
  2771   _g1h->heap_region_iterate(&rcl);
  2772   out->cr();
  2774   gclog_or_tty->print_cr("  done");
  2775   gclog_or_tty->flush();
  2778 #endif // PRODUCT
  2780 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
  2781   // Note we are overriding the read-only view of the prev map here, via
  2782   // the cast.
  2783   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2786 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
  2787   _nextMarkBitMap->clearRange(mr);
  2790 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
  2791   clearRangePrevBitmap(mr);
  2792   clearRangeNextBitmap(mr);
  2795 HeapRegion*
  2796 ConcurrentMark::claim_region(uint worker_id) {
  2797   // "checkpoint" the finger
  2798   HeapWord* finger = _finger;
  2800   // _heap_end will not change underneath our feet; it only changes at
  2801   // yield points.
  2802   while (finger < _heap_end) {
  2803     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2805     // Note on how this code handles humongous regions. In the
  2806     // normal case the finger will reach the start of a "starts
  2807     // humongous" (SH) region. Its end will either be the end of the
  2808     // last "continues humongous" (CH) region in the sequence, or the
  2809     // standard end of the SH region (if the SH is the only region in
  2810     // the sequence). That way claim_region() will skip over the CH
  2811     // regions. However, there is a subtle race between a CM thread
  2812     // executing this method and a mutator thread doing a humongous
  2813     // object allocation. The two are not mutually exclusive as the CM
  2814     // thread does not need to hold the Heap_lock when it gets
  2815     // here. So there is a chance that claim_region() will come across
  2816     // a free region that's in the progress of becoming a SH or a CH
  2817     // region. In the former case, it will either
  2818     //   a) Miss the update to the region's end, in which case it will
  2819     //      visit every subsequent CH region, will find their bitmaps
  2820     //      empty, and do nothing, or
  2821     //   b) Will observe the update of the region's end (in which case
  2822     //      it will skip the subsequent CH regions).
  2823     // If it comes across a region that suddenly becomes CH, the
  2824     // scenario will be similar to b). So, the race between
  2825     // claim_region() and a humongous object allocation might force us
  2826     // to do a bit of unnecessary work (due to some unnecessary bitmap
  2827     // iterations) but it should not introduce and correctness issues.
  2828     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
  2829     HeapWord*   bottom        = curr_region->bottom();
  2830     HeapWord*   end           = curr_region->end();
  2831     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2833     if (verbose_low()) {
  2834       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
  2835                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2836                              "limit = "PTR_FORMAT,
  2837                              worker_id, curr_region, bottom, end, limit);
  2840     // Is the gap between reading the finger and doing the CAS too long?
  2841     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2842     if (res == finger) {
  2843       // we succeeded
  2845       // notice that _finger == end cannot be guaranteed here since,
  2846       // someone else might have moved the finger even further
  2847       assert(_finger >= end, "the finger should have moved forward");
  2849       if (verbose_low()) {
  2850         gclog_or_tty->print_cr("[%u] we were successful with region = "
  2851                                PTR_FORMAT, worker_id, curr_region);
  2854       if (limit > bottom) {
  2855         if (verbose_low()) {
  2856           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
  2857                                  "returning it ", worker_id, curr_region);
  2859         return curr_region;
  2860       } else {
  2861         assert(limit == bottom,
  2862                "the region limit should be at bottom");
  2863         if (verbose_low()) {
  2864           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
  2865                                  "returning NULL", worker_id, curr_region);
  2867         // we return NULL and the caller should try calling
  2868         // claim_region() again.
  2869         return NULL;
  2871     } else {
  2872       assert(_finger > finger, "the finger should have moved forward");
  2873       if (verbose_low()) {
  2874         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
  2875                                "global finger = "PTR_FORMAT", "
  2876                                "our finger = "PTR_FORMAT,
  2877                                worker_id, _finger, finger);
  2880       // read it again
  2881       finger = _finger;
  2885   return NULL;
  2888 #ifndef PRODUCT
  2889 enum VerifyNoCSetOopsPhase {
  2890   VerifyNoCSetOopsStack,
  2891   VerifyNoCSetOopsQueues,
  2892   VerifyNoCSetOopsSATBCompleted,
  2893   VerifyNoCSetOopsSATBThread
  2894 };
  2896 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
  2897 private:
  2898   G1CollectedHeap* _g1h;
  2899   VerifyNoCSetOopsPhase _phase;
  2900   int _info;
  2902   const char* phase_str() {
  2903     switch (_phase) {
  2904     case VerifyNoCSetOopsStack:         return "Stack";
  2905     case VerifyNoCSetOopsQueues:        return "Queue";
  2906     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
  2907     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
  2908     default:                            ShouldNotReachHere();
  2910     return NULL;
  2913   void do_object_work(oop obj) {
  2914     guarantee(!_g1h->obj_in_cs(obj),
  2915               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
  2916                       (void*) obj, phase_str(), _info));
  2919 public:
  2920   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
  2922   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
  2923     _phase = phase;
  2924     _info = info;
  2927   virtual void do_oop(oop* p) {
  2928     oop obj = oopDesc::load_decode_heap_oop(p);
  2929     do_object_work(obj);
  2932   virtual void do_oop(narrowOop* p) {
  2933     // We should not come across narrow oops while scanning marking
  2934     // stacks and SATB buffers.
  2935     ShouldNotReachHere();
  2938   virtual void do_object(oop obj) {
  2939     do_object_work(obj);
  2941 };
  2943 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
  2944                                          bool verify_enqueued_buffers,
  2945                                          bool verify_thread_buffers,
  2946                                          bool verify_fingers) {
  2947   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
  2948   if (!G1CollectedHeap::heap()->mark_in_progress()) {
  2949     return;
  2952   VerifyNoCSetOopsClosure cl;
  2954   if (verify_stacks) {
  2955     // Verify entries on the global mark stack
  2956     cl.set_phase(VerifyNoCSetOopsStack);
  2957     _markStack.oops_do(&cl);
  2959     // Verify entries on the task queues
  2960     for (uint i = 0; i < _max_worker_id; i += 1) {
  2961       cl.set_phase(VerifyNoCSetOopsQueues, i);
  2962       CMTaskQueue* queue = _task_queues->queue(i);
  2963       queue->oops_do(&cl);
  2967   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
  2969   // Verify entries on the enqueued SATB buffers
  2970   if (verify_enqueued_buffers) {
  2971     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
  2972     satb_qs.iterate_completed_buffers_read_only(&cl);
  2975   // Verify entries on the per-thread SATB buffers
  2976   if (verify_thread_buffers) {
  2977     cl.set_phase(VerifyNoCSetOopsSATBThread);
  2978     satb_qs.iterate_thread_buffers_read_only(&cl);
  2981   if (verify_fingers) {
  2982     // Verify the global finger
  2983     HeapWord* global_finger = finger();
  2984     if (global_finger != NULL && global_finger < _heap_end) {
  2985       // The global finger always points to a heap region boundary. We
  2986       // use heap_region_containing_raw() to get the containing region
  2987       // given that the global finger could be pointing to a free region
  2988       // which subsequently becomes continues humongous. If that
  2989       // happens, heap_region_containing() will return the bottom of the
  2990       // corresponding starts humongous region and the check below will
  2991       // not hold any more.
  2992       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
  2993       guarantee(global_finger == global_hr->bottom(),
  2994                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
  2995                         global_finger, HR_FORMAT_PARAMS(global_hr)));
  2998     // Verify the task fingers
  2999     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
  3000     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
  3001       CMTask* task = _tasks[i];
  3002       HeapWord* task_finger = task->finger();
  3003       if (task_finger != NULL && task_finger < _heap_end) {
  3004         // See above note on the global finger verification.
  3005         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
  3006         guarantee(task_finger == task_hr->bottom() ||
  3007                   !task_hr->in_collection_set(),
  3008                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
  3009                           task_finger, HR_FORMAT_PARAMS(task_hr)));
  3014 #endif // PRODUCT
  3016 // Aggregate the counting data that was constructed concurrently
  3017 // with marking.
  3018 class AggregateCountDataHRClosure: public HeapRegionClosure {
  3019   G1CollectedHeap* _g1h;
  3020   ConcurrentMark* _cm;
  3021   CardTableModRefBS* _ct_bs;
  3022   BitMap* _cm_card_bm;
  3023   uint _max_worker_id;
  3025  public:
  3026   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
  3027                               BitMap* cm_card_bm,
  3028                               uint max_worker_id) :
  3029     _g1h(g1h), _cm(g1h->concurrent_mark()),
  3030     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  3031     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
  3033   bool doHeapRegion(HeapRegion* hr) {
  3034     if (hr->continuesHumongous()) {
  3035       // We will ignore these here and process them when their
  3036       // associated "starts humongous" region is processed.
  3037       // Note that we cannot rely on their associated
  3038       // "starts humongous" region to have their bit set to 1
  3039       // since, due to the region chunking in the parallel region
  3040       // iteration, a "continues humongous" region might be visited
  3041       // before its associated "starts humongous".
  3042       return false;
  3045     HeapWord* start = hr->bottom();
  3046     HeapWord* limit = hr->next_top_at_mark_start();
  3047     HeapWord* end = hr->end();
  3049     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
  3050            err_msg("Preconditions not met - "
  3051                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
  3052                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
  3053                    start, limit, hr->top(), hr->end()));
  3055     assert(hr->next_marked_bytes() == 0, "Precondition");
  3057     if (start == limit) {
  3058       // NTAMS of this region has not been set so nothing to do.
  3059       return false;
  3062     // 'start' should be in the heap.
  3063     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
  3064     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
  3065     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
  3067     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  3068     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
  3069     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
  3071     // If ntams is not card aligned then we bump card bitmap index
  3072     // for limit so that we get the all the cards spanned by
  3073     // the object ending at ntams.
  3074     // Note: if this is the last region in the heap then ntams
  3075     // could be actually just beyond the end of the the heap;
  3076     // limit_idx will then  correspond to a (non-existent) card
  3077     // that is also outside the heap.
  3078     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
  3079       limit_idx += 1;
  3082     assert(limit_idx <= end_idx, "or else use atomics");
  3084     // Aggregate the "stripe" in the count data associated with hr.
  3085     uint hrs_index = hr->hrs_index();
  3086     size_t marked_bytes = 0;
  3088     for (uint i = 0; i < _max_worker_id; i += 1) {
  3089       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
  3090       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
  3092       // Fetch the marked_bytes in this region for task i and
  3093       // add it to the running total for this region.
  3094       marked_bytes += marked_bytes_array[hrs_index];
  3096       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
  3097       // into the global card bitmap.
  3098       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
  3100       while (scan_idx < limit_idx) {
  3101         assert(task_card_bm->at(scan_idx) == true, "should be");
  3102         _cm_card_bm->set_bit(scan_idx);
  3103         assert(_cm_card_bm->at(scan_idx) == true, "should be");
  3105         // BitMap::get_next_one_offset() can handle the case when
  3106         // its left_offset parameter is greater than its right_offset
  3107         // parameter. It does, however, have an early exit if
  3108         // left_offset == right_offset. So let's limit the value
  3109         // passed in for left offset here.
  3110         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
  3111         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
  3115     // Update the marked bytes for this region.
  3116     hr->add_to_marked_bytes(marked_bytes);
  3118     // Next heap region
  3119     return false;
  3121 };
  3123 class G1AggregateCountDataTask: public AbstractGangTask {
  3124 protected:
  3125   G1CollectedHeap* _g1h;
  3126   ConcurrentMark* _cm;
  3127   BitMap* _cm_card_bm;
  3128   uint _max_worker_id;
  3129   int _active_workers;
  3131 public:
  3132   G1AggregateCountDataTask(G1CollectedHeap* g1h,
  3133                            ConcurrentMark* cm,
  3134                            BitMap* cm_card_bm,
  3135                            uint max_worker_id,
  3136                            int n_workers) :
  3137     AbstractGangTask("Count Aggregation"),
  3138     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
  3139     _max_worker_id(max_worker_id),
  3140     _active_workers(n_workers) { }
  3142   void work(uint worker_id) {
  3143     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
  3145     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3146       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
  3147                                             _active_workers,
  3148                                             HeapRegion::AggregateCountClaimValue);
  3149     } else {
  3150       _g1h->heap_region_iterate(&cl);
  3153 };
  3156 void ConcurrentMark::aggregate_count_data() {
  3157   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
  3158                         _g1h->workers()->active_workers() :
  3159                         1);
  3161   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
  3162                                            _max_worker_id, n_workers);
  3164   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3165     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  3166            "sanity check");
  3167     _g1h->set_par_threads(n_workers);
  3168     _g1h->workers()->run_task(&g1_par_agg_task);
  3169     _g1h->set_par_threads(0);
  3171     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
  3172            "sanity check");
  3173     _g1h->reset_heap_region_claim_values();
  3174   } else {
  3175     g1_par_agg_task.work(0);
  3179 // Clear the per-worker arrays used to store the per-region counting data
  3180 void ConcurrentMark::clear_all_count_data() {
  3181   // Clear the global card bitmap - it will be filled during
  3182   // liveness count aggregation (during remark) and the
  3183   // final counting task.
  3184   _card_bm.clear();
  3186   // Clear the global region bitmap - it will be filled as part
  3187   // of the final counting task.
  3188   _region_bm.clear();
  3190   uint max_regions = _g1h->max_regions();
  3191   assert(_max_worker_id > 0, "uninitialized");
  3193   for (uint i = 0; i < _max_worker_id; i += 1) {
  3194     BitMap* task_card_bm = count_card_bitmap_for(i);
  3195     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
  3197     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
  3198     assert(marked_bytes_array != NULL, "uninitialized");
  3200     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
  3201     task_card_bm->clear();
  3205 void ConcurrentMark::print_stats() {
  3206   if (verbose_stats()) {
  3207     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3208     for (size_t i = 0; i < _active_tasks; ++i) {
  3209       _tasks[i]->print_stats();
  3210       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3215 // abandon current marking iteration due to a Full GC
  3216 void ConcurrentMark::abort() {
  3217   // Clear all marks to force marking thread to do nothing
  3218   _nextMarkBitMap->clearAll();
  3219   // Clear the liveness counting data
  3220   clear_all_count_data();
  3221   // Empty mark stack
  3222   reset_marking_state();
  3223   for (uint i = 0; i < _max_worker_id; ++i) {
  3224     _tasks[i]->clear_region_fields();
  3226   _has_aborted = true;
  3228   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3229   satb_mq_set.abandon_partial_marking();
  3230   // This can be called either during or outside marking, we'll read
  3231   // the expected_active value from the SATB queue set.
  3232   satb_mq_set.set_active_all_threads(
  3233                                  false, /* new active value */
  3234                                  satb_mq_set.is_active() /* expected_active */);
  3237 static void print_ms_time_info(const char* prefix, const char* name,
  3238                                NumberSeq& ns) {
  3239   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  3240                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  3241   if (ns.num() > 0) {
  3242     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  3243                            prefix, ns.sd(), ns.maximum());
  3247 void ConcurrentMark::print_summary_info() {
  3248   gclog_or_tty->print_cr(" Concurrent marking:");
  3249   print_ms_time_info("  ", "init marks", _init_times);
  3250   print_ms_time_info("  ", "remarks", _remark_times);
  3252     print_ms_time_info("     ", "final marks", _remark_mark_times);
  3253     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  3256   print_ms_time_info("  ", "cleanups", _cleanup_times);
  3257   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  3258                          _total_counting_time,
  3259                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  3260                           (double)_cleanup_times.num()
  3261                          : 0.0));
  3262   if (G1ScrubRemSets) {
  3263     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  3264                            _total_rs_scrub_time,
  3265                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  3266                             (double)_cleanup_times.num()
  3267                            : 0.0));
  3269   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  3270                          (_init_times.sum() + _remark_times.sum() +
  3271                           _cleanup_times.sum())/1000.0);
  3272   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  3273                 "(%8.2f s marking).",
  3274                 cmThread()->vtime_accum(),
  3275                 cmThread()->vtime_mark_accum());
  3278 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  3279   if (use_parallel_marking_threads()) {
  3280     _parallel_workers->print_worker_threads_on(st);
  3284 void ConcurrentMark::print_on_error(outputStream* st) const {
  3285   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
  3286       _prevMarkBitMap, _nextMarkBitMap);
  3287   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
  3288   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
  3291 // We take a break if someone is trying to stop the world.
  3292 bool ConcurrentMark::do_yield_check(uint worker_id) {
  3293   if (should_yield()) {
  3294     if (worker_id == 0) {
  3295       _g1h->g1_policy()->record_concurrent_pause();
  3297     cmThread()->yield();
  3298     return true;
  3299   } else {
  3300     return false;
  3304 bool ConcurrentMark::should_yield() {
  3305   return cmThread()->should_yield();
  3308 bool ConcurrentMark::containing_card_is_marked(void* p) {
  3309   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  3310   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  3313 bool ConcurrentMark::containing_cards_are_marked(void* start,
  3314                                                  void* last) {
  3315   return containing_card_is_marked(start) &&
  3316          containing_card_is_marked(last);
  3319 #ifndef PRODUCT
  3320 // for debugging purposes
  3321 void ConcurrentMark::print_finger() {
  3322   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  3323                          _heap_start, _heap_end, _finger);
  3324   for (uint i = 0; i < _max_worker_id; ++i) {
  3325     gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
  3327   gclog_or_tty->print_cr("");
  3329 #endif
  3331 void CMTask::scan_object(oop obj) {
  3332   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
  3334   if (_cm->verbose_high()) {
  3335     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
  3336                            _worker_id, (void*) obj);
  3339   size_t obj_size = obj->size();
  3340   _words_scanned += obj_size;
  3342   obj->oop_iterate(_cm_oop_closure);
  3343   statsOnly( ++_objs_scanned );
  3344   check_limits();
  3347 // Closure for iteration over bitmaps
  3348 class CMBitMapClosure : public BitMapClosure {
  3349 private:
  3350   // the bitmap that is being iterated over
  3351   CMBitMap*                   _nextMarkBitMap;
  3352   ConcurrentMark*             _cm;
  3353   CMTask*                     _task;
  3355 public:
  3356   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
  3357     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  3359   bool do_bit(size_t offset) {
  3360     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  3361     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  3362     assert( addr < _cm->finger(), "invariant");
  3364     statsOnly( _task->increase_objs_found_on_bitmap() );
  3365     assert(addr >= _task->finger(), "invariant");
  3367     // We move that task's local finger along.
  3368     _task->move_finger_to(addr);
  3370     _task->scan_object(oop(addr));
  3371     // we only partially drain the local queue and global stack
  3372     _task->drain_local_queue(true);
  3373     _task->drain_global_stack(true);
  3375     // if the has_aborted flag has been raised, we need to bail out of
  3376     // the iteration
  3377     return !_task->has_aborted();
  3379 };
  3381 // Closure for iterating over objects, currently only used for
  3382 // processing SATB buffers.
  3383 class CMObjectClosure : public ObjectClosure {
  3384 private:
  3385   CMTask* _task;
  3387 public:
  3388   void do_object(oop obj) {
  3389     _task->deal_with_reference(obj);
  3392   CMObjectClosure(CMTask* task) : _task(task) { }
  3393 };
  3395 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
  3396                                ConcurrentMark* cm,
  3397                                CMTask* task)
  3398   : _g1h(g1h), _cm(cm), _task(task) {
  3399   assert(_ref_processor == NULL, "should be initialized to NULL");
  3401   if (G1UseConcMarkReferenceProcessing) {
  3402     _ref_processor = g1h->ref_processor_cm();
  3403     assert(_ref_processor != NULL, "should not be NULL");
  3407 void CMTask::setup_for_region(HeapRegion* hr) {
  3408   // Separated the asserts so that we know which one fires.
  3409   assert(hr != NULL,
  3410         "claim_region() should have filtered out continues humongous regions");
  3411   assert(!hr->continuesHumongous(),
  3412         "claim_region() should have filtered out continues humongous regions");
  3414   if (_cm->verbose_low()) {
  3415     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
  3416                            _worker_id, hr);
  3419   _curr_region  = hr;
  3420   _finger       = hr->bottom();
  3421   update_region_limit();
  3424 void CMTask::update_region_limit() {
  3425   HeapRegion* hr            = _curr_region;
  3426   HeapWord* bottom          = hr->bottom();
  3427   HeapWord* limit           = hr->next_top_at_mark_start();
  3429   if (limit == bottom) {
  3430     if (_cm->verbose_low()) {
  3431       gclog_or_tty->print_cr("[%u] found an empty region "
  3432                              "["PTR_FORMAT", "PTR_FORMAT")",
  3433                              _worker_id, bottom, limit);
  3435     // The region was collected underneath our feet.
  3436     // We set the finger to bottom to ensure that the bitmap
  3437     // iteration that will follow this will not do anything.
  3438     // (this is not a condition that holds when we set the region up,
  3439     // as the region is not supposed to be empty in the first place)
  3440     _finger = bottom;
  3441   } else if (limit >= _region_limit) {
  3442     assert(limit >= _finger, "peace of mind");
  3443   } else {
  3444     assert(limit < _region_limit, "only way to get here");
  3445     // This can happen under some pretty unusual circumstances.  An
  3446     // evacuation pause empties the region underneath our feet (NTAMS
  3447     // at bottom). We then do some allocation in the region (NTAMS
  3448     // stays at bottom), followed by the region being used as a GC
  3449     // alloc region (NTAMS will move to top() and the objects
  3450     // originally below it will be grayed). All objects now marked in
  3451     // the region are explicitly grayed, if below the global finger,
  3452     // and we do not need in fact to scan anything else. So, we simply
  3453     // set _finger to be limit to ensure that the bitmap iteration
  3454     // doesn't do anything.
  3455     _finger = limit;
  3458   _region_limit = limit;
  3461 void CMTask::giveup_current_region() {
  3462   assert(_curr_region != NULL, "invariant");
  3463   if (_cm->verbose_low()) {
  3464     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
  3465                            _worker_id, _curr_region);
  3467   clear_region_fields();
  3470 void CMTask::clear_region_fields() {
  3471   // Values for these three fields that indicate that we're not
  3472   // holding on to a region.
  3473   _curr_region   = NULL;
  3474   _finger        = NULL;
  3475   _region_limit  = NULL;
  3478 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
  3479   if (cm_oop_closure == NULL) {
  3480     assert(_cm_oop_closure != NULL, "invariant");
  3481   } else {
  3482     assert(_cm_oop_closure == NULL, "invariant");
  3484   _cm_oop_closure = cm_oop_closure;
  3487 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3488   guarantee(nextMarkBitMap != NULL, "invariant");
  3490   if (_cm->verbose_low()) {
  3491     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
  3494   _nextMarkBitMap                = nextMarkBitMap;
  3495   clear_region_fields();
  3497   _calls                         = 0;
  3498   _elapsed_time_ms               = 0.0;
  3499   _termination_time_ms           = 0.0;
  3500   _termination_start_time_ms     = 0.0;
  3502 #if _MARKING_STATS_
  3503   _local_pushes                  = 0;
  3504   _local_pops                    = 0;
  3505   _local_max_size                = 0;
  3506   _objs_scanned                  = 0;
  3507   _global_pushes                 = 0;
  3508   _global_pops                   = 0;
  3509   _global_max_size               = 0;
  3510   _global_transfers_to           = 0;
  3511   _global_transfers_from         = 0;
  3512   _regions_claimed               = 0;
  3513   _objs_found_on_bitmap          = 0;
  3514   _satb_buffers_processed        = 0;
  3515   _steal_attempts                = 0;
  3516   _steals                        = 0;
  3517   _aborted                       = 0;
  3518   _aborted_overflow              = 0;
  3519   _aborted_cm_aborted            = 0;
  3520   _aborted_yield                 = 0;
  3521   _aborted_timed_out             = 0;
  3522   _aborted_satb                  = 0;
  3523   _aborted_termination           = 0;
  3524 #endif // _MARKING_STATS_
  3527 bool CMTask::should_exit_termination() {
  3528   regular_clock_call();
  3529   // This is called when we are in the termination protocol. We should
  3530   // quit if, for some reason, this task wants to abort or the global
  3531   // stack is not empty (this means that we can get work from it).
  3532   return !_cm->mark_stack_empty() || has_aborted();
  3535 void CMTask::reached_limit() {
  3536   assert(_words_scanned >= _words_scanned_limit ||
  3537          _refs_reached >= _refs_reached_limit ,
  3538          "shouldn't have been called otherwise");
  3539   regular_clock_call();
  3542 void CMTask::regular_clock_call() {
  3543   if (has_aborted()) return;
  3545   // First, we need to recalculate the words scanned and refs reached
  3546   // limits for the next clock call.
  3547   recalculate_limits();
  3549   // During the regular clock call we do the following
  3551   // (1) If an overflow has been flagged, then we abort.
  3552   if (_cm->has_overflown()) {
  3553     set_has_aborted();
  3554     return;
  3557   // If we are not concurrent (i.e. we're doing remark) we don't need
  3558   // to check anything else. The other steps are only needed during
  3559   // the concurrent marking phase.
  3560   if (!concurrent()) return;
  3562   // (2) If marking has been aborted for Full GC, then we also abort.
  3563   if (_cm->has_aborted()) {
  3564     set_has_aborted();
  3565     statsOnly( ++_aborted_cm_aborted );
  3566     return;
  3569   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3571   // (3) If marking stats are enabled, then we update the step history.
  3572 #if _MARKING_STATS_
  3573   if (_words_scanned >= _words_scanned_limit) {
  3574     ++_clock_due_to_scanning;
  3576   if (_refs_reached >= _refs_reached_limit) {
  3577     ++_clock_due_to_marking;
  3580   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3581   _interval_start_time_ms = curr_time_ms;
  3582   _all_clock_intervals_ms.add(last_interval_ms);
  3584   if (_cm->verbose_medium()) {
  3585       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
  3586                         "scanned = %d%s, refs reached = %d%s",
  3587                         _worker_id, last_interval_ms,
  3588                         _words_scanned,
  3589                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3590                         _refs_reached,
  3591                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3593 #endif // _MARKING_STATS_
  3595   // (4) We check whether we should yield. If we have to, then we abort.
  3596   if (_cm->should_yield()) {
  3597     // We should yield. To do this we abort the task. The caller is
  3598     // responsible for yielding.
  3599     set_has_aborted();
  3600     statsOnly( ++_aborted_yield );
  3601     return;
  3604   // (5) We check whether we've reached our time quota. If we have,
  3605   // then we abort.
  3606   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3607   if (elapsed_time_ms > _time_target_ms) {
  3608     set_has_aborted();
  3609     _has_timed_out = true;
  3610     statsOnly( ++_aborted_timed_out );
  3611     return;
  3614   // (6) Finally, we check whether there are enough completed STAB
  3615   // buffers available for processing. If there are, we abort.
  3616   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3617   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3618     if (_cm->verbose_low()) {
  3619       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
  3620                              _worker_id);
  3622     // we do need to process SATB buffers, we'll abort and restart
  3623     // the marking task to do so
  3624     set_has_aborted();
  3625     statsOnly( ++_aborted_satb );
  3626     return;
  3630 void CMTask::recalculate_limits() {
  3631   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3632   _words_scanned_limit      = _real_words_scanned_limit;
  3634   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3635   _refs_reached_limit       = _real_refs_reached_limit;
  3638 void CMTask::decrease_limits() {
  3639   // This is called when we believe that we're going to do an infrequent
  3640   // operation which will increase the per byte scanned cost (i.e. move
  3641   // entries to/from the global stack). It basically tries to decrease the
  3642   // scanning limit so that the clock is called earlier.
  3644   if (_cm->verbose_medium()) {
  3645     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
  3648   _words_scanned_limit = _real_words_scanned_limit -
  3649     3 * words_scanned_period / 4;
  3650   _refs_reached_limit  = _real_refs_reached_limit -
  3651     3 * refs_reached_period / 4;
  3654 void CMTask::move_entries_to_global_stack() {
  3655   // local array where we'll store the entries that will be popped
  3656   // from the local queue
  3657   oop buffer[global_stack_transfer_size];
  3659   int n = 0;
  3660   oop obj;
  3661   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3662     buffer[n] = obj;
  3663     ++n;
  3666   if (n > 0) {
  3667     // we popped at least one entry from the local queue
  3669     statsOnly( ++_global_transfers_to; _local_pops += n );
  3671     if (!_cm->mark_stack_push(buffer, n)) {
  3672       if (_cm->verbose_low()) {
  3673         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
  3674                                _worker_id);
  3676       set_has_aborted();
  3677     } else {
  3678       // the transfer was successful
  3680       if (_cm->verbose_medium()) {
  3681         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
  3682                                _worker_id, n);
  3684       statsOnly( int tmp_size = _cm->mark_stack_size();
  3685                  if (tmp_size > _global_max_size) {
  3686                    _global_max_size = tmp_size;
  3688                  _global_pushes += n );
  3692   // this operation was quite expensive, so decrease the limits
  3693   decrease_limits();
  3696 void CMTask::get_entries_from_global_stack() {
  3697   // local array where we'll store the entries that will be popped
  3698   // from the global stack.
  3699   oop buffer[global_stack_transfer_size];
  3700   int n;
  3701   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3702   assert(n <= global_stack_transfer_size,
  3703          "we should not pop more than the given limit");
  3704   if (n > 0) {
  3705     // yes, we did actually pop at least one entry
  3707     statsOnly( ++_global_transfers_from; _global_pops += n );
  3708     if (_cm->verbose_medium()) {
  3709       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
  3710                              _worker_id, n);
  3712     for (int i = 0; i < n; ++i) {
  3713       bool success = _task_queue->push(buffer[i]);
  3714       // We only call this when the local queue is empty or under a
  3715       // given target limit. So, we do not expect this push to fail.
  3716       assert(success, "invariant");
  3719     statsOnly( int tmp_size = _task_queue->size();
  3720                if (tmp_size > _local_max_size) {
  3721                  _local_max_size = tmp_size;
  3723                _local_pushes += n );
  3726   // this operation was quite expensive, so decrease the limits
  3727   decrease_limits();
  3730 void CMTask::drain_local_queue(bool partially) {
  3731   if (has_aborted()) return;
  3733   // Decide what the target size is, depending whether we're going to
  3734   // drain it partially (so that other tasks can steal if they run out
  3735   // of things to do) or totally (at the very end).
  3736   size_t target_size;
  3737   if (partially) {
  3738     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3739   } else {
  3740     target_size = 0;
  3743   if (_task_queue->size() > target_size) {
  3744     if (_cm->verbose_high()) {
  3745       gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
  3746                              _worker_id, target_size);
  3749     oop obj;
  3750     bool ret = _task_queue->pop_local(obj);
  3751     while (ret) {
  3752       statsOnly( ++_local_pops );
  3754       if (_cm->verbose_high()) {
  3755         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
  3756                                (void*) obj);
  3759       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3760       assert(!_g1h->is_on_master_free_list(
  3761                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
  3763       scan_object(obj);
  3765       if (_task_queue->size() <= target_size || has_aborted()) {
  3766         ret = false;
  3767       } else {
  3768         ret = _task_queue->pop_local(obj);
  3772     if (_cm->verbose_high()) {
  3773       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
  3774                              _worker_id, _task_queue->size());
  3779 void CMTask::drain_global_stack(bool partially) {
  3780   if (has_aborted()) return;
  3782   // We have a policy to drain the local queue before we attempt to
  3783   // drain the global stack.
  3784   assert(partially || _task_queue->size() == 0, "invariant");
  3786   // Decide what the target size is, depending whether we're going to
  3787   // drain it partially (so that other tasks can steal if they run out
  3788   // of things to do) or totally (at the very end).  Notice that,
  3789   // because we move entries from the global stack in chunks or
  3790   // because another task might be doing the same, we might in fact
  3791   // drop below the target. But, this is not a problem.
  3792   size_t target_size;
  3793   if (partially) {
  3794     target_size = _cm->partial_mark_stack_size_target();
  3795   } else {
  3796     target_size = 0;
  3799   if (_cm->mark_stack_size() > target_size) {
  3800     if (_cm->verbose_low()) {
  3801       gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
  3802                              _worker_id, target_size);
  3805     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3806       get_entries_from_global_stack();
  3807       drain_local_queue(partially);
  3810     if (_cm->verbose_low()) {
  3811       gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
  3812                              _worker_id, _cm->mark_stack_size());
  3817 // SATB Queue has several assumptions on whether to call the par or
  3818 // non-par versions of the methods. this is why some of the code is
  3819 // replicated. We should really get rid of the single-threaded version
  3820 // of the code to simplify things.
  3821 void CMTask::drain_satb_buffers() {
  3822   if (has_aborted()) return;
  3824   // We set this so that the regular clock knows that we're in the
  3825   // middle of draining buffers and doesn't set the abort flag when it
  3826   // notices that SATB buffers are available for draining. It'd be
  3827   // very counter productive if it did that. :-)
  3828   _draining_satb_buffers = true;
  3830   CMObjectClosure oc(this);
  3831   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3832   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3833     satb_mq_set.set_par_closure(_worker_id, &oc);
  3834   } else {
  3835     satb_mq_set.set_closure(&oc);
  3838   // This keeps claiming and applying the closure to completed buffers
  3839   // until we run out of buffers or we need to abort.
  3840   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3841     while (!has_aborted() &&
  3842            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
  3843       if (_cm->verbose_medium()) {
  3844         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3846       statsOnly( ++_satb_buffers_processed );
  3847       regular_clock_call();
  3849   } else {
  3850     while (!has_aborted() &&
  3851            satb_mq_set.apply_closure_to_completed_buffer()) {
  3852       if (_cm->verbose_medium()) {
  3853         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3855       statsOnly( ++_satb_buffers_processed );
  3856       regular_clock_call();
  3860   if (!concurrent() && !has_aborted()) {
  3861     // We should only do this during remark.
  3862     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3863       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
  3864     } else {
  3865       satb_mq_set.iterate_closure_all_threads();
  3869   _draining_satb_buffers = false;
  3871   assert(has_aborted() ||
  3872          concurrent() ||
  3873          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3875   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3876     satb_mq_set.set_par_closure(_worker_id, NULL);
  3877   } else {
  3878     satb_mq_set.set_closure(NULL);
  3881   // again, this was a potentially expensive operation, decrease the
  3882   // limits to get the regular clock call early
  3883   decrease_limits();
  3886 void CMTask::print_stats() {
  3887   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
  3888                          _worker_id, _calls);
  3889   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3890                          _elapsed_time_ms, _termination_time_ms);
  3891   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3892                          _step_times_ms.num(), _step_times_ms.avg(),
  3893                          _step_times_ms.sd());
  3894   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3895                          _step_times_ms.maximum(), _step_times_ms.sum());
  3897 #if _MARKING_STATS_
  3898   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3899                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3900                          _all_clock_intervals_ms.sd());
  3901   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3902                          _all_clock_intervals_ms.maximum(),
  3903                          _all_clock_intervals_ms.sum());
  3904   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3905                          _clock_due_to_scanning, _clock_due_to_marking);
  3906   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3907                          _objs_scanned, _objs_found_on_bitmap);
  3908   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3909                          _local_pushes, _local_pops, _local_max_size);
  3910   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3911                          _global_pushes, _global_pops, _global_max_size);
  3912   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3913                          _global_transfers_to,_global_transfers_from);
  3914   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
  3915   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3916   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3917                          _steal_attempts, _steals);
  3918   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3919   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3920                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3921   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3922                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3923 #endif // _MARKING_STATS_
  3926 /*****************************************************************************
  3928     The do_marking_step(time_target_ms, ...) method is the building
  3929     block of the parallel marking framework. It can be called in parallel
  3930     with other invocations of do_marking_step() on different tasks
  3931     (but only one per task, obviously) and concurrently with the
  3932     mutator threads, or during remark, hence it eliminates the need
  3933     for two versions of the code. When called during remark, it will
  3934     pick up from where the task left off during the concurrent marking
  3935     phase. Interestingly, tasks are also claimable during evacuation
  3936     pauses too, since do_marking_step() ensures that it aborts before
  3937     it needs to yield.
  3939     The data structures that it uses to do marking work are the
  3940     following:
  3942       (1) Marking Bitmap. If there are gray objects that appear only
  3943       on the bitmap (this happens either when dealing with an overflow
  3944       or when the initial marking phase has simply marked the roots
  3945       and didn't push them on the stack), then tasks claim heap
  3946       regions whose bitmap they then scan to find gray objects. A
  3947       global finger indicates where the end of the last claimed region
  3948       is. A local finger indicates how far into the region a task has
  3949       scanned. The two fingers are used to determine how to gray an
  3950       object (i.e. whether simply marking it is OK, as it will be
  3951       visited by a task in the future, or whether it needs to be also
  3952       pushed on a stack).
  3954       (2) Local Queue. The local queue of the task which is accessed
  3955       reasonably efficiently by the task. Other tasks can steal from
  3956       it when they run out of work. Throughout the marking phase, a
  3957       task attempts to keep its local queue short but not totally
  3958       empty, so that entries are available for stealing by other
  3959       tasks. Only when there is no more work, a task will totally
  3960       drain its local queue.
  3962       (3) Global Mark Stack. This handles local queue overflow. During
  3963       marking only sets of entries are moved between it and the local
  3964       queues, as access to it requires a mutex and more fine-grain
  3965       interaction with it which might cause contention. If it
  3966       overflows, then the marking phase should restart and iterate
  3967       over the bitmap to identify gray objects. Throughout the marking
  3968       phase, tasks attempt to keep the global mark stack at a small
  3969       length but not totally empty, so that entries are available for
  3970       popping by other tasks. Only when there is no more work, tasks
  3971       will totally drain the global mark stack.
  3973       (4) SATB Buffer Queue. This is where completed SATB buffers are
  3974       made available. Buffers are regularly removed from this queue
  3975       and scanned for roots, so that the queue doesn't get too
  3976       long. During remark, all completed buffers are processed, as
  3977       well as the filled in parts of any uncompleted buffers.
  3979     The do_marking_step() method tries to abort when the time target
  3980     has been reached. There are a few other cases when the
  3981     do_marking_step() method also aborts:
  3983       (1) When the marking phase has been aborted (after a Full GC).
  3985       (2) When a global overflow (on the global stack) has been
  3986       triggered. Before the task aborts, it will actually sync up with
  3987       the other tasks to ensure that all the marking data structures
  3988       (local queues, stacks, fingers etc.)  are re-initialized so that
  3989       when do_marking_step() completes, the marking phase can
  3990       immediately restart.
  3992       (3) When enough completed SATB buffers are available. The
  3993       do_marking_step() method only tries to drain SATB buffers right
  3994       at the beginning. So, if enough buffers are available, the
  3995       marking step aborts and the SATB buffers are processed at
  3996       the beginning of the next invocation.
  3998       (4) To yield. when we have to yield then we abort and yield
  3999       right at the end of do_marking_step(). This saves us from a lot
  4000       of hassle as, by yielding we might allow a Full GC. If this
  4001       happens then objects will be compacted underneath our feet, the
  4002       heap might shrink, etc. We save checking for this by just
  4003       aborting and doing the yield right at the end.
  4005     From the above it follows that the do_marking_step() method should
  4006     be called in a loop (or, otherwise, regularly) until it completes.
  4008     If a marking step completes without its has_aborted() flag being
  4009     true, it means it has completed the current marking phase (and
  4010     also all other marking tasks have done so and have all synced up).
  4012     A method called regular_clock_call() is invoked "regularly" (in
  4013     sub ms intervals) throughout marking. It is this clock method that
  4014     checks all the abort conditions which were mentioned above and
  4015     decides when the task should abort. A work-based scheme is used to
  4016     trigger this clock method: when the number of object words the
  4017     marking phase has scanned or the number of references the marking
  4018     phase has visited reach a given limit. Additional invocations to
  4019     the method clock have been planted in a few other strategic places
  4020     too. The initial reason for the clock method was to avoid calling
  4021     vtime too regularly, as it is quite expensive. So, once it was in
  4022     place, it was natural to piggy-back all the other conditions on it
  4023     too and not constantly check them throughout the code.
  4025     If do_termination is true then do_marking_step will enter its
  4026     termination protocol.
  4028     The value of is_serial must be true when do_marking_step is being
  4029     called serially (i.e. by the VMThread) and do_marking_step should
  4030     skip any synchronization in the termination and overflow code.
  4031     Examples include the serial remark code and the serial reference
  4032     processing closures.
  4034     The value of is_serial must be false when do_marking_step is
  4035     being called by any of the worker threads in a work gang.
  4036     Examples include the concurrent marking code (CMMarkingTask),
  4037     the MT remark code, and the MT reference processing closures.
  4039  *****************************************************************************/
  4041 void CMTask::do_marking_step(double time_target_ms,
  4042                              bool do_termination,
  4043                              bool is_serial) {
  4044   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  4045   assert(concurrent() == _cm->concurrent(), "they should be the same");
  4047   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  4048   assert(_task_queues != NULL, "invariant");
  4049   assert(_task_queue != NULL, "invariant");
  4050   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
  4052   assert(!_claimed,
  4053          "only one thread should claim this task at any one time");
  4055   // OK, this doesn't safeguard again all possible scenarios, as it is
  4056   // possible for two threads to set the _claimed flag at the same
  4057   // time. But it is only for debugging purposes anyway and it will
  4058   // catch most problems.
  4059   _claimed = true;
  4061   _start_time_ms = os::elapsedVTime() * 1000.0;
  4062   statsOnly( _interval_start_time_ms = _start_time_ms );
  4064   // If do_stealing is true then do_marking_step will attempt to
  4065   // steal work from the other CMTasks. It only makes sense to
  4066   // enable stealing when the termination protocol is enabled
  4067   // and do_marking_step() is not being called serially.
  4068   bool do_stealing = do_termination && !is_serial;
  4070   double diff_prediction_ms =
  4071     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  4072   _time_target_ms = time_target_ms - diff_prediction_ms;
  4074   // set up the variables that are used in the work-based scheme to
  4075   // call the regular clock method
  4076   _words_scanned = 0;
  4077   _refs_reached  = 0;
  4078   recalculate_limits();
  4080   // clear all flags
  4081   clear_has_aborted();
  4082   _has_timed_out = false;
  4083   _draining_satb_buffers = false;
  4085   ++_calls;
  4087   if (_cm->verbose_low()) {
  4088     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
  4089                            "target = %1.2lfms >>>>>>>>>>",
  4090                            _worker_id, _calls, _time_target_ms);
  4093   // Set up the bitmap and oop closures. Anything that uses them is
  4094   // eventually called from this method, so it is OK to allocate these
  4095   // statically.
  4096   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  4097   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
  4098   set_cm_oop_closure(&cm_oop_closure);
  4100   if (_cm->has_overflown()) {
  4101     // This can happen if the mark stack overflows during a GC pause
  4102     // and this task, after a yield point, restarts. We have to abort
  4103     // as we need to get into the overflow protocol which happens
  4104     // right at the end of this task.
  4105     set_has_aborted();
  4108   // First drain any available SATB buffers. After this, we will not
  4109   // look at SATB buffers before the next invocation of this method.
  4110   // If enough completed SATB buffers are queued up, the regular clock
  4111   // will abort this task so that it restarts.
  4112   drain_satb_buffers();
  4113   // ...then partially drain the local queue and the global stack
  4114   drain_local_queue(true);
  4115   drain_global_stack(true);
  4117   do {
  4118     if (!has_aborted() && _curr_region != NULL) {
  4119       // This means that we're already holding on to a region.
  4120       assert(_finger != NULL, "if region is not NULL, then the finger "
  4121              "should not be NULL either");
  4123       // We might have restarted this task after an evacuation pause
  4124       // which might have evacuated the region we're holding on to
  4125       // underneath our feet. Let's read its limit again to make sure
  4126       // that we do not iterate over a region of the heap that
  4127       // contains garbage (update_region_limit() will also move
  4128       // _finger to the start of the region if it is found empty).
  4129       update_region_limit();
  4130       // We will start from _finger not from the start of the region,
  4131       // as we might be restarting this task after aborting half-way
  4132       // through scanning this region. In this case, _finger points to
  4133       // the address where we last found a marked object. If this is a
  4134       // fresh region, _finger points to start().
  4135       MemRegion mr = MemRegion(_finger, _region_limit);
  4137       if (_cm->verbose_low()) {
  4138         gclog_or_tty->print_cr("[%u] we're scanning part "
  4139                                "["PTR_FORMAT", "PTR_FORMAT") "
  4140                                "of region "HR_FORMAT,
  4141                                _worker_id, _finger, _region_limit,
  4142                                HR_FORMAT_PARAMS(_curr_region));
  4145       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
  4146              "humongous regions should go around loop once only");
  4148       // Some special cases:
  4149       // If the memory region is empty, we can just give up the region.
  4150       // If the current region is humongous then we only need to check
  4151       // the bitmap for the bit associated with the start of the object,
  4152       // scan the object if it's live, and give up the region.
  4153       // Otherwise, let's iterate over the bitmap of the part of the region
  4154       // that is left.
  4155       // If the iteration is successful, give up the region.
  4156       if (mr.is_empty()) {
  4157         giveup_current_region();
  4158         regular_clock_call();
  4159       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
  4160         if (_nextMarkBitMap->isMarked(mr.start())) {
  4161           // The object is marked - apply the closure
  4162           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
  4163           bitmap_closure.do_bit(offset);
  4165         // Even if this task aborted while scanning the humongous object
  4166         // we can (and should) give up the current region.
  4167         giveup_current_region();
  4168         regular_clock_call();
  4169       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  4170         giveup_current_region();
  4171         regular_clock_call();
  4172       } else {
  4173         assert(has_aborted(), "currently the only way to do so");
  4174         // The only way to abort the bitmap iteration is to return
  4175         // false from the do_bit() method. However, inside the
  4176         // do_bit() method we move the _finger to point to the
  4177         // object currently being looked at. So, if we bail out, we
  4178         // have definitely set _finger to something non-null.
  4179         assert(_finger != NULL, "invariant");
  4181         // Region iteration was actually aborted. So now _finger
  4182         // points to the address of the object we last scanned. If we
  4183         // leave it there, when we restart this task, we will rescan
  4184         // the object. It is easy to avoid this. We move the finger by
  4185         // enough to point to the next possible object header (the
  4186         // bitmap knows by how much we need to move it as it knows its
  4187         // granularity).
  4188         assert(_finger < _region_limit, "invariant");
  4189         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
  4190         // Check if bitmap iteration was aborted while scanning the last object
  4191         if (new_finger >= _region_limit) {
  4192           giveup_current_region();
  4193         } else {
  4194           move_finger_to(new_finger);
  4198     // At this point we have either completed iterating over the
  4199     // region we were holding on to, or we have aborted.
  4201     // We then partially drain the local queue and the global stack.
  4202     // (Do we really need this?)
  4203     drain_local_queue(true);
  4204     drain_global_stack(true);
  4206     // Read the note on the claim_region() method on why it might
  4207     // return NULL with potentially more regions available for
  4208     // claiming and why we have to check out_of_regions() to determine
  4209     // whether we're done or not.
  4210     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  4211       // We are going to try to claim a new region. We should have
  4212       // given up on the previous one.
  4213       // Separated the asserts so that we know which one fires.
  4214       assert(_curr_region  == NULL, "invariant");
  4215       assert(_finger       == NULL, "invariant");
  4216       assert(_region_limit == NULL, "invariant");
  4217       if (_cm->verbose_low()) {
  4218         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
  4220       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
  4221       if (claimed_region != NULL) {
  4222         // Yes, we managed to claim one
  4223         statsOnly( ++_regions_claimed );
  4225         if (_cm->verbose_low()) {
  4226           gclog_or_tty->print_cr("[%u] we successfully claimed "
  4227                                  "region "PTR_FORMAT,
  4228                                  _worker_id, claimed_region);
  4231         setup_for_region(claimed_region);
  4232         assert(_curr_region == claimed_region, "invariant");
  4234       // It is important to call the regular clock here. It might take
  4235       // a while to claim a region if, for example, we hit a large
  4236       // block of empty regions. So we need to call the regular clock
  4237       // method once round the loop to make sure it's called
  4238       // frequently enough.
  4239       regular_clock_call();
  4242     if (!has_aborted() && _curr_region == NULL) {
  4243       assert(_cm->out_of_regions(),
  4244              "at this point we should be out of regions");
  4246   } while ( _curr_region != NULL && !has_aborted());
  4248   if (!has_aborted()) {
  4249     // We cannot check whether the global stack is empty, since other
  4250     // tasks might be pushing objects to it concurrently.
  4251     assert(_cm->out_of_regions(),
  4252            "at this point we should be out of regions");
  4254     if (_cm->verbose_low()) {
  4255       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
  4258     // Try to reduce the number of available SATB buffers so that
  4259     // remark has less work to do.
  4260     drain_satb_buffers();
  4263   // Since we've done everything else, we can now totally drain the
  4264   // local queue and global stack.
  4265   drain_local_queue(false);
  4266   drain_global_stack(false);
  4268   // Attempt at work stealing from other task's queues.
  4269   if (do_stealing && !has_aborted()) {
  4270     // We have not aborted. This means that we have finished all that
  4271     // we could. Let's try to do some stealing...
  4273     // We cannot check whether the global stack is empty, since other
  4274     // tasks might be pushing objects to it concurrently.
  4275     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  4276            "only way to reach here");
  4278     if (_cm->verbose_low()) {
  4279       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
  4282     while (!has_aborted()) {
  4283       oop obj;
  4284       statsOnly( ++_steal_attempts );
  4286       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
  4287         if (_cm->verbose_medium()) {
  4288           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
  4289                                  _worker_id, (void*) obj);
  4292         statsOnly( ++_steals );
  4294         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  4295                "any stolen object should be marked");
  4296         scan_object(obj);
  4298         // And since we're towards the end, let's totally drain the
  4299         // local queue and global stack.
  4300         drain_local_queue(false);
  4301         drain_global_stack(false);
  4302       } else {
  4303         break;
  4308   // If we are about to wrap up and go into termination, check if we
  4309   // should raise the overflow flag.
  4310   if (do_termination && !has_aborted()) {
  4311     if (_cm->force_overflow()->should_force()) {
  4312       _cm->set_has_overflown();
  4313       regular_clock_call();
  4317   // We still haven't aborted. Now, let's try to get into the
  4318   // termination protocol.
  4319   if (do_termination && !has_aborted()) {
  4320     // We cannot check whether the global stack is empty, since other
  4321     // tasks might be concurrently pushing objects on it.
  4322     // Separated the asserts so that we know which one fires.
  4323     assert(_cm->out_of_regions(), "only way to reach here");
  4324     assert(_task_queue->size() == 0, "only way to reach here");
  4326     if (_cm->verbose_low()) {
  4327       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
  4330     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4332     // The CMTask class also extends the TerminatorTerminator class,
  4333     // hence its should_exit_termination() method will also decide
  4334     // whether to exit the termination protocol or not.
  4335     bool finished = (is_serial ||
  4336                      _cm->terminator()->offer_termination(this));
  4337     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4338     _termination_time_ms +=
  4339       termination_end_time_ms - _termination_start_time_ms;
  4341     if (finished) {
  4342       // We're all done.
  4344       if (_worker_id == 0) {
  4345         // let's allow task 0 to do this
  4346         if (concurrent()) {
  4347           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4348           // we need to set this to false before the next
  4349           // safepoint. This way we ensure that the marking phase
  4350           // doesn't observe any more heap expansions.
  4351           _cm->clear_concurrent_marking_in_progress();
  4355       // We can now guarantee that the global stack is empty, since
  4356       // all other tasks have finished. We separated the guarantees so
  4357       // that, if a condition is false, we can immediately find out
  4358       // which one.
  4359       guarantee(_cm->out_of_regions(), "only way to reach here");
  4360       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4361       guarantee(_task_queue->size() == 0, "only way to reach here");
  4362       guarantee(!_cm->has_overflown(), "only way to reach here");
  4363       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4365       if (_cm->verbose_low()) {
  4366         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
  4368     } else {
  4369       // Apparently there's more work to do. Let's abort this task. It
  4370       // will restart it and we can hopefully find more things to do.
  4372       if (_cm->verbose_low()) {
  4373         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
  4374                                _worker_id);
  4377       set_has_aborted();
  4378       statsOnly( ++_aborted_termination );
  4382   // Mainly for debugging purposes to make sure that a pointer to the
  4383   // closure which was statically allocated in this frame doesn't
  4384   // escape it by accident.
  4385   set_cm_oop_closure(NULL);
  4386   double end_time_ms = os::elapsedVTime() * 1000.0;
  4387   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4388   // Update the step history.
  4389   _step_times_ms.add(elapsed_time_ms);
  4391   if (has_aborted()) {
  4392     // The task was aborted for some reason.
  4394     statsOnly( ++_aborted );
  4396     if (_has_timed_out) {
  4397       double diff_ms = elapsed_time_ms - _time_target_ms;
  4398       // Keep statistics of how well we did with respect to hitting
  4399       // our target only if we actually timed out (if we aborted for
  4400       // other reasons, then the results might get skewed).
  4401       _marking_step_diffs_ms.add(diff_ms);
  4404     if (_cm->has_overflown()) {
  4405       // This is the interesting one. We aborted because a global
  4406       // overflow was raised. This means we have to restart the
  4407       // marking phase and start iterating over regions. However, in
  4408       // order to do this we have to make sure that all tasks stop
  4409       // what they are doing and re-initialise in a safe manner. We
  4410       // will achieve this with the use of two barrier sync points.
  4412       if (_cm->verbose_low()) {
  4413         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
  4416       if (!is_serial) {
  4417         // We only need to enter the sync barrier if being called
  4418         // from a parallel context
  4419         _cm->enter_first_sync_barrier(_worker_id);
  4421         // When we exit this sync barrier we know that all tasks have
  4422         // stopped doing marking work. So, it's now safe to
  4423         // re-initialise our data structures. At the end of this method,
  4424         // task 0 will clear the global data structures.
  4427       statsOnly( ++_aborted_overflow );
  4429       // We clear the local state of this task...
  4430       clear_region_fields();
  4432       if (!is_serial) {
  4433         // ...and enter the second barrier.
  4434         _cm->enter_second_sync_barrier(_worker_id);
  4436       // At this point, if we're during the concurrent phase of
  4437       // marking, everything has been re-initialized and we're
  4438       // ready to restart.
  4441     if (_cm->verbose_low()) {
  4442       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4443                              "elapsed = %1.2lfms <<<<<<<<<<",
  4444                              _worker_id, _time_target_ms, elapsed_time_ms);
  4445       if (_cm->has_aborted()) {
  4446         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
  4447                                _worker_id);
  4450   } else {
  4451     if (_cm->verbose_low()) {
  4452       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4453                              "elapsed = %1.2lfms <<<<<<<<<<",
  4454                              _worker_id, _time_target_ms, elapsed_time_ms);
  4458   _claimed = false;
  4461 CMTask::CMTask(uint worker_id,
  4462                ConcurrentMark* cm,
  4463                size_t* marked_bytes,
  4464                BitMap* card_bm,
  4465                CMTaskQueue* task_queue,
  4466                CMTaskQueueSet* task_queues)
  4467   : _g1h(G1CollectedHeap::heap()),
  4468     _worker_id(worker_id), _cm(cm),
  4469     _claimed(false),
  4470     _nextMarkBitMap(NULL), _hash_seed(17),
  4471     _task_queue(task_queue),
  4472     _task_queues(task_queues),
  4473     _cm_oop_closure(NULL),
  4474     _marked_bytes_array(marked_bytes),
  4475     _card_bm(card_bm) {
  4476   guarantee(task_queue != NULL, "invariant");
  4477   guarantee(task_queues != NULL, "invariant");
  4479   statsOnly( _clock_due_to_scanning = 0;
  4480              _clock_due_to_marking  = 0 );
  4482   _marking_step_diffs_ms.add(0.5);
  4485 // These are formatting macros that are used below to ensure
  4486 // consistent formatting. The *_H_* versions are used to format the
  4487 // header for a particular value and they should be kept consistent
  4488 // with the corresponding macro. Also note that most of the macros add
  4489 // the necessary white space (as a prefix) which makes them a bit
  4490 // easier to compose.
  4492 // All the output lines are prefixed with this string to be able to
  4493 // identify them easily in a large log file.
  4494 #define G1PPRL_LINE_PREFIX            "###"
  4496 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
  4497 #ifdef _LP64
  4498 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
  4499 #else // _LP64
  4500 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
  4501 #endif // _LP64
  4503 // For per-region info
  4504 #define G1PPRL_TYPE_FORMAT            "   %-4s"
  4505 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
  4506 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
  4507 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
  4508 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
  4509 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
  4511 // For summary info
  4512 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
  4513 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
  4514 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
  4515 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
  4517 G1PrintRegionLivenessInfoClosure::
  4518 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
  4519   : _out(out),
  4520     _total_used_bytes(0), _total_capacity_bytes(0),
  4521     _total_prev_live_bytes(0), _total_next_live_bytes(0),
  4522     _hum_used_bytes(0), _hum_capacity_bytes(0),
  4523     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
  4524   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  4525   MemRegion g1_committed = g1h->g1_committed();
  4526   MemRegion g1_reserved = g1h->g1_reserved();
  4527   double now = os::elapsedTime();
  4529   // Print the header of the output.
  4530   _out->cr();
  4531   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
  4532   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
  4533                  G1PPRL_SUM_ADDR_FORMAT("committed")
  4534                  G1PPRL_SUM_ADDR_FORMAT("reserved")
  4535                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
  4536                  g1_committed.start(), g1_committed.end(),
  4537                  g1_reserved.start(), g1_reserved.end(),
  4538                  HeapRegion::GrainBytes);
  4539   _out->print_cr(G1PPRL_LINE_PREFIX);
  4540   _out->print_cr(G1PPRL_LINE_PREFIX
  4541                  G1PPRL_TYPE_H_FORMAT
  4542                  G1PPRL_ADDR_BASE_H_FORMAT
  4543                  G1PPRL_BYTE_H_FORMAT
  4544                  G1PPRL_BYTE_H_FORMAT
  4545                  G1PPRL_BYTE_H_FORMAT
  4546                  G1PPRL_DOUBLE_H_FORMAT,
  4547                  "type", "address-range",
  4548                  "used", "prev-live", "next-live", "gc-eff");
  4549   _out->print_cr(G1PPRL_LINE_PREFIX
  4550                  G1PPRL_TYPE_H_FORMAT
  4551                  G1PPRL_ADDR_BASE_H_FORMAT
  4552                  G1PPRL_BYTE_H_FORMAT
  4553                  G1PPRL_BYTE_H_FORMAT
  4554                  G1PPRL_BYTE_H_FORMAT
  4555                  G1PPRL_DOUBLE_H_FORMAT,
  4556                  "", "",
  4557                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
  4560 // It takes as a parameter a reference to one of the _hum_* fields, it
  4561 // deduces the corresponding value for a region in a humongous region
  4562 // series (either the region size, or what's left if the _hum_* field
  4563 // is < the region size), and updates the _hum_* field accordingly.
  4564 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
  4565   size_t bytes = 0;
  4566   // The > 0 check is to deal with the prev and next live bytes which
  4567   // could be 0.
  4568   if (*hum_bytes > 0) {
  4569     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
  4570     *hum_bytes -= bytes;
  4572   return bytes;
  4575 // It deduces the values for a region in a humongous region series
  4576 // from the _hum_* fields and updates those accordingly. It assumes
  4577 // that that _hum_* fields have already been set up from the "starts
  4578 // humongous" region and we visit the regions in address order.
  4579 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
  4580                                                      size_t* capacity_bytes,
  4581                                                      size_t* prev_live_bytes,
  4582                                                      size_t* next_live_bytes) {
  4583   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
  4584   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
  4585   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
  4586   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
  4587   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
  4590 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  4591   const char* type = "";
  4592   HeapWord* bottom       = r->bottom();
  4593   HeapWord* end          = r->end();
  4594   size_t capacity_bytes  = r->capacity();
  4595   size_t used_bytes      = r->used();
  4596   size_t prev_live_bytes = r->live_bytes();
  4597   size_t next_live_bytes = r->next_live_bytes();
  4598   double gc_eff          = r->gc_efficiency();
  4599   if (r->used() == 0) {
  4600     type = "FREE";
  4601   } else if (r->is_survivor()) {
  4602     type = "SURV";
  4603   } else if (r->is_young()) {
  4604     type = "EDEN";
  4605   } else if (r->startsHumongous()) {
  4606     type = "HUMS";
  4608     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
  4609            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
  4610            "they should have been zeroed after the last time we used them");
  4611     // Set up the _hum_* fields.
  4612     _hum_capacity_bytes  = capacity_bytes;
  4613     _hum_used_bytes      = used_bytes;
  4614     _hum_prev_live_bytes = prev_live_bytes;
  4615     _hum_next_live_bytes = next_live_bytes;
  4616     get_hum_bytes(&used_bytes, &capacity_bytes,
  4617                   &prev_live_bytes, &next_live_bytes);
  4618     end = bottom + HeapRegion::GrainWords;
  4619   } else if (r->continuesHumongous()) {
  4620     type = "HUMC";
  4621     get_hum_bytes(&used_bytes, &capacity_bytes,
  4622                   &prev_live_bytes, &next_live_bytes);
  4623     assert(end == bottom + HeapRegion::GrainWords, "invariant");
  4624   } else {
  4625     type = "OLD";
  4628   _total_used_bytes      += used_bytes;
  4629   _total_capacity_bytes  += capacity_bytes;
  4630   _total_prev_live_bytes += prev_live_bytes;
  4631   _total_next_live_bytes += next_live_bytes;
  4633   // Print a line for this particular region.
  4634   _out->print_cr(G1PPRL_LINE_PREFIX
  4635                  G1PPRL_TYPE_FORMAT
  4636                  G1PPRL_ADDR_BASE_FORMAT
  4637                  G1PPRL_BYTE_FORMAT
  4638                  G1PPRL_BYTE_FORMAT
  4639                  G1PPRL_BYTE_FORMAT
  4640                  G1PPRL_DOUBLE_FORMAT,
  4641                  type, bottom, end,
  4642                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
  4644   return false;
  4647 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
  4648   // Print the footer of the output.
  4649   _out->print_cr(G1PPRL_LINE_PREFIX);
  4650   _out->print_cr(G1PPRL_LINE_PREFIX
  4651                  " SUMMARY"
  4652                  G1PPRL_SUM_MB_FORMAT("capacity")
  4653                  G1PPRL_SUM_MB_PERC_FORMAT("used")
  4654                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
  4655                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
  4656                  bytes_to_mb(_total_capacity_bytes),
  4657                  bytes_to_mb(_total_used_bytes),
  4658                  perc(_total_used_bytes, _total_capacity_bytes),
  4659                  bytes_to_mb(_total_prev_live_bytes),
  4660                  perc(_total_prev_live_bytes, _total_capacity_bytes),
  4661                  bytes_to_mb(_total_next_live_bytes),
  4662                  perc(_total_next_live_bytes, _total_capacity_bytes));
  4663   _out->cr();

mercurial