src/share/vm/gc_implementation/g1/concurrentMark.cpp

Thu, 28 Mar 2013 10:27:28 +0100

author
mgerdin
date
Thu, 28 Mar 2013 10:27:28 +0100
changeset 4853
2e093b564241
parent 4789
1179172e9ec9
child 4904
7b835924c31c
permissions
-rw-r--r--

7014552: gc/lock/jni/jnilockXXX works too slow on 1-processor machine
Summary: Keep a counter of how many times we were stalled by the GC locker, add a diagnostic flag which sets the limit.
Reviewed-by: brutisso, ehelin, johnc

     1 /*
     2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
    32 #include "gc_implementation/g1/g1Log.hpp"
    33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
    34 #include "gc_implementation/g1/g1RemSet.hpp"
    35 #include "gc_implementation/g1/heapRegion.inline.hpp"
    36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    38 #include "gc_implementation/shared/vmGCOperations.hpp"
    39 #include "memory/genOopClosures.inline.hpp"
    40 #include "memory/referencePolicy.hpp"
    41 #include "memory/resourceArea.hpp"
    42 #include "oops/oop.inline.hpp"
    43 #include "runtime/handles.inline.hpp"
    44 #include "runtime/java.hpp"
    45 #include "services/memTracker.hpp"
    47 // Concurrent marking bit map wrapper
    49 CMBitMapRO::CMBitMapRO(int shifter) :
    50   _bm(),
    51   _shifter(shifter) {
    52   _bmStartWord = 0;
    53   _bmWordSize = 0;
    54 }
    56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    57                                                HeapWord* limit) const {
    58   // First we must round addr *up* to a possible object boundary.
    59   addr = (HeapWord*)align_size_up((intptr_t)addr,
    60                                   HeapWordSize << _shifter);
    61   size_t addrOffset = heapWordToOffset(addr);
    62   if (limit == NULL) {
    63     limit = _bmStartWord + _bmWordSize;
    64   }
    65   size_t limitOffset = heapWordToOffset(limit);
    66   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    67   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    68   assert(nextAddr >= addr, "get_next_one postcondition");
    69   assert(nextAddr == limit || isMarked(nextAddr),
    70          "get_next_one postcondition");
    71   return nextAddr;
    72 }
    74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    75                                                  HeapWord* limit) const {
    76   size_t addrOffset = heapWordToOffset(addr);
    77   if (limit == NULL) {
    78     limit = _bmStartWord + _bmWordSize;
    79   }
    80   size_t limitOffset = heapWordToOffset(limit);
    81   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    83   assert(nextAddr >= addr, "get_next_one postcondition");
    84   assert(nextAddr == limit || !isMarked(nextAddr),
    85          "get_next_one postcondition");
    86   return nextAddr;
    87 }
    89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    90   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    91   return (int) (diff >> _shifter);
    92 }
    94 #ifndef PRODUCT
    95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
    96   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
    97   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
    98          "size inconsistency");
    99   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
   100          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
   101 }
   102 #endif
   104 bool CMBitMap::allocate(ReservedSpace heap_rs) {
   105   _bmStartWord = (HeapWord*)(heap_rs.base());
   106   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
   107   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
   108                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
   109   if (!brs.is_reserved()) {
   110     warning("ConcurrentMark marking bit map allocation failure");
   111     return false;
   112   }
   113   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
   114   // For now we'll just commit all of the bit map up front.
   115   // Later on we'll try to be more parsimonious with swap.
   116   if (!_virtual_space.initialize(brs, brs.size())) {
   117     warning("ConcurrentMark marking bit map backing store failure");
   118     return false;
   119   }
   120   assert(_virtual_space.committed_size() == brs.size(),
   121          "didn't reserve backing store for all of concurrent marking bit map?");
   122   _bm.set_map((uintptr_t*)_virtual_space.low());
   123   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
   124          _bmWordSize, "inconsistency in bit map sizing");
   125   _bm.set_size(_bmWordSize >> _shifter);
   126   return true;
   127 }
   129 void CMBitMap::clearAll() {
   130   _bm.clear();
   131   return;
   132 }
   134 void CMBitMap::markRange(MemRegion mr) {
   135   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   136   assert(!mr.is_empty(), "unexpected empty region");
   137   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   138           ((HeapWord *) mr.end())),
   139          "markRange memory region end is not card aligned");
   140   // convert address range into offset range
   141   _bm.at_put_range(heapWordToOffset(mr.start()),
   142                    heapWordToOffset(mr.end()), true);
   143 }
   145 void CMBitMap::clearRange(MemRegion mr) {
   146   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   147   assert(!mr.is_empty(), "unexpected empty region");
   148   // convert address range into offset range
   149   _bm.at_put_range(heapWordToOffset(mr.start()),
   150                    heapWordToOffset(mr.end()), false);
   151 }
   153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   154                                             HeapWord* end_addr) {
   155   HeapWord* start = getNextMarkedWordAddress(addr);
   156   start = MIN2(start, end_addr);
   157   HeapWord* end   = getNextUnmarkedWordAddress(start);
   158   end = MIN2(end, end_addr);
   159   assert(start <= end, "Consistency check");
   160   MemRegion mr(start, end);
   161   if (!mr.is_empty()) {
   162     clearRange(mr);
   163   }
   164   return mr;
   165 }
   167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   168   _base(NULL), _cm(cm)
   169 #ifdef ASSERT
   170   , _drain_in_progress(false)
   171   , _drain_in_progress_yields(false)
   172 #endif
   173 {}
   175 bool CMMarkStack::allocate(size_t capacity) {
   176   // allocate a stack of the requisite depth
   177   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
   178   if (!rs.is_reserved()) {
   179     warning("ConcurrentMark MarkStack allocation failure");
   180     return false;
   181   }
   182   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
   183   if (!_virtual_space.initialize(rs, rs.size())) {
   184     warning("ConcurrentMark MarkStack backing store failure");
   185     // Release the virtual memory reserved for the marking stack
   186     rs.release();
   187     return false;
   188   }
   189   assert(_virtual_space.committed_size() == rs.size(),
   190          "Didn't reserve backing store for all of ConcurrentMark stack?");
   191   _base = (oop*) _virtual_space.low();
   192   setEmpty();
   193   _capacity = (jint) capacity;
   194   _saved_index = -1;
   195   _should_expand = false;
   196   NOT_PRODUCT(_max_depth = 0);
   197   return true;
   198 }
   200 void CMMarkStack::expand() {
   201   // Called, during remark, if we've overflown the marking stack during marking.
   202   assert(isEmpty(), "stack should been emptied while handling overflow");
   203   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
   204   // Clear expansion flag
   205   _should_expand = false;
   206   if (_capacity == (jint) MarkStackSizeMax) {
   207     if (PrintGCDetails && Verbose) {
   208       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
   209     }
   210     return;
   211   }
   212   // Double capacity if possible
   213   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
   214   // Do not give up existing stack until we have managed to
   215   // get the double capacity that we desired.
   216   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
   217                                                            sizeof(oop)));
   218   if (rs.is_reserved()) {
   219     // Release the backing store associated with old stack
   220     _virtual_space.release();
   221     // Reinitialize virtual space for new stack
   222     if (!_virtual_space.initialize(rs, rs.size())) {
   223       fatal("Not enough swap for expanded marking stack capacity");
   224     }
   225     _base = (oop*)(_virtual_space.low());
   226     _index = 0;
   227     _capacity = new_capacity;
   228   } else {
   229     if (PrintGCDetails && Verbose) {
   230       // Failed to double capacity, continue;
   231       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
   232                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
   233                           _capacity / K, new_capacity / K);
   234     }
   235   }
   236 }
   238 void CMMarkStack::set_should_expand() {
   239   // If we're resetting the marking state because of an
   240   // marking stack overflow, record that we should, if
   241   // possible, expand the stack.
   242   _should_expand = _cm->has_overflown();
   243 }
   245 CMMarkStack::~CMMarkStack() {
   246   if (_base != NULL) {
   247     _base = NULL;
   248     _virtual_space.release();
   249   }
   250 }
   252 void CMMarkStack::par_push(oop ptr) {
   253   while (true) {
   254     if (isFull()) {
   255       _overflow = true;
   256       return;
   257     }
   258     // Otherwise...
   259     jint index = _index;
   260     jint next_index = index+1;
   261     jint res = Atomic::cmpxchg(next_index, &_index, index);
   262     if (res == index) {
   263       _base[index] = ptr;
   264       // Note that we don't maintain this atomically.  We could, but it
   265       // doesn't seem necessary.
   266       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   267       return;
   268     }
   269     // Otherwise, we need to try again.
   270   }
   271 }
   273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   274   while (true) {
   275     if (isFull()) {
   276       _overflow = true;
   277       return;
   278     }
   279     // Otherwise...
   280     jint index = _index;
   281     jint next_index = index + n;
   282     if (next_index > _capacity) {
   283       _overflow = true;
   284       return;
   285     }
   286     jint res = Atomic::cmpxchg(next_index, &_index, index);
   287     if (res == index) {
   288       for (int i = 0; i < n; i++) {
   289         int  ind = index + i;
   290         assert(ind < _capacity, "By overflow test above.");
   291         _base[ind] = ptr_arr[i];
   292       }
   293       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   301   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   302   jint start = _index;
   303   jint next_index = start + n;
   304   if (next_index > _capacity) {
   305     _overflow = true;
   306     return;
   307   }
   308   // Otherwise.
   309   _index = next_index;
   310   for (int i = 0; i < n; i++) {
   311     int ind = start + i;
   312     assert(ind < _capacity, "By overflow test above.");
   313     _base[ind] = ptr_arr[i];
   314   }
   315   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   316 }
   318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   319   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   320   jint index = _index;
   321   if (index == 0) {
   322     *n = 0;
   323     return false;
   324   } else {
   325     int k = MIN2(max, index);
   326     jint  new_ind = index - k;
   327     for (int j = 0; j < k; j++) {
   328       ptr_arr[j] = _base[new_ind + j];
   329     }
   330     _index = new_ind;
   331     *n = k;
   332     return true;
   333   }
   334 }
   336 template<class OopClosureClass>
   337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   338   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   339          || SafepointSynchronize::is_at_safepoint(),
   340          "Drain recursion must be yield-safe.");
   341   bool res = true;
   342   debug_only(_drain_in_progress = true);
   343   debug_only(_drain_in_progress_yields = yield_after);
   344   while (!isEmpty()) {
   345     oop newOop = pop();
   346     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   347     assert(newOop->is_oop(), "Expected an oop");
   348     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   349            "only grey objects on this stack");
   350     newOop->oop_iterate(cl);
   351     if (yield_after && _cm->do_yield_check()) {
   352       res = false;
   353       break;
   354     }
   355   }
   356   debug_only(_drain_in_progress = false);
   357   return res;
   358 }
   360 void CMMarkStack::note_start_of_gc() {
   361   assert(_saved_index == -1,
   362          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
   363   _saved_index = _index;
   364 }
   366 void CMMarkStack::note_end_of_gc() {
   367   // This is intentionally a guarantee, instead of an assert. If we
   368   // accidentally add something to the mark stack during GC, it
   369   // will be a correctness issue so it's better if we crash. we'll
   370   // only check this once per GC anyway, so it won't be a performance
   371   // issue in any way.
   372   guarantee(_saved_index == _index,
   373             err_msg("saved index: %d index: %d", _saved_index, _index));
   374   _saved_index = -1;
   375 }
   377 void CMMarkStack::oops_do(OopClosure* f) {
   378   assert(_saved_index == _index,
   379          err_msg("saved index: %d index: %d", _saved_index, _index));
   380   for (int i = 0; i < _index; i += 1) {
   381     f->do_oop(&_base[i]);
   382   }
   383 }
   385 bool ConcurrentMark::not_yet_marked(oop obj) const {
   386   return _g1h->is_obj_ill(obj);
   387 }
   389 CMRootRegions::CMRootRegions() :
   390   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
   391   _should_abort(false),  _next_survivor(NULL) { }
   393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
   394   _young_list = g1h->young_list();
   395   _cm = cm;
   396 }
   398 void CMRootRegions::prepare_for_scan() {
   399   assert(!scan_in_progress(), "pre-condition");
   401   // Currently, only survivors can be root regions.
   402   assert(_next_survivor == NULL, "pre-condition");
   403   _next_survivor = _young_list->first_survivor_region();
   404   _scan_in_progress = (_next_survivor != NULL);
   405   _should_abort = false;
   406 }
   408 HeapRegion* CMRootRegions::claim_next() {
   409   if (_should_abort) {
   410     // If someone has set the should_abort flag, we return NULL to
   411     // force the caller to bail out of their loop.
   412     return NULL;
   413   }
   415   // Currently, only survivors can be root regions.
   416   HeapRegion* res = _next_survivor;
   417   if (res != NULL) {
   418     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   419     // Read it again in case it changed while we were waiting for the lock.
   420     res = _next_survivor;
   421     if (res != NULL) {
   422       if (res == _young_list->last_survivor_region()) {
   423         // We just claimed the last survivor so store NULL to indicate
   424         // that we're done.
   425         _next_survivor = NULL;
   426       } else {
   427         _next_survivor = res->get_next_young_region();
   428       }
   429     } else {
   430       // Someone else claimed the last survivor while we were trying
   431       // to take the lock so nothing else to do.
   432     }
   433   }
   434   assert(res == NULL || res->is_survivor(), "post-condition");
   436   return res;
   437 }
   439 void CMRootRegions::scan_finished() {
   440   assert(scan_in_progress(), "pre-condition");
   442   // Currently, only survivors can be root regions.
   443   if (!_should_abort) {
   444     assert(_next_survivor == NULL, "we should have claimed all survivors");
   445   }
   446   _next_survivor = NULL;
   448   {
   449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   450     _scan_in_progress = false;
   451     RootRegionScan_lock->notify_all();
   452   }
   453 }
   455 bool CMRootRegions::wait_until_scan_finished() {
   456   if (!scan_in_progress()) return false;
   458   {
   459     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   460     while (scan_in_progress()) {
   461       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
   462     }
   463   }
   464   return true;
   465 }
   467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   469 #endif // _MSC_VER
   471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
   472   return MAX2((n_par_threads + 2) / 4, 1U);
   473 }
   475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
   476   _g1h(g1h),
   477   _markBitMap1(MinObjAlignment - 1),
   478   _markBitMap2(MinObjAlignment - 1),
   480   _parallel_marking_threads(0),
   481   _max_parallel_marking_threads(0),
   482   _sleep_factor(0.0),
   483   _marking_task_overhead(1.0),
   484   _cleanup_sleep_factor(0.0),
   485   _cleanup_task_overhead(1.0),
   486   _cleanup_list("Cleanup List"),
   487   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
   488   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
   489             CardTableModRefBS::card_shift,
   490             false /* in_resource_area*/),
   492   _prevMarkBitMap(&_markBitMap1),
   493   _nextMarkBitMap(&_markBitMap2),
   495   _markStack(this),
   496   // _finger set in set_non_marking_state
   498   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
   499   // _active_tasks set in set_non_marking_state
   500   // _tasks set inside the constructor
   501   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
   502   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
   504   _has_overflown(false),
   505   _concurrent(false),
   506   _has_aborted(false),
   507   _restart_for_overflow(false),
   508   _concurrent_marking_in_progress(false),
   510   // _verbose_level set below
   512   _init_times(),
   513   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   514   _cleanup_times(),
   515   _total_counting_time(0.0),
   516   _total_rs_scrub_time(0.0),
   518   _parallel_workers(NULL),
   520   _count_card_bitmaps(NULL),
   521   _count_marked_bytes(NULL),
   522   _completed_initialization(false) {
   523   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   524   if (verbose_level < no_verbose) {
   525     verbose_level = no_verbose;
   526   }
   527   if (verbose_level > high_verbose) {
   528     verbose_level = high_verbose;
   529   }
   530   _verbose_level = verbose_level;
   532   if (verbose_low()) {
   533     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   534                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   535   }
   537   if (!_markBitMap1.allocate(heap_rs)) {
   538     warning("Failed to allocate first CM bit map");
   539     return;
   540   }
   541   if (!_markBitMap2.allocate(heap_rs)) {
   542     warning("Failed to allocate second CM bit map");
   543     return;
   544   }
   546   // Create & start a ConcurrentMark thread.
   547   _cmThread = new ConcurrentMarkThread(this);
   548   assert(cmThread() != NULL, "CM Thread should have been created");
   549   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   551   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   552   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
   553   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
   555   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   556   satb_qs.set_buffer_size(G1SATBBufferSize);
   558   _root_regions.init(_g1h, this);
   560   if (ConcGCThreads > ParallelGCThreads) {
   561     warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
   562             "than ParallelGCThreads (" UINT32_FORMAT ").",
   563             ConcGCThreads, ParallelGCThreads);
   564     return;
   565   }
   566   if (ParallelGCThreads == 0) {
   567     // if we are not running with any parallel GC threads we will not
   568     // spawn any marking threads either
   569     _parallel_marking_threads =       0;
   570     _max_parallel_marking_threads =   0;
   571     _sleep_factor             =     0.0;
   572     _marking_task_overhead    =     1.0;
   573   } else {
   574     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
   575       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
   576       // if both are set
   577       _sleep_factor             = 0.0;
   578       _marking_task_overhead    = 1.0;
   579     } else if (G1MarkingOverheadPercent > 0) {
   580       // We will calculate the number of parallel marking threads based
   581       // on a target overhead with respect to the soft real-time goal
   582       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   583       double overall_cm_overhead =
   584         (double) MaxGCPauseMillis * marking_overhead /
   585         (double) GCPauseIntervalMillis;
   586       double cpu_ratio = 1.0 / (double) os::processor_count();
   587       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   588       double marking_task_overhead =
   589         overall_cm_overhead / marking_thread_num *
   590                                                 (double) os::processor_count();
   591       double sleep_factor =
   592                          (1.0 - marking_task_overhead) / marking_task_overhead;
   594       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
   595       _sleep_factor             = sleep_factor;
   596       _marking_task_overhead    = marking_task_overhead;
   597     } else {
   598       // Calculate the number of parallel marking threads by scaling
   599       // the number of parallel GC threads.
   600       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
   601       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
   602       _sleep_factor             = 0.0;
   603       _marking_task_overhead    = 1.0;
   604     }
   606     assert(ConcGCThreads > 0, "Should have been set");
   607     _parallel_marking_threads = (uint) ConcGCThreads;
   608     _max_parallel_marking_threads = _parallel_marking_threads;
   610     if (parallel_marking_threads() > 1) {
   611       _cleanup_task_overhead = 1.0;
   612     } else {
   613       _cleanup_task_overhead = marking_task_overhead();
   614     }
   615     _cleanup_sleep_factor =
   616                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   618 #if 0
   619     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   620     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   621     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   622     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   623     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   624 #endif
   626     guarantee(parallel_marking_threads() > 0, "peace of mind");
   627     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   628          _max_parallel_marking_threads, false, true);
   629     if (_parallel_workers == NULL) {
   630       vm_exit_during_initialization("Failed necessary allocation.");
   631     } else {
   632       _parallel_workers->initialize_workers();
   633     }
   634   }
   636   if (FLAG_IS_DEFAULT(MarkStackSize)) {
   637     uintx mark_stack_size =
   638       MIN2(MarkStackSizeMax,
   639           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
   640     // Verify that the calculated value for MarkStackSize is in range.
   641     // It would be nice to use the private utility routine from Arguments.
   642     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
   643       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
   644               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   645               mark_stack_size, 1, MarkStackSizeMax);
   646       return;
   647     }
   648     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
   649   } else {
   650     // Verify MarkStackSize is in range.
   651     if (FLAG_IS_CMDLINE(MarkStackSize)) {
   652       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
   653         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   654           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
   655                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   656                   MarkStackSize, 1, MarkStackSizeMax);
   657           return;
   658         }
   659       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
   660         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   661           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
   662                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
   663                   MarkStackSize, MarkStackSizeMax);
   664           return;
   665         }
   666       }
   667     }
   668   }
   670   if (!_markStack.allocate(MarkStackSize)) {
   671     warning("Failed to allocate CM marking stack");
   672     return;
   673   }
   675   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
   676   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
   678   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
   679   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
   681   BitMap::idx_t card_bm_size = _card_bm.size();
   683   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   684   _active_tasks = _max_worker_id;
   686   size_t max_regions = (size_t) _g1h->max_regions();
   687   for (uint i = 0; i < _max_worker_id; ++i) {
   688     CMTaskQueue* task_queue = new CMTaskQueue();
   689     task_queue->initialize();
   690     _task_queues->register_queue(i, task_queue);
   692     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
   693     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
   695     _tasks[i] = new CMTask(i, this,
   696                            _count_marked_bytes[i],
   697                            &_count_card_bitmaps[i],
   698                            task_queue, _task_queues);
   700     _accum_task_vtime[i] = 0.0;
   701   }
   703   // Calculate the card number for the bottom of the heap. Used
   704   // in biasing indexes into the accounting card bitmaps.
   705   _heap_bottom_card_num =
   706     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
   707                                 CardTableModRefBS::card_shift);
   709   // Clear all the liveness counting data
   710   clear_all_count_data();
   712   // so that the call below can read a sensible value
   713   _heap_start = (HeapWord*) heap_rs.base();
   714   set_non_marking_state();
   715   _completed_initialization = true;
   716 }
   718 void ConcurrentMark::update_g1_committed(bool force) {
   719   // If concurrent marking is not in progress, then we do not need to
   720   // update _heap_end.
   721   if (!concurrent_marking_in_progress() && !force) return;
   723   MemRegion committed = _g1h->g1_committed();
   724   assert(committed.start() == _heap_start, "start shouldn't change");
   725   HeapWord* new_end = committed.end();
   726   if (new_end > _heap_end) {
   727     // The heap has been expanded.
   729     _heap_end = new_end;
   730   }
   731   // Notice that the heap can also shrink. However, this only happens
   732   // during a Full GC (at least currently) and the entire marking
   733   // phase will bail out and the task will not be restarted. So, let's
   734   // do nothing.
   735 }
   737 void ConcurrentMark::reset() {
   738   // Starting values for these two. This should be called in a STW
   739   // phase. CM will be notified of any future g1_committed expansions
   740   // will be at the end of evacuation pauses, when tasks are
   741   // inactive.
   742   MemRegion committed = _g1h->g1_committed();
   743   _heap_start = committed.start();
   744   _heap_end   = committed.end();
   746   // Separated the asserts so that we know which one fires.
   747   assert(_heap_start != NULL, "heap bounds should look ok");
   748   assert(_heap_end != NULL, "heap bounds should look ok");
   749   assert(_heap_start < _heap_end, "heap bounds should look ok");
   751   // Reset all the marking data structures and any necessary flags
   752   reset_marking_state();
   754   if (verbose_low()) {
   755     gclog_or_tty->print_cr("[global] resetting");
   756   }
   758   // We do reset all of them, since different phases will use
   759   // different number of active threads. So, it's easiest to have all
   760   // of them ready.
   761   for (uint i = 0; i < _max_worker_id; ++i) {
   762     _tasks[i]->reset(_nextMarkBitMap);
   763   }
   765   // we need this to make sure that the flag is on during the evac
   766   // pause with initial mark piggy-backed
   767   set_concurrent_marking_in_progress();
   768 }
   771 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
   772   _markStack.set_should_expand();
   773   _markStack.setEmpty();        // Also clears the _markStack overflow flag
   774   if (clear_overflow) {
   775     clear_has_overflown();
   776   } else {
   777     assert(has_overflown(), "pre-condition");
   778   }
   779   _finger = _heap_start;
   781   for (uint i = 0; i < _max_worker_id; ++i) {
   782     CMTaskQueue* queue = _task_queues->queue(i);
   783     queue->set_empty();
   784   }
   785 }
   787 void ConcurrentMark::set_concurrency(uint active_tasks) {
   788   assert(active_tasks <= _max_worker_id, "we should not have more");
   790   _active_tasks = active_tasks;
   791   // Need to update the three data structures below according to the
   792   // number of active threads for this phase.
   793   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   794   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   795   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   796 }
   798 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
   799   set_concurrency(active_tasks);
   801   _concurrent = concurrent;
   802   // We propagate this to all tasks, not just the active ones.
   803   for (uint i = 0; i < _max_worker_id; ++i)
   804     _tasks[i]->set_concurrent(concurrent);
   806   if (concurrent) {
   807     set_concurrent_marking_in_progress();
   808   } else {
   809     // We currently assume that the concurrent flag has been set to
   810     // false before we start remark. At this point we should also be
   811     // in a STW phase.
   812     assert(!concurrent_marking_in_progress(), "invariant");
   813     assert(_finger == _heap_end,
   814            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
   815                    _finger, _heap_end));
   816     update_g1_committed(true);
   817   }
   818 }
   820 void ConcurrentMark::set_non_marking_state() {
   821   // We set the global marking state to some default values when we're
   822   // not doing marking.
   823   reset_marking_state();
   824   _active_tasks = 0;
   825   clear_concurrent_marking_in_progress();
   826 }
   828 ConcurrentMark::~ConcurrentMark() {
   829   // The ConcurrentMark instance is never freed.
   830   ShouldNotReachHere();
   831 }
   833 void ConcurrentMark::clearNextBitmap() {
   834   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   835   G1CollectorPolicy* g1p = g1h->g1_policy();
   837   // Make sure that the concurrent mark thread looks to still be in
   838   // the current cycle.
   839   guarantee(cmThread()->during_cycle(), "invariant");
   841   // We are finishing up the current cycle by clearing the next
   842   // marking bitmap and getting it ready for the next cycle. During
   843   // this time no other cycle can start. So, let's make sure that this
   844   // is the case.
   845   guarantee(!g1h->mark_in_progress(), "invariant");
   847   // clear the mark bitmap (no grey objects to start with).
   848   // We need to do this in chunks and offer to yield in between
   849   // each chunk.
   850   HeapWord* start  = _nextMarkBitMap->startWord();
   851   HeapWord* end    = _nextMarkBitMap->endWord();
   852   HeapWord* cur    = start;
   853   size_t chunkSize = M;
   854   while (cur < end) {
   855     HeapWord* next = cur + chunkSize;
   856     if (next > end) {
   857       next = end;
   858     }
   859     MemRegion mr(cur,next);
   860     _nextMarkBitMap->clearRange(mr);
   861     cur = next;
   862     do_yield_check();
   864     // Repeat the asserts from above. We'll do them as asserts here to
   865     // minimize their overhead on the product. However, we'll have
   866     // them as guarantees at the beginning / end of the bitmap
   867     // clearing to get some checking in the product.
   868     assert(cmThread()->during_cycle(), "invariant");
   869     assert(!g1h->mark_in_progress(), "invariant");
   870   }
   872   // Clear the liveness counting data
   873   clear_all_count_data();
   875   // Repeat the asserts from above.
   876   guarantee(cmThread()->during_cycle(), "invariant");
   877   guarantee(!g1h->mark_in_progress(), "invariant");
   878 }
   880 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   881 public:
   882   bool doHeapRegion(HeapRegion* r) {
   883     if (!r->continuesHumongous()) {
   884       r->note_start_of_marking();
   885     }
   886     return false;
   887   }
   888 };
   890 void ConcurrentMark::checkpointRootsInitialPre() {
   891   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   892   G1CollectorPolicy* g1p = g1h->g1_policy();
   894   _has_aborted = false;
   896 #ifndef PRODUCT
   897   if (G1PrintReachableAtInitialMark) {
   898     print_reachable("at-cycle-start",
   899                     VerifyOption_G1UsePrevMarking, true /* all */);
   900   }
   901 #endif
   903   // Initialise marking structures. This has to be done in a STW phase.
   904   reset();
   906   // For each region note start of marking.
   907   NoteStartOfMarkHRClosure startcl;
   908   g1h->heap_region_iterate(&startcl);
   909 }
   912 void ConcurrentMark::checkpointRootsInitialPost() {
   913   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   915   // If we force an overflow during remark, the remark operation will
   916   // actually abort and we'll restart concurrent marking. If we always
   917   // force an oveflow during remark we'll never actually complete the
   918   // marking phase. So, we initilize this here, at the start of the
   919   // cycle, so that at the remaining overflow number will decrease at
   920   // every remark and we'll eventually not need to cause one.
   921   force_overflow_stw()->init();
   923   // Start Concurrent Marking weak-reference discovery.
   924   ReferenceProcessor* rp = g1h->ref_processor_cm();
   925   // enable ("weak") refs discovery
   926   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
   927   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   929   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   930   // This is the start of  the marking cycle, we're expected all
   931   // threads to have SATB queues with active set to false.
   932   satb_mq_set.set_active_all_threads(true, /* new active value */
   933                                      false /* expected_active */);
   935   _root_regions.prepare_for_scan();
   937   // update_g1_committed() will be called at the end of an evac pause
   938   // when marking is on. So, it's also called at the end of the
   939   // initial-mark pause to update the heap end, if the heap expands
   940   // during it. No need to call it here.
   941 }
   943 /*
   944  * Notice that in the next two methods, we actually leave the STS
   945  * during the barrier sync and join it immediately afterwards. If we
   946  * do not do this, the following deadlock can occur: one thread could
   947  * be in the barrier sync code, waiting for the other thread to also
   948  * sync up, whereas another one could be trying to yield, while also
   949  * waiting for the other threads to sync up too.
   950  *
   951  * Note, however, that this code is also used during remark and in
   952  * this case we should not attempt to leave / enter the STS, otherwise
   953  * we'll either hit an asseert (debug / fastdebug) or deadlock
   954  * (product). So we should only leave / enter the STS if we are
   955  * operating concurrently.
   956  *
   957  * Because the thread that does the sync barrier has left the STS, it
   958  * is possible to be suspended for a Full GC or an evacuation pause
   959  * could occur. This is actually safe, since the entering the sync
   960  * barrier is one of the last things do_marking_step() does, and it
   961  * doesn't manipulate any data structures afterwards.
   962  */
   964 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
   965   if (verbose_low()) {
   966     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
   967   }
   969   if (concurrent()) {
   970     ConcurrentGCThread::stsLeave();
   971   }
   972   _first_overflow_barrier_sync.enter();
   973   if (concurrent()) {
   974     ConcurrentGCThread::stsJoin();
   975   }
   976   // at this point everyone should have synced up and not be doing any
   977   // more work
   979   if (verbose_low()) {
   980     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
   981   }
   983   // If we're executing the concurrent phase of marking, reset the marking
   984   // state; otherwise the marking state is reset after reference processing,
   985   // during the remark pause.
   986   // If we reset here as a result of an overflow during the remark we will
   987   // see assertion failures from any subsequent set_concurrency_and_phase()
   988   // calls.
   989   if (concurrent()) {
   990     // let the task associated with with worker 0 do this
   991     if (worker_id == 0) {
   992       // task 0 is responsible for clearing the global data structures
   993       // We should be here because of an overflow. During STW we should
   994       // not clear the overflow flag since we rely on it being true when
   995       // we exit this method to abort the pause and restart concurent
   996       // marking.
   997       reset_marking_state(true /* clear_overflow */);
   998       force_overflow()->update();
  1000       if (G1Log::fine()) {
  1001         gclog_or_tty->date_stamp(PrintGCDateStamps);
  1002         gclog_or_tty->stamp(PrintGCTimeStamps);
  1003         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
  1008   // after this, each task should reset its own data structures then
  1009   // then go into the second barrier
  1012 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
  1013   if (verbose_low()) {
  1014     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
  1017   if (concurrent()) {
  1018     ConcurrentGCThread::stsLeave();
  1020   _second_overflow_barrier_sync.enter();
  1021   if (concurrent()) {
  1022     ConcurrentGCThread::stsJoin();
  1024   // at this point everything should be re-initialized and ready to go
  1026   if (verbose_low()) {
  1027     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
  1031 #ifndef PRODUCT
  1032 void ForceOverflowSettings::init() {
  1033   _num_remaining = G1ConcMarkForceOverflow;
  1034   _force = false;
  1035   update();
  1038 void ForceOverflowSettings::update() {
  1039   if (_num_remaining > 0) {
  1040     _num_remaining -= 1;
  1041     _force = true;
  1042   } else {
  1043     _force = false;
  1047 bool ForceOverflowSettings::should_force() {
  1048   if (_force) {
  1049     _force = false;
  1050     return true;
  1051   } else {
  1052     return false;
  1055 #endif // !PRODUCT
  1057 class CMConcurrentMarkingTask: public AbstractGangTask {
  1058 private:
  1059   ConcurrentMark*       _cm;
  1060   ConcurrentMarkThread* _cmt;
  1062 public:
  1063   void work(uint worker_id) {
  1064     assert(Thread::current()->is_ConcurrentGC_thread(),
  1065            "this should only be done by a conc GC thread");
  1066     ResourceMark rm;
  1068     double start_vtime = os::elapsedVTime();
  1070     ConcurrentGCThread::stsJoin();
  1072     assert(worker_id < _cm->active_tasks(), "invariant");
  1073     CMTask* the_task = _cm->task(worker_id);
  1074     the_task->record_start_time();
  1075     if (!_cm->has_aborted()) {
  1076       do {
  1077         double start_vtime_sec = os::elapsedVTime();
  1078         double start_time_sec = os::elapsedTime();
  1079         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  1081         the_task->do_marking_step(mark_step_duration_ms,
  1082                                   true  /* do_termination */,
  1083                                   false /* is_serial*/);
  1085         double end_time_sec = os::elapsedTime();
  1086         double end_vtime_sec = os::elapsedVTime();
  1087         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1088         double elapsed_time_sec = end_time_sec - start_time_sec;
  1089         _cm->clear_has_overflown();
  1091         bool ret = _cm->do_yield_check(worker_id);
  1093         jlong sleep_time_ms;
  1094         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1095           sleep_time_ms =
  1096             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1097           ConcurrentGCThread::stsLeave();
  1098           os::sleep(Thread::current(), sleep_time_ms, false);
  1099           ConcurrentGCThread::stsJoin();
  1101         double end_time2_sec = os::elapsedTime();
  1102         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1104 #if 0
  1105           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1106                                  "overhead %1.4lf",
  1107                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1108                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1109           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1110                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1111 #endif
  1112       } while (!_cm->has_aborted() && the_task->has_aborted());
  1114     the_task->record_end_time();
  1115     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1117     ConcurrentGCThread::stsLeave();
  1119     double end_vtime = os::elapsedVTime();
  1120     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
  1123   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1124                           ConcurrentMarkThread* cmt) :
  1125       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1127   ~CMConcurrentMarkingTask() { }
  1128 };
  1130 // Calculates the number of active workers for a concurrent
  1131 // phase.
  1132 uint ConcurrentMark::calc_parallel_marking_threads() {
  1133   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1134     uint n_conc_workers = 0;
  1135     if (!UseDynamicNumberOfGCThreads ||
  1136         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
  1137          !ForceDynamicNumberOfGCThreads)) {
  1138       n_conc_workers = max_parallel_marking_threads();
  1139     } else {
  1140       n_conc_workers =
  1141         AdaptiveSizePolicy::calc_default_active_workers(
  1142                                      max_parallel_marking_threads(),
  1143                                      1, /* Minimum workers */
  1144                                      parallel_marking_threads(),
  1145                                      Threads::number_of_non_daemon_threads());
  1146       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
  1147       // that scaling has already gone into "_max_parallel_marking_threads".
  1149     assert(n_conc_workers > 0, "Always need at least 1");
  1150     return n_conc_workers;
  1152   // If we are not running with any parallel GC threads we will not
  1153   // have spawned any marking threads either. Hence the number of
  1154   // concurrent workers should be 0.
  1155   return 0;
  1158 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
  1159   // Currently, only survivors can be root regions.
  1160   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
  1161   G1RootRegionScanClosure cl(_g1h, this, worker_id);
  1163   const uintx interval = PrefetchScanIntervalInBytes;
  1164   HeapWord* curr = hr->bottom();
  1165   const HeapWord* end = hr->top();
  1166   while (curr < end) {
  1167     Prefetch::read(curr, interval);
  1168     oop obj = oop(curr);
  1169     int size = obj->oop_iterate(&cl);
  1170     assert(size == obj->size(), "sanity");
  1171     curr += size;
  1175 class CMRootRegionScanTask : public AbstractGangTask {
  1176 private:
  1177   ConcurrentMark* _cm;
  1179 public:
  1180   CMRootRegionScanTask(ConcurrentMark* cm) :
  1181     AbstractGangTask("Root Region Scan"), _cm(cm) { }
  1183   void work(uint worker_id) {
  1184     assert(Thread::current()->is_ConcurrentGC_thread(),
  1185            "this should only be done by a conc GC thread");
  1187     CMRootRegions* root_regions = _cm->root_regions();
  1188     HeapRegion* hr = root_regions->claim_next();
  1189     while (hr != NULL) {
  1190       _cm->scanRootRegion(hr, worker_id);
  1191       hr = root_regions->claim_next();
  1194 };
  1196 void ConcurrentMark::scanRootRegions() {
  1197   // scan_in_progress() will have been set to true only if there was
  1198   // at least one root region to scan. So, if it's false, we
  1199   // should not attempt to do any further work.
  1200   if (root_regions()->scan_in_progress()) {
  1201     _parallel_marking_threads = calc_parallel_marking_threads();
  1202     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1203            "Maximum number of marking threads exceeded");
  1204     uint active_workers = MAX2(1U, parallel_marking_threads());
  1206     CMRootRegionScanTask task(this);
  1207     if (use_parallel_marking_threads()) {
  1208       _parallel_workers->set_active_workers((int) active_workers);
  1209       _parallel_workers->run_task(&task);
  1210     } else {
  1211       task.work(0);
  1214     // It's possible that has_aborted() is true here without actually
  1215     // aborting the survivor scan earlier. This is OK as it's
  1216     // mainly used for sanity checking.
  1217     root_regions()->scan_finished();
  1221 void ConcurrentMark::markFromRoots() {
  1222   // we might be tempted to assert that:
  1223   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1224   //        "inconsistent argument?");
  1225   // However that wouldn't be right, because it's possible that
  1226   // a safepoint is indeed in progress as a younger generation
  1227   // stop-the-world GC happens even as we mark in this generation.
  1229   _restart_for_overflow = false;
  1230   force_overflow_conc()->init();
  1232   // _g1h has _n_par_threads
  1233   _parallel_marking_threads = calc_parallel_marking_threads();
  1234   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1235     "Maximum number of marking threads exceeded");
  1237   uint active_workers = MAX2(1U, parallel_marking_threads());
  1239   // Parallel task terminator is set in "set_concurrency_and_phase()"
  1240   set_concurrency_and_phase(active_workers, true /* concurrent */);
  1242   CMConcurrentMarkingTask markingTask(this, cmThread());
  1243   if (use_parallel_marking_threads()) {
  1244     _parallel_workers->set_active_workers((int)active_workers);
  1245     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
  1246     // and the decisions on that MT processing is made elsewhere.
  1247     assert(_parallel_workers->active_workers() > 0, "Should have been set");
  1248     _parallel_workers->run_task(&markingTask);
  1249   } else {
  1250     markingTask.work(0);
  1252   print_stats();
  1255 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1256   // world is stopped at this checkpoint
  1257   assert(SafepointSynchronize::is_at_safepoint(),
  1258          "world should be stopped");
  1260   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1262   // If a full collection has happened, we shouldn't do this.
  1263   if (has_aborted()) {
  1264     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1265     return;
  1268   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1270   if (VerifyDuringGC) {
  1271     HandleMark hm;  // handle scope
  1272     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1273     Universe::heap()->prepare_for_verify();
  1274     Universe::verify(/* silent */ false,
  1275                      /* option */ VerifyOption_G1UsePrevMarking);
  1278   G1CollectorPolicy* g1p = g1h->g1_policy();
  1279   g1p->record_concurrent_mark_remark_start();
  1281   double start = os::elapsedTime();
  1283   checkpointRootsFinalWork();
  1285   double mark_work_end = os::elapsedTime();
  1287   weakRefsWork(clear_all_soft_refs);
  1289   if (has_overflown()) {
  1290     // Oops.  We overflowed.  Restart concurrent marking.
  1291     _restart_for_overflow = true;
  1292     if (G1TraceMarkStackOverflow) {
  1293       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1296     // Verify the heap w.r.t. the previous marking bitmap.
  1297     if (VerifyDuringGC) {
  1298       HandleMark hm;  // handle scope
  1299       gclog_or_tty->print(" VerifyDuringGC:(overflow)");
  1300       Universe::heap()->prepare_for_verify();
  1301       Universe::verify(/* silent */ false,
  1302                        /* option */ VerifyOption_G1UsePrevMarking);
  1305     // Clear the marking state because we will be restarting
  1306     // marking due to overflowing the global mark stack.
  1307     reset_marking_state();
  1308   } else {
  1309     // Aggregate the per-task counting data that we have accumulated
  1310     // while marking.
  1311     aggregate_count_data();
  1313     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1314     // We're done with marking.
  1315     // This is the end of  the marking cycle, we're expected all
  1316     // threads to have SATB queues with active set to true.
  1317     satb_mq_set.set_active_all_threads(false, /* new active value */
  1318                                        true /* expected_active */);
  1320     if (VerifyDuringGC) {
  1321       HandleMark hm;  // handle scope
  1322       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1323       Universe::heap()->prepare_for_verify();
  1324       Universe::verify(/* silent */ false,
  1325                        /* option */ VerifyOption_G1UseNextMarking);
  1327     assert(!restart_for_overflow(), "sanity");
  1328     // Completely reset the marking state since marking completed
  1329     set_non_marking_state();
  1332   // Expand the marking stack, if we have to and if we can.
  1333   if (_markStack.should_expand()) {
  1334     _markStack.expand();
  1337   // Statistics
  1338   double now = os::elapsedTime();
  1339   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1340   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1341   _remark_times.add((now - start) * 1000.0);
  1343   g1p->record_concurrent_mark_remark_end();
  1346 // Base class of the closures that finalize and verify the
  1347 // liveness counting data.
  1348 class CMCountDataClosureBase: public HeapRegionClosure {
  1349 protected:
  1350   G1CollectedHeap* _g1h;
  1351   ConcurrentMark* _cm;
  1352   CardTableModRefBS* _ct_bs;
  1354   BitMap* _region_bm;
  1355   BitMap* _card_bm;
  1357   // Takes a region that's not empty (i.e., it has at least one
  1358   // live object in it and sets its corresponding bit on the region
  1359   // bitmap to 1. If the region is "starts humongous" it will also set
  1360   // to 1 the bits on the region bitmap that correspond to its
  1361   // associated "continues humongous" regions.
  1362   void set_bit_for_region(HeapRegion* hr) {
  1363     assert(!hr->continuesHumongous(), "should have filtered those out");
  1365     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1366     if (!hr->startsHumongous()) {
  1367       // Normal (non-humongous) case: just set the bit.
  1368       _region_bm->par_at_put(index, true);
  1369     } else {
  1370       // Starts humongous case: calculate how many regions are part of
  1371       // this humongous region and then set the bit range.
  1372       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
  1373       _region_bm->par_at_put_range(index, end_index, true);
  1377 public:
  1378   CMCountDataClosureBase(G1CollectedHeap* g1h,
  1379                          BitMap* region_bm, BitMap* card_bm):
  1380     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1381     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  1382     _region_bm(region_bm), _card_bm(card_bm) { }
  1383 };
  1385 // Closure that calculates the # live objects per region. Used
  1386 // for verification purposes during the cleanup pause.
  1387 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
  1388   CMBitMapRO* _bm;
  1389   size_t _region_marked_bytes;
  1391 public:
  1392   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
  1393                          BitMap* region_bm, BitMap* card_bm) :
  1394     CMCountDataClosureBase(g1h, region_bm, card_bm),
  1395     _bm(bm), _region_marked_bytes(0) { }
  1397   bool doHeapRegion(HeapRegion* hr) {
  1399     if (hr->continuesHumongous()) {
  1400       // We will ignore these here and process them when their
  1401       // associated "starts humongous" region is processed (see
  1402       // set_bit_for_heap_region()). Note that we cannot rely on their
  1403       // associated "starts humongous" region to have their bit set to
  1404       // 1 since, due to the region chunking in the parallel region
  1405       // iteration, a "continues humongous" region might be visited
  1406       // before its associated "starts humongous".
  1407       return false;
  1410     HeapWord* ntams = hr->next_top_at_mark_start();
  1411     HeapWord* start = hr->bottom();
  1413     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
  1414            err_msg("Preconditions not met - "
  1415                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
  1416                    start, ntams, hr->end()));
  1418     // Find the first marked object at or after "start".
  1419     start = _bm->getNextMarkedWordAddress(start, ntams);
  1421     size_t marked_bytes = 0;
  1423     while (start < ntams) {
  1424       oop obj = oop(start);
  1425       int obj_sz = obj->size();
  1426       HeapWord* obj_end = start + obj_sz;
  1428       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  1429       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
  1431       // Note: if we're looking at the last region in heap - obj_end
  1432       // could be actually just beyond the end of the heap; end_idx
  1433       // will then correspond to a (non-existent) card that is also
  1434       // just beyond the heap.
  1435       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
  1436         // end of object is not card aligned - increment to cover
  1437         // all the cards spanned by the object
  1438         end_idx += 1;
  1441       // Set the bits in the card BM for the cards spanned by this object.
  1442       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1444       // Add the size of this object to the number of marked bytes.
  1445       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1447       // Find the next marked object after this one.
  1448       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
  1451     // Mark the allocated-since-marking portion...
  1452     HeapWord* top = hr->top();
  1453     if (ntams < top) {
  1454       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1455       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1457       // Note: if we're looking at the last region in heap - top
  1458       // could be actually just beyond the end of the heap; end_idx
  1459       // will then correspond to a (non-existent) card that is also
  1460       // just beyond the heap.
  1461       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1462         // end of object is not card aligned - increment to cover
  1463         // all the cards spanned by the object
  1464         end_idx += 1;
  1466       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1468       // This definitely means the region has live objects.
  1469       set_bit_for_region(hr);
  1472     // Update the live region bitmap.
  1473     if (marked_bytes > 0) {
  1474       set_bit_for_region(hr);
  1477     // Set the marked bytes for the current region so that
  1478     // it can be queried by a calling verificiation routine
  1479     _region_marked_bytes = marked_bytes;
  1481     return false;
  1484   size_t region_marked_bytes() const { return _region_marked_bytes; }
  1485 };
  1487 // Heap region closure used for verifying the counting data
  1488 // that was accumulated concurrently and aggregated during
  1489 // the remark pause. This closure is applied to the heap
  1490 // regions during the STW cleanup pause.
  1492 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
  1493   G1CollectedHeap* _g1h;
  1494   ConcurrentMark* _cm;
  1495   CalcLiveObjectsClosure _calc_cl;
  1496   BitMap* _region_bm;   // Region BM to be verified
  1497   BitMap* _card_bm;     // Card BM to be verified
  1498   bool _verbose;        // verbose output?
  1500   BitMap* _exp_region_bm; // Expected Region BM values
  1501   BitMap* _exp_card_bm;   // Expected card BM values
  1503   int _failures;
  1505 public:
  1506   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
  1507                                 BitMap* region_bm,
  1508                                 BitMap* card_bm,
  1509                                 BitMap* exp_region_bm,
  1510                                 BitMap* exp_card_bm,
  1511                                 bool verbose) :
  1512     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1513     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
  1514     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
  1515     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
  1516     _failures(0) { }
  1518   int failures() const { return _failures; }
  1520   bool doHeapRegion(HeapRegion* hr) {
  1521     if (hr->continuesHumongous()) {
  1522       // We will ignore these here and process them when their
  1523       // associated "starts humongous" region is processed (see
  1524       // set_bit_for_heap_region()). Note that we cannot rely on their
  1525       // associated "starts humongous" region to have their bit set to
  1526       // 1 since, due to the region chunking in the parallel region
  1527       // iteration, a "continues humongous" region might be visited
  1528       // before its associated "starts humongous".
  1529       return false;
  1532     int failures = 0;
  1534     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
  1535     // this region and set the corresponding bits in the expected region
  1536     // and card bitmaps.
  1537     bool res = _calc_cl.doHeapRegion(hr);
  1538     assert(res == false, "should be continuing");
  1540     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
  1541                     Mutex::_no_safepoint_check_flag);
  1543     // Verify the marked bytes for this region.
  1544     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
  1545     size_t act_marked_bytes = hr->next_marked_bytes();
  1547     // We're not OK if expected marked bytes > actual marked bytes. It means
  1548     // we have missed accounting some objects during the actual marking.
  1549     if (exp_marked_bytes > act_marked_bytes) {
  1550       if (_verbose) {
  1551         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
  1552                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
  1553                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
  1555       failures += 1;
  1558     // Verify the bit, for this region, in the actual and expected
  1559     // (which was just calculated) region bit maps.
  1560     // We're not OK if the bit in the calculated expected region
  1561     // bitmap is set and the bit in the actual region bitmap is not.
  1562     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1564     bool expected = _exp_region_bm->at(index);
  1565     bool actual = _region_bm->at(index);
  1566     if (expected && !actual) {
  1567       if (_verbose) {
  1568         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
  1569                                "expected: %s, actual: %s",
  1570                                hr->hrs_index(),
  1571                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1573       failures += 1;
  1576     // Verify that the card bit maps for the cards spanned by the current
  1577     // region match. We have an error if we have a set bit in the expected
  1578     // bit map and the corresponding bit in the actual bitmap is not set.
  1580     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
  1581     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
  1583     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
  1584       expected = _exp_card_bm->at(i);
  1585       actual = _card_bm->at(i);
  1587       if (expected && !actual) {
  1588         if (_verbose) {
  1589           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
  1590                                  "expected: %s, actual: %s",
  1591                                  hr->hrs_index(), i,
  1592                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1594         failures += 1;
  1598     if (failures > 0 && _verbose)  {
  1599       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
  1600                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
  1601                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
  1602                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
  1605     _failures += failures;
  1607     // We could stop iteration over the heap when we
  1608     // find the first violating region by returning true.
  1609     return false;
  1611 };
  1614 class G1ParVerifyFinalCountTask: public AbstractGangTask {
  1615 protected:
  1616   G1CollectedHeap* _g1h;
  1617   ConcurrentMark* _cm;
  1618   BitMap* _actual_region_bm;
  1619   BitMap* _actual_card_bm;
  1621   uint    _n_workers;
  1623   BitMap* _expected_region_bm;
  1624   BitMap* _expected_card_bm;
  1626   int  _failures;
  1627   bool _verbose;
  1629 public:
  1630   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
  1631                             BitMap* region_bm, BitMap* card_bm,
  1632                             BitMap* expected_region_bm, BitMap* expected_card_bm)
  1633     : AbstractGangTask("G1 verify final counting"),
  1634       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1635       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1636       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
  1637       _failures(0), _verbose(false),
  1638       _n_workers(0) {
  1639     assert(VerifyDuringGC, "don't call this otherwise");
  1641     // Use the value already set as the number of active threads
  1642     // in the call to run_task().
  1643     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1644       assert( _g1h->workers()->active_workers() > 0,
  1645         "Should have been previously set");
  1646       _n_workers = _g1h->workers()->active_workers();
  1647     } else {
  1648       _n_workers = 1;
  1651     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
  1652     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
  1654     _verbose = _cm->verbose_medium();
  1657   void work(uint worker_id) {
  1658     assert(worker_id < _n_workers, "invariant");
  1660     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
  1661                                             _actual_region_bm, _actual_card_bm,
  1662                                             _expected_region_bm,
  1663                                             _expected_card_bm,
  1664                                             _verbose);
  1666     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1667       _g1h->heap_region_par_iterate_chunked(&verify_cl,
  1668                                             worker_id,
  1669                                             _n_workers,
  1670                                             HeapRegion::VerifyCountClaimValue);
  1671     } else {
  1672       _g1h->heap_region_iterate(&verify_cl);
  1675     Atomic::add(verify_cl.failures(), &_failures);
  1678   int failures() const { return _failures; }
  1679 };
  1681 // Closure that finalizes the liveness counting data.
  1682 // Used during the cleanup pause.
  1683 // Sets the bits corresponding to the interval [NTAMS, top]
  1684 // (which contains the implicitly live objects) in the
  1685 // card liveness bitmap. Also sets the bit for each region,
  1686 // containing live data, in the region liveness bitmap.
  1688 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
  1689  public:
  1690   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
  1691                               BitMap* region_bm,
  1692                               BitMap* card_bm) :
  1693     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
  1695   bool doHeapRegion(HeapRegion* hr) {
  1697     if (hr->continuesHumongous()) {
  1698       // We will ignore these here and process them when their
  1699       // associated "starts humongous" region is processed (see
  1700       // set_bit_for_heap_region()). Note that we cannot rely on their
  1701       // associated "starts humongous" region to have their bit set to
  1702       // 1 since, due to the region chunking in the parallel region
  1703       // iteration, a "continues humongous" region might be visited
  1704       // before its associated "starts humongous".
  1705       return false;
  1708     HeapWord* ntams = hr->next_top_at_mark_start();
  1709     HeapWord* top   = hr->top();
  1711     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
  1713     // Mark the allocated-since-marking portion...
  1714     if (ntams < top) {
  1715       // This definitely means the region has live objects.
  1716       set_bit_for_region(hr);
  1718       // Now set the bits in the card bitmap for [ntams, top)
  1719       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1720       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1722       // Note: if we're looking at the last region in heap - top
  1723       // could be actually just beyond the end of the heap; end_idx
  1724       // will then correspond to a (non-existent) card that is also
  1725       // just beyond the heap.
  1726       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1727         // end of object is not card aligned - increment to cover
  1728         // all the cards spanned by the object
  1729         end_idx += 1;
  1732       assert(end_idx <= _card_bm->size(),
  1733              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1734                      end_idx, _card_bm->size()));
  1735       assert(start_idx < _card_bm->size(),
  1736              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1737                      start_idx, _card_bm->size()));
  1739       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1742     // Set the bit for the region if it contains live data
  1743     if (hr->next_marked_bytes() > 0) {
  1744       set_bit_for_region(hr);
  1747     return false;
  1749 };
  1751 class G1ParFinalCountTask: public AbstractGangTask {
  1752 protected:
  1753   G1CollectedHeap* _g1h;
  1754   ConcurrentMark* _cm;
  1755   BitMap* _actual_region_bm;
  1756   BitMap* _actual_card_bm;
  1758   uint    _n_workers;
  1760 public:
  1761   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
  1762     : AbstractGangTask("G1 final counting"),
  1763       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1764       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1765       _n_workers(0) {
  1766     // Use the value already set as the number of active threads
  1767     // in the call to run_task().
  1768     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1769       assert( _g1h->workers()->active_workers() > 0,
  1770         "Should have been previously set");
  1771       _n_workers = _g1h->workers()->active_workers();
  1772     } else {
  1773       _n_workers = 1;
  1777   void work(uint worker_id) {
  1778     assert(worker_id < _n_workers, "invariant");
  1780     FinalCountDataUpdateClosure final_update_cl(_g1h,
  1781                                                 _actual_region_bm,
  1782                                                 _actual_card_bm);
  1784     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1785       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
  1786                                             worker_id,
  1787                                             _n_workers,
  1788                                             HeapRegion::FinalCountClaimValue);
  1789     } else {
  1790       _g1h->heap_region_iterate(&final_update_cl);
  1793 };
  1795 class G1ParNoteEndTask;
  1797 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1798   G1CollectedHeap* _g1;
  1799   int _worker_num;
  1800   size_t _max_live_bytes;
  1801   uint _regions_claimed;
  1802   size_t _freed_bytes;
  1803   FreeRegionList* _local_cleanup_list;
  1804   OldRegionSet* _old_proxy_set;
  1805   HumongousRegionSet* _humongous_proxy_set;
  1806   HRRSCleanupTask* _hrrs_cleanup_task;
  1807   double _claimed_region_time;
  1808   double _max_region_time;
  1810 public:
  1811   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1812                              int worker_num,
  1813                              FreeRegionList* local_cleanup_list,
  1814                              OldRegionSet* old_proxy_set,
  1815                              HumongousRegionSet* humongous_proxy_set,
  1816                              HRRSCleanupTask* hrrs_cleanup_task) :
  1817     _g1(g1), _worker_num(worker_num),
  1818     _max_live_bytes(0), _regions_claimed(0),
  1819     _freed_bytes(0),
  1820     _claimed_region_time(0.0), _max_region_time(0.0),
  1821     _local_cleanup_list(local_cleanup_list),
  1822     _old_proxy_set(old_proxy_set),
  1823     _humongous_proxy_set(humongous_proxy_set),
  1824     _hrrs_cleanup_task(hrrs_cleanup_task) { }
  1826   size_t freed_bytes() { return _freed_bytes; }
  1828   bool doHeapRegion(HeapRegion *hr) {
  1829     if (hr->continuesHumongous()) {
  1830       return false;
  1832     // We use a claim value of zero here because all regions
  1833     // were claimed with value 1 in the FinalCount task.
  1834     _g1->reset_gc_time_stamps(hr);
  1835     double start = os::elapsedTime();
  1836     _regions_claimed++;
  1837     hr->note_end_of_marking();
  1838     _max_live_bytes += hr->max_live_bytes();
  1839     _g1->free_region_if_empty(hr,
  1840                               &_freed_bytes,
  1841                               _local_cleanup_list,
  1842                               _old_proxy_set,
  1843                               _humongous_proxy_set,
  1844                               _hrrs_cleanup_task,
  1845                               true /* par */);
  1846     double region_time = (os::elapsedTime() - start);
  1847     _claimed_region_time += region_time;
  1848     if (region_time > _max_region_time) {
  1849       _max_region_time = region_time;
  1851     return false;
  1854   size_t max_live_bytes() { return _max_live_bytes; }
  1855   uint regions_claimed() { return _regions_claimed; }
  1856   double claimed_region_time_sec() { return _claimed_region_time; }
  1857   double max_region_time_sec() { return _max_region_time; }
  1858 };
  1860 class G1ParNoteEndTask: public AbstractGangTask {
  1861   friend class G1NoteEndOfConcMarkClosure;
  1863 protected:
  1864   G1CollectedHeap* _g1h;
  1865   size_t _max_live_bytes;
  1866   size_t _freed_bytes;
  1867   FreeRegionList* _cleanup_list;
  1869 public:
  1870   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1871                    FreeRegionList* cleanup_list) :
  1872     AbstractGangTask("G1 note end"), _g1h(g1h),
  1873     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
  1875   void work(uint worker_id) {
  1876     double start = os::elapsedTime();
  1877     FreeRegionList local_cleanup_list("Local Cleanup List");
  1878     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
  1879     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
  1880     HRRSCleanupTask hrrs_cleanup_task;
  1881     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
  1882                                            &old_proxy_set,
  1883                                            &humongous_proxy_set,
  1884                                            &hrrs_cleanup_task);
  1885     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1886       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
  1887                                             _g1h->workers()->active_workers(),
  1888                                             HeapRegion::NoteEndClaimValue);
  1889     } else {
  1890       _g1h->heap_region_iterate(&g1_note_end);
  1892     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1894     // Now update the lists
  1895     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
  1896                                             NULL /* free_list */,
  1897                                             &old_proxy_set,
  1898                                             &humongous_proxy_set,
  1899                                             true /* par */);
  1901       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1902       _max_live_bytes += g1_note_end.max_live_bytes();
  1903       _freed_bytes += g1_note_end.freed_bytes();
  1905       // If we iterate over the global cleanup list at the end of
  1906       // cleanup to do this printing we will not guarantee to only
  1907       // generate output for the newly-reclaimed regions (the list
  1908       // might not be empty at the beginning of cleanup; we might
  1909       // still be working on its previous contents). So we do the
  1910       // printing here, before we append the new regions to the global
  1911       // cleanup list.
  1913       G1HRPrinter* hr_printer = _g1h->hr_printer();
  1914       if (hr_printer->is_active()) {
  1915         HeapRegionLinkedListIterator iter(&local_cleanup_list);
  1916         while (iter.more_available()) {
  1917           HeapRegion* hr = iter.get_next();
  1918           hr_printer->cleanup(hr);
  1922       _cleanup_list->add_as_tail(&local_cleanup_list);
  1923       assert(local_cleanup_list.is_empty(), "post-condition");
  1925       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
  1928   size_t max_live_bytes() { return _max_live_bytes; }
  1929   size_t freed_bytes() { return _freed_bytes; }
  1930 };
  1932 class G1ParScrubRemSetTask: public AbstractGangTask {
  1933 protected:
  1934   G1RemSet* _g1rs;
  1935   BitMap* _region_bm;
  1936   BitMap* _card_bm;
  1937 public:
  1938   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1939                        BitMap* region_bm, BitMap* card_bm) :
  1940     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1941     _region_bm(region_bm), _card_bm(card_bm) { }
  1943   void work(uint worker_id) {
  1944     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1945       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
  1946                        HeapRegion::ScrubRemSetClaimValue);
  1947     } else {
  1948       _g1rs->scrub(_region_bm, _card_bm);
  1952 };
  1954 void ConcurrentMark::cleanup() {
  1955   // world is stopped at this checkpoint
  1956   assert(SafepointSynchronize::is_at_safepoint(),
  1957          "world should be stopped");
  1958   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1960   // If a full collection has happened, we shouldn't do this.
  1961   if (has_aborted()) {
  1962     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1963     return;
  1966   HRSPhaseSetter x(HRSPhaseCleanup);
  1967   g1h->verify_region_sets_optional();
  1969   if (VerifyDuringGC) {
  1970     HandleMark hm;  // handle scope
  1971     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1972     Universe::heap()->prepare_for_verify();
  1973     Universe::verify(/* silent */ false,
  1974                      /* option */ VerifyOption_G1UsePrevMarking);
  1977   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1978   g1p->record_concurrent_mark_cleanup_start();
  1980   double start = os::elapsedTime();
  1982   HeapRegionRemSet::reset_for_cleanup_tasks();
  1984   uint n_workers;
  1986   // Do counting once more with the world stopped for good measure.
  1987   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
  1989   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1990    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  1991            "sanity check");
  1993     g1h->set_par_threads();
  1994     n_workers = g1h->n_par_threads();
  1995     assert(g1h->n_par_threads() == n_workers,
  1996            "Should not have been reset");
  1997     g1h->workers()->run_task(&g1_par_count_task);
  1998     // Done with the parallel phase so reset to 0.
  1999     g1h->set_par_threads(0);
  2001     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
  2002            "sanity check");
  2003   } else {
  2004     n_workers = 1;
  2005     g1_par_count_task.work(0);
  2008   if (VerifyDuringGC) {
  2009     // Verify that the counting data accumulated during marking matches
  2010     // that calculated by walking the marking bitmap.
  2012     // Bitmaps to hold expected values
  2013     BitMap expected_region_bm(_region_bm.size(), false);
  2014     BitMap expected_card_bm(_card_bm.size(), false);
  2016     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
  2017                                                  &_region_bm,
  2018                                                  &_card_bm,
  2019                                                  &expected_region_bm,
  2020                                                  &expected_card_bm);
  2022     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2023       g1h->set_par_threads((int)n_workers);
  2024       g1h->workers()->run_task(&g1_par_verify_task);
  2025       // Done with the parallel phase so reset to 0.
  2026       g1h->set_par_threads(0);
  2028       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
  2029              "sanity check");
  2030     } else {
  2031       g1_par_verify_task.work(0);
  2034     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
  2037   size_t start_used_bytes = g1h->used();
  2038   g1h->set_marking_complete();
  2040   double count_end = os::elapsedTime();
  2041   double this_final_counting_time = (count_end - start);
  2042   _total_counting_time += this_final_counting_time;
  2044   if (G1PrintRegionLivenessInfo) {
  2045     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
  2046     _g1h->heap_region_iterate(&cl);
  2049   // Install newly created mark bitMap as "prev".
  2050   swapMarkBitMaps();
  2052   g1h->reset_gc_time_stamp();
  2054   // Note end of marking in all heap regions.
  2055   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
  2056   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2057     g1h->set_par_threads((int)n_workers);
  2058     g1h->workers()->run_task(&g1_par_note_end_task);
  2059     g1h->set_par_threads(0);
  2061     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  2062            "sanity check");
  2063   } else {
  2064     g1_par_note_end_task.work(0);
  2066   g1h->check_gc_time_stamps();
  2068   if (!cleanup_list_is_empty()) {
  2069     // The cleanup list is not empty, so we'll have to process it
  2070     // concurrently. Notify anyone else that might be wanting free
  2071     // regions that there will be more free regions coming soon.
  2072     g1h->set_free_regions_coming();
  2075   // call below, since it affects the metric by which we sort the heap
  2076   // regions.
  2077   if (G1ScrubRemSets) {
  2078     double rs_scrub_start = os::elapsedTime();
  2079     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  2080     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2081       g1h->set_par_threads((int)n_workers);
  2082       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  2083       g1h->set_par_threads(0);
  2085       assert(g1h->check_heap_region_claim_values(
  2086                                             HeapRegion::ScrubRemSetClaimValue),
  2087              "sanity check");
  2088     } else {
  2089       g1_par_scrub_rs_task.work(0);
  2092     double rs_scrub_end = os::elapsedTime();
  2093     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  2094     _total_rs_scrub_time += this_rs_scrub_time;
  2097   // this will also free any regions totally full of garbage objects,
  2098   // and sort the regions.
  2099   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
  2101   // Statistics.
  2102   double end = os::elapsedTime();
  2103   _cleanup_times.add((end - start) * 1000.0);
  2105   if (G1Log::fine()) {
  2106     g1h->print_size_transition(gclog_or_tty,
  2107                                start_used_bytes,
  2108                                g1h->used(),
  2109                                g1h->capacity());
  2112   // Clean up will have freed any regions completely full of garbage.
  2113   // Update the soft reference policy with the new heap occupancy.
  2114   Universe::update_heap_info_at_gc();
  2116   // We need to make this be a "collection" so any collection pause that
  2117   // races with it goes around and waits for completeCleanup to finish.
  2118   g1h->increment_total_collections();
  2120   // We reclaimed old regions so we should calculate the sizes to make
  2121   // sure we update the old gen/space data.
  2122   g1h->g1mm()->update_sizes();
  2124   if (VerifyDuringGC) {
  2125     HandleMark hm;  // handle scope
  2126     gclog_or_tty->print(" VerifyDuringGC:(after)");
  2127     Universe::heap()->prepare_for_verify();
  2128     Universe::verify(/* silent */ false,
  2129                      /* option */ VerifyOption_G1UsePrevMarking);
  2132   g1h->verify_region_sets_optional();
  2135 void ConcurrentMark::completeCleanup() {
  2136   if (has_aborted()) return;
  2138   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2140   _cleanup_list.verify_optional();
  2141   FreeRegionList tmp_free_list("Tmp Free List");
  2143   if (G1ConcRegionFreeingVerbose) {
  2144     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2145                            "cleanup list has %u entries",
  2146                            _cleanup_list.length());
  2149   // Noone else should be accessing the _cleanup_list at this point,
  2150   // so it's not necessary to take any locks
  2151   while (!_cleanup_list.is_empty()) {
  2152     HeapRegion* hr = _cleanup_list.remove_head();
  2153     assert(hr != NULL, "the list was not empty");
  2154     hr->par_clear();
  2155     tmp_free_list.add_as_tail(hr);
  2157     // Instead of adding one region at a time to the secondary_free_list,
  2158     // we accumulate them in the local list and move them a few at a
  2159     // time. This also cuts down on the number of notify_all() calls
  2160     // we do during this process. We'll also append the local list when
  2161     // _cleanup_list is empty (which means we just removed the last
  2162     // region from the _cleanup_list).
  2163     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
  2164         _cleanup_list.is_empty()) {
  2165       if (G1ConcRegionFreeingVerbose) {
  2166         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2167                                "appending %u entries to the secondary_free_list, "
  2168                                "cleanup list still has %u entries",
  2169                                tmp_free_list.length(),
  2170                                _cleanup_list.length());
  2174         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  2175         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
  2176         SecondaryFreeList_lock->notify_all();
  2179       if (G1StressConcRegionFreeing) {
  2180         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
  2181           os::sleep(Thread::current(), (jlong) 1, false);
  2186   assert(tmp_free_list.is_empty(), "post-condition");
  2189 // Supporting Object and Oop closures for reference discovery
  2190 // and processing in during marking
  2192 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  2193   HeapWord* addr = (HeapWord*)obj;
  2194   return addr != NULL &&
  2195          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  2198 // 'Keep Alive' oop closure used by both serial parallel reference processing.
  2199 // Uses the CMTask associated with a worker thread (for serial reference
  2200 // processing the CMTask for worker 0 is used) to preserve (mark) and
  2201 // trace referent objects.
  2202 //
  2203 // Using the CMTask and embedded local queues avoids having the worker
  2204 // threads operating on the global mark stack. This reduces the risk
  2205 // of overflowing the stack - which we would rather avoid at this late
  2206 // state. Also using the tasks' local queues removes the potential
  2207 // of the workers interfering with each other that could occur if
  2208 // operating on the global stack.
  2210 class G1CMKeepAliveAndDrainClosure: public OopClosure {
  2211   ConcurrentMark* _cm;
  2212   CMTask*         _task;
  2213   int             _ref_counter_limit;
  2214   int             _ref_counter;
  2215   bool            _is_serial;
  2216  public:
  2217   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2218     _cm(cm), _task(task), _is_serial(is_serial),
  2219     _ref_counter_limit(G1RefProcDrainInterval) {
  2220     assert(_ref_counter_limit > 0, "sanity");
  2221     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2222     _ref_counter = _ref_counter_limit;
  2225   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2226   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2228   template <class T> void do_oop_work(T* p) {
  2229     if (!_cm->has_overflown()) {
  2230       oop obj = oopDesc::load_decode_heap_oop(p);
  2231       if (_cm->verbose_high()) {
  2232         gclog_or_tty->print_cr("\t[%u] we're looking at location "
  2233                                "*"PTR_FORMAT" = "PTR_FORMAT,
  2234                                _task->worker_id(), p, (void*) obj);
  2237       _task->deal_with_reference(obj);
  2238       _ref_counter--;
  2240       if (_ref_counter == 0) {
  2241         // We have dealt with _ref_counter_limit references, pushing them
  2242         // and objects reachable from them on to the local stack (and
  2243         // possibly the global stack). Call CMTask::do_marking_step() to
  2244         // process these entries.
  2245         //
  2246         // We call CMTask::do_marking_step() in a loop, which we'll exit if
  2247         // there's nothing more to do (i.e. we're done with the entries that
  2248         // were pushed as a result of the CMTask::deal_with_reference() calls
  2249         // above) or we overflow.
  2250         //
  2251         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2252         // flag while there may still be some work to do. (See the comment at
  2253         // the beginning of CMTask::do_marking_step() for those conditions -
  2254         // one of which is reaching the specified time target.) It is only
  2255         // when CMTask::do_marking_step() returns without setting the
  2256         // has_aborted() flag that the marking step has completed.
  2257         do {
  2258           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  2259           _task->do_marking_step(mark_step_duration_ms,
  2260                                  false      /* do_termination */,
  2261                                  _is_serial);
  2262         } while (_task->has_aborted() && !_cm->has_overflown());
  2263         _ref_counter = _ref_counter_limit;
  2265     } else {
  2266       if (_cm->verbose_high()) {
  2267          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
  2271 };
  2273 // 'Drain' oop closure used by both serial and parallel reference processing.
  2274 // Uses the CMTask associated with a given worker thread (for serial
  2275 // reference processing the CMtask for worker 0 is used). Calls the
  2276 // do_marking_step routine, with an unbelievably large timeout value,
  2277 // to drain the marking data structures of the remaining entries
  2278 // added by the 'keep alive' oop closure above.
  2280 class G1CMDrainMarkingStackClosure: public VoidClosure {
  2281   ConcurrentMark* _cm;
  2282   CMTask*         _task;
  2283   bool            _is_serial;
  2284  public:
  2285   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2286     _cm(cm), _task(task), _is_serial(is_serial) {
  2287     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2290   void do_void() {
  2291     do {
  2292       if (_cm->verbose_high()) {
  2293         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
  2294                                _task->worker_id(), BOOL_TO_STR(_is_serial));
  2297       // We call CMTask::do_marking_step() to completely drain the local
  2298       // and global marking stacks of entries pushed by the 'keep alive'
  2299       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
  2300       //
  2301       // CMTask::do_marking_step() is called in a loop, which we'll exit
  2302       // if there's nothing more to do (i.e. we'completely drained the
  2303       // entries that were pushed as a a result of applying the 'keep alive'
  2304       // closure to the entries on the discovered ref lists) or we overflow
  2305       // the global marking stack.
  2306       //
  2307       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2308       // flag while there may still be some work to do. (See the comment at
  2309       // the beginning of CMTask::do_marking_step() for those conditions -
  2310       // one of which is reaching the specified time target.) It is only
  2311       // when CMTask::do_marking_step() returns without setting the
  2312       // has_aborted() flag that the marking step has completed.
  2314       _task->do_marking_step(1000000000.0 /* something very large */,
  2315                              true         /* do_termination */,
  2316                              _is_serial);
  2317     } while (_task->has_aborted() && !_cm->has_overflown());
  2319 };
  2321 // Implementation of AbstractRefProcTaskExecutor for parallel
  2322 // reference processing at the end of G1 concurrent marking
  2324 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
  2325 private:
  2326   G1CollectedHeap* _g1h;
  2327   ConcurrentMark*  _cm;
  2328   WorkGang*        _workers;
  2329   int              _active_workers;
  2331 public:
  2332   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
  2333                         ConcurrentMark* cm,
  2334                         WorkGang* workers,
  2335                         int n_workers) :
  2336     _g1h(g1h), _cm(cm),
  2337     _workers(workers), _active_workers(n_workers) { }
  2339   // Executes the given task using concurrent marking worker threads.
  2340   virtual void execute(ProcessTask& task);
  2341   virtual void execute(EnqueueTask& task);
  2342 };
  2344 class G1CMRefProcTaskProxy: public AbstractGangTask {
  2345   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  2346   ProcessTask&     _proc_task;
  2347   G1CollectedHeap* _g1h;
  2348   ConcurrentMark*  _cm;
  2350 public:
  2351   G1CMRefProcTaskProxy(ProcessTask& proc_task,
  2352                      G1CollectedHeap* g1h,
  2353                      ConcurrentMark* cm) :
  2354     AbstractGangTask("Process reference objects in parallel"),
  2355     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
  2356     ReferenceProcessor* rp = _g1h->ref_processor_cm();
  2357     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
  2360   virtual void work(uint worker_id) {
  2361     CMTask* task = _cm->task(worker_id);
  2362     G1CMIsAliveClosure g1_is_alive(_g1h);
  2363     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
  2364     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
  2366     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
  2368 };
  2370 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
  2371   assert(_workers != NULL, "Need parallel worker threads.");
  2372   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2374   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  2376   // We need to reset the concurrency level before each
  2377   // proxy task execution, so that the termination protocol
  2378   // and overflow handling in CMTask::do_marking_step() knows
  2379   // how many workers to wait for.
  2380   _cm->set_concurrency(_active_workers);
  2381   _g1h->set_par_threads(_active_workers);
  2382   _workers->run_task(&proc_task_proxy);
  2383   _g1h->set_par_threads(0);
  2386 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
  2387   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
  2388   EnqueueTask& _enq_task;
  2390 public:
  2391   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
  2392     AbstractGangTask("Enqueue reference objects in parallel"),
  2393     _enq_task(enq_task) { }
  2395   virtual void work(uint worker_id) {
  2396     _enq_task.work(worker_id);
  2398 };
  2400 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
  2401   assert(_workers != NULL, "Need parallel worker threads.");
  2402   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2404   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
  2406   // Not strictly necessary but...
  2407   //
  2408   // We need to reset the concurrency level before each
  2409   // proxy task execution, so that the termination protocol
  2410   // and overflow handling in CMTask::do_marking_step() knows
  2411   // how many workers to wait for.
  2412   _cm->set_concurrency(_active_workers);
  2413   _g1h->set_par_threads(_active_workers);
  2414   _workers->run_task(&enq_task_proxy);
  2415   _g1h->set_par_threads(0);
  2418 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  2419   if (has_overflown()) {
  2420     // Skip processing the discovered references if we have
  2421     // overflown the global marking stack. Reference objects
  2422     // only get discovered once so it is OK to not
  2423     // de-populate the discovered reference lists. We could have,
  2424     // but the only benefit would be that, when marking restarts,
  2425     // less reference objects are discovered.
  2426     return;
  2429   ResourceMark rm;
  2430   HandleMark   hm;
  2432   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2434   // Is alive closure.
  2435   G1CMIsAliveClosure g1_is_alive(g1h);
  2437   // Inner scope to exclude the cleaning of the string and symbol
  2438   // tables from the displayed time.
  2440     if (G1Log::finer()) {
  2441       gclog_or_tty->put(' ');
  2443     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
  2445     ReferenceProcessor* rp = g1h->ref_processor_cm();
  2447     // See the comment in G1CollectedHeap::ref_processing_init()
  2448     // about how reference processing currently works in G1.
  2450     // Set the soft reference policy
  2451     rp->setup_policy(clear_all_soft_refs);
  2452     assert(_markStack.isEmpty(), "mark stack should be empty");
  2454     // Instances of the 'Keep Alive' and 'Complete GC' closures used
  2455     // in serial reference processing. Note these closures are also
  2456     // used for serially processing (by the the current thread) the
  2457     // JNI references during parallel reference processing.
  2458     //
  2459     // These closures do not need to synchronize with the worker
  2460     // threads involved in parallel reference processing as these
  2461     // instances are executed serially by the current thread (e.g.
  2462     // reference processing is not multi-threaded and is thus
  2463     // performed by the current thread instead of a gang worker).
  2464     //
  2465     // The gang tasks involved in parallel reference procssing create
  2466     // their own instances of these closures, which do their own
  2467     // synchronization among themselves.
  2468     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
  2469     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
  2471     // We need at least one active thread. If reference processing
  2472     // is not multi-threaded we use the current (VMThread) thread,
  2473     // otherwise we use the work gang from the G1CollectedHeap and
  2474     // we utilize all the worker threads we can.
  2475     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
  2476     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
  2477     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
  2479     // Parallel processing task executor.
  2480     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
  2481                                               g1h->workers(), active_workers);
  2482     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
  2484     // Set the concurrency level. The phase was already set prior to
  2485     // executing the remark task.
  2486     set_concurrency(active_workers);
  2488     // Set the degree of MT processing here.  If the discovery was done MT,
  2489     // the number of threads involved during discovery could differ from
  2490     // the number of active workers.  This is OK as long as the discovered
  2491     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
  2492     rp->set_active_mt_degree(active_workers);
  2494     // Process the weak references.
  2495     rp->process_discovered_references(&g1_is_alive,
  2496                                       &g1_keep_alive,
  2497                                       &g1_drain_mark_stack,
  2498                                       executor);
  2500     // The do_oop work routines of the keep_alive and drain_marking_stack
  2501     // oop closures will set the has_overflown flag if we overflow the
  2502     // global marking stack.
  2504     assert(_markStack.overflow() || _markStack.isEmpty(),
  2505             "mark stack should be empty (unless it overflowed)");
  2507     if (_markStack.overflow()) {
  2508       // This should have been done already when we tried to push an
  2509       // entry on to the global mark stack. But let's do it again.
  2510       set_has_overflown();
  2513     assert(rp->num_q() == active_workers, "why not");
  2515     rp->enqueue_discovered_references(executor);
  2517     rp->verify_no_references_recorded();
  2518     assert(!rp->discovery_enabled(), "Post condition");
  2521   // Now clean up stale oops in StringTable
  2522   StringTable::unlink(&g1_is_alive);
  2523   // Clean up unreferenced symbols in symbol table.
  2524   SymbolTable::unlink();
  2527 void ConcurrentMark::swapMarkBitMaps() {
  2528   CMBitMapRO* temp = _prevMarkBitMap;
  2529   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  2530   _nextMarkBitMap  = (CMBitMap*)  temp;
  2533 class CMRemarkTask: public AbstractGangTask {
  2534 private:
  2535   ConcurrentMark* _cm;
  2536   bool            _is_serial;
  2537 public:
  2538   void work(uint worker_id) {
  2539     // Since all available tasks are actually started, we should
  2540     // only proceed if we're supposed to be actived.
  2541     if (worker_id < _cm->active_tasks()) {
  2542       CMTask* task = _cm->task(worker_id);
  2543       task->record_start_time();
  2544       do {
  2545         task->do_marking_step(1000000000.0 /* something very large */,
  2546                               true         /* do_termination       */,
  2547                               _is_serial);
  2548       } while (task->has_aborted() && !_cm->has_overflown());
  2549       // If we overflow, then we do not want to restart. We instead
  2550       // want to abort remark and do concurrent marking again.
  2551       task->record_end_time();
  2555   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
  2556     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
  2557     _cm->terminator()->reset_for_reuse(active_workers);
  2559 };
  2561 void ConcurrentMark::checkpointRootsFinalWork() {
  2562   ResourceMark rm;
  2563   HandleMark   hm;
  2564   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2566   g1h->ensure_parsability(false);
  2568   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2569     G1CollectedHeap::StrongRootsScope srs(g1h);
  2570     // this is remark, so we'll use up all active threads
  2571     uint active_workers = g1h->workers()->active_workers();
  2572     if (active_workers == 0) {
  2573       assert(active_workers > 0, "Should have been set earlier");
  2574       active_workers = (uint) ParallelGCThreads;
  2575       g1h->workers()->set_active_workers(active_workers);
  2577     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2578     // Leave _parallel_marking_threads at it's
  2579     // value originally calculated in the ConcurrentMark
  2580     // constructor and pass values of the active workers
  2581     // through the gang in the task.
  2583     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
  2584     // We will start all available threads, even if we decide that the
  2585     // active_workers will be fewer. The extra ones will just bail out
  2586     // immediately.
  2587     g1h->set_par_threads(active_workers);
  2588     g1h->workers()->run_task(&remarkTask);
  2589     g1h->set_par_threads(0);
  2590   } else {
  2591     G1CollectedHeap::StrongRootsScope srs(g1h);
  2592     uint active_workers = 1;
  2593     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2595     // Note - if there's no work gang then the VMThread will be
  2596     // the thread to execute the remark - serially. We have
  2597     // to pass true for the is_serial parameter so that
  2598     // CMTask::do_marking_step() doesn't enter the sync
  2599     // barriers in the event of an overflow. Doing so will
  2600     // cause an assert that the current thread is not a
  2601     // concurrent GC thread.
  2602     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
  2603     remarkTask.work(0);
  2605   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2606   guarantee(has_overflown() ||
  2607             satb_mq_set.completed_buffers_num() == 0,
  2608             err_msg("Invariant: has_overflown = %s, num buffers = %d",
  2609                     BOOL_TO_STR(has_overflown()),
  2610                     satb_mq_set.completed_buffers_num()));
  2612   print_stats();
  2615 #ifndef PRODUCT
  2617 class PrintReachableOopClosure: public OopClosure {
  2618 private:
  2619   G1CollectedHeap* _g1h;
  2620   outputStream*    _out;
  2621   VerifyOption     _vo;
  2622   bool             _all;
  2624 public:
  2625   PrintReachableOopClosure(outputStream* out,
  2626                            VerifyOption  vo,
  2627                            bool          all) :
  2628     _g1h(G1CollectedHeap::heap()),
  2629     _out(out), _vo(vo), _all(all) { }
  2631   void do_oop(narrowOop* p) { do_oop_work(p); }
  2632   void do_oop(      oop* p) { do_oop_work(p); }
  2634   template <class T> void do_oop_work(T* p) {
  2635     oop         obj = oopDesc::load_decode_heap_oop(p);
  2636     const char* str = NULL;
  2637     const char* str2 = "";
  2639     if (obj == NULL) {
  2640       str = "";
  2641     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2642       str = " O";
  2643     } else {
  2644       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2645       guarantee(hr != NULL, "invariant");
  2646       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
  2647       bool marked = _g1h->is_marked(obj, _vo);
  2649       if (over_tams) {
  2650         str = " >";
  2651         if (marked) {
  2652           str2 = " AND MARKED";
  2654       } else if (marked) {
  2655         str = " M";
  2656       } else {
  2657         str = " NOT";
  2661     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2662                    p, (void*) obj, str, str2);
  2664 };
  2666 class PrintReachableObjectClosure : public ObjectClosure {
  2667 private:
  2668   G1CollectedHeap* _g1h;
  2669   outputStream*    _out;
  2670   VerifyOption     _vo;
  2671   bool             _all;
  2672   HeapRegion*      _hr;
  2674 public:
  2675   PrintReachableObjectClosure(outputStream* out,
  2676                               VerifyOption  vo,
  2677                               bool          all,
  2678                               HeapRegion*   hr) :
  2679     _g1h(G1CollectedHeap::heap()),
  2680     _out(out), _vo(vo), _all(all), _hr(hr) { }
  2682   void do_object(oop o) {
  2683     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
  2684     bool marked = _g1h->is_marked(o, _vo);
  2685     bool print_it = _all || over_tams || marked;
  2687     if (print_it) {
  2688       _out->print_cr(" "PTR_FORMAT"%s",
  2689                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2690       PrintReachableOopClosure oopCl(_out, _vo, _all);
  2691       o->oop_iterate_no_header(&oopCl);
  2694 };
  2696 class PrintReachableRegionClosure : public HeapRegionClosure {
  2697 private:
  2698   G1CollectedHeap* _g1h;
  2699   outputStream*    _out;
  2700   VerifyOption     _vo;
  2701   bool             _all;
  2703 public:
  2704   bool doHeapRegion(HeapRegion* hr) {
  2705     HeapWord* b = hr->bottom();
  2706     HeapWord* e = hr->end();
  2707     HeapWord* t = hr->top();
  2708     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
  2709     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2710                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2711     _out->cr();
  2713     HeapWord* from = b;
  2714     HeapWord* to   = t;
  2716     if (to > from) {
  2717       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2718       _out->cr();
  2719       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
  2720       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2721       _out->cr();
  2724     return false;
  2727   PrintReachableRegionClosure(outputStream* out,
  2728                               VerifyOption  vo,
  2729                               bool          all) :
  2730     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
  2731 };
  2733 void ConcurrentMark::print_reachable(const char* str,
  2734                                      VerifyOption vo,
  2735                                      bool all) {
  2736   gclog_or_tty->cr();
  2737   gclog_or_tty->print_cr("== Doing heap dump... ");
  2739   if (G1PrintReachableBaseFile == NULL) {
  2740     gclog_or_tty->print_cr("  #### error: no base file defined");
  2741     return;
  2744   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2745       (JVM_MAXPATHLEN - 1)) {
  2746     gclog_or_tty->print_cr("  #### error: file name too long");
  2747     return;
  2750   char file_name[JVM_MAXPATHLEN];
  2751   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2752   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2754   fileStream fout(file_name);
  2755   if (!fout.is_open()) {
  2756     gclog_or_tty->print_cr("  #### error: could not open file");
  2757     return;
  2760   outputStream* out = &fout;
  2761   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
  2762   out->cr();
  2764   out->print_cr("--- ITERATING OVER REGIONS");
  2765   out->cr();
  2766   PrintReachableRegionClosure rcl(out, vo, all);
  2767   _g1h->heap_region_iterate(&rcl);
  2768   out->cr();
  2770   gclog_or_tty->print_cr("  done");
  2771   gclog_or_tty->flush();
  2774 #endif // PRODUCT
  2776 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
  2777   // Note we are overriding the read-only view of the prev map here, via
  2778   // the cast.
  2779   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2782 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
  2783   _nextMarkBitMap->clearRange(mr);
  2786 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
  2787   clearRangePrevBitmap(mr);
  2788   clearRangeNextBitmap(mr);
  2791 HeapRegion*
  2792 ConcurrentMark::claim_region(uint worker_id) {
  2793   // "checkpoint" the finger
  2794   HeapWord* finger = _finger;
  2796   // _heap_end will not change underneath our feet; it only changes at
  2797   // yield points.
  2798   while (finger < _heap_end) {
  2799     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2801     // Note on how this code handles humongous regions. In the
  2802     // normal case the finger will reach the start of a "starts
  2803     // humongous" (SH) region. Its end will either be the end of the
  2804     // last "continues humongous" (CH) region in the sequence, or the
  2805     // standard end of the SH region (if the SH is the only region in
  2806     // the sequence). That way claim_region() will skip over the CH
  2807     // regions. However, there is a subtle race between a CM thread
  2808     // executing this method and a mutator thread doing a humongous
  2809     // object allocation. The two are not mutually exclusive as the CM
  2810     // thread does not need to hold the Heap_lock when it gets
  2811     // here. So there is a chance that claim_region() will come across
  2812     // a free region that's in the progress of becoming a SH or a CH
  2813     // region. In the former case, it will either
  2814     //   a) Miss the update to the region's end, in which case it will
  2815     //      visit every subsequent CH region, will find their bitmaps
  2816     //      empty, and do nothing, or
  2817     //   b) Will observe the update of the region's end (in which case
  2818     //      it will skip the subsequent CH regions).
  2819     // If it comes across a region that suddenly becomes CH, the
  2820     // scenario will be similar to b). So, the race between
  2821     // claim_region() and a humongous object allocation might force us
  2822     // to do a bit of unnecessary work (due to some unnecessary bitmap
  2823     // iterations) but it should not introduce and correctness issues.
  2824     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
  2825     HeapWord*   bottom        = curr_region->bottom();
  2826     HeapWord*   end           = curr_region->end();
  2827     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2829     if (verbose_low()) {
  2830       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
  2831                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2832                              "limit = "PTR_FORMAT,
  2833                              worker_id, curr_region, bottom, end, limit);
  2836     // Is the gap between reading the finger and doing the CAS too long?
  2837     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2838     if (res == finger) {
  2839       // we succeeded
  2841       // notice that _finger == end cannot be guaranteed here since,
  2842       // someone else might have moved the finger even further
  2843       assert(_finger >= end, "the finger should have moved forward");
  2845       if (verbose_low()) {
  2846         gclog_or_tty->print_cr("[%u] we were successful with region = "
  2847                                PTR_FORMAT, worker_id, curr_region);
  2850       if (limit > bottom) {
  2851         if (verbose_low()) {
  2852           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
  2853                                  "returning it ", worker_id, curr_region);
  2855         return curr_region;
  2856       } else {
  2857         assert(limit == bottom,
  2858                "the region limit should be at bottom");
  2859         if (verbose_low()) {
  2860           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
  2861                                  "returning NULL", worker_id, curr_region);
  2863         // we return NULL and the caller should try calling
  2864         // claim_region() again.
  2865         return NULL;
  2867     } else {
  2868       assert(_finger > finger, "the finger should have moved forward");
  2869       if (verbose_low()) {
  2870         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
  2871                                "global finger = "PTR_FORMAT", "
  2872                                "our finger = "PTR_FORMAT,
  2873                                worker_id, _finger, finger);
  2876       // read it again
  2877       finger = _finger;
  2881   return NULL;
  2884 #ifndef PRODUCT
  2885 enum VerifyNoCSetOopsPhase {
  2886   VerifyNoCSetOopsStack,
  2887   VerifyNoCSetOopsQueues,
  2888   VerifyNoCSetOopsSATBCompleted,
  2889   VerifyNoCSetOopsSATBThread
  2890 };
  2892 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
  2893 private:
  2894   G1CollectedHeap* _g1h;
  2895   VerifyNoCSetOopsPhase _phase;
  2896   int _info;
  2898   const char* phase_str() {
  2899     switch (_phase) {
  2900     case VerifyNoCSetOopsStack:         return "Stack";
  2901     case VerifyNoCSetOopsQueues:        return "Queue";
  2902     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
  2903     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
  2904     default:                            ShouldNotReachHere();
  2906     return NULL;
  2909   void do_object_work(oop obj) {
  2910     guarantee(!_g1h->obj_in_cs(obj),
  2911               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
  2912                       (void*) obj, phase_str(), _info));
  2915 public:
  2916   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
  2918   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
  2919     _phase = phase;
  2920     _info = info;
  2923   virtual void do_oop(oop* p) {
  2924     oop obj = oopDesc::load_decode_heap_oop(p);
  2925     do_object_work(obj);
  2928   virtual void do_oop(narrowOop* p) {
  2929     // We should not come across narrow oops while scanning marking
  2930     // stacks and SATB buffers.
  2931     ShouldNotReachHere();
  2934   virtual void do_object(oop obj) {
  2935     do_object_work(obj);
  2937 };
  2939 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
  2940                                          bool verify_enqueued_buffers,
  2941                                          bool verify_thread_buffers,
  2942                                          bool verify_fingers) {
  2943   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
  2944   if (!G1CollectedHeap::heap()->mark_in_progress()) {
  2945     return;
  2948   VerifyNoCSetOopsClosure cl;
  2950   if (verify_stacks) {
  2951     // Verify entries on the global mark stack
  2952     cl.set_phase(VerifyNoCSetOopsStack);
  2953     _markStack.oops_do(&cl);
  2955     // Verify entries on the task queues
  2956     for (uint i = 0; i < _max_worker_id; i += 1) {
  2957       cl.set_phase(VerifyNoCSetOopsQueues, i);
  2958       CMTaskQueue* queue = _task_queues->queue(i);
  2959       queue->oops_do(&cl);
  2963   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
  2965   // Verify entries on the enqueued SATB buffers
  2966   if (verify_enqueued_buffers) {
  2967     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
  2968     satb_qs.iterate_completed_buffers_read_only(&cl);
  2971   // Verify entries on the per-thread SATB buffers
  2972   if (verify_thread_buffers) {
  2973     cl.set_phase(VerifyNoCSetOopsSATBThread);
  2974     satb_qs.iterate_thread_buffers_read_only(&cl);
  2977   if (verify_fingers) {
  2978     // Verify the global finger
  2979     HeapWord* global_finger = finger();
  2980     if (global_finger != NULL && global_finger < _heap_end) {
  2981       // The global finger always points to a heap region boundary. We
  2982       // use heap_region_containing_raw() to get the containing region
  2983       // given that the global finger could be pointing to a free region
  2984       // which subsequently becomes continues humongous. If that
  2985       // happens, heap_region_containing() will return the bottom of the
  2986       // corresponding starts humongous region and the check below will
  2987       // not hold any more.
  2988       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
  2989       guarantee(global_finger == global_hr->bottom(),
  2990                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
  2991                         global_finger, HR_FORMAT_PARAMS(global_hr)));
  2994     // Verify the task fingers
  2995     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
  2996     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
  2997       CMTask* task = _tasks[i];
  2998       HeapWord* task_finger = task->finger();
  2999       if (task_finger != NULL && task_finger < _heap_end) {
  3000         // See above note on the global finger verification.
  3001         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
  3002         guarantee(task_finger == task_hr->bottom() ||
  3003                   !task_hr->in_collection_set(),
  3004                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
  3005                           task_finger, HR_FORMAT_PARAMS(task_hr)));
  3010 #endif // PRODUCT
  3012 // Aggregate the counting data that was constructed concurrently
  3013 // with marking.
  3014 class AggregateCountDataHRClosure: public HeapRegionClosure {
  3015   G1CollectedHeap* _g1h;
  3016   ConcurrentMark* _cm;
  3017   CardTableModRefBS* _ct_bs;
  3018   BitMap* _cm_card_bm;
  3019   uint _max_worker_id;
  3021  public:
  3022   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
  3023                               BitMap* cm_card_bm,
  3024                               uint max_worker_id) :
  3025     _g1h(g1h), _cm(g1h->concurrent_mark()),
  3026     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  3027     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
  3029   bool doHeapRegion(HeapRegion* hr) {
  3030     if (hr->continuesHumongous()) {
  3031       // We will ignore these here and process them when their
  3032       // associated "starts humongous" region is processed.
  3033       // Note that we cannot rely on their associated
  3034       // "starts humongous" region to have their bit set to 1
  3035       // since, due to the region chunking in the parallel region
  3036       // iteration, a "continues humongous" region might be visited
  3037       // before its associated "starts humongous".
  3038       return false;
  3041     HeapWord* start = hr->bottom();
  3042     HeapWord* limit = hr->next_top_at_mark_start();
  3043     HeapWord* end = hr->end();
  3045     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
  3046            err_msg("Preconditions not met - "
  3047                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
  3048                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
  3049                    start, limit, hr->top(), hr->end()));
  3051     assert(hr->next_marked_bytes() == 0, "Precondition");
  3053     if (start == limit) {
  3054       // NTAMS of this region has not been set so nothing to do.
  3055       return false;
  3058     // 'start' should be in the heap.
  3059     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
  3060     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
  3061     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
  3063     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  3064     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
  3065     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
  3067     // If ntams is not card aligned then we bump card bitmap index
  3068     // for limit so that we get the all the cards spanned by
  3069     // the object ending at ntams.
  3070     // Note: if this is the last region in the heap then ntams
  3071     // could be actually just beyond the end of the the heap;
  3072     // limit_idx will then  correspond to a (non-existent) card
  3073     // that is also outside the heap.
  3074     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
  3075       limit_idx += 1;
  3078     assert(limit_idx <= end_idx, "or else use atomics");
  3080     // Aggregate the "stripe" in the count data associated with hr.
  3081     uint hrs_index = hr->hrs_index();
  3082     size_t marked_bytes = 0;
  3084     for (uint i = 0; i < _max_worker_id; i += 1) {
  3085       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
  3086       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
  3088       // Fetch the marked_bytes in this region for task i and
  3089       // add it to the running total for this region.
  3090       marked_bytes += marked_bytes_array[hrs_index];
  3092       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
  3093       // into the global card bitmap.
  3094       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
  3096       while (scan_idx < limit_idx) {
  3097         assert(task_card_bm->at(scan_idx) == true, "should be");
  3098         _cm_card_bm->set_bit(scan_idx);
  3099         assert(_cm_card_bm->at(scan_idx) == true, "should be");
  3101         // BitMap::get_next_one_offset() can handle the case when
  3102         // its left_offset parameter is greater than its right_offset
  3103         // parameter. It does, however, have an early exit if
  3104         // left_offset == right_offset. So let's limit the value
  3105         // passed in for left offset here.
  3106         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
  3107         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
  3111     // Update the marked bytes for this region.
  3112     hr->add_to_marked_bytes(marked_bytes);
  3114     // Next heap region
  3115     return false;
  3117 };
  3119 class G1AggregateCountDataTask: public AbstractGangTask {
  3120 protected:
  3121   G1CollectedHeap* _g1h;
  3122   ConcurrentMark* _cm;
  3123   BitMap* _cm_card_bm;
  3124   uint _max_worker_id;
  3125   int _active_workers;
  3127 public:
  3128   G1AggregateCountDataTask(G1CollectedHeap* g1h,
  3129                            ConcurrentMark* cm,
  3130                            BitMap* cm_card_bm,
  3131                            uint max_worker_id,
  3132                            int n_workers) :
  3133     AbstractGangTask("Count Aggregation"),
  3134     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
  3135     _max_worker_id(max_worker_id),
  3136     _active_workers(n_workers) { }
  3138   void work(uint worker_id) {
  3139     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
  3141     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3142       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
  3143                                             _active_workers,
  3144                                             HeapRegion::AggregateCountClaimValue);
  3145     } else {
  3146       _g1h->heap_region_iterate(&cl);
  3149 };
  3152 void ConcurrentMark::aggregate_count_data() {
  3153   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
  3154                         _g1h->workers()->active_workers() :
  3155                         1);
  3157   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
  3158                                            _max_worker_id, n_workers);
  3160   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3161     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  3162            "sanity check");
  3163     _g1h->set_par_threads(n_workers);
  3164     _g1h->workers()->run_task(&g1_par_agg_task);
  3165     _g1h->set_par_threads(0);
  3167     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
  3168            "sanity check");
  3169     _g1h->reset_heap_region_claim_values();
  3170   } else {
  3171     g1_par_agg_task.work(0);
  3175 // Clear the per-worker arrays used to store the per-region counting data
  3176 void ConcurrentMark::clear_all_count_data() {
  3177   // Clear the global card bitmap - it will be filled during
  3178   // liveness count aggregation (during remark) and the
  3179   // final counting task.
  3180   _card_bm.clear();
  3182   // Clear the global region bitmap - it will be filled as part
  3183   // of the final counting task.
  3184   _region_bm.clear();
  3186   uint max_regions = _g1h->max_regions();
  3187   assert(_max_worker_id > 0, "uninitialized");
  3189   for (uint i = 0; i < _max_worker_id; i += 1) {
  3190     BitMap* task_card_bm = count_card_bitmap_for(i);
  3191     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
  3193     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
  3194     assert(marked_bytes_array != NULL, "uninitialized");
  3196     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
  3197     task_card_bm->clear();
  3201 void ConcurrentMark::print_stats() {
  3202   if (verbose_stats()) {
  3203     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3204     for (size_t i = 0; i < _active_tasks; ++i) {
  3205       _tasks[i]->print_stats();
  3206       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3211 // abandon current marking iteration due to a Full GC
  3212 void ConcurrentMark::abort() {
  3213   // Clear all marks to force marking thread to do nothing
  3214   _nextMarkBitMap->clearAll();
  3215   // Clear the liveness counting data
  3216   clear_all_count_data();
  3217   // Empty mark stack
  3218   reset_marking_state();
  3219   for (uint i = 0; i < _max_worker_id; ++i) {
  3220     _tasks[i]->clear_region_fields();
  3222   _has_aborted = true;
  3224   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3225   satb_mq_set.abandon_partial_marking();
  3226   // This can be called either during or outside marking, we'll read
  3227   // the expected_active value from the SATB queue set.
  3228   satb_mq_set.set_active_all_threads(
  3229                                  false, /* new active value */
  3230                                  satb_mq_set.is_active() /* expected_active */);
  3233 static void print_ms_time_info(const char* prefix, const char* name,
  3234                                NumberSeq& ns) {
  3235   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  3236                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  3237   if (ns.num() > 0) {
  3238     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  3239                            prefix, ns.sd(), ns.maximum());
  3243 void ConcurrentMark::print_summary_info() {
  3244   gclog_or_tty->print_cr(" Concurrent marking:");
  3245   print_ms_time_info("  ", "init marks", _init_times);
  3246   print_ms_time_info("  ", "remarks", _remark_times);
  3248     print_ms_time_info("     ", "final marks", _remark_mark_times);
  3249     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  3252   print_ms_time_info("  ", "cleanups", _cleanup_times);
  3253   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  3254                          _total_counting_time,
  3255                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  3256                           (double)_cleanup_times.num()
  3257                          : 0.0));
  3258   if (G1ScrubRemSets) {
  3259     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  3260                            _total_rs_scrub_time,
  3261                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  3262                             (double)_cleanup_times.num()
  3263                            : 0.0));
  3265   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  3266                          (_init_times.sum() + _remark_times.sum() +
  3267                           _cleanup_times.sum())/1000.0);
  3268   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  3269                 "(%8.2f s marking).",
  3270                 cmThread()->vtime_accum(),
  3271                 cmThread()->vtime_mark_accum());
  3274 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  3275   if (use_parallel_marking_threads()) {
  3276     _parallel_workers->print_worker_threads_on(st);
  3280 // We take a break if someone is trying to stop the world.
  3281 bool ConcurrentMark::do_yield_check(uint worker_id) {
  3282   if (should_yield()) {
  3283     if (worker_id == 0) {
  3284       _g1h->g1_policy()->record_concurrent_pause();
  3286     cmThread()->yield();
  3287     return true;
  3288   } else {
  3289     return false;
  3293 bool ConcurrentMark::should_yield() {
  3294   return cmThread()->should_yield();
  3297 bool ConcurrentMark::containing_card_is_marked(void* p) {
  3298   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  3299   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  3302 bool ConcurrentMark::containing_cards_are_marked(void* start,
  3303                                                  void* last) {
  3304   return containing_card_is_marked(start) &&
  3305          containing_card_is_marked(last);
  3308 #ifndef PRODUCT
  3309 // for debugging purposes
  3310 void ConcurrentMark::print_finger() {
  3311   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  3312                          _heap_start, _heap_end, _finger);
  3313   for (uint i = 0; i < _max_worker_id; ++i) {
  3314     gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
  3316   gclog_or_tty->print_cr("");
  3318 #endif
  3320 void CMTask::scan_object(oop obj) {
  3321   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
  3323   if (_cm->verbose_high()) {
  3324     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
  3325                            _worker_id, (void*) obj);
  3328   size_t obj_size = obj->size();
  3329   _words_scanned += obj_size;
  3331   obj->oop_iterate(_cm_oop_closure);
  3332   statsOnly( ++_objs_scanned );
  3333   check_limits();
  3336 // Closure for iteration over bitmaps
  3337 class CMBitMapClosure : public BitMapClosure {
  3338 private:
  3339   // the bitmap that is being iterated over
  3340   CMBitMap*                   _nextMarkBitMap;
  3341   ConcurrentMark*             _cm;
  3342   CMTask*                     _task;
  3344 public:
  3345   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
  3346     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  3348   bool do_bit(size_t offset) {
  3349     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  3350     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  3351     assert( addr < _cm->finger(), "invariant");
  3353     statsOnly( _task->increase_objs_found_on_bitmap() );
  3354     assert(addr >= _task->finger(), "invariant");
  3356     // We move that task's local finger along.
  3357     _task->move_finger_to(addr);
  3359     _task->scan_object(oop(addr));
  3360     // we only partially drain the local queue and global stack
  3361     _task->drain_local_queue(true);
  3362     _task->drain_global_stack(true);
  3364     // if the has_aborted flag has been raised, we need to bail out of
  3365     // the iteration
  3366     return !_task->has_aborted();
  3368 };
  3370 // Closure for iterating over objects, currently only used for
  3371 // processing SATB buffers.
  3372 class CMObjectClosure : public ObjectClosure {
  3373 private:
  3374   CMTask* _task;
  3376 public:
  3377   void do_object(oop obj) {
  3378     _task->deal_with_reference(obj);
  3381   CMObjectClosure(CMTask* task) : _task(task) { }
  3382 };
  3384 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
  3385                                ConcurrentMark* cm,
  3386                                CMTask* task)
  3387   : _g1h(g1h), _cm(cm), _task(task) {
  3388   assert(_ref_processor == NULL, "should be initialized to NULL");
  3390   if (G1UseConcMarkReferenceProcessing) {
  3391     _ref_processor = g1h->ref_processor_cm();
  3392     assert(_ref_processor != NULL, "should not be NULL");
  3396 void CMTask::setup_for_region(HeapRegion* hr) {
  3397   // Separated the asserts so that we know which one fires.
  3398   assert(hr != NULL,
  3399         "claim_region() should have filtered out continues humongous regions");
  3400   assert(!hr->continuesHumongous(),
  3401         "claim_region() should have filtered out continues humongous regions");
  3403   if (_cm->verbose_low()) {
  3404     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
  3405                            _worker_id, hr);
  3408   _curr_region  = hr;
  3409   _finger       = hr->bottom();
  3410   update_region_limit();
  3413 void CMTask::update_region_limit() {
  3414   HeapRegion* hr            = _curr_region;
  3415   HeapWord* bottom          = hr->bottom();
  3416   HeapWord* limit           = hr->next_top_at_mark_start();
  3418   if (limit == bottom) {
  3419     if (_cm->verbose_low()) {
  3420       gclog_or_tty->print_cr("[%u] found an empty region "
  3421                              "["PTR_FORMAT", "PTR_FORMAT")",
  3422                              _worker_id, bottom, limit);
  3424     // The region was collected underneath our feet.
  3425     // We set the finger to bottom to ensure that the bitmap
  3426     // iteration that will follow this will not do anything.
  3427     // (this is not a condition that holds when we set the region up,
  3428     // as the region is not supposed to be empty in the first place)
  3429     _finger = bottom;
  3430   } else if (limit >= _region_limit) {
  3431     assert(limit >= _finger, "peace of mind");
  3432   } else {
  3433     assert(limit < _region_limit, "only way to get here");
  3434     // This can happen under some pretty unusual circumstances.  An
  3435     // evacuation pause empties the region underneath our feet (NTAMS
  3436     // at bottom). We then do some allocation in the region (NTAMS
  3437     // stays at bottom), followed by the region being used as a GC
  3438     // alloc region (NTAMS will move to top() and the objects
  3439     // originally below it will be grayed). All objects now marked in
  3440     // the region are explicitly grayed, if below the global finger,
  3441     // and we do not need in fact to scan anything else. So, we simply
  3442     // set _finger to be limit to ensure that the bitmap iteration
  3443     // doesn't do anything.
  3444     _finger = limit;
  3447   _region_limit = limit;
  3450 void CMTask::giveup_current_region() {
  3451   assert(_curr_region != NULL, "invariant");
  3452   if (_cm->verbose_low()) {
  3453     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
  3454                            _worker_id, _curr_region);
  3456   clear_region_fields();
  3459 void CMTask::clear_region_fields() {
  3460   // Values for these three fields that indicate that we're not
  3461   // holding on to a region.
  3462   _curr_region   = NULL;
  3463   _finger        = NULL;
  3464   _region_limit  = NULL;
  3467 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
  3468   if (cm_oop_closure == NULL) {
  3469     assert(_cm_oop_closure != NULL, "invariant");
  3470   } else {
  3471     assert(_cm_oop_closure == NULL, "invariant");
  3473   _cm_oop_closure = cm_oop_closure;
  3476 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3477   guarantee(nextMarkBitMap != NULL, "invariant");
  3479   if (_cm->verbose_low()) {
  3480     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
  3483   _nextMarkBitMap                = nextMarkBitMap;
  3484   clear_region_fields();
  3486   _calls                         = 0;
  3487   _elapsed_time_ms               = 0.0;
  3488   _termination_time_ms           = 0.0;
  3489   _termination_start_time_ms     = 0.0;
  3491 #if _MARKING_STATS_
  3492   _local_pushes                  = 0;
  3493   _local_pops                    = 0;
  3494   _local_max_size                = 0;
  3495   _objs_scanned                  = 0;
  3496   _global_pushes                 = 0;
  3497   _global_pops                   = 0;
  3498   _global_max_size               = 0;
  3499   _global_transfers_to           = 0;
  3500   _global_transfers_from         = 0;
  3501   _regions_claimed               = 0;
  3502   _objs_found_on_bitmap          = 0;
  3503   _satb_buffers_processed        = 0;
  3504   _steal_attempts                = 0;
  3505   _steals                        = 0;
  3506   _aborted                       = 0;
  3507   _aborted_overflow              = 0;
  3508   _aborted_cm_aborted            = 0;
  3509   _aborted_yield                 = 0;
  3510   _aborted_timed_out             = 0;
  3511   _aborted_satb                  = 0;
  3512   _aborted_termination           = 0;
  3513 #endif // _MARKING_STATS_
  3516 bool CMTask::should_exit_termination() {
  3517   regular_clock_call();
  3518   // This is called when we are in the termination protocol. We should
  3519   // quit if, for some reason, this task wants to abort or the global
  3520   // stack is not empty (this means that we can get work from it).
  3521   return !_cm->mark_stack_empty() || has_aborted();
  3524 void CMTask::reached_limit() {
  3525   assert(_words_scanned >= _words_scanned_limit ||
  3526          _refs_reached >= _refs_reached_limit ,
  3527          "shouldn't have been called otherwise");
  3528   regular_clock_call();
  3531 void CMTask::regular_clock_call() {
  3532   if (has_aborted()) return;
  3534   // First, we need to recalculate the words scanned and refs reached
  3535   // limits for the next clock call.
  3536   recalculate_limits();
  3538   // During the regular clock call we do the following
  3540   // (1) If an overflow has been flagged, then we abort.
  3541   if (_cm->has_overflown()) {
  3542     set_has_aborted();
  3543     return;
  3546   // If we are not concurrent (i.e. we're doing remark) we don't need
  3547   // to check anything else. The other steps are only needed during
  3548   // the concurrent marking phase.
  3549   if (!concurrent()) return;
  3551   // (2) If marking has been aborted for Full GC, then we also abort.
  3552   if (_cm->has_aborted()) {
  3553     set_has_aborted();
  3554     statsOnly( ++_aborted_cm_aborted );
  3555     return;
  3558   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3560   // (3) If marking stats are enabled, then we update the step history.
  3561 #if _MARKING_STATS_
  3562   if (_words_scanned >= _words_scanned_limit) {
  3563     ++_clock_due_to_scanning;
  3565   if (_refs_reached >= _refs_reached_limit) {
  3566     ++_clock_due_to_marking;
  3569   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3570   _interval_start_time_ms = curr_time_ms;
  3571   _all_clock_intervals_ms.add(last_interval_ms);
  3573   if (_cm->verbose_medium()) {
  3574       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
  3575                         "scanned = %d%s, refs reached = %d%s",
  3576                         _worker_id, last_interval_ms,
  3577                         _words_scanned,
  3578                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3579                         _refs_reached,
  3580                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3582 #endif // _MARKING_STATS_
  3584   // (4) We check whether we should yield. If we have to, then we abort.
  3585   if (_cm->should_yield()) {
  3586     // We should yield. To do this we abort the task. The caller is
  3587     // responsible for yielding.
  3588     set_has_aborted();
  3589     statsOnly( ++_aborted_yield );
  3590     return;
  3593   // (5) We check whether we've reached our time quota. If we have,
  3594   // then we abort.
  3595   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3596   if (elapsed_time_ms > _time_target_ms) {
  3597     set_has_aborted();
  3598     _has_timed_out = true;
  3599     statsOnly( ++_aborted_timed_out );
  3600     return;
  3603   // (6) Finally, we check whether there are enough completed STAB
  3604   // buffers available for processing. If there are, we abort.
  3605   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3606   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3607     if (_cm->verbose_low()) {
  3608       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
  3609                              _worker_id);
  3611     // we do need to process SATB buffers, we'll abort and restart
  3612     // the marking task to do so
  3613     set_has_aborted();
  3614     statsOnly( ++_aborted_satb );
  3615     return;
  3619 void CMTask::recalculate_limits() {
  3620   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3621   _words_scanned_limit      = _real_words_scanned_limit;
  3623   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3624   _refs_reached_limit       = _real_refs_reached_limit;
  3627 void CMTask::decrease_limits() {
  3628   // This is called when we believe that we're going to do an infrequent
  3629   // operation which will increase the per byte scanned cost (i.e. move
  3630   // entries to/from the global stack). It basically tries to decrease the
  3631   // scanning limit so that the clock is called earlier.
  3633   if (_cm->verbose_medium()) {
  3634     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
  3637   _words_scanned_limit = _real_words_scanned_limit -
  3638     3 * words_scanned_period / 4;
  3639   _refs_reached_limit  = _real_refs_reached_limit -
  3640     3 * refs_reached_period / 4;
  3643 void CMTask::move_entries_to_global_stack() {
  3644   // local array where we'll store the entries that will be popped
  3645   // from the local queue
  3646   oop buffer[global_stack_transfer_size];
  3648   int n = 0;
  3649   oop obj;
  3650   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3651     buffer[n] = obj;
  3652     ++n;
  3655   if (n > 0) {
  3656     // we popped at least one entry from the local queue
  3658     statsOnly( ++_global_transfers_to; _local_pops += n );
  3660     if (!_cm->mark_stack_push(buffer, n)) {
  3661       if (_cm->verbose_low()) {
  3662         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
  3663                                _worker_id);
  3665       set_has_aborted();
  3666     } else {
  3667       // the transfer was successful
  3669       if (_cm->verbose_medium()) {
  3670         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
  3671                                _worker_id, n);
  3673       statsOnly( int tmp_size = _cm->mark_stack_size();
  3674                  if (tmp_size > _global_max_size) {
  3675                    _global_max_size = tmp_size;
  3677                  _global_pushes += n );
  3681   // this operation was quite expensive, so decrease the limits
  3682   decrease_limits();
  3685 void CMTask::get_entries_from_global_stack() {
  3686   // local array where we'll store the entries that will be popped
  3687   // from the global stack.
  3688   oop buffer[global_stack_transfer_size];
  3689   int n;
  3690   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3691   assert(n <= global_stack_transfer_size,
  3692          "we should not pop more than the given limit");
  3693   if (n > 0) {
  3694     // yes, we did actually pop at least one entry
  3696     statsOnly( ++_global_transfers_from; _global_pops += n );
  3697     if (_cm->verbose_medium()) {
  3698       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
  3699                              _worker_id, n);
  3701     for (int i = 0; i < n; ++i) {
  3702       bool success = _task_queue->push(buffer[i]);
  3703       // We only call this when the local queue is empty or under a
  3704       // given target limit. So, we do not expect this push to fail.
  3705       assert(success, "invariant");
  3708     statsOnly( int tmp_size = _task_queue->size();
  3709                if (tmp_size > _local_max_size) {
  3710                  _local_max_size = tmp_size;
  3712                _local_pushes += n );
  3715   // this operation was quite expensive, so decrease the limits
  3716   decrease_limits();
  3719 void CMTask::drain_local_queue(bool partially) {
  3720   if (has_aborted()) return;
  3722   // Decide what the target size is, depending whether we're going to
  3723   // drain it partially (so that other tasks can steal if they run out
  3724   // of things to do) or totally (at the very end).
  3725   size_t target_size;
  3726   if (partially) {
  3727     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3728   } else {
  3729     target_size = 0;
  3732   if (_task_queue->size() > target_size) {
  3733     if (_cm->verbose_high()) {
  3734       gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
  3735                              _worker_id, target_size);
  3738     oop obj;
  3739     bool ret = _task_queue->pop_local(obj);
  3740     while (ret) {
  3741       statsOnly( ++_local_pops );
  3743       if (_cm->verbose_high()) {
  3744         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
  3745                                (void*) obj);
  3748       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3749       assert(!_g1h->is_on_master_free_list(
  3750                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
  3752       scan_object(obj);
  3754       if (_task_queue->size() <= target_size || has_aborted()) {
  3755         ret = false;
  3756       } else {
  3757         ret = _task_queue->pop_local(obj);
  3761     if (_cm->verbose_high()) {
  3762       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
  3763                              _worker_id, _task_queue->size());
  3768 void CMTask::drain_global_stack(bool partially) {
  3769   if (has_aborted()) return;
  3771   // We have a policy to drain the local queue before we attempt to
  3772   // drain the global stack.
  3773   assert(partially || _task_queue->size() == 0, "invariant");
  3775   // Decide what the target size is, depending whether we're going to
  3776   // drain it partially (so that other tasks can steal if they run out
  3777   // of things to do) or totally (at the very end).  Notice that,
  3778   // because we move entries from the global stack in chunks or
  3779   // because another task might be doing the same, we might in fact
  3780   // drop below the target. But, this is not a problem.
  3781   size_t target_size;
  3782   if (partially) {
  3783     target_size = _cm->partial_mark_stack_size_target();
  3784   } else {
  3785     target_size = 0;
  3788   if (_cm->mark_stack_size() > target_size) {
  3789     if (_cm->verbose_low()) {
  3790       gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
  3791                              _worker_id, target_size);
  3794     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3795       get_entries_from_global_stack();
  3796       drain_local_queue(partially);
  3799     if (_cm->verbose_low()) {
  3800       gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
  3801                              _worker_id, _cm->mark_stack_size());
  3806 // SATB Queue has several assumptions on whether to call the par or
  3807 // non-par versions of the methods. this is why some of the code is
  3808 // replicated. We should really get rid of the single-threaded version
  3809 // of the code to simplify things.
  3810 void CMTask::drain_satb_buffers() {
  3811   if (has_aborted()) return;
  3813   // We set this so that the regular clock knows that we're in the
  3814   // middle of draining buffers and doesn't set the abort flag when it
  3815   // notices that SATB buffers are available for draining. It'd be
  3816   // very counter productive if it did that. :-)
  3817   _draining_satb_buffers = true;
  3819   CMObjectClosure oc(this);
  3820   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3821   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3822     satb_mq_set.set_par_closure(_worker_id, &oc);
  3823   } else {
  3824     satb_mq_set.set_closure(&oc);
  3827   // This keeps claiming and applying the closure to completed buffers
  3828   // until we run out of buffers or we need to abort.
  3829   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3830     while (!has_aborted() &&
  3831            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
  3832       if (_cm->verbose_medium()) {
  3833         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3835       statsOnly( ++_satb_buffers_processed );
  3836       regular_clock_call();
  3838   } else {
  3839     while (!has_aborted() &&
  3840            satb_mq_set.apply_closure_to_completed_buffer()) {
  3841       if (_cm->verbose_medium()) {
  3842         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3844       statsOnly( ++_satb_buffers_processed );
  3845       regular_clock_call();
  3849   if (!concurrent() && !has_aborted()) {
  3850     // We should only do this during remark.
  3851     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3852       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
  3853     } else {
  3854       satb_mq_set.iterate_closure_all_threads();
  3858   _draining_satb_buffers = false;
  3860   assert(has_aborted() ||
  3861          concurrent() ||
  3862          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3864   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3865     satb_mq_set.set_par_closure(_worker_id, NULL);
  3866   } else {
  3867     satb_mq_set.set_closure(NULL);
  3870   // again, this was a potentially expensive operation, decrease the
  3871   // limits to get the regular clock call early
  3872   decrease_limits();
  3875 void CMTask::print_stats() {
  3876   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
  3877                          _worker_id, _calls);
  3878   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3879                          _elapsed_time_ms, _termination_time_ms);
  3880   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3881                          _step_times_ms.num(), _step_times_ms.avg(),
  3882                          _step_times_ms.sd());
  3883   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3884                          _step_times_ms.maximum(), _step_times_ms.sum());
  3886 #if _MARKING_STATS_
  3887   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3888                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3889                          _all_clock_intervals_ms.sd());
  3890   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3891                          _all_clock_intervals_ms.maximum(),
  3892                          _all_clock_intervals_ms.sum());
  3893   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3894                          _clock_due_to_scanning, _clock_due_to_marking);
  3895   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3896                          _objs_scanned, _objs_found_on_bitmap);
  3897   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3898                          _local_pushes, _local_pops, _local_max_size);
  3899   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3900                          _global_pushes, _global_pops, _global_max_size);
  3901   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3902                          _global_transfers_to,_global_transfers_from);
  3903   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
  3904   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3905   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3906                          _steal_attempts, _steals);
  3907   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3908   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3909                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3910   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3911                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3912 #endif // _MARKING_STATS_
  3915 /*****************************************************************************
  3917     The do_marking_step(time_target_ms, ...) method is the building
  3918     block of the parallel marking framework. It can be called in parallel
  3919     with other invocations of do_marking_step() on different tasks
  3920     (but only one per task, obviously) and concurrently with the
  3921     mutator threads, or during remark, hence it eliminates the need
  3922     for two versions of the code. When called during remark, it will
  3923     pick up from where the task left off during the concurrent marking
  3924     phase. Interestingly, tasks are also claimable during evacuation
  3925     pauses too, since do_marking_step() ensures that it aborts before
  3926     it needs to yield.
  3928     The data structures that it uses to do marking work are the
  3929     following:
  3931       (1) Marking Bitmap. If there are gray objects that appear only
  3932       on the bitmap (this happens either when dealing with an overflow
  3933       or when the initial marking phase has simply marked the roots
  3934       and didn't push them on the stack), then tasks claim heap
  3935       regions whose bitmap they then scan to find gray objects. A
  3936       global finger indicates where the end of the last claimed region
  3937       is. A local finger indicates how far into the region a task has
  3938       scanned. The two fingers are used to determine how to gray an
  3939       object (i.e. whether simply marking it is OK, as it will be
  3940       visited by a task in the future, or whether it needs to be also
  3941       pushed on a stack).
  3943       (2) Local Queue. The local queue of the task which is accessed
  3944       reasonably efficiently by the task. Other tasks can steal from
  3945       it when they run out of work. Throughout the marking phase, a
  3946       task attempts to keep its local queue short but not totally
  3947       empty, so that entries are available for stealing by other
  3948       tasks. Only when there is no more work, a task will totally
  3949       drain its local queue.
  3951       (3) Global Mark Stack. This handles local queue overflow. During
  3952       marking only sets of entries are moved between it and the local
  3953       queues, as access to it requires a mutex and more fine-grain
  3954       interaction with it which might cause contention. If it
  3955       overflows, then the marking phase should restart and iterate
  3956       over the bitmap to identify gray objects. Throughout the marking
  3957       phase, tasks attempt to keep the global mark stack at a small
  3958       length but not totally empty, so that entries are available for
  3959       popping by other tasks. Only when there is no more work, tasks
  3960       will totally drain the global mark stack.
  3962       (4) SATB Buffer Queue. This is where completed SATB buffers are
  3963       made available. Buffers are regularly removed from this queue
  3964       and scanned for roots, so that the queue doesn't get too
  3965       long. During remark, all completed buffers are processed, as
  3966       well as the filled in parts of any uncompleted buffers.
  3968     The do_marking_step() method tries to abort when the time target
  3969     has been reached. There are a few other cases when the
  3970     do_marking_step() method also aborts:
  3972       (1) When the marking phase has been aborted (after a Full GC).
  3974       (2) When a global overflow (on the global stack) has been
  3975       triggered. Before the task aborts, it will actually sync up with
  3976       the other tasks to ensure that all the marking data structures
  3977       (local queues, stacks, fingers etc.)  are re-initialized so that
  3978       when do_marking_step() completes, the marking phase can
  3979       immediately restart.
  3981       (3) When enough completed SATB buffers are available. The
  3982       do_marking_step() method only tries to drain SATB buffers right
  3983       at the beginning. So, if enough buffers are available, the
  3984       marking step aborts and the SATB buffers are processed at
  3985       the beginning of the next invocation.
  3987       (4) To yield. when we have to yield then we abort and yield
  3988       right at the end of do_marking_step(). This saves us from a lot
  3989       of hassle as, by yielding we might allow a Full GC. If this
  3990       happens then objects will be compacted underneath our feet, the
  3991       heap might shrink, etc. We save checking for this by just
  3992       aborting and doing the yield right at the end.
  3994     From the above it follows that the do_marking_step() method should
  3995     be called in a loop (or, otherwise, regularly) until it completes.
  3997     If a marking step completes without its has_aborted() flag being
  3998     true, it means it has completed the current marking phase (and
  3999     also all other marking tasks have done so and have all synced up).
  4001     A method called regular_clock_call() is invoked "regularly" (in
  4002     sub ms intervals) throughout marking. It is this clock method that
  4003     checks all the abort conditions which were mentioned above and
  4004     decides when the task should abort. A work-based scheme is used to
  4005     trigger this clock method: when the number of object words the
  4006     marking phase has scanned or the number of references the marking
  4007     phase has visited reach a given limit. Additional invocations to
  4008     the method clock have been planted in a few other strategic places
  4009     too. The initial reason for the clock method was to avoid calling
  4010     vtime too regularly, as it is quite expensive. So, once it was in
  4011     place, it was natural to piggy-back all the other conditions on it
  4012     too and not constantly check them throughout the code.
  4014     If do_termination is true then do_marking_step will enter its
  4015     termination protocol.
  4017     The value of is_serial must be true when do_marking_step is being
  4018     called serially (i.e. by the VMThread) and do_marking_step should
  4019     skip any synchronization in the termination and overflow code.
  4020     Examples include the serial remark code and the serial reference
  4021     processing closures.
  4023     The value of is_serial must be false when do_marking_step is
  4024     being called by any of the worker threads in a work gang.
  4025     Examples include the concurrent marking code (CMMarkingTask),
  4026     the MT remark code, and the MT reference processing closures.
  4028  *****************************************************************************/
  4030 void CMTask::do_marking_step(double time_target_ms,
  4031                              bool do_termination,
  4032                              bool is_serial) {
  4033   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  4034   assert(concurrent() == _cm->concurrent(), "they should be the same");
  4036   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  4037   assert(_task_queues != NULL, "invariant");
  4038   assert(_task_queue != NULL, "invariant");
  4039   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
  4041   assert(!_claimed,
  4042          "only one thread should claim this task at any one time");
  4044   // OK, this doesn't safeguard again all possible scenarios, as it is
  4045   // possible for two threads to set the _claimed flag at the same
  4046   // time. But it is only for debugging purposes anyway and it will
  4047   // catch most problems.
  4048   _claimed = true;
  4050   _start_time_ms = os::elapsedVTime() * 1000.0;
  4051   statsOnly( _interval_start_time_ms = _start_time_ms );
  4053   // If do_stealing is true then do_marking_step will attempt to
  4054   // steal work from the other CMTasks. It only makes sense to
  4055   // enable stealing when the termination protocol is enabled
  4056   // and do_marking_step() is not being called serially.
  4057   bool do_stealing = do_termination && !is_serial;
  4059   double diff_prediction_ms =
  4060     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  4061   _time_target_ms = time_target_ms - diff_prediction_ms;
  4063   // set up the variables that are used in the work-based scheme to
  4064   // call the regular clock method
  4065   _words_scanned = 0;
  4066   _refs_reached  = 0;
  4067   recalculate_limits();
  4069   // clear all flags
  4070   clear_has_aborted();
  4071   _has_timed_out = false;
  4072   _draining_satb_buffers = false;
  4074   ++_calls;
  4076   if (_cm->verbose_low()) {
  4077     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
  4078                            "target = %1.2lfms >>>>>>>>>>",
  4079                            _worker_id, _calls, _time_target_ms);
  4082   // Set up the bitmap and oop closures. Anything that uses them is
  4083   // eventually called from this method, so it is OK to allocate these
  4084   // statically.
  4085   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  4086   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
  4087   set_cm_oop_closure(&cm_oop_closure);
  4089   if (_cm->has_overflown()) {
  4090     // This can happen if the mark stack overflows during a GC pause
  4091     // and this task, after a yield point, restarts. We have to abort
  4092     // as we need to get into the overflow protocol which happens
  4093     // right at the end of this task.
  4094     set_has_aborted();
  4097   // First drain any available SATB buffers. After this, we will not
  4098   // look at SATB buffers before the next invocation of this method.
  4099   // If enough completed SATB buffers are queued up, the regular clock
  4100   // will abort this task so that it restarts.
  4101   drain_satb_buffers();
  4102   // ...then partially drain the local queue and the global stack
  4103   drain_local_queue(true);
  4104   drain_global_stack(true);
  4106   do {
  4107     if (!has_aborted() && _curr_region != NULL) {
  4108       // This means that we're already holding on to a region.
  4109       assert(_finger != NULL, "if region is not NULL, then the finger "
  4110              "should not be NULL either");
  4112       // We might have restarted this task after an evacuation pause
  4113       // which might have evacuated the region we're holding on to
  4114       // underneath our feet. Let's read its limit again to make sure
  4115       // that we do not iterate over a region of the heap that
  4116       // contains garbage (update_region_limit() will also move
  4117       // _finger to the start of the region if it is found empty).
  4118       update_region_limit();
  4119       // We will start from _finger not from the start of the region,
  4120       // as we might be restarting this task after aborting half-way
  4121       // through scanning this region. In this case, _finger points to
  4122       // the address where we last found a marked object. If this is a
  4123       // fresh region, _finger points to start().
  4124       MemRegion mr = MemRegion(_finger, _region_limit);
  4126       if (_cm->verbose_low()) {
  4127         gclog_or_tty->print_cr("[%u] we're scanning part "
  4128                                "["PTR_FORMAT", "PTR_FORMAT") "
  4129                                "of region "HR_FORMAT,
  4130                                _worker_id, _finger, _region_limit,
  4131                                HR_FORMAT_PARAMS(_curr_region));
  4134       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
  4135              "humongous regions should go around loop once only");
  4137       // Some special cases:
  4138       // If the memory region is empty, we can just give up the region.
  4139       // If the current region is humongous then we only need to check
  4140       // the bitmap for the bit associated with the start of the object,
  4141       // scan the object if it's live, and give up the region.
  4142       // Otherwise, let's iterate over the bitmap of the part of the region
  4143       // that is left.
  4144       // If the iteration is successful, give up the region.
  4145       if (mr.is_empty()) {
  4146         giveup_current_region();
  4147         regular_clock_call();
  4148       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
  4149         if (_nextMarkBitMap->isMarked(mr.start())) {
  4150           // The object is marked - apply the closure
  4151           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
  4152           bitmap_closure.do_bit(offset);
  4154         // Even if this task aborted while scanning the humongous object
  4155         // we can (and should) give up the current region.
  4156         giveup_current_region();
  4157         regular_clock_call();
  4158       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  4159         giveup_current_region();
  4160         regular_clock_call();
  4161       } else {
  4162         assert(has_aborted(), "currently the only way to do so");
  4163         // The only way to abort the bitmap iteration is to return
  4164         // false from the do_bit() method. However, inside the
  4165         // do_bit() method we move the _finger to point to the
  4166         // object currently being looked at. So, if we bail out, we
  4167         // have definitely set _finger to something non-null.
  4168         assert(_finger != NULL, "invariant");
  4170         // Region iteration was actually aborted. So now _finger
  4171         // points to the address of the object we last scanned. If we
  4172         // leave it there, when we restart this task, we will rescan
  4173         // the object. It is easy to avoid this. We move the finger by
  4174         // enough to point to the next possible object header (the
  4175         // bitmap knows by how much we need to move it as it knows its
  4176         // granularity).
  4177         assert(_finger < _region_limit, "invariant");
  4178         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
  4179         // Check if bitmap iteration was aborted while scanning the last object
  4180         if (new_finger >= _region_limit) {
  4181           giveup_current_region();
  4182         } else {
  4183           move_finger_to(new_finger);
  4187     // At this point we have either completed iterating over the
  4188     // region we were holding on to, or we have aborted.
  4190     // We then partially drain the local queue and the global stack.
  4191     // (Do we really need this?)
  4192     drain_local_queue(true);
  4193     drain_global_stack(true);
  4195     // Read the note on the claim_region() method on why it might
  4196     // return NULL with potentially more regions available for
  4197     // claiming and why we have to check out_of_regions() to determine
  4198     // whether we're done or not.
  4199     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  4200       // We are going to try to claim a new region. We should have
  4201       // given up on the previous one.
  4202       // Separated the asserts so that we know which one fires.
  4203       assert(_curr_region  == NULL, "invariant");
  4204       assert(_finger       == NULL, "invariant");
  4205       assert(_region_limit == NULL, "invariant");
  4206       if (_cm->verbose_low()) {
  4207         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
  4209       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
  4210       if (claimed_region != NULL) {
  4211         // Yes, we managed to claim one
  4212         statsOnly( ++_regions_claimed );
  4214         if (_cm->verbose_low()) {
  4215           gclog_or_tty->print_cr("[%u] we successfully claimed "
  4216                                  "region "PTR_FORMAT,
  4217                                  _worker_id, claimed_region);
  4220         setup_for_region(claimed_region);
  4221         assert(_curr_region == claimed_region, "invariant");
  4223       // It is important to call the regular clock here. It might take
  4224       // a while to claim a region if, for example, we hit a large
  4225       // block of empty regions. So we need to call the regular clock
  4226       // method once round the loop to make sure it's called
  4227       // frequently enough.
  4228       regular_clock_call();
  4231     if (!has_aborted() && _curr_region == NULL) {
  4232       assert(_cm->out_of_regions(),
  4233              "at this point we should be out of regions");
  4235   } while ( _curr_region != NULL && !has_aborted());
  4237   if (!has_aborted()) {
  4238     // We cannot check whether the global stack is empty, since other
  4239     // tasks might be pushing objects to it concurrently.
  4240     assert(_cm->out_of_regions(),
  4241            "at this point we should be out of regions");
  4243     if (_cm->verbose_low()) {
  4244       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
  4247     // Try to reduce the number of available SATB buffers so that
  4248     // remark has less work to do.
  4249     drain_satb_buffers();
  4252   // Since we've done everything else, we can now totally drain the
  4253   // local queue and global stack.
  4254   drain_local_queue(false);
  4255   drain_global_stack(false);
  4257   // Attempt at work stealing from other task's queues.
  4258   if (do_stealing && !has_aborted()) {
  4259     // We have not aborted. This means that we have finished all that
  4260     // we could. Let's try to do some stealing...
  4262     // We cannot check whether the global stack is empty, since other
  4263     // tasks might be pushing objects to it concurrently.
  4264     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  4265            "only way to reach here");
  4267     if (_cm->verbose_low()) {
  4268       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
  4271     while (!has_aborted()) {
  4272       oop obj;
  4273       statsOnly( ++_steal_attempts );
  4275       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
  4276         if (_cm->verbose_medium()) {
  4277           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
  4278                                  _worker_id, (void*) obj);
  4281         statsOnly( ++_steals );
  4283         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  4284                "any stolen object should be marked");
  4285         scan_object(obj);
  4287         // And since we're towards the end, let's totally drain the
  4288         // local queue and global stack.
  4289         drain_local_queue(false);
  4290         drain_global_stack(false);
  4291       } else {
  4292         break;
  4297   // If we are about to wrap up and go into termination, check if we
  4298   // should raise the overflow flag.
  4299   if (do_termination && !has_aborted()) {
  4300     if (_cm->force_overflow()->should_force()) {
  4301       _cm->set_has_overflown();
  4302       regular_clock_call();
  4306   // We still haven't aborted. Now, let's try to get into the
  4307   // termination protocol.
  4308   if (do_termination && !has_aborted()) {
  4309     // We cannot check whether the global stack is empty, since other
  4310     // tasks might be concurrently pushing objects on it.
  4311     // Separated the asserts so that we know which one fires.
  4312     assert(_cm->out_of_regions(), "only way to reach here");
  4313     assert(_task_queue->size() == 0, "only way to reach here");
  4315     if (_cm->verbose_low()) {
  4316       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
  4319     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4321     // The CMTask class also extends the TerminatorTerminator class,
  4322     // hence its should_exit_termination() method will also decide
  4323     // whether to exit the termination protocol or not.
  4324     bool finished = (is_serial ||
  4325                      _cm->terminator()->offer_termination(this));
  4326     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4327     _termination_time_ms +=
  4328       termination_end_time_ms - _termination_start_time_ms;
  4330     if (finished) {
  4331       // We're all done.
  4333       if (_worker_id == 0) {
  4334         // let's allow task 0 to do this
  4335         if (concurrent()) {
  4336           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4337           // we need to set this to false before the next
  4338           // safepoint. This way we ensure that the marking phase
  4339           // doesn't observe any more heap expansions.
  4340           _cm->clear_concurrent_marking_in_progress();
  4344       // We can now guarantee that the global stack is empty, since
  4345       // all other tasks have finished. We separated the guarantees so
  4346       // that, if a condition is false, we can immediately find out
  4347       // which one.
  4348       guarantee(_cm->out_of_regions(), "only way to reach here");
  4349       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4350       guarantee(_task_queue->size() == 0, "only way to reach here");
  4351       guarantee(!_cm->has_overflown(), "only way to reach here");
  4352       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4354       if (_cm->verbose_low()) {
  4355         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
  4357     } else {
  4358       // Apparently there's more work to do. Let's abort this task. It
  4359       // will restart it and we can hopefully find more things to do.
  4361       if (_cm->verbose_low()) {
  4362         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
  4363                                _worker_id);
  4366       set_has_aborted();
  4367       statsOnly( ++_aborted_termination );
  4371   // Mainly for debugging purposes to make sure that a pointer to the
  4372   // closure which was statically allocated in this frame doesn't
  4373   // escape it by accident.
  4374   set_cm_oop_closure(NULL);
  4375   double end_time_ms = os::elapsedVTime() * 1000.0;
  4376   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4377   // Update the step history.
  4378   _step_times_ms.add(elapsed_time_ms);
  4380   if (has_aborted()) {
  4381     // The task was aborted for some reason.
  4383     statsOnly( ++_aborted );
  4385     if (_has_timed_out) {
  4386       double diff_ms = elapsed_time_ms - _time_target_ms;
  4387       // Keep statistics of how well we did with respect to hitting
  4388       // our target only if we actually timed out (if we aborted for
  4389       // other reasons, then the results might get skewed).
  4390       _marking_step_diffs_ms.add(diff_ms);
  4393     if (_cm->has_overflown()) {
  4394       // This is the interesting one. We aborted because a global
  4395       // overflow was raised. This means we have to restart the
  4396       // marking phase and start iterating over regions. However, in
  4397       // order to do this we have to make sure that all tasks stop
  4398       // what they are doing and re-initialise in a safe manner. We
  4399       // will achieve this with the use of two barrier sync points.
  4401       if (_cm->verbose_low()) {
  4402         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
  4405       if (!is_serial) {
  4406         // We only need to enter the sync barrier if being called
  4407         // from a parallel context
  4408         _cm->enter_first_sync_barrier(_worker_id);
  4410         // When we exit this sync barrier we know that all tasks have
  4411         // stopped doing marking work. So, it's now safe to
  4412         // re-initialise our data structures. At the end of this method,
  4413         // task 0 will clear the global data structures.
  4416       statsOnly( ++_aborted_overflow );
  4418       // We clear the local state of this task...
  4419       clear_region_fields();
  4421       if (!is_serial) {
  4422         // ...and enter the second barrier.
  4423         _cm->enter_second_sync_barrier(_worker_id);
  4425       // At this point, if we're during the concurrent phase of
  4426       // marking, everything has been re-initialized and we're
  4427       // ready to restart.
  4430     if (_cm->verbose_low()) {
  4431       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4432                              "elapsed = %1.2lfms <<<<<<<<<<",
  4433                              _worker_id, _time_target_ms, elapsed_time_ms);
  4434       if (_cm->has_aborted()) {
  4435         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
  4436                                _worker_id);
  4439   } else {
  4440     if (_cm->verbose_low()) {
  4441       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4442                              "elapsed = %1.2lfms <<<<<<<<<<",
  4443                              _worker_id, _time_target_ms, elapsed_time_ms);
  4447   _claimed = false;
  4450 CMTask::CMTask(uint worker_id,
  4451                ConcurrentMark* cm,
  4452                size_t* marked_bytes,
  4453                BitMap* card_bm,
  4454                CMTaskQueue* task_queue,
  4455                CMTaskQueueSet* task_queues)
  4456   : _g1h(G1CollectedHeap::heap()),
  4457     _worker_id(worker_id), _cm(cm),
  4458     _claimed(false),
  4459     _nextMarkBitMap(NULL), _hash_seed(17),
  4460     _task_queue(task_queue),
  4461     _task_queues(task_queues),
  4462     _cm_oop_closure(NULL),
  4463     _marked_bytes_array(marked_bytes),
  4464     _card_bm(card_bm) {
  4465   guarantee(task_queue != NULL, "invariant");
  4466   guarantee(task_queues != NULL, "invariant");
  4468   statsOnly( _clock_due_to_scanning = 0;
  4469              _clock_due_to_marking  = 0 );
  4471   _marking_step_diffs_ms.add(0.5);
  4474 // These are formatting macros that are used below to ensure
  4475 // consistent formatting. The *_H_* versions are used to format the
  4476 // header for a particular value and they should be kept consistent
  4477 // with the corresponding macro. Also note that most of the macros add
  4478 // the necessary white space (as a prefix) which makes them a bit
  4479 // easier to compose.
  4481 // All the output lines are prefixed with this string to be able to
  4482 // identify them easily in a large log file.
  4483 #define G1PPRL_LINE_PREFIX            "###"
  4485 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
  4486 #ifdef _LP64
  4487 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
  4488 #else // _LP64
  4489 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
  4490 #endif // _LP64
  4492 // For per-region info
  4493 #define G1PPRL_TYPE_FORMAT            "   %-4s"
  4494 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
  4495 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
  4496 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
  4497 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
  4498 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
  4500 // For summary info
  4501 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
  4502 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
  4503 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
  4504 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
  4506 G1PrintRegionLivenessInfoClosure::
  4507 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
  4508   : _out(out),
  4509     _total_used_bytes(0), _total_capacity_bytes(0),
  4510     _total_prev_live_bytes(0), _total_next_live_bytes(0),
  4511     _hum_used_bytes(0), _hum_capacity_bytes(0),
  4512     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
  4513   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  4514   MemRegion g1_committed = g1h->g1_committed();
  4515   MemRegion g1_reserved = g1h->g1_reserved();
  4516   double now = os::elapsedTime();
  4518   // Print the header of the output.
  4519   _out->cr();
  4520   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
  4521   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
  4522                  G1PPRL_SUM_ADDR_FORMAT("committed")
  4523                  G1PPRL_SUM_ADDR_FORMAT("reserved")
  4524                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
  4525                  g1_committed.start(), g1_committed.end(),
  4526                  g1_reserved.start(), g1_reserved.end(),
  4527                  HeapRegion::GrainBytes);
  4528   _out->print_cr(G1PPRL_LINE_PREFIX);
  4529   _out->print_cr(G1PPRL_LINE_PREFIX
  4530                  G1PPRL_TYPE_H_FORMAT
  4531                  G1PPRL_ADDR_BASE_H_FORMAT
  4532                  G1PPRL_BYTE_H_FORMAT
  4533                  G1PPRL_BYTE_H_FORMAT
  4534                  G1PPRL_BYTE_H_FORMAT
  4535                  G1PPRL_DOUBLE_H_FORMAT,
  4536                  "type", "address-range",
  4537                  "used", "prev-live", "next-live", "gc-eff");
  4538   _out->print_cr(G1PPRL_LINE_PREFIX
  4539                  G1PPRL_TYPE_H_FORMAT
  4540                  G1PPRL_ADDR_BASE_H_FORMAT
  4541                  G1PPRL_BYTE_H_FORMAT
  4542                  G1PPRL_BYTE_H_FORMAT
  4543                  G1PPRL_BYTE_H_FORMAT
  4544                  G1PPRL_DOUBLE_H_FORMAT,
  4545                  "", "",
  4546                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
  4549 // It takes as a parameter a reference to one of the _hum_* fields, it
  4550 // deduces the corresponding value for a region in a humongous region
  4551 // series (either the region size, or what's left if the _hum_* field
  4552 // is < the region size), and updates the _hum_* field accordingly.
  4553 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
  4554   size_t bytes = 0;
  4555   // The > 0 check is to deal with the prev and next live bytes which
  4556   // could be 0.
  4557   if (*hum_bytes > 0) {
  4558     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
  4559     *hum_bytes -= bytes;
  4561   return bytes;
  4564 // It deduces the values for a region in a humongous region series
  4565 // from the _hum_* fields and updates those accordingly. It assumes
  4566 // that that _hum_* fields have already been set up from the "starts
  4567 // humongous" region and we visit the regions in address order.
  4568 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
  4569                                                      size_t* capacity_bytes,
  4570                                                      size_t* prev_live_bytes,
  4571                                                      size_t* next_live_bytes) {
  4572   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
  4573   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
  4574   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
  4575   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
  4576   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
  4579 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  4580   const char* type = "";
  4581   HeapWord* bottom       = r->bottom();
  4582   HeapWord* end          = r->end();
  4583   size_t capacity_bytes  = r->capacity();
  4584   size_t used_bytes      = r->used();
  4585   size_t prev_live_bytes = r->live_bytes();
  4586   size_t next_live_bytes = r->next_live_bytes();
  4587   double gc_eff          = r->gc_efficiency();
  4588   if (r->used() == 0) {
  4589     type = "FREE";
  4590   } else if (r->is_survivor()) {
  4591     type = "SURV";
  4592   } else if (r->is_young()) {
  4593     type = "EDEN";
  4594   } else if (r->startsHumongous()) {
  4595     type = "HUMS";
  4597     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
  4598            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
  4599            "they should have been zeroed after the last time we used them");
  4600     // Set up the _hum_* fields.
  4601     _hum_capacity_bytes  = capacity_bytes;
  4602     _hum_used_bytes      = used_bytes;
  4603     _hum_prev_live_bytes = prev_live_bytes;
  4604     _hum_next_live_bytes = next_live_bytes;
  4605     get_hum_bytes(&used_bytes, &capacity_bytes,
  4606                   &prev_live_bytes, &next_live_bytes);
  4607     end = bottom + HeapRegion::GrainWords;
  4608   } else if (r->continuesHumongous()) {
  4609     type = "HUMC";
  4610     get_hum_bytes(&used_bytes, &capacity_bytes,
  4611                   &prev_live_bytes, &next_live_bytes);
  4612     assert(end == bottom + HeapRegion::GrainWords, "invariant");
  4613   } else {
  4614     type = "OLD";
  4617   _total_used_bytes      += used_bytes;
  4618   _total_capacity_bytes  += capacity_bytes;
  4619   _total_prev_live_bytes += prev_live_bytes;
  4620   _total_next_live_bytes += next_live_bytes;
  4622   // Print a line for this particular region.
  4623   _out->print_cr(G1PPRL_LINE_PREFIX
  4624                  G1PPRL_TYPE_FORMAT
  4625                  G1PPRL_ADDR_BASE_FORMAT
  4626                  G1PPRL_BYTE_FORMAT
  4627                  G1PPRL_BYTE_FORMAT
  4628                  G1PPRL_BYTE_FORMAT
  4629                  G1PPRL_DOUBLE_FORMAT,
  4630                  type, bottom, end,
  4631                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
  4633   return false;
  4636 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
  4637   // Print the footer of the output.
  4638   _out->print_cr(G1PPRL_LINE_PREFIX);
  4639   _out->print_cr(G1PPRL_LINE_PREFIX
  4640                  " SUMMARY"
  4641                  G1PPRL_SUM_MB_FORMAT("capacity")
  4642                  G1PPRL_SUM_MB_PERC_FORMAT("used")
  4643                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
  4644                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
  4645                  bytes_to_mb(_total_capacity_bytes),
  4646                  bytes_to_mb(_total_used_bytes),
  4647                  perc(_total_used_bytes, _total_capacity_bytes),
  4648                  bytes_to_mb(_total_prev_live_bytes),
  4649                  perc(_total_prev_live_bytes, _total_capacity_bytes),
  4650                  bytes_to_mb(_total_next_live_bytes),
  4651                  perc(_total_next_live_bytes, _total_capacity_bytes));
  4652   _out->cr();

mercurial