src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 19 Aug 2014 10:50:27 +0200

author
tschatzl
date
Tue, 19 Aug 2014 10:50:27 +0200
changeset 7050
6701abbc4441
parent 7049
eec72fa4b108
child 7051
1f1d373cd044
permissions
-rw-r--r--

8054818: Refactor HeapRegionSeq to manage heap region and auxiliary data
Summary: Let HeapRegionSeq manage the heap region and auxiliary data to decrease the amount of responsibilities of G1CollectedHeap, and encapsulate this work from other code.
Reviewed-by: jwilhelm, jmasa, mgerdin, brutisso

     1 /*
     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "code/codeCache.hpp"
    28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
    29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
    33 #include "gc_implementation/g1/g1Log.hpp"
    34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
    35 #include "gc_implementation/g1/g1RemSet.hpp"
    36 #include "gc_implementation/g1/heapRegion.inline.hpp"
    37 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    39 #include "gc_implementation/shared/vmGCOperations.hpp"
    40 #include "gc_implementation/shared/gcTimer.hpp"
    41 #include "gc_implementation/shared/gcTrace.hpp"
    42 #include "gc_implementation/shared/gcTraceTime.hpp"
    43 #include "memory/allocation.hpp"
    44 #include "memory/genOopClosures.inline.hpp"
    45 #include "memory/referencePolicy.hpp"
    46 #include "memory/resourceArea.hpp"
    47 #include "oops/oop.inline.hpp"
    48 #include "runtime/handles.inline.hpp"
    49 #include "runtime/java.hpp"
    50 #include "runtime/prefetch.inline.hpp"
    51 #include "services/memTracker.hpp"
    53 // Concurrent marking bit map wrapper
    55 CMBitMapRO::CMBitMapRO(int shifter) :
    56   _bm(),
    57   _shifter(shifter) {
    58   _bmStartWord = 0;
    59   _bmWordSize = 0;
    60 }
    62 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
    63                                                const HeapWord* limit) const {
    64   // First we must round addr *up* to a possible object boundary.
    65   addr = (HeapWord*)align_size_up((intptr_t)addr,
    66                                   HeapWordSize << _shifter);
    67   size_t addrOffset = heapWordToOffset(addr);
    68   if (limit == NULL) {
    69     limit = _bmStartWord + _bmWordSize;
    70   }
    71   size_t limitOffset = heapWordToOffset(limit);
    72   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    73   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    74   assert(nextAddr >= addr, "get_next_one postcondition");
    75   assert(nextAddr == limit || isMarked(nextAddr),
    76          "get_next_one postcondition");
    77   return nextAddr;
    78 }
    80 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
    81                                                  const HeapWord* limit) const {
    82   size_t addrOffset = heapWordToOffset(addr);
    83   if (limit == NULL) {
    84     limit = _bmStartWord + _bmWordSize;
    85   }
    86   size_t limitOffset = heapWordToOffset(limit);
    87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    89   assert(nextAddr >= addr, "get_next_one postcondition");
    90   assert(nextAddr == limit || !isMarked(nextAddr),
    91          "get_next_one postcondition");
    92   return nextAddr;
    93 }
    95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    97   return (int) (diff >> _shifter);
    98 }
   100 #ifndef PRODUCT
   101 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
   102   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   103   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
   104          "size inconsistency");
   105   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
   106          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
   107 }
   108 #endif
   110 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
   111   _bm.print_on_error(st, prefix);
   112 }
   114 bool CMBitMap::allocate(ReservedSpace heap_rs) {
   115   _bmStartWord = (HeapWord*)(heap_rs.base());
   116   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
   117   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
   118                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
   119   if (!brs.is_reserved()) {
   120     warning("ConcurrentMark marking bit map allocation failure");
   121     return false;
   122   }
   123   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
   124   // For now we'll just commit all of the bit map up front.
   125   // Later on we'll try to be more parsimonious with swap.
   126   if (!_virtual_space.initialize(brs, brs.size())) {
   127     warning("ConcurrentMark marking bit map backing store failure");
   128     return false;
   129   }
   130   assert(_virtual_space.committed_size() == brs.size(),
   131          "didn't reserve backing store for all of concurrent marking bit map?");
   132   _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
   133   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
   134          _bmWordSize, "inconsistency in bit map sizing");
   135   _bm.set_size(_bmWordSize >> _shifter);
   136   return true;
   137 }
   139 void CMBitMap::clearAll() {
   140   _bm.clear();
   141   return;
   142 }
   144 void CMBitMap::markRange(MemRegion mr) {
   145   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   146   assert(!mr.is_empty(), "unexpected empty region");
   147   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   148           ((HeapWord *) mr.end())),
   149          "markRange memory region end is not card aligned");
   150   // convert address range into offset range
   151   _bm.at_put_range(heapWordToOffset(mr.start()),
   152                    heapWordToOffset(mr.end()), true);
   153 }
   155 void CMBitMap::clearRange(MemRegion mr) {
   156   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   157   assert(!mr.is_empty(), "unexpected empty region");
   158   // convert address range into offset range
   159   _bm.at_put_range(heapWordToOffset(mr.start()),
   160                    heapWordToOffset(mr.end()), false);
   161 }
   163 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   164                                             HeapWord* end_addr) {
   165   HeapWord* start = getNextMarkedWordAddress(addr);
   166   start = MIN2(start, end_addr);
   167   HeapWord* end   = getNextUnmarkedWordAddress(start);
   168   end = MIN2(end, end_addr);
   169   assert(start <= end, "Consistency check");
   170   MemRegion mr(start, end);
   171   if (!mr.is_empty()) {
   172     clearRange(mr);
   173   }
   174   return mr;
   175 }
   177 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   178   _base(NULL), _cm(cm)
   179 #ifdef ASSERT
   180   , _drain_in_progress(false)
   181   , _drain_in_progress_yields(false)
   182 #endif
   183 {}
   185 bool CMMarkStack::allocate(size_t capacity) {
   186   // allocate a stack of the requisite depth
   187   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
   188   if (!rs.is_reserved()) {
   189     warning("ConcurrentMark MarkStack allocation failure");
   190     return false;
   191   }
   192   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
   193   if (!_virtual_space.initialize(rs, rs.size())) {
   194     warning("ConcurrentMark MarkStack backing store failure");
   195     // Release the virtual memory reserved for the marking stack
   196     rs.release();
   197     return false;
   198   }
   199   assert(_virtual_space.committed_size() == rs.size(),
   200          "Didn't reserve backing store for all of ConcurrentMark stack?");
   201   _base = (oop*) _virtual_space.low();
   202   setEmpty();
   203   _capacity = (jint) capacity;
   204   _saved_index = -1;
   205   _should_expand = false;
   206   NOT_PRODUCT(_max_depth = 0);
   207   return true;
   208 }
   210 void CMMarkStack::expand() {
   211   // Called, during remark, if we've overflown the marking stack during marking.
   212   assert(isEmpty(), "stack should been emptied while handling overflow");
   213   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
   214   // Clear expansion flag
   215   _should_expand = false;
   216   if (_capacity == (jint) MarkStackSizeMax) {
   217     if (PrintGCDetails && Verbose) {
   218       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
   219     }
   220     return;
   221   }
   222   // Double capacity if possible
   223   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
   224   // Do not give up existing stack until we have managed to
   225   // get the double capacity that we desired.
   226   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
   227                                                            sizeof(oop)));
   228   if (rs.is_reserved()) {
   229     // Release the backing store associated with old stack
   230     _virtual_space.release();
   231     // Reinitialize virtual space for new stack
   232     if (!_virtual_space.initialize(rs, rs.size())) {
   233       fatal("Not enough swap for expanded marking stack capacity");
   234     }
   235     _base = (oop*)(_virtual_space.low());
   236     _index = 0;
   237     _capacity = new_capacity;
   238   } else {
   239     if (PrintGCDetails && Verbose) {
   240       // Failed to double capacity, continue;
   241       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
   242                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
   243                           _capacity / K, new_capacity / K);
   244     }
   245   }
   246 }
   248 void CMMarkStack::set_should_expand() {
   249   // If we're resetting the marking state because of an
   250   // marking stack overflow, record that we should, if
   251   // possible, expand the stack.
   252   _should_expand = _cm->has_overflown();
   253 }
   255 CMMarkStack::~CMMarkStack() {
   256   if (_base != NULL) {
   257     _base = NULL;
   258     _virtual_space.release();
   259   }
   260 }
   262 void CMMarkStack::par_push(oop ptr) {
   263   while (true) {
   264     if (isFull()) {
   265       _overflow = true;
   266       return;
   267     }
   268     // Otherwise...
   269     jint index = _index;
   270     jint next_index = index+1;
   271     jint res = Atomic::cmpxchg(next_index, &_index, index);
   272     if (res == index) {
   273       _base[index] = ptr;
   274       // Note that we don't maintain this atomically.  We could, but it
   275       // doesn't seem necessary.
   276       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   277       return;
   278     }
   279     // Otherwise, we need to try again.
   280   }
   281 }
   283 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   284   while (true) {
   285     if (isFull()) {
   286       _overflow = true;
   287       return;
   288     }
   289     // Otherwise...
   290     jint index = _index;
   291     jint next_index = index + n;
   292     if (next_index > _capacity) {
   293       _overflow = true;
   294       return;
   295     }
   296     jint res = Atomic::cmpxchg(next_index, &_index, index);
   297     if (res == index) {
   298       for (int i = 0; i < n; i++) {
   299         int  ind = index + i;
   300         assert(ind < _capacity, "By overflow test above.");
   301         _base[ind] = ptr_arr[i];
   302       }
   303       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   304       return;
   305     }
   306     // Otherwise, we need to try again.
   307   }
   308 }
   310 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   311   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   312   jint start = _index;
   313   jint next_index = start + n;
   314   if (next_index > _capacity) {
   315     _overflow = true;
   316     return;
   317   }
   318   // Otherwise.
   319   _index = next_index;
   320   for (int i = 0; i < n; i++) {
   321     int ind = start + i;
   322     assert(ind < _capacity, "By overflow test above.");
   323     _base[ind] = ptr_arr[i];
   324   }
   325   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   326 }
   328 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   329   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   330   jint index = _index;
   331   if (index == 0) {
   332     *n = 0;
   333     return false;
   334   } else {
   335     int k = MIN2(max, index);
   336     jint  new_ind = index - k;
   337     for (int j = 0; j < k; j++) {
   338       ptr_arr[j] = _base[new_ind + j];
   339     }
   340     _index = new_ind;
   341     *n = k;
   342     return true;
   343   }
   344 }
   346 template<class OopClosureClass>
   347 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   348   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   349          || SafepointSynchronize::is_at_safepoint(),
   350          "Drain recursion must be yield-safe.");
   351   bool res = true;
   352   debug_only(_drain_in_progress = true);
   353   debug_only(_drain_in_progress_yields = yield_after);
   354   while (!isEmpty()) {
   355     oop newOop = pop();
   356     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   357     assert(newOop->is_oop(), "Expected an oop");
   358     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   359            "only grey objects on this stack");
   360     newOop->oop_iterate(cl);
   361     if (yield_after && _cm->do_yield_check()) {
   362       res = false;
   363       break;
   364     }
   365   }
   366   debug_only(_drain_in_progress = false);
   367   return res;
   368 }
   370 void CMMarkStack::note_start_of_gc() {
   371   assert(_saved_index == -1,
   372          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
   373   _saved_index = _index;
   374 }
   376 void CMMarkStack::note_end_of_gc() {
   377   // This is intentionally a guarantee, instead of an assert. If we
   378   // accidentally add something to the mark stack during GC, it
   379   // will be a correctness issue so it's better if we crash. we'll
   380   // only check this once per GC anyway, so it won't be a performance
   381   // issue in any way.
   382   guarantee(_saved_index == _index,
   383             err_msg("saved index: %d index: %d", _saved_index, _index));
   384   _saved_index = -1;
   385 }
   387 void CMMarkStack::oops_do(OopClosure* f) {
   388   assert(_saved_index == _index,
   389          err_msg("saved index: %d index: %d", _saved_index, _index));
   390   for (int i = 0; i < _index; i += 1) {
   391     f->do_oop(&_base[i]);
   392   }
   393 }
   395 bool ConcurrentMark::not_yet_marked(oop obj) const {
   396   return _g1h->is_obj_ill(obj);
   397 }
   399 CMRootRegions::CMRootRegions() :
   400   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
   401   _should_abort(false),  _next_survivor(NULL) { }
   403 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
   404   _young_list = g1h->young_list();
   405   _cm = cm;
   406 }
   408 void CMRootRegions::prepare_for_scan() {
   409   assert(!scan_in_progress(), "pre-condition");
   411   // Currently, only survivors can be root regions.
   412   assert(_next_survivor == NULL, "pre-condition");
   413   _next_survivor = _young_list->first_survivor_region();
   414   _scan_in_progress = (_next_survivor != NULL);
   415   _should_abort = false;
   416 }
   418 HeapRegion* CMRootRegions::claim_next() {
   419   if (_should_abort) {
   420     // If someone has set the should_abort flag, we return NULL to
   421     // force the caller to bail out of their loop.
   422     return NULL;
   423   }
   425   // Currently, only survivors can be root regions.
   426   HeapRegion* res = _next_survivor;
   427   if (res != NULL) {
   428     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   429     // Read it again in case it changed while we were waiting for the lock.
   430     res = _next_survivor;
   431     if (res != NULL) {
   432       if (res == _young_list->last_survivor_region()) {
   433         // We just claimed the last survivor so store NULL to indicate
   434         // that we're done.
   435         _next_survivor = NULL;
   436       } else {
   437         _next_survivor = res->get_next_young_region();
   438       }
   439     } else {
   440       // Someone else claimed the last survivor while we were trying
   441       // to take the lock so nothing else to do.
   442     }
   443   }
   444   assert(res == NULL || res->is_survivor(), "post-condition");
   446   return res;
   447 }
   449 void CMRootRegions::scan_finished() {
   450   assert(scan_in_progress(), "pre-condition");
   452   // Currently, only survivors can be root regions.
   453   if (!_should_abort) {
   454     assert(_next_survivor == NULL, "we should have claimed all survivors");
   455   }
   456   _next_survivor = NULL;
   458   {
   459     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   460     _scan_in_progress = false;
   461     RootRegionScan_lock->notify_all();
   462   }
   463 }
   465 bool CMRootRegions::wait_until_scan_finished() {
   466   if (!scan_in_progress()) return false;
   468   {
   469     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   470     while (scan_in_progress()) {
   471       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
   472     }
   473   }
   474   return true;
   475 }
   477 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   478 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   479 #endif // _MSC_VER
   481 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
   482   return MAX2((n_par_threads + 2) / 4, 1U);
   483 }
   485 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
   486   _g1h(g1h),
   487   _markBitMap1(log2_intptr(MinObjAlignment)),
   488   _markBitMap2(log2_intptr(MinObjAlignment)),
   489   _parallel_marking_threads(0),
   490   _max_parallel_marking_threads(0),
   491   _sleep_factor(0.0),
   492   _marking_task_overhead(1.0),
   493   _cleanup_sleep_factor(0.0),
   494   _cleanup_task_overhead(1.0),
   495   _cleanup_list("Cleanup List"),
   496   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
   497   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
   498             CardTableModRefBS::card_shift,
   499             false /* in_resource_area*/),
   501   _prevMarkBitMap(&_markBitMap1),
   502   _nextMarkBitMap(&_markBitMap2),
   504   _markStack(this),
   505   // _finger set in set_non_marking_state
   507   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
   508   // _active_tasks set in set_non_marking_state
   509   // _tasks set inside the constructor
   510   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
   511   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
   513   _has_overflown(false),
   514   _concurrent(false),
   515   _has_aborted(false),
   516   _aborted_gc_id(GCId::undefined()),
   517   _restart_for_overflow(false),
   518   _concurrent_marking_in_progress(false),
   520   // _verbose_level set below
   522   _init_times(),
   523   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   524   _cleanup_times(),
   525   _total_counting_time(0.0),
   526   _total_rs_scrub_time(0.0),
   528   _parallel_workers(NULL),
   530   _count_card_bitmaps(NULL),
   531   _count_marked_bytes(NULL),
   532   _completed_initialization(false) {
   533   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   534   if (verbose_level < no_verbose) {
   535     verbose_level = no_verbose;
   536   }
   537   if (verbose_level > high_verbose) {
   538     verbose_level = high_verbose;
   539   }
   540   _verbose_level = verbose_level;
   542   if (verbose_low()) {
   543     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   544                            "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
   545   }
   547   if (!_markBitMap1.allocate(heap_rs)) {
   548     warning("Failed to allocate first CM bit map");
   549     return;
   550   }
   551   if (!_markBitMap2.allocate(heap_rs)) {
   552     warning("Failed to allocate second CM bit map");
   553     return;
   554   }
   556   // Create & start a ConcurrentMark thread.
   557   _cmThread = new ConcurrentMarkThread(this);
   558   assert(cmThread() != NULL, "CM Thread should have been created");
   559   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   560   if (_cmThread->osthread() == NULL) {
   561       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
   562   }
   564   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   565   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
   566   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
   568   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   569   satb_qs.set_buffer_size(G1SATBBufferSize);
   571   _root_regions.init(_g1h, this);
   573   if (ConcGCThreads > ParallelGCThreads) {
   574     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
   575             "than ParallelGCThreads (" UINTX_FORMAT ").",
   576             ConcGCThreads, ParallelGCThreads);
   577     return;
   578   }
   579   if (ParallelGCThreads == 0) {
   580     // if we are not running with any parallel GC threads we will not
   581     // spawn any marking threads either
   582     _parallel_marking_threads =       0;
   583     _max_parallel_marking_threads =   0;
   584     _sleep_factor             =     0.0;
   585     _marking_task_overhead    =     1.0;
   586   } else {
   587     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
   588       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
   589       // if both are set
   590       _sleep_factor             = 0.0;
   591       _marking_task_overhead    = 1.0;
   592     } else if (G1MarkingOverheadPercent > 0) {
   593       // We will calculate the number of parallel marking threads based
   594       // on a target overhead with respect to the soft real-time goal
   595       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   596       double overall_cm_overhead =
   597         (double) MaxGCPauseMillis * marking_overhead /
   598         (double) GCPauseIntervalMillis;
   599       double cpu_ratio = 1.0 / (double) os::processor_count();
   600       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   601       double marking_task_overhead =
   602         overall_cm_overhead / marking_thread_num *
   603                                                 (double) os::processor_count();
   604       double sleep_factor =
   605                          (1.0 - marking_task_overhead) / marking_task_overhead;
   607       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
   608       _sleep_factor             = sleep_factor;
   609       _marking_task_overhead    = marking_task_overhead;
   610     } else {
   611       // Calculate the number of parallel marking threads by scaling
   612       // the number of parallel GC threads.
   613       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
   614       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
   615       _sleep_factor             = 0.0;
   616       _marking_task_overhead    = 1.0;
   617     }
   619     assert(ConcGCThreads > 0, "Should have been set");
   620     _parallel_marking_threads = (uint) ConcGCThreads;
   621     _max_parallel_marking_threads = _parallel_marking_threads;
   623     if (parallel_marking_threads() > 1) {
   624       _cleanup_task_overhead = 1.0;
   625     } else {
   626       _cleanup_task_overhead = marking_task_overhead();
   627     }
   628     _cleanup_sleep_factor =
   629                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   631 #if 0
   632     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   633     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   634     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   635     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   636     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   637 #endif
   639     guarantee(parallel_marking_threads() > 0, "peace of mind");
   640     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   641          _max_parallel_marking_threads, false, true);
   642     if (_parallel_workers == NULL) {
   643       vm_exit_during_initialization("Failed necessary allocation.");
   644     } else {
   645       _parallel_workers->initialize_workers();
   646     }
   647   }
   649   if (FLAG_IS_DEFAULT(MarkStackSize)) {
   650     uintx mark_stack_size =
   651       MIN2(MarkStackSizeMax,
   652           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
   653     // Verify that the calculated value for MarkStackSize is in range.
   654     // It would be nice to use the private utility routine from Arguments.
   655     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
   656       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
   657               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   658               mark_stack_size, (uintx) 1, MarkStackSizeMax);
   659       return;
   660     }
   661     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
   662   } else {
   663     // Verify MarkStackSize is in range.
   664     if (FLAG_IS_CMDLINE(MarkStackSize)) {
   665       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
   666         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   667           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
   668                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   669                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
   670           return;
   671         }
   672       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
   673         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   674           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
   675                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
   676                   MarkStackSize, MarkStackSizeMax);
   677           return;
   678         }
   679       }
   680     }
   681   }
   683   if (!_markStack.allocate(MarkStackSize)) {
   684     warning("Failed to allocate CM marking stack");
   685     return;
   686   }
   688   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
   689   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
   691   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
   692   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
   694   BitMap::idx_t card_bm_size = _card_bm.size();
   696   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   697   _active_tasks = _max_worker_id;
   699   size_t max_regions = (size_t) _g1h->max_regions();
   700   for (uint i = 0; i < _max_worker_id; ++i) {
   701     CMTaskQueue* task_queue = new CMTaskQueue();
   702     task_queue->initialize();
   703     _task_queues->register_queue(i, task_queue);
   705     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
   706     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
   708     _tasks[i] = new CMTask(i, this,
   709                            _count_marked_bytes[i],
   710                            &_count_card_bitmaps[i],
   711                            task_queue, _task_queues);
   713     _accum_task_vtime[i] = 0.0;
   714   }
   716   // Calculate the card number for the bottom of the heap. Used
   717   // in biasing indexes into the accounting card bitmaps.
   718   _heap_bottom_card_num =
   719     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
   720                                 CardTableModRefBS::card_shift);
   722   // Clear all the liveness counting data
   723   clear_all_count_data();
   725   // so that the call below can read a sensible value
   726   _heap_start = (HeapWord*) heap_rs.base();
   727   set_non_marking_state();
   728   _completed_initialization = true;
   729 }
   731 void ConcurrentMark::update_heap_boundaries(MemRegion bounds, bool force) {
   732   // If concurrent marking is not in progress, then we do not need to
   733   // update _heap_end.
   734   if (!concurrent_marking_in_progress() && !force) return;
   736   assert(bounds.start() == _heap_start, "start shouldn't change");
   737   HeapWord* new_end = bounds.end();
   738   if (new_end > _heap_end) {
   739     // The heap has been expanded.
   741     _heap_end = new_end;
   742   }
   743   // Notice that the heap can also shrink. However, this only happens
   744   // during a Full GC (at least currently) and the entire marking
   745   // phase will bail out and the task will not be restarted. So, let's
   746   // do nothing.
   747 }
   749 void ConcurrentMark::reset() {
   750   // Starting values for these two. This should be called in a STW
   751   // phase. CM will be notified of any future g1_committed expansions
   752   // will be at the end of evacuation pauses, when tasks are
   753   // inactive.
   754   MemRegion committed = _g1h->g1_committed();
   755   _heap_start = committed.start();
   756   _heap_end   = committed.end();
   758   // Separated the asserts so that we know which one fires.
   759   assert(_heap_start != NULL, "heap bounds should look ok");
   760   assert(_heap_end != NULL, "heap bounds should look ok");
   761   assert(_heap_start < _heap_end, "heap bounds should look ok");
   763   // Reset all the marking data structures and any necessary flags
   764   reset_marking_state();
   766   if (verbose_low()) {
   767     gclog_or_tty->print_cr("[global] resetting");
   768   }
   770   // We do reset all of them, since different phases will use
   771   // different number of active threads. So, it's easiest to have all
   772   // of them ready.
   773   for (uint i = 0; i < _max_worker_id; ++i) {
   774     _tasks[i]->reset(_nextMarkBitMap);
   775   }
   777   // we need this to make sure that the flag is on during the evac
   778   // pause with initial mark piggy-backed
   779   set_concurrent_marking_in_progress();
   780 }
   783 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
   784   _markStack.set_should_expand();
   785   _markStack.setEmpty();        // Also clears the _markStack overflow flag
   786   if (clear_overflow) {
   787     clear_has_overflown();
   788   } else {
   789     assert(has_overflown(), "pre-condition");
   790   }
   791   _finger = _heap_start;
   793   for (uint i = 0; i < _max_worker_id; ++i) {
   794     CMTaskQueue* queue = _task_queues->queue(i);
   795     queue->set_empty();
   796   }
   797 }
   799 void ConcurrentMark::set_concurrency(uint active_tasks) {
   800   assert(active_tasks <= _max_worker_id, "we should not have more");
   802   _active_tasks = active_tasks;
   803   // Need to update the three data structures below according to the
   804   // number of active threads for this phase.
   805   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   806   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   807   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   808 }
   810 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
   811   set_concurrency(active_tasks);
   813   _concurrent = concurrent;
   814   // We propagate this to all tasks, not just the active ones.
   815   for (uint i = 0; i < _max_worker_id; ++i)
   816     _tasks[i]->set_concurrent(concurrent);
   818   if (concurrent) {
   819     set_concurrent_marking_in_progress();
   820   } else {
   821     // We currently assume that the concurrent flag has been set to
   822     // false before we start remark. At this point we should also be
   823     // in a STW phase.
   824     assert(!concurrent_marking_in_progress(), "invariant");
   825     assert(out_of_regions(),
   826            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
   827                    p2i(_finger), p2i(_heap_end)));
   828     update_heap_boundaries(_g1h->g1_committed(), true);
   829   }
   830 }
   832 void ConcurrentMark::set_non_marking_state() {
   833   // We set the global marking state to some default values when we're
   834   // not doing marking.
   835   reset_marking_state();
   836   _active_tasks = 0;
   837   clear_concurrent_marking_in_progress();
   838 }
   840 ConcurrentMark::~ConcurrentMark() {
   841   // The ConcurrentMark instance is never freed.
   842   ShouldNotReachHere();
   843 }
   845 void ConcurrentMark::clearNextBitmap() {
   846   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   847   G1CollectorPolicy* g1p = g1h->g1_policy();
   849   // Make sure that the concurrent mark thread looks to still be in
   850   // the current cycle.
   851   guarantee(cmThread()->during_cycle(), "invariant");
   853   // We are finishing up the current cycle by clearing the next
   854   // marking bitmap and getting it ready for the next cycle. During
   855   // this time no other cycle can start. So, let's make sure that this
   856   // is the case.
   857   guarantee(!g1h->mark_in_progress(), "invariant");
   859   // clear the mark bitmap (no grey objects to start with).
   860   // We need to do this in chunks and offer to yield in between
   861   // each chunk.
   862   HeapWord* start  = _nextMarkBitMap->startWord();
   863   HeapWord* end    = _nextMarkBitMap->endWord();
   864   HeapWord* cur    = start;
   865   size_t chunkSize = M;
   866   while (cur < end) {
   867     HeapWord* next = cur + chunkSize;
   868     if (next > end) {
   869       next = end;
   870     }
   871     MemRegion mr(cur,next);
   872     _nextMarkBitMap->clearRange(mr);
   873     cur = next;
   874     do_yield_check();
   876     // Repeat the asserts from above. We'll do them as asserts here to
   877     // minimize their overhead on the product. However, we'll have
   878     // them as guarantees at the beginning / end of the bitmap
   879     // clearing to get some checking in the product.
   880     assert(cmThread()->during_cycle(), "invariant");
   881     assert(!g1h->mark_in_progress(), "invariant");
   882   }
   884   // Clear the liveness counting data
   885   clear_all_count_data();
   887   // Repeat the asserts from above.
   888   guarantee(cmThread()->during_cycle(), "invariant");
   889   guarantee(!g1h->mark_in_progress(), "invariant");
   890 }
   892 bool ConcurrentMark::nextMarkBitmapIsClear() {
   893   return _nextMarkBitMap->getNextMarkedWordAddress(_heap_start, _heap_end) == _heap_end;
   894 }
   896 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   897 public:
   898   bool doHeapRegion(HeapRegion* r) {
   899     if (!r->continuesHumongous()) {
   900       r->note_start_of_marking();
   901     }
   902     return false;
   903   }
   904 };
   906 void ConcurrentMark::checkpointRootsInitialPre() {
   907   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   908   G1CollectorPolicy* g1p = g1h->g1_policy();
   910   _has_aborted = false;
   912 #ifndef PRODUCT
   913   if (G1PrintReachableAtInitialMark) {
   914     print_reachable("at-cycle-start",
   915                     VerifyOption_G1UsePrevMarking, true /* all */);
   916   }
   917 #endif
   919   // Initialise marking structures. This has to be done in a STW phase.
   920   reset();
   922   // For each region note start of marking.
   923   NoteStartOfMarkHRClosure startcl;
   924   g1h->heap_region_iterate(&startcl);
   925 }
   928 void ConcurrentMark::checkpointRootsInitialPost() {
   929   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   931   // If we force an overflow during remark, the remark operation will
   932   // actually abort and we'll restart concurrent marking. If we always
   933   // force an oveflow during remark we'll never actually complete the
   934   // marking phase. So, we initilize this here, at the start of the
   935   // cycle, so that at the remaining overflow number will decrease at
   936   // every remark and we'll eventually not need to cause one.
   937   force_overflow_stw()->init();
   939   // Start Concurrent Marking weak-reference discovery.
   940   ReferenceProcessor* rp = g1h->ref_processor_cm();
   941   // enable ("weak") refs discovery
   942   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
   943   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   945   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   946   // This is the start of  the marking cycle, we're expected all
   947   // threads to have SATB queues with active set to false.
   948   satb_mq_set.set_active_all_threads(true, /* new active value */
   949                                      false /* expected_active */);
   951   _root_regions.prepare_for_scan();
   953   // update_g1_committed() will be called at the end of an evac pause
   954   // when marking is on. So, it's also called at the end of the
   955   // initial-mark pause to update the heap end, if the heap expands
   956   // during it. No need to call it here.
   957 }
   959 /*
   960  * Notice that in the next two methods, we actually leave the STS
   961  * during the barrier sync and join it immediately afterwards. If we
   962  * do not do this, the following deadlock can occur: one thread could
   963  * be in the barrier sync code, waiting for the other thread to also
   964  * sync up, whereas another one could be trying to yield, while also
   965  * waiting for the other threads to sync up too.
   966  *
   967  * Note, however, that this code is also used during remark and in
   968  * this case we should not attempt to leave / enter the STS, otherwise
   969  * we'll either hit an asseert (debug / fastdebug) or deadlock
   970  * (product). So we should only leave / enter the STS if we are
   971  * operating concurrently.
   972  *
   973  * Because the thread that does the sync barrier has left the STS, it
   974  * is possible to be suspended for a Full GC or an evacuation pause
   975  * could occur. This is actually safe, since the entering the sync
   976  * barrier is one of the last things do_marking_step() does, and it
   977  * doesn't manipulate any data structures afterwards.
   978  */
   980 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
   981   if (verbose_low()) {
   982     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
   983   }
   985   if (concurrent()) {
   986     SuspendibleThreadSet::leave();
   987   }
   989   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
   991   if (concurrent()) {
   992     SuspendibleThreadSet::join();
   993   }
   994   // at this point everyone should have synced up and not be doing any
   995   // more work
   997   if (verbose_low()) {
   998     if (barrier_aborted) {
   999       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
  1000     } else {
  1001       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
  1005   if (barrier_aborted) {
  1006     // If the barrier aborted we ignore the overflow condition and
  1007     // just abort the whole marking phase as quickly as possible.
  1008     return;
  1011   // If we're executing the concurrent phase of marking, reset the marking
  1012   // state; otherwise the marking state is reset after reference processing,
  1013   // during the remark pause.
  1014   // If we reset here as a result of an overflow during the remark we will
  1015   // see assertion failures from any subsequent set_concurrency_and_phase()
  1016   // calls.
  1017   if (concurrent()) {
  1018     // let the task associated with with worker 0 do this
  1019     if (worker_id == 0) {
  1020       // task 0 is responsible for clearing the global data structures
  1021       // We should be here because of an overflow. During STW we should
  1022       // not clear the overflow flag since we rely on it being true when
  1023       // we exit this method to abort the pause and restart concurent
  1024       // marking.
  1025       reset_marking_state(true /* clear_overflow */);
  1026       force_overflow()->update();
  1028       if (G1Log::fine()) {
  1029         gclog_or_tty->gclog_stamp(concurrent_gc_id());
  1030         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
  1035   // after this, each task should reset its own data structures then
  1036   // then go into the second barrier
  1039 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
  1040   if (verbose_low()) {
  1041     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
  1044   if (concurrent()) {
  1045     SuspendibleThreadSet::leave();
  1048   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
  1050   if (concurrent()) {
  1051     SuspendibleThreadSet::join();
  1053   // at this point everything should be re-initialized and ready to go
  1055   if (verbose_low()) {
  1056     if (barrier_aborted) {
  1057       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
  1058     } else {
  1059       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
  1064 #ifndef PRODUCT
  1065 void ForceOverflowSettings::init() {
  1066   _num_remaining = G1ConcMarkForceOverflow;
  1067   _force = false;
  1068   update();
  1071 void ForceOverflowSettings::update() {
  1072   if (_num_remaining > 0) {
  1073     _num_remaining -= 1;
  1074     _force = true;
  1075   } else {
  1076     _force = false;
  1080 bool ForceOverflowSettings::should_force() {
  1081   if (_force) {
  1082     _force = false;
  1083     return true;
  1084   } else {
  1085     return false;
  1088 #endif // !PRODUCT
  1090 class CMConcurrentMarkingTask: public AbstractGangTask {
  1091 private:
  1092   ConcurrentMark*       _cm;
  1093   ConcurrentMarkThread* _cmt;
  1095 public:
  1096   void work(uint worker_id) {
  1097     assert(Thread::current()->is_ConcurrentGC_thread(),
  1098            "this should only be done by a conc GC thread");
  1099     ResourceMark rm;
  1101     double start_vtime = os::elapsedVTime();
  1103     SuspendibleThreadSet::join();
  1105     assert(worker_id < _cm->active_tasks(), "invariant");
  1106     CMTask* the_task = _cm->task(worker_id);
  1107     the_task->record_start_time();
  1108     if (!_cm->has_aborted()) {
  1109       do {
  1110         double start_vtime_sec = os::elapsedVTime();
  1111         double start_time_sec = os::elapsedTime();
  1112         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  1114         the_task->do_marking_step(mark_step_duration_ms,
  1115                                   true  /* do_termination */,
  1116                                   false /* is_serial*/);
  1118         double end_time_sec = os::elapsedTime();
  1119         double end_vtime_sec = os::elapsedVTime();
  1120         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1121         double elapsed_time_sec = end_time_sec - start_time_sec;
  1122         _cm->clear_has_overflown();
  1124         bool ret = _cm->do_yield_check(worker_id);
  1126         jlong sleep_time_ms;
  1127         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1128           sleep_time_ms =
  1129             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1130           SuspendibleThreadSet::leave();
  1131           os::sleep(Thread::current(), sleep_time_ms, false);
  1132           SuspendibleThreadSet::join();
  1134         double end_time2_sec = os::elapsedTime();
  1135         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1137 #if 0
  1138           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1139                                  "overhead %1.4lf",
  1140                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1141                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1142           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1143                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1144 #endif
  1145       } while (!_cm->has_aborted() && the_task->has_aborted());
  1147     the_task->record_end_time();
  1148     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1150     SuspendibleThreadSet::leave();
  1152     double end_vtime = os::elapsedVTime();
  1153     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
  1156   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1157                           ConcurrentMarkThread* cmt) :
  1158       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1160   ~CMConcurrentMarkingTask() { }
  1161 };
  1163 // Calculates the number of active workers for a concurrent
  1164 // phase.
  1165 uint ConcurrentMark::calc_parallel_marking_threads() {
  1166   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1167     uint n_conc_workers = 0;
  1168     if (!UseDynamicNumberOfGCThreads ||
  1169         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
  1170          !ForceDynamicNumberOfGCThreads)) {
  1171       n_conc_workers = max_parallel_marking_threads();
  1172     } else {
  1173       n_conc_workers =
  1174         AdaptiveSizePolicy::calc_default_active_workers(
  1175                                      max_parallel_marking_threads(),
  1176                                      1, /* Minimum workers */
  1177                                      parallel_marking_threads(),
  1178                                      Threads::number_of_non_daemon_threads());
  1179       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
  1180       // that scaling has already gone into "_max_parallel_marking_threads".
  1182     assert(n_conc_workers > 0, "Always need at least 1");
  1183     return n_conc_workers;
  1185   // If we are not running with any parallel GC threads we will not
  1186   // have spawned any marking threads either. Hence the number of
  1187   // concurrent workers should be 0.
  1188   return 0;
  1191 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
  1192   // Currently, only survivors can be root regions.
  1193   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
  1194   G1RootRegionScanClosure cl(_g1h, this, worker_id);
  1196   const uintx interval = PrefetchScanIntervalInBytes;
  1197   HeapWord* curr = hr->bottom();
  1198   const HeapWord* end = hr->top();
  1199   while (curr < end) {
  1200     Prefetch::read(curr, interval);
  1201     oop obj = oop(curr);
  1202     int size = obj->oop_iterate(&cl);
  1203     assert(size == obj->size(), "sanity");
  1204     curr += size;
  1208 class CMRootRegionScanTask : public AbstractGangTask {
  1209 private:
  1210   ConcurrentMark* _cm;
  1212 public:
  1213   CMRootRegionScanTask(ConcurrentMark* cm) :
  1214     AbstractGangTask("Root Region Scan"), _cm(cm) { }
  1216   void work(uint worker_id) {
  1217     assert(Thread::current()->is_ConcurrentGC_thread(),
  1218            "this should only be done by a conc GC thread");
  1220     CMRootRegions* root_regions = _cm->root_regions();
  1221     HeapRegion* hr = root_regions->claim_next();
  1222     while (hr != NULL) {
  1223       _cm->scanRootRegion(hr, worker_id);
  1224       hr = root_regions->claim_next();
  1227 };
  1229 void ConcurrentMark::scanRootRegions() {
  1230   // Start of concurrent marking.
  1231   ClassLoaderDataGraph::clear_claimed_marks();
  1233   // scan_in_progress() will have been set to true only if there was
  1234   // at least one root region to scan. So, if it's false, we
  1235   // should not attempt to do any further work.
  1236   if (root_regions()->scan_in_progress()) {
  1237     _parallel_marking_threads = calc_parallel_marking_threads();
  1238     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1239            "Maximum number of marking threads exceeded");
  1240     uint active_workers = MAX2(1U, parallel_marking_threads());
  1242     CMRootRegionScanTask task(this);
  1243     if (use_parallel_marking_threads()) {
  1244       _parallel_workers->set_active_workers((int) active_workers);
  1245       _parallel_workers->run_task(&task);
  1246     } else {
  1247       task.work(0);
  1250     // It's possible that has_aborted() is true here without actually
  1251     // aborting the survivor scan earlier. This is OK as it's
  1252     // mainly used for sanity checking.
  1253     root_regions()->scan_finished();
  1257 void ConcurrentMark::markFromRoots() {
  1258   // we might be tempted to assert that:
  1259   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1260   //        "inconsistent argument?");
  1261   // However that wouldn't be right, because it's possible that
  1262   // a safepoint is indeed in progress as a younger generation
  1263   // stop-the-world GC happens even as we mark in this generation.
  1265   _restart_for_overflow = false;
  1266   force_overflow_conc()->init();
  1268   // _g1h has _n_par_threads
  1269   _parallel_marking_threads = calc_parallel_marking_threads();
  1270   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1271     "Maximum number of marking threads exceeded");
  1273   uint active_workers = MAX2(1U, parallel_marking_threads());
  1275   // Parallel task terminator is set in "set_concurrency_and_phase()"
  1276   set_concurrency_and_phase(active_workers, true /* concurrent */);
  1278   CMConcurrentMarkingTask markingTask(this, cmThread());
  1279   if (use_parallel_marking_threads()) {
  1280     _parallel_workers->set_active_workers((int)active_workers);
  1281     // Don't set _n_par_threads because it affects MT in process_roots()
  1282     // and the decisions on that MT processing is made elsewhere.
  1283     assert(_parallel_workers->active_workers() > 0, "Should have been set");
  1284     _parallel_workers->run_task(&markingTask);
  1285   } else {
  1286     markingTask.work(0);
  1288   print_stats();
  1291 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1292   // world is stopped at this checkpoint
  1293   assert(SafepointSynchronize::is_at_safepoint(),
  1294          "world should be stopped");
  1296   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1298   // If a full collection has happened, we shouldn't do this.
  1299   if (has_aborted()) {
  1300     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1301     return;
  1304   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1306   if (VerifyDuringGC) {
  1307     HandleMark hm;  // handle scope
  1308     Universe::heap()->prepare_for_verify();
  1309     Universe::verify(VerifyOption_G1UsePrevMarking,
  1310                      " VerifyDuringGC:(before)");
  1312   g1h->check_bitmaps("Remark Start");
  1314   G1CollectorPolicy* g1p = g1h->g1_policy();
  1315   g1p->record_concurrent_mark_remark_start();
  1317   double start = os::elapsedTime();
  1319   checkpointRootsFinalWork();
  1321   double mark_work_end = os::elapsedTime();
  1323   weakRefsWork(clear_all_soft_refs);
  1325   if (has_overflown()) {
  1326     // Oops.  We overflowed.  Restart concurrent marking.
  1327     _restart_for_overflow = true;
  1328     if (G1TraceMarkStackOverflow) {
  1329       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1332     // Verify the heap w.r.t. the previous marking bitmap.
  1333     if (VerifyDuringGC) {
  1334       HandleMark hm;  // handle scope
  1335       Universe::heap()->prepare_for_verify();
  1336       Universe::verify(VerifyOption_G1UsePrevMarking,
  1337                        " VerifyDuringGC:(overflow)");
  1340     // Clear the marking state because we will be restarting
  1341     // marking due to overflowing the global mark stack.
  1342     reset_marking_state();
  1343   } else {
  1344     // Aggregate the per-task counting data that we have accumulated
  1345     // while marking.
  1346     aggregate_count_data();
  1348     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1349     // We're done with marking.
  1350     // This is the end of  the marking cycle, we're expected all
  1351     // threads to have SATB queues with active set to true.
  1352     satb_mq_set.set_active_all_threads(false, /* new active value */
  1353                                        true /* expected_active */);
  1355     if (VerifyDuringGC) {
  1356       HandleMark hm;  // handle scope
  1357       Universe::heap()->prepare_for_verify();
  1358       Universe::verify(VerifyOption_G1UseNextMarking,
  1359                        " VerifyDuringGC:(after)");
  1361     g1h->check_bitmaps("Remark End");
  1362     assert(!restart_for_overflow(), "sanity");
  1363     // Completely reset the marking state since marking completed
  1364     set_non_marking_state();
  1367   // Expand the marking stack, if we have to and if we can.
  1368   if (_markStack.should_expand()) {
  1369     _markStack.expand();
  1372   // Statistics
  1373   double now = os::elapsedTime();
  1374   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1375   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1376   _remark_times.add((now - start) * 1000.0);
  1378   g1p->record_concurrent_mark_remark_end();
  1380   G1CMIsAliveClosure is_alive(g1h);
  1381   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
  1384 // Base class of the closures that finalize and verify the
  1385 // liveness counting data.
  1386 class CMCountDataClosureBase: public HeapRegionClosure {
  1387 protected:
  1388   G1CollectedHeap* _g1h;
  1389   ConcurrentMark* _cm;
  1390   CardTableModRefBS* _ct_bs;
  1392   BitMap* _region_bm;
  1393   BitMap* _card_bm;
  1395   // Takes a region that's not empty (i.e., it has at least one
  1396   // live object in it and sets its corresponding bit on the region
  1397   // bitmap to 1. If the region is "starts humongous" it will also set
  1398   // to 1 the bits on the region bitmap that correspond to its
  1399   // associated "continues humongous" regions.
  1400   void set_bit_for_region(HeapRegion* hr) {
  1401     assert(!hr->continuesHumongous(), "should have filtered those out");
  1403     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1404     if (!hr->startsHumongous()) {
  1405       // Normal (non-humongous) case: just set the bit.
  1406       _region_bm->par_at_put(index, true);
  1407     } else {
  1408       // Starts humongous case: calculate how many regions are part of
  1409       // this humongous region and then set the bit range.
  1410       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
  1411       _region_bm->par_at_put_range(index, end_index, true);
  1415 public:
  1416   CMCountDataClosureBase(G1CollectedHeap* g1h,
  1417                          BitMap* region_bm, BitMap* card_bm):
  1418     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1419     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  1420     _region_bm(region_bm), _card_bm(card_bm) { }
  1421 };
  1423 // Closure that calculates the # live objects per region. Used
  1424 // for verification purposes during the cleanup pause.
  1425 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
  1426   CMBitMapRO* _bm;
  1427   size_t _region_marked_bytes;
  1429 public:
  1430   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
  1431                          BitMap* region_bm, BitMap* card_bm) :
  1432     CMCountDataClosureBase(g1h, region_bm, card_bm),
  1433     _bm(bm), _region_marked_bytes(0) { }
  1435   bool doHeapRegion(HeapRegion* hr) {
  1437     if (hr->continuesHumongous()) {
  1438       // We will ignore these here and process them when their
  1439       // associated "starts humongous" region is processed (see
  1440       // set_bit_for_heap_region()). Note that we cannot rely on their
  1441       // associated "starts humongous" region to have their bit set to
  1442       // 1 since, due to the region chunking in the parallel region
  1443       // iteration, a "continues humongous" region might be visited
  1444       // before its associated "starts humongous".
  1445       return false;
  1448     HeapWord* ntams = hr->next_top_at_mark_start();
  1449     HeapWord* start = hr->bottom();
  1451     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
  1452            err_msg("Preconditions not met - "
  1453                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
  1454                    p2i(start), p2i(ntams), p2i(hr->end())));
  1456     // Find the first marked object at or after "start".
  1457     start = _bm->getNextMarkedWordAddress(start, ntams);
  1459     size_t marked_bytes = 0;
  1461     while (start < ntams) {
  1462       oop obj = oop(start);
  1463       int obj_sz = obj->size();
  1464       HeapWord* obj_end = start + obj_sz;
  1466       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  1467       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
  1469       // Note: if we're looking at the last region in heap - obj_end
  1470       // could be actually just beyond the end of the heap; end_idx
  1471       // will then correspond to a (non-existent) card that is also
  1472       // just beyond the heap.
  1473       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
  1474         // end of object is not card aligned - increment to cover
  1475         // all the cards spanned by the object
  1476         end_idx += 1;
  1479       // Set the bits in the card BM for the cards spanned by this object.
  1480       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1482       // Add the size of this object to the number of marked bytes.
  1483       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1485       // Find the next marked object after this one.
  1486       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
  1489     // Mark the allocated-since-marking portion...
  1490     HeapWord* top = hr->top();
  1491     if (ntams < top) {
  1492       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1493       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1495       // Note: if we're looking at the last region in heap - top
  1496       // could be actually just beyond the end of the heap; end_idx
  1497       // will then correspond to a (non-existent) card that is also
  1498       // just beyond the heap.
  1499       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1500         // end of object is not card aligned - increment to cover
  1501         // all the cards spanned by the object
  1502         end_idx += 1;
  1504       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1506       // This definitely means the region has live objects.
  1507       set_bit_for_region(hr);
  1510     // Update the live region bitmap.
  1511     if (marked_bytes > 0) {
  1512       set_bit_for_region(hr);
  1515     // Set the marked bytes for the current region so that
  1516     // it can be queried by a calling verificiation routine
  1517     _region_marked_bytes = marked_bytes;
  1519     return false;
  1522   size_t region_marked_bytes() const { return _region_marked_bytes; }
  1523 };
  1525 // Heap region closure used for verifying the counting data
  1526 // that was accumulated concurrently and aggregated during
  1527 // the remark pause. This closure is applied to the heap
  1528 // regions during the STW cleanup pause.
  1530 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
  1531   G1CollectedHeap* _g1h;
  1532   ConcurrentMark* _cm;
  1533   CalcLiveObjectsClosure _calc_cl;
  1534   BitMap* _region_bm;   // Region BM to be verified
  1535   BitMap* _card_bm;     // Card BM to be verified
  1536   bool _verbose;        // verbose output?
  1538   BitMap* _exp_region_bm; // Expected Region BM values
  1539   BitMap* _exp_card_bm;   // Expected card BM values
  1541   int _failures;
  1543 public:
  1544   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
  1545                                 BitMap* region_bm,
  1546                                 BitMap* card_bm,
  1547                                 BitMap* exp_region_bm,
  1548                                 BitMap* exp_card_bm,
  1549                                 bool verbose) :
  1550     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1551     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
  1552     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
  1553     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
  1554     _failures(0) { }
  1556   int failures() const { return _failures; }
  1558   bool doHeapRegion(HeapRegion* hr) {
  1559     if (hr->continuesHumongous()) {
  1560       // We will ignore these here and process them when their
  1561       // associated "starts humongous" region is processed (see
  1562       // set_bit_for_heap_region()). Note that we cannot rely on their
  1563       // associated "starts humongous" region to have their bit set to
  1564       // 1 since, due to the region chunking in the parallel region
  1565       // iteration, a "continues humongous" region might be visited
  1566       // before its associated "starts humongous".
  1567       return false;
  1570     int failures = 0;
  1572     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
  1573     // this region and set the corresponding bits in the expected region
  1574     // and card bitmaps.
  1575     bool res = _calc_cl.doHeapRegion(hr);
  1576     assert(res == false, "should be continuing");
  1578     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
  1579                     Mutex::_no_safepoint_check_flag);
  1581     // Verify the marked bytes for this region.
  1582     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
  1583     size_t act_marked_bytes = hr->next_marked_bytes();
  1585     // We're not OK if expected marked bytes > actual marked bytes. It means
  1586     // we have missed accounting some objects during the actual marking.
  1587     if (exp_marked_bytes > act_marked_bytes) {
  1588       if (_verbose) {
  1589         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
  1590                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
  1591                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
  1593       failures += 1;
  1596     // Verify the bit, for this region, in the actual and expected
  1597     // (which was just calculated) region bit maps.
  1598     // We're not OK if the bit in the calculated expected region
  1599     // bitmap is set and the bit in the actual region bitmap is not.
  1600     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1602     bool expected = _exp_region_bm->at(index);
  1603     bool actual = _region_bm->at(index);
  1604     if (expected && !actual) {
  1605       if (_verbose) {
  1606         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
  1607                                "expected: %s, actual: %s",
  1608                                hr->hrs_index(),
  1609                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1611       failures += 1;
  1614     // Verify that the card bit maps for the cards spanned by the current
  1615     // region match. We have an error if we have a set bit in the expected
  1616     // bit map and the corresponding bit in the actual bitmap is not set.
  1618     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
  1619     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
  1621     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
  1622       expected = _exp_card_bm->at(i);
  1623       actual = _card_bm->at(i);
  1625       if (expected && !actual) {
  1626         if (_verbose) {
  1627           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
  1628                                  "expected: %s, actual: %s",
  1629                                  hr->hrs_index(), i,
  1630                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1632         failures += 1;
  1636     if (failures > 0 && _verbose)  {
  1637       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
  1638                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
  1639                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
  1640                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
  1643     _failures += failures;
  1645     // We could stop iteration over the heap when we
  1646     // find the first violating region by returning true.
  1647     return false;
  1649 };
  1651 class G1ParVerifyFinalCountTask: public AbstractGangTask {
  1652 protected:
  1653   G1CollectedHeap* _g1h;
  1654   ConcurrentMark* _cm;
  1655   BitMap* _actual_region_bm;
  1656   BitMap* _actual_card_bm;
  1658   uint    _n_workers;
  1660   BitMap* _expected_region_bm;
  1661   BitMap* _expected_card_bm;
  1663   int  _failures;
  1664   bool _verbose;
  1666 public:
  1667   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
  1668                             BitMap* region_bm, BitMap* card_bm,
  1669                             BitMap* expected_region_bm, BitMap* expected_card_bm)
  1670     : AbstractGangTask("G1 verify final counting"),
  1671       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1672       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1673       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
  1674       _failures(0), _verbose(false),
  1675       _n_workers(0) {
  1676     assert(VerifyDuringGC, "don't call this otherwise");
  1678     // Use the value already set as the number of active threads
  1679     // in the call to run_task().
  1680     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1681       assert( _g1h->workers()->active_workers() > 0,
  1682         "Should have been previously set");
  1683       _n_workers = _g1h->workers()->active_workers();
  1684     } else {
  1685       _n_workers = 1;
  1688     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
  1689     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
  1691     _verbose = _cm->verbose_medium();
  1694   void work(uint worker_id) {
  1695     assert(worker_id < _n_workers, "invariant");
  1697     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
  1698                                             _actual_region_bm, _actual_card_bm,
  1699                                             _expected_region_bm,
  1700                                             _expected_card_bm,
  1701                                             _verbose);
  1703     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1704       _g1h->heap_region_par_iterate_chunked(&verify_cl,
  1705                                             worker_id,
  1706                                             _n_workers,
  1707                                             HeapRegion::VerifyCountClaimValue);
  1708     } else {
  1709       _g1h->heap_region_iterate(&verify_cl);
  1712     Atomic::add(verify_cl.failures(), &_failures);
  1715   int failures() const { return _failures; }
  1716 };
  1718 // Closure that finalizes the liveness counting data.
  1719 // Used during the cleanup pause.
  1720 // Sets the bits corresponding to the interval [NTAMS, top]
  1721 // (which contains the implicitly live objects) in the
  1722 // card liveness bitmap. Also sets the bit for each region,
  1723 // containing live data, in the region liveness bitmap.
  1725 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
  1726  public:
  1727   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
  1728                               BitMap* region_bm,
  1729                               BitMap* card_bm) :
  1730     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
  1732   bool doHeapRegion(HeapRegion* hr) {
  1734     if (hr->continuesHumongous()) {
  1735       // We will ignore these here and process them when their
  1736       // associated "starts humongous" region is processed (see
  1737       // set_bit_for_heap_region()). Note that we cannot rely on their
  1738       // associated "starts humongous" region to have their bit set to
  1739       // 1 since, due to the region chunking in the parallel region
  1740       // iteration, a "continues humongous" region might be visited
  1741       // before its associated "starts humongous".
  1742       return false;
  1745     HeapWord* ntams = hr->next_top_at_mark_start();
  1746     HeapWord* top   = hr->top();
  1748     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
  1750     // Mark the allocated-since-marking portion...
  1751     if (ntams < top) {
  1752       // This definitely means the region has live objects.
  1753       set_bit_for_region(hr);
  1755       // Now set the bits in the card bitmap for [ntams, top)
  1756       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1757       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1759       // Note: if we're looking at the last region in heap - top
  1760       // could be actually just beyond the end of the heap; end_idx
  1761       // will then correspond to a (non-existent) card that is also
  1762       // just beyond the heap.
  1763       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1764         // end of object is not card aligned - increment to cover
  1765         // all the cards spanned by the object
  1766         end_idx += 1;
  1769       assert(end_idx <= _card_bm->size(),
  1770              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1771                      end_idx, _card_bm->size()));
  1772       assert(start_idx < _card_bm->size(),
  1773              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1774                      start_idx, _card_bm->size()));
  1776       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1779     // Set the bit for the region if it contains live data
  1780     if (hr->next_marked_bytes() > 0) {
  1781       set_bit_for_region(hr);
  1784     return false;
  1786 };
  1788 class G1ParFinalCountTask: public AbstractGangTask {
  1789 protected:
  1790   G1CollectedHeap* _g1h;
  1791   ConcurrentMark* _cm;
  1792   BitMap* _actual_region_bm;
  1793   BitMap* _actual_card_bm;
  1795   uint    _n_workers;
  1797 public:
  1798   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
  1799     : AbstractGangTask("G1 final counting"),
  1800       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1801       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1802       _n_workers(0) {
  1803     // Use the value already set as the number of active threads
  1804     // in the call to run_task().
  1805     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1806       assert( _g1h->workers()->active_workers() > 0,
  1807         "Should have been previously set");
  1808       _n_workers = _g1h->workers()->active_workers();
  1809     } else {
  1810       _n_workers = 1;
  1814   void work(uint worker_id) {
  1815     assert(worker_id < _n_workers, "invariant");
  1817     FinalCountDataUpdateClosure final_update_cl(_g1h,
  1818                                                 _actual_region_bm,
  1819                                                 _actual_card_bm);
  1821     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1822       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
  1823                                             worker_id,
  1824                                             _n_workers,
  1825                                             HeapRegion::FinalCountClaimValue);
  1826     } else {
  1827       _g1h->heap_region_iterate(&final_update_cl);
  1830 };
  1832 class G1ParNoteEndTask;
  1834 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1835   G1CollectedHeap* _g1;
  1836   size_t _max_live_bytes;
  1837   uint _regions_claimed;
  1838   size_t _freed_bytes;
  1839   FreeRegionList* _local_cleanup_list;
  1840   HeapRegionSetCount _old_regions_removed;
  1841   HeapRegionSetCount _humongous_regions_removed;
  1842   HRRSCleanupTask* _hrrs_cleanup_task;
  1843   double _claimed_region_time;
  1844   double _max_region_time;
  1846 public:
  1847   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1848                              FreeRegionList* local_cleanup_list,
  1849                              HRRSCleanupTask* hrrs_cleanup_task) :
  1850     _g1(g1),
  1851     _max_live_bytes(0), _regions_claimed(0),
  1852     _freed_bytes(0),
  1853     _claimed_region_time(0.0), _max_region_time(0.0),
  1854     _local_cleanup_list(local_cleanup_list),
  1855     _old_regions_removed(),
  1856     _humongous_regions_removed(),
  1857     _hrrs_cleanup_task(hrrs_cleanup_task) { }
  1859   size_t freed_bytes() { return _freed_bytes; }
  1860   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
  1861   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
  1863   bool doHeapRegion(HeapRegion *hr) {
  1864     if (hr->continuesHumongous()) {
  1865       return false;
  1867     // We use a claim value of zero here because all regions
  1868     // were claimed with value 1 in the FinalCount task.
  1869     _g1->reset_gc_time_stamps(hr);
  1870     double start = os::elapsedTime();
  1871     _regions_claimed++;
  1872     hr->note_end_of_marking();
  1873     _max_live_bytes += hr->max_live_bytes();
  1875     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
  1876       _freed_bytes += hr->used();
  1877       hr->set_containing_set(NULL);
  1878       if (hr->isHumongous()) {
  1879         assert(hr->startsHumongous(), "we should only see starts humongous");
  1880         _humongous_regions_removed.increment(1u, hr->capacity());
  1881         _g1->free_humongous_region(hr, _local_cleanup_list, true);
  1882       } else {
  1883         _old_regions_removed.increment(1u, hr->capacity());
  1884         _g1->free_region(hr, _local_cleanup_list, true);
  1886     } else {
  1887       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
  1890     double region_time = (os::elapsedTime() - start);
  1891     _claimed_region_time += region_time;
  1892     if (region_time > _max_region_time) {
  1893       _max_region_time = region_time;
  1895     return false;
  1898   size_t max_live_bytes() { return _max_live_bytes; }
  1899   uint regions_claimed() { return _regions_claimed; }
  1900   double claimed_region_time_sec() { return _claimed_region_time; }
  1901   double max_region_time_sec() { return _max_region_time; }
  1902 };
  1904 class G1ParNoteEndTask: public AbstractGangTask {
  1905   friend class G1NoteEndOfConcMarkClosure;
  1907 protected:
  1908   G1CollectedHeap* _g1h;
  1909   size_t _max_live_bytes;
  1910   size_t _freed_bytes;
  1911   FreeRegionList* _cleanup_list;
  1913 public:
  1914   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1915                    FreeRegionList* cleanup_list) :
  1916     AbstractGangTask("G1 note end"), _g1h(g1h),
  1917     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
  1919   void work(uint worker_id) {
  1920     double start = os::elapsedTime();
  1921     FreeRegionList local_cleanup_list("Local Cleanup List");
  1922     HRRSCleanupTask hrrs_cleanup_task;
  1923     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
  1924                                            &hrrs_cleanup_task);
  1925     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1926       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
  1927                                             _g1h->workers()->active_workers(),
  1928                                             HeapRegion::NoteEndClaimValue);
  1929     } else {
  1930       _g1h->heap_region_iterate(&g1_note_end);
  1932     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1934     // Now update the lists
  1935     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
  1937       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1938       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
  1939       _max_live_bytes += g1_note_end.max_live_bytes();
  1940       _freed_bytes += g1_note_end.freed_bytes();
  1942       // If we iterate over the global cleanup list at the end of
  1943       // cleanup to do this printing we will not guarantee to only
  1944       // generate output for the newly-reclaimed regions (the list
  1945       // might not be empty at the beginning of cleanup; we might
  1946       // still be working on its previous contents). So we do the
  1947       // printing here, before we append the new regions to the global
  1948       // cleanup list.
  1950       G1HRPrinter* hr_printer = _g1h->hr_printer();
  1951       if (hr_printer->is_active()) {
  1952         FreeRegionListIterator iter(&local_cleanup_list);
  1953         while (iter.more_available()) {
  1954           HeapRegion* hr = iter.get_next();
  1955           hr_printer->cleanup(hr);
  1959       _cleanup_list->add_ordered(&local_cleanup_list);
  1960       assert(local_cleanup_list.is_empty(), "post-condition");
  1962       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
  1965   size_t max_live_bytes() { return _max_live_bytes; }
  1966   size_t freed_bytes() { return _freed_bytes; }
  1967 };
  1969 class G1ParScrubRemSetTask: public AbstractGangTask {
  1970 protected:
  1971   G1RemSet* _g1rs;
  1972   BitMap* _region_bm;
  1973   BitMap* _card_bm;
  1974 public:
  1975   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1976                        BitMap* region_bm, BitMap* card_bm) :
  1977     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1978     _region_bm(region_bm), _card_bm(card_bm) { }
  1980   void work(uint worker_id) {
  1981     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1982       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
  1983                        HeapRegion::ScrubRemSetClaimValue);
  1984     } else {
  1985       _g1rs->scrub(_region_bm, _card_bm);
  1989 };
  1991 void ConcurrentMark::cleanup() {
  1992   // world is stopped at this checkpoint
  1993   assert(SafepointSynchronize::is_at_safepoint(),
  1994          "world should be stopped");
  1995   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1997   // If a full collection has happened, we shouldn't do this.
  1998   if (has_aborted()) {
  1999     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  2000     return;
  2003   g1h->verify_region_sets_optional();
  2005   if (VerifyDuringGC) {
  2006     HandleMark hm;  // handle scope
  2007     Universe::heap()->prepare_for_verify();
  2008     Universe::verify(VerifyOption_G1UsePrevMarking,
  2009                      " VerifyDuringGC:(before)");
  2011   g1h->check_bitmaps("Cleanup Start");
  2013   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  2014   g1p->record_concurrent_mark_cleanup_start();
  2016   double start = os::elapsedTime();
  2018   HeapRegionRemSet::reset_for_cleanup_tasks();
  2020   uint n_workers;
  2022   // Do counting once more with the world stopped for good measure.
  2023   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
  2025   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2026    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  2027            "sanity check");
  2029     g1h->set_par_threads();
  2030     n_workers = g1h->n_par_threads();
  2031     assert(g1h->n_par_threads() == n_workers,
  2032            "Should not have been reset");
  2033     g1h->workers()->run_task(&g1_par_count_task);
  2034     // Done with the parallel phase so reset to 0.
  2035     g1h->set_par_threads(0);
  2037     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
  2038            "sanity check");
  2039   } else {
  2040     n_workers = 1;
  2041     g1_par_count_task.work(0);
  2044   if (VerifyDuringGC) {
  2045     // Verify that the counting data accumulated during marking matches
  2046     // that calculated by walking the marking bitmap.
  2048     // Bitmaps to hold expected values
  2049     BitMap expected_region_bm(_region_bm.size(), true);
  2050     BitMap expected_card_bm(_card_bm.size(), true);
  2052     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
  2053                                                  &_region_bm,
  2054                                                  &_card_bm,
  2055                                                  &expected_region_bm,
  2056                                                  &expected_card_bm);
  2058     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2059       g1h->set_par_threads((int)n_workers);
  2060       g1h->workers()->run_task(&g1_par_verify_task);
  2061       // Done with the parallel phase so reset to 0.
  2062       g1h->set_par_threads(0);
  2064       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
  2065              "sanity check");
  2066     } else {
  2067       g1_par_verify_task.work(0);
  2070     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
  2073   size_t start_used_bytes = g1h->used();
  2074   g1h->set_marking_complete();
  2076   double count_end = os::elapsedTime();
  2077   double this_final_counting_time = (count_end - start);
  2078   _total_counting_time += this_final_counting_time;
  2080   if (G1PrintRegionLivenessInfo) {
  2081     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
  2082     _g1h->heap_region_iterate(&cl);
  2085   // Install newly created mark bitMap as "prev".
  2086   swapMarkBitMaps();
  2088   g1h->reset_gc_time_stamp();
  2090   // Note end of marking in all heap regions.
  2091   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
  2092   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2093     g1h->set_par_threads((int)n_workers);
  2094     g1h->workers()->run_task(&g1_par_note_end_task);
  2095     g1h->set_par_threads(0);
  2097     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  2098            "sanity check");
  2099   } else {
  2100     g1_par_note_end_task.work(0);
  2102   g1h->check_gc_time_stamps();
  2104   if (!cleanup_list_is_empty()) {
  2105     // The cleanup list is not empty, so we'll have to process it
  2106     // concurrently. Notify anyone else that might be wanting free
  2107     // regions that there will be more free regions coming soon.
  2108     g1h->set_free_regions_coming();
  2111   // call below, since it affects the metric by which we sort the heap
  2112   // regions.
  2113   if (G1ScrubRemSets) {
  2114     double rs_scrub_start = os::elapsedTime();
  2115     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  2116     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2117       g1h->set_par_threads((int)n_workers);
  2118       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  2119       g1h->set_par_threads(0);
  2121       assert(g1h->check_heap_region_claim_values(
  2122                                             HeapRegion::ScrubRemSetClaimValue),
  2123              "sanity check");
  2124     } else {
  2125       g1_par_scrub_rs_task.work(0);
  2128     double rs_scrub_end = os::elapsedTime();
  2129     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  2130     _total_rs_scrub_time += this_rs_scrub_time;
  2133   // this will also free any regions totally full of garbage objects,
  2134   // and sort the regions.
  2135   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
  2137   // Statistics.
  2138   double end = os::elapsedTime();
  2139   _cleanup_times.add((end - start) * 1000.0);
  2141   if (G1Log::fine()) {
  2142     g1h->print_size_transition(gclog_or_tty,
  2143                                start_used_bytes,
  2144                                g1h->used(),
  2145                                g1h->capacity());
  2148   // Clean up will have freed any regions completely full of garbage.
  2149   // Update the soft reference policy with the new heap occupancy.
  2150   Universe::update_heap_info_at_gc();
  2152   if (VerifyDuringGC) {
  2153     HandleMark hm;  // handle scope
  2154     Universe::heap()->prepare_for_verify();
  2155     Universe::verify(VerifyOption_G1UsePrevMarking,
  2156                      " VerifyDuringGC:(after)");
  2158   g1h->check_bitmaps("Cleanup End");
  2160   g1h->verify_region_sets_optional();
  2162   // We need to make this be a "collection" so any collection pause that
  2163   // races with it goes around and waits for completeCleanup to finish.
  2164   g1h->increment_total_collections();
  2166   // Clean out dead classes and update Metaspace sizes.
  2167   if (ClassUnloadingWithConcurrentMark) {
  2168     ClassLoaderDataGraph::purge();
  2170   MetaspaceGC::compute_new_size();
  2172   // We reclaimed old regions so we should calculate the sizes to make
  2173   // sure we update the old gen/space data.
  2174   g1h->g1mm()->update_sizes();
  2176   g1h->trace_heap_after_concurrent_cycle();
  2179 void ConcurrentMark::completeCleanup() {
  2180   if (has_aborted()) return;
  2182   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2184   _cleanup_list.verify_optional();
  2185   FreeRegionList tmp_free_list("Tmp Free List");
  2187   if (G1ConcRegionFreeingVerbose) {
  2188     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2189                            "cleanup list has %u entries",
  2190                            _cleanup_list.length());
  2193   // Noone else should be accessing the _cleanup_list at this point,
  2194   // so it's not necessary to take any locks
  2195   while (!_cleanup_list.is_empty()) {
  2196     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
  2197     assert(hr != NULL, "Got NULL from a non-empty list");
  2198     hr->par_clear();
  2199     tmp_free_list.add_ordered(hr);
  2201     // Instead of adding one region at a time to the secondary_free_list,
  2202     // we accumulate them in the local list and move them a few at a
  2203     // time. This also cuts down on the number of notify_all() calls
  2204     // we do during this process. We'll also append the local list when
  2205     // _cleanup_list is empty (which means we just removed the last
  2206     // region from the _cleanup_list).
  2207     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
  2208         _cleanup_list.is_empty()) {
  2209       if (G1ConcRegionFreeingVerbose) {
  2210         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2211                                "appending %u entries to the secondary_free_list, "
  2212                                "cleanup list still has %u entries",
  2213                                tmp_free_list.length(),
  2214                                _cleanup_list.length());
  2218         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  2219         g1h->secondary_free_list_add(&tmp_free_list);
  2220         SecondaryFreeList_lock->notify_all();
  2223       if (G1StressConcRegionFreeing) {
  2224         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
  2225           os::sleep(Thread::current(), (jlong) 1, false);
  2230   assert(tmp_free_list.is_empty(), "post-condition");
  2233 // Supporting Object and Oop closures for reference discovery
  2234 // and processing in during marking
  2236 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  2237   HeapWord* addr = (HeapWord*)obj;
  2238   return addr != NULL &&
  2239          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  2242 // 'Keep Alive' oop closure used by both serial parallel reference processing.
  2243 // Uses the CMTask associated with a worker thread (for serial reference
  2244 // processing the CMTask for worker 0 is used) to preserve (mark) and
  2245 // trace referent objects.
  2246 //
  2247 // Using the CMTask and embedded local queues avoids having the worker
  2248 // threads operating on the global mark stack. This reduces the risk
  2249 // of overflowing the stack - which we would rather avoid at this late
  2250 // state. Also using the tasks' local queues removes the potential
  2251 // of the workers interfering with each other that could occur if
  2252 // operating on the global stack.
  2254 class G1CMKeepAliveAndDrainClosure: public OopClosure {
  2255   ConcurrentMark* _cm;
  2256   CMTask*         _task;
  2257   int             _ref_counter_limit;
  2258   int             _ref_counter;
  2259   bool            _is_serial;
  2260  public:
  2261   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2262     _cm(cm), _task(task), _is_serial(is_serial),
  2263     _ref_counter_limit(G1RefProcDrainInterval) {
  2264     assert(_ref_counter_limit > 0, "sanity");
  2265     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2266     _ref_counter = _ref_counter_limit;
  2269   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2270   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2272   template <class T> void do_oop_work(T* p) {
  2273     if (!_cm->has_overflown()) {
  2274       oop obj = oopDesc::load_decode_heap_oop(p);
  2275       if (_cm->verbose_high()) {
  2276         gclog_or_tty->print_cr("\t[%u] we're looking at location "
  2277                                "*"PTR_FORMAT" = "PTR_FORMAT,
  2278                                _task->worker_id(), p2i(p), p2i((void*) obj));
  2281       _task->deal_with_reference(obj);
  2282       _ref_counter--;
  2284       if (_ref_counter == 0) {
  2285         // We have dealt with _ref_counter_limit references, pushing them
  2286         // and objects reachable from them on to the local stack (and
  2287         // possibly the global stack). Call CMTask::do_marking_step() to
  2288         // process these entries.
  2289         //
  2290         // We call CMTask::do_marking_step() in a loop, which we'll exit if
  2291         // there's nothing more to do (i.e. we're done with the entries that
  2292         // were pushed as a result of the CMTask::deal_with_reference() calls
  2293         // above) or we overflow.
  2294         //
  2295         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2296         // flag while there may still be some work to do. (See the comment at
  2297         // the beginning of CMTask::do_marking_step() for those conditions -
  2298         // one of which is reaching the specified time target.) It is only
  2299         // when CMTask::do_marking_step() returns without setting the
  2300         // has_aborted() flag that the marking step has completed.
  2301         do {
  2302           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  2303           _task->do_marking_step(mark_step_duration_ms,
  2304                                  false      /* do_termination */,
  2305                                  _is_serial);
  2306         } while (_task->has_aborted() && !_cm->has_overflown());
  2307         _ref_counter = _ref_counter_limit;
  2309     } else {
  2310       if (_cm->verbose_high()) {
  2311          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
  2315 };
  2317 // 'Drain' oop closure used by both serial and parallel reference processing.
  2318 // Uses the CMTask associated with a given worker thread (for serial
  2319 // reference processing the CMtask for worker 0 is used). Calls the
  2320 // do_marking_step routine, with an unbelievably large timeout value,
  2321 // to drain the marking data structures of the remaining entries
  2322 // added by the 'keep alive' oop closure above.
  2324 class G1CMDrainMarkingStackClosure: public VoidClosure {
  2325   ConcurrentMark* _cm;
  2326   CMTask*         _task;
  2327   bool            _is_serial;
  2328  public:
  2329   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2330     _cm(cm), _task(task), _is_serial(is_serial) {
  2331     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2334   void do_void() {
  2335     do {
  2336       if (_cm->verbose_high()) {
  2337         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
  2338                                _task->worker_id(), BOOL_TO_STR(_is_serial));
  2341       // We call CMTask::do_marking_step() to completely drain the local
  2342       // and global marking stacks of entries pushed by the 'keep alive'
  2343       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
  2344       //
  2345       // CMTask::do_marking_step() is called in a loop, which we'll exit
  2346       // if there's nothing more to do (i.e. we'completely drained the
  2347       // entries that were pushed as a a result of applying the 'keep alive'
  2348       // closure to the entries on the discovered ref lists) or we overflow
  2349       // the global marking stack.
  2350       //
  2351       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2352       // flag while there may still be some work to do. (See the comment at
  2353       // the beginning of CMTask::do_marking_step() for those conditions -
  2354       // one of which is reaching the specified time target.) It is only
  2355       // when CMTask::do_marking_step() returns without setting the
  2356       // has_aborted() flag that the marking step has completed.
  2358       _task->do_marking_step(1000000000.0 /* something very large */,
  2359                              true         /* do_termination */,
  2360                              _is_serial);
  2361     } while (_task->has_aborted() && !_cm->has_overflown());
  2363 };
  2365 // Implementation of AbstractRefProcTaskExecutor for parallel
  2366 // reference processing at the end of G1 concurrent marking
  2368 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
  2369 private:
  2370   G1CollectedHeap* _g1h;
  2371   ConcurrentMark*  _cm;
  2372   WorkGang*        _workers;
  2373   int              _active_workers;
  2375 public:
  2376   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
  2377                         ConcurrentMark* cm,
  2378                         WorkGang* workers,
  2379                         int n_workers) :
  2380     _g1h(g1h), _cm(cm),
  2381     _workers(workers), _active_workers(n_workers) { }
  2383   // Executes the given task using concurrent marking worker threads.
  2384   virtual void execute(ProcessTask& task);
  2385   virtual void execute(EnqueueTask& task);
  2386 };
  2388 class G1CMRefProcTaskProxy: public AbstractGangTask {
  2389   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  2390   ProcessTask&     _proc_task;
  2391   G1CollectedHeap* _g1h;
  2392   ConcurrentMark*  _cm;
  2394 public:
  2395   G1CMRefProcTaskProxy(ProcessTask& proc_task,
  2396                      G1CollectedHeap* g1h,
  2397                      ConcurrentMark* cm) :
  2398     AbstractGangTask("Process reference objects in parallel"),
  2399     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
  2400     ReferenceProcessor* rp = _g1h->ref_processor_cm();
  2401     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
  2404   virtual void work(uint worker_id) {
  2405     ResourceMark rm;
  2406     HandleMark hm;
  2407     CMTask* task = _cm->task(worker_id);
  2408     G1CMIsAliveClosure g1_is_alive(_g1h);
  2409     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
  2410     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
  2412     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
  2414 };
  2416 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
  2417   assert(_workers != NULL, "Need parallel worker threads.");
  2418   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2420   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  2422   // We need to reset the concurrency level before each
  2423   // proxy task execution, so that the termination protocol
  2424   // and overflow handling in CMTask::do_marking_step() knows
  2425   // how many workers to wait for.
  2426   _cm->set_concurrency(_active_workers);
  2427   _g1h->set_par_threads(_active_workers);
  2428   _workers->run_task(&proc_task_proxy);
  2429   _g1h->set_par_threads(0);
  2432 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
  2433   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
  2434   EnqueueTask& _enq_task;
  2436 public:
  2437   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
  2438     AbstractGangTask("Enqueue reference objects in parallel"),
  2439     _enq_task(enq_task) { }
  2441   virtual void work(uint worker_id) {
  2442     _enq_task.work(worker_id);
  2444 };
  2446 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
  2447   assert(_workers != NULL, "Need parallel worker threads.");
  2448   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2450   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
  2452   // Not strictly necessary but...
  2453   //
  2454   // We need to reset the concurrency level before each
  2455   // proxy task execution, so that the termination protocol
  2456   // and overflow handling in CMTask::do_marking_step() knows
  2457   // how many workers to wait for.
  2458   _cm->set_concurrency(_active_workers);
  2459   _g1h->set_par_threads(_active_workers);
  2460   _workers->run_task(&enq_task_proxy);
  2461   _g1h->set_par_threads(0);
  2464 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
  2465   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
  2468 // Helper class to get rid of some boilerplate code.
  2469 class G1RemarkGCTraceTime : public GCTraceTime {
  2470   static bool doit_and_prepend(bool doit) {
  2471     if (doit) {
  2472       gclog_or_tty->put(' ');
  2474     return doit;
  2477  public:
  2478   G1RemarkGCTraceTime(const char* title, bool doit)
  2479     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
  2480         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
  2482 };
  2484 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  2485   if (has_overflown()) {
  2486     // Skip processing the discovered references if we have
  2487     // overflown the global marking stack. Reference objects
  2488     // only get discovered once so it is OK to not
  2489     // de-populate the discovered reference lists. We could have,
  2490     // but the only benefit would be that, when marking restarts,
  2491     // less reference objects are discovered.
  2492     return;
  2495   ResourceMark rm;
  2496   HandleMark   hm;
  2498   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2500   // Is alive closure.
  2501   G1CMIsAliveClosure g1_is_alive(g1h);
  2503   // Inner scope to exclude the cleaning of the string and symbol
  2504   // tables from the displayed time.
  2506     if (G1Log::finer()) {
  2507       gclog_or_tty->put(' ');
  2509     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
  2511     ReferenceProcessor* rp = g1h->ref_processor_cm();
  2513     // See the comment in G1CollectedHeap::ref_processing_init()
  2514     // about how reference processing currently works in G1.
  2516     // Set the soft reference policy
  2517     rp->setup_policy(clear_all_soft_refs);
  2518     assert(_markStack.isEmpty(), "mark stack should be empty");
  2520     // Instances of the 'Keep Alive' and 'Complete GC' closures used
  2521     // in serial reference processing. Note these closures are also
  2522     // used for serially processing (by the the current thread) the
  2523     // JNI references during parallel reference processing.
  2524     //
  2525     // These closures do not need to synchronize with the worker
  2526     // threads involved in parallel reference processing as these
  2527     // instances are executed serially by the current thread (e.g.
  2528     // reference processing is not multi-threaded and is thus
  2529     // performed by the current thread instead of a gang worker).
  2530     //
  2531     // The gang tasks involved in parallel reference procssing create
  2532     // their own instances of these closures, which do their own
  2533     // synchronization among themselves.
  2534     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
  2535     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
  2537     // We need at least one active thread. If reference processing
  2538     // is not multi-threaded we use the current (VMThread) thread,
  2539     // otherwise we use the work gang from the G1CollectedHeap and
  2540     // we utilize all the worker threads we can.
  2541     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
  2542     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
  2543     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
  2545     // Parallel processing task executor.
  2546     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
  2547                                               g1h->workers(), active_workers);
  2548     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
  2550     // Set the concurrency level. The phase was already set prior to
  2551     // executing the remark task.
  2552     set_concurrency(active_workers);
  2554     // Set the degree of MT processing here.  If the discovery was done MT,
  2555     // the number of threads involved during discovery could differ from
  2556     // the number of active workers.  This is OK as long as the discovered
  2557     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
  2558     rp->set_active_mt_degree(active_workers);
  2560     // Process the weak references.
  2561     const ReferenceProcessorStats& stats =
  2562         rp->process_discovered_references(&g1_is_alive,
  2563                                           &g1_keep_alive,
  2564                                           &g1_drain_mark_stack,
  2565                                           executor,
  2566                                           g1h->gc_timer_cm(),
  2567                                           concurrent_gc_id());
  2568     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
  2570     // The do_oop work routines of the keep_alive and drain_marking_stack
  2571     // oop closures will set the has_overflown flag if we overflow the
  2572     // global marking stack.
  2574     assert(_markStack.overflow() || _markStack.isEmpty(),
  2575             "mark stack should be empty (unless it overflowed)");
  2577     if (_markStack.overflow()) {
  2578       // This should have been done already when we tried to push an
  2579       // entry on to the global mark stack. But let's do it again.
  2580       set_has_overflown();
  2583     assert(rp->num_q() == active_workers, "why not");
  2585     rp->enqueue_discovered_references(executor);
  2587     rp->verify_no_references_recorded();
  2588     assert(!rp->discovery_enabled(), "Post condition");
  2591   if (has_overflown()) {
  2592     // We can not trust g1_is_alive if the marking stack overflowed
  2593     return;
  2596   assert(_markStack.isEmpty(), "Marking should have completed");
  2598   // Unload Klasses, String, Symbols, Code Cache, etc.
  2600     G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
  2602     if (ClassUnloadingWithConcurrentMark) {
  2603       bool purged_classes;
  2606         G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
  2607         purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
  2611         G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
  2612         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
  2616     if (G1StringDedup::is_enabled()) {
  2617       G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
  2618       G1StringDedup::unlink(&g1_is_alive);
  2623 void ConcurrentMark::swapMarkBitMaps() {
  2624   CMBitMapRO* temp = _prevMarkBitMap;
  2625   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  2626   _nextMarkBitMap  = (CMBitMap*)  temp;
  2629 class CMObjectClosure;
  2631 // Closure for iterating over objects, currently only used for
  2632 // processing SATB buffers.
  2633 class CMObjectClosure : public ObjectClosure {
  2634 private:
  2635   CMTask* _task;
  2637 public:
  2638   void do_object(oop obj) {
  2639     _task->deal_with_reference(obj);
  2642   CMObjectClosure(CMTask* task) : _task(task) { }
  2643 };
  2645 class G1RemarkThreadsClosure : public ThreadClosure {
  2646   CMObjectClosure _cm_obj;
  2647   G1CMOopClosure _cm_cl;
  2648   MarkingCodeBlobClosure _code_cl;
  2649   int _thread_parity;
  2650   bool _is_par;
  2652  public:
  2653   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
  2654     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
  2655     _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
  2657   void do_thread(Thread* thread) {
  2658     if (thread->is_Java_thread()) {
  2659       if (thread->claim_oops_do(_is_par, _thread_parity)) {
  2660         JavaThread* jt = (JavaThread*)thread;
  2662         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
  2663         // however the liveness of oops reachable from nmethods have very complex lifecycles:
  2664         // * Alive if on the stack of an executing method
  2665         // * Weakly reachable otherwise
  2666         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
  2667         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
  2668         jt->nmethods_do(&_code_cl);
  2670         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
  2672     } else if (thread->is_VM_thread()) {
  2673       if (thread->claim_oops_do(_is_par, _thread_parity)) {
  2674         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
  2678 };
  2680 class CMRemarkTask: public AbstractGangTask {
  2681 private:
  2682   ConcurrentMark* _cm;
  2683   bool            _is_serial;
  2684 public:
  2685   void work(uint worker_id) {
  2686     // Since all available tasks are actually started, we should
  2687     // only proceed if we're supposed to be actived.
  2688     if (worker_id < _cm->active_tasks()) {
  2689       CMTask* task = _cm->task(worker_id);
  2690       task->record_start_time();
  2692         ResourceMark rm;
  2693         HandleMark hm;
  2695         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
  2696         Threads::threads_do(&threads_f);
  2699       do {
  2700         task->do_marking_step(1000000000.0 /* something very large */,
  2701                               true         /* do_termination       */,
  2702                               _is_serial);
  2703       } while (task->has_aborted() && !_cm->has_overflown());
  2704       // If we overflow, then we do not want to restart. We instead
  2705       // want to abort remark and do concurrent marking again.
  2706       task->record_end_time();
  2710   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
  2711     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
  2712     _cm->terminator()->reset_for_reuse(active_workers);
  2714 };
  2716 void ConcurrentMark::checkpointRootsFinalWork() {
  2717   ResourceMark rm;
  2718   HandleMark   hm;
  2719   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2721   G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
  2723   g1h->ensure_parsability(false);
  2725   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2726     G1CollectedHeap::StrongRootsScope srs(g1h);
  2727     // this is remark, so we'll use up all active threads
  2728     uint active_workers = g1h->workers()->active_workers();
  2729     if (active_workers == 0) {
  2730       assert(active_workers > 0, "Should have been set earlier");
  2731       active_workers = (uint) ParallelGCThreads;
  2732       g1h->workers()->set_active_workers(active_workers);
  2734     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2735     // Leave _parallel_marking_threads at it's
  2736     // value originally calculated in the ConcurrentMark
  2737     // constructor and pass values of the active workers
  2738     // through the gang in the task.
  2740     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
  2741     // We will start all available threads, even if we decide that the
  2742     // active_workers will be fewer. The extra ones will just bail out
  2743     // immediately.
  2744     g1h->set_par_threads(active_workers);
  2745     g1h->workers()->run_task(&remarkTask);
  2746     g1h->set_par_threads(0);
  2747   } else {
  2748     G1CollectedHeap::StrongRootsScope srs(g1h);
  2749     uint active_workers = 1;
  2750     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2752     // Note - if there's no work gang then the VMThread will be
  2753     // the thread to execute the remark - serially. We have
  2754     // to pass true for the is_serial parameter so that
  2755     // CMTask::do_marking_step() doesn't enter the sync
  2756     // barriers in the event of an overflow. Doing so will
  2757     // cause an assert that the current thread is not a
  2758     // concurrent GC thread.
  2759     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
  2760     remarkTask.work(0);
  2762   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2763   guarantee(has_overflown() ||
  2764             satb_mq_set.completed_buffers_num() == 0,
  2765             err_msg("Invariant: has_overflown = %s, num buffers = %d",
  2766                     BOOL_TO_STR(has_overflown()),
  2767                     satb_mq_set.completed_buffers_num()));
  2769   print_stats();
  2772 #ifndef PRODUCT
  2774 class PrintReachableOopClosure: public OopClosure {
  2775 private:
  2776   G1CollectedHeap* _g1h;
  2777   outputStream*    _out;
  2778   VerifyOption     _vo;
  2779   bool             _all;
  2781 public:
  2782   PrintReachableOopClosure(outputStream* out,
  2783                            VerifyOption  vo,
  2784                            bool          all) :
  2785     _g1h(G1CollectedHeap::heap()),
  2786     _out(out), _vo(vo), _all(all) { }
  2788   void do_oop(narrowOop* p) { do_oop_work(p); }
  2789   void do_oop(      oop* p) { do_oop_work(p); }
  2791   template <class T> void do_oop_work(T* p) {
  2792     oop         obj = oopDesc::load_decode_heap_oop(p);
  2793     const char* str = NULL;
  2794     const char* str2 = "";
  2796     if (obj == NULL) {
  2797       str = "";
  2798     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2799       str = " O";
  2800     } else {
  2801       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2802       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
  2803       bool marked = _g1h->is_marked(obj, _vo);
  2805       if (over_tams) {
  2806         str = " >";
  2807         if (marked) {
  2808           str2 = " AND MARKED";
  2810       } else if (marked) {
  2811         str = " M";
  2812       } else {
  2813         str = " NOT";
  2817     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2818                    p2i(p), p2i((void*) obj), str, str2);
  2820 };
  2822 class PrintReachableObjectClosure : public ObjectClosure {
  2823 private:
  2824   G1CollectedHeap* _g1h;
  2825   outputStream*    _out;
  2826   VerifyOption     _vo;
  2827   bool             _all;
  2828   HeapRegion*      _hr;
  2830 public:
  2831   PrintReachableObjectClosure(outputStream* out,
  2832                               VerifyOption  vo,
  2833                               bool          all,
  2834                               HeapRegion*   hr) :
  2835     _g1h(G1CollectedHeap::heap()),
  2836     _out(out), _vo(vo), _all(all), _hr(hr) { }
  2838   void do_object(oop o) {
  2839     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
  2840     bool marked = _g1h->is_marked(o, _vo);
  2841     bool print_it = _all || over_tams || marked;
  2843     if (print_it) {
  2844       _out->print_cr(" "PTR_FORMAT"%s",
  2845                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
  2846       PrintReachableOopClosure oopCl(_out, _vo, _all);
  2847       o->oop_iterate_no_header(&oopCl);
  2850 };
  2852 class PrintReachableRegionClosure : public HeapRegionClosure {
  2853 private:
  2854   G1CollectedHeap* _g1h;
  2855   outputStream*    _out;
  2856   VerifyOption     _vo;
  2857   bool             _all;
  2859 public:
  2860   bool doHeapRegion(HeapRegion* hr) {
  2861     HeapWord* b = hr->bottom();
  2862     HeapWord* e = hr->end();
  2863     HeapWord* t = hr->top();
  2864     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
  2865     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2866                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
  2867     _out->cr();
  2869     HeapWord* from = b;
  2870     HeapWord* to   = t;
  2872     if (to > from) {
  2873       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
  2874       _out->cr();
  2875       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
  2876       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2877       _out->cr();
  2880     return false;
  2883   PrintReachableRegionClosure(outputStream* out,
  2884                               VerifyOption  vo,
  2885                               bool          all) :
  2886     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
  2887 };
  2889 void ConcurrentMark::print_reachable(const char* str,
  2890                                      VerifyOption vo,
  2891                                      bool all) {
  2892   gclog_or_tty->cr();
  2893   gclog_or_tty->print_cr("== Doing heap dump... ");
  2895   if (G1PrintReachableBaseFile == NULL) {
  2896     gclog_or_tty->print_cr("  #### error: no base file defined");
  2897     return;
  2900   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2901       (JVM_MAXPATHLEN - 1)) {
  2902     gclog_or_tty->print_cr("  #### error: file name too long");
  2903     return;
  2906   char file_name[JVM_MAXPATHLEN];
  2907   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2908   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2910   fileStream fout(file_name);
  2911   if (!fout.is_open()) {
  2912     gclog_or_tty->print_cr("  #### error: could not open file");
  2913     return;
  2916   outputStream* out = &fout;
  2917   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
  2918   out->cr();
  2920   out->print_cr("--- ITERATING OVER REGIONS");
  2921   out->cr();
  2922   PrintReachableRegionClosure rcl(out, vo, all);
  2923   _g1h->heap_region_iterate(&rcl);
  2924   out->cr();
  2926   gclog_or_tty->print_cr("  done");
  2927   gclog_or_tty->flush();
  2930 #endif // PRODUCT
  2932 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
  2933   // Note we are overriding the read-only view of the prev map here, via
  2934   // the cast.
  2935   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2938 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
  2939   _nextMarkBitMap->clearRange(mr);
  2942 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
  2943   clearRangePrevBitmap(mr);
  2944   clearRangeNextBitmap(mr);
  2947 HeapRegion*
  2948 ConcurrentMark::claim_region(uint worker_id) {
  2949   // "checkpoint" the finger
  2950   HeapWord* finger = _finger;
  2952   // _heap_end will not change underneath our feet; it only changes at
  2953   // yield points.
  2954   while (finger < _heap_end) {
  2955     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2957     // Note on how this code handles humongous regions. In the
  2958     // normal case the finger will reach the start of a "starts
  2959     // humongous" (SH) region. Its end will either be the end of the
  2960     // last "continues humongous" (CH) region in the sequence, or the
  2961     // standard end of the SH region (if the SH is the only region in
  2962     // the sequence). That way claim_region() will skip over the CH
  2963     // regions. However, there is a subtle race between a CM thread
  2964     // executing this method and a mutator thread doing a humongous
  2965     // object allocation. The two are not mutually exclusive as the CM
  2966     // thread does not need to hold the Heap_lock when it gets
  2967     // here. So there is a chance that claim_region() will come across
  2968     // a free region that's in the progress of becoming a SH or a CH
  2969     // region. In the former case, it will either
  2970     //   a) Miss the update to the region's end, in which case it will
  2971     //      visit every subsequent CH region, will find their bitmaps
  2972     //      empty, and do nothing, or
  2973     //   b) Will observe the update of the region's end (in which case
  2974     //      it will skip the subsequent CH regions).
  2975     // If it comes across a region that suddenly becomes CH, the
  2976     // scenario will be similar to b). So, the race between
  2977     // claim_region() and a humongous object allocation might force us
  2978     // to do a bit of unnecessary work (due to some unnecessary bitmap
  2979     // iterations) but it should not introduce and correctness issues.
  2980     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
  2981     HeapWord*   bottom        = curr_region->bottom();
  2982     HeapWord*   end           = curr_region->end();
  2983     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2985     if (verbose_low()) {
  2986       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
  2987                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2988                              "limit = "PTR_FORMAT,
  2989                              worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
  2992     // Is the gap between reading the finger and doing the CAS too long?
  2993     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2994     if (res == finger) {
  2995       // we succeeded
  2997       // notice that _finger == end cannot be guaranteed here since,
  2998       // someone else might have moved the finger even further
  2999       assert(_finger >= end, "the finger should have moved forward");
  3001       if (verbose_low()) {
  3002         gclog_or_tty->print_cr("[%u] we were successful with region = "
  3003                                PTR_FORMAT, worker_id, p2i(curr_region));
  3006       if (limit > bottom) {
  3007         if (verbose_low()) {
  3008           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
  3009                                  "returning it ", worker_id, p2i(curr_region));
  3011         return curr_region;
  3012       } else {
  3013         assert(limit == bottom,
  3014                "the region limit should be at bottom");
  3015         if (verbose_low()) {
  3016           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
  3017                                  "returning NULL", worker_id, p2i(curr_region));
  3019         // we return NULL and the caller should try calling
  3020         // claim_region() again.
  3021         return NULL;
  3023     } else {
  3024       assert(_finger > finger, "the finger should have moved forward");
  3025       if (verbose_low()) {
  3026         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
  3027                                "global finger = "PTR_FORMAT", "
  3028                                "our finger = "PTR_FORMAT,
  3029                                worker_id, p2i(_finger), p2i(finger));
  3032       // read it again
  3033       finger = _finger;
  3037   return NULL;
  3040 #ifndef PRODUCT
  3041 enum VerifyNoCSetOopsPhase {
  3042   VerifyNoCSetOopsStack,
  3043   VerifyNoCSetOopsQueues,
  3044   VerifyNoCSetOopsSATBCompleted,
  3045   VerifyNoCSetOopsSATBThread
  3046 };
  3048 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
  3049 private:
  3050   G1CollectedHeap* _g1h;
  3051   VerifyNoCSetOopsPhase _phase;
  3052   int _info;
  3054   const char* phase_str() {
  3055     switch (_phase) {
  3056     case VerifyNoCSetOopsStack:         return "Stack";
  3057     case VerifyNoCSetOopsQueues:        return "Queue";
  3058     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
  3059     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
  3060     default:                            ShouldNotReachHere();
  3062     return NULL;
  3065   void do_object_work(oop obj) {
  3066     guarantee(!_g1h->obj_in_cs(obj),
  3067               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
  3068                       p2i((void*) obj), phase_str(), _info));
  3071 public:
  3072   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
  3074   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
  3075     _phase = phase;
  3076     _info = info;
  3079   virtual void do_oop(oop* p) {
  3080     oop obj = oopDesc::load_decode_heap_oop(p);
  3081     do_object_work(obj);
  3084   virtual void do_oop(narrowOop* p) {
  3085     // We should not come across narrow oops while scanning marking
  3086     // stacks and SATB buffers.
  3087     ShouldNotReachHere();
  3090   virtual void do_object(oop obj) {
  3091     do_object_work(obj);
  3093 };
  3095 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
  3096                                          bool verify_enqueued_buffers,
  3097                                          bool verify_thread_buffers,
  3098                                          bool verify_fingers) {
  3099   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
  3100   if (!G1CollectedHeap::heap()->mark_in_progress()) {
  3101     return;
  3104   VerifyNoCSetOopsClosure cl;
  3106   if (verify_stacks) {
  3107     // Verify entries on the global mark stack
  3108     cl.set_phase(VerifyNoCSetOopsStack);
  3109     _markStack.oops_do(&cl);
  3111     // Verify entries on the task queues
  3112     for (uint i = 0; i < _max_worker_id; i += 1) {
  3113       cl.set_phase(VerifyNoCSetOopsQueues, i);
  3114       CMTaskQueue* queue = _task_queues->queue(i);
  3115       queue->oops_do(&cl);
  3119   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
  3121   // Verify entries on the enqueued SATB buffers
  3122   if (verify_enqueued_buffers) {
  3123     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
  3124     satb_qs.iterate_completed_buffers_read_only(&cl);
  3127   // Verify entries on the per-thread SATB buffers
  3128   if (verify_thread_buffers) {
  3129     cl.set_phase(VerifyNoCSetOopsSATBThread);
  3130     satb_qs.iterate_thread_buffers_read_only(&cl);
  3133   if (verify_fingers) {
  3134     // Verify the global finger
  3135     HeapWord* global_finger = finger();
  3136     if (global_finger != NULL && global_finger < _heap_end) {
  3137       // The global finger always points to a heap region boundary. We
  3138       // use heap_region_containing_raw() to get the containing region
  3139       // given that the global finger could be pointing to a free region
  3140       // which subsequently becomes continues humongous. If that
  3141       // happens, heap_region_containing() will return the bottom of the
  3142       // corresponding starts humongous region and the check below will
  3143       // not hold any more.
  3144       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
  3145       guarantee(global_finger == global_hr->bottom(),
  3146                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
  3147                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
  3150     // Verify the task fingers
  3151     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
  3152     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
  3153       CMTask* task = _tasks[i];
  3154       HeapWord* task_finger = task->finger();
  3155       if (task_finger != NULL && task_finger < _heap_end) {
  3156         // See above note on the global finger verification.
  3157         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
  3158         guarantee(task_finger == task_hr->bottom() ||
  3159                   !task_hr->in_collection_set(),
  3160                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
  3161                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
  3166 #endif // PRODUCT
  3168 // Aggregate the counting data that was constructed concurrently
  3169 // with marking.
  3170 class AggregateCountDataHRClosure: public HeapRegionClosure {
  3171   G1CollectedHeap* _g1h;
  3172   ConcurrentMark* _cm;
  3173   CardTableModRefBS* _ct_bs;
  3174   BitMap* _cm_card_bm;
  3175   uint _max_worker_id;
  3177  public:
  3178   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
  3179                               BitMap* cm_card_bm,
  3180                               uint max_worker_id) :
  3181     _g1h(g1h), _cm(g1h->concurrent_mark()),
  3182     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  3183     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
  3185   bool doHeapRegion(HeapRegion* hr) {
  3186     if (hr->continuesHumongous()) {
  3187       // We will ignore these here and process them when their
  3188       // associated "starts humongous" region is processed.
  3189       // Note that we cannot rely on their associated
  3190       // "starts humongous" region to have their bit set to 1
  3191       // since, due to the region chunking in the parallel region
  3192       // iteration, a "continues humongous" region might be visited
  3193       // before its associated "starts humongous".
  3194       return false;
  3197     HeapWord* start = hr->bottom();
  3198     HeapWord* limit = hr->next_top_at_mark_start();
  3199     HeapWord* end = hr->end();
  3201     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
  3202            err_msg("Preconditions not met - "
  3203                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
  3204                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
  3205                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
  3207     assert(hr->next_marked_bytes() == 0, "Precondition");
  3209     if (start == limit) {
  3210       // NTAMS of this region has not been set so nothing to do.
  3211       return false;
  3214     // 'start' should be in the heap.
  3215     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
  3216     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
  3217     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
  3219     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  3220     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
  3221     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
  3223     // If ntams is not card aligned then we bump card bitmap index
  3224     // for limit so that we get the all the cards spanned by
  3225     // the object ending at ntams.
  3226     // Note: if this is the last region in the heap then ntams
  3227     // could be actually just beyond the end of the the heap;
  3228     // limit_idx will then  correspond to a (non-existent) card
  3229     // that is also outside the heap.
  3230     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
  3231       limit_idx += 1;
  3234     assert(limit_idx <= end_idx, "or else use atomics");
  3236     // Aggregate the "stripe" in the count data associated with hr.
  3237     uint hrs_index = hr->hrs_index();
  3238     size_t marked_bytes = 0;
  3240     for (uint i = 0; i < _max_worker_id; i += 1) {
  3241       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
  3242       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
  3244       // Fetch the marked_bytes in this region for task i and
  3245       // add it to the running total for this region.
  3246       marked_bytes += marked_bytes_array[hrs_index];
  3248       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
  3249       // into the global card bitmap.
  3250       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
  3252       while (scan_idx < limit_idx) {
  3253         assert(task_card_bm->at(scan_idx) == true, "should be");
  3254         _cm_card_bm->set_bit(scan_idx);
  3255         assert(_cm_card_bm->at(scan_idx) == true, "should be");
  3257         // BitMap::get_next_one_offset() can handle the case when
  3258         // its left_offset parameter is greater than its right_offset
  3259         // parameter. It does, however, have an early exit if
  3260         // left_offset == right_offset. So let's limit the value
  3261         // passed in for left offset here.
  3262         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
  3263         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
  3267     // Update the marked bytes for this region.
  3268     hr->add_to_marked_bytes(marked_bytes);
  3270     // Next heap region
  3271     return false;
  3273 };
  3275 class G1AggregateCountDataTask: public AbstractGangTask {
  3276 protected:
  3277   G1CollectedHeap* _g1h;
  3278   ConcurrentMark* _cm;
  3279   BitMap* _cm_card_bm;
  3280   uint _max_worker_id;
  3281   int _active_workers;
  3283 public:
  3284   G1AggregateCountDataTask(G1CollectedHeap* g1h,
  3285                            ConcurrentMark* cm,
  3286                            BitMap* cm_card_bm,
  3287                            uint max_worker_id,
  3288                            int n_workers) :
  3289     AbstractGangTask("Count Aggregation"),
  3290     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
  3291     _max_worker_id(max_worker_id),
  3292     _active_workers(n_workers) { }
  3294   void work(uint worker_id) {
  3295     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
  3297     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3298       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
  3299                                             _active_workers,
  3300                                             HeapRegion::AggregateCountClaimValue);
  3301     } else {
  3302       _g1h->heap_region_iterate(&cl);
  3305 };
  3308 void ConcurrentMark::aggregate_count_data() {
  3309   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
  3310                         _g1h->workers()->active_workers() :
  3311                         1);
  3313   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
  3314                                            _max_worker_id, n_workers);
  3316   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3317     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  3318            "sanity check");
  3319     _g1h->set_par_threads(n_workers);
  3320     _g1h->workers()->run_task(&g1_par_agg_task);
  3321     _g1h->set_par_threads(0);
  3323     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
  3324            "sanity check");
  3325     _g1h->reset_heap_region_claim_values();
  3326   } else {
  3327     g1_par_agg_task.work(0);
  3331 // Clear the per-worker arrays used to store the per-region counting data
  3332 void ConcurrentMark::clear_all_count_data() {
  3333   // Clear the global card bitmap - it will be filled during
  3334   // liveness count aggregation (during remark) and the
  3335   // final counting task.
  3336   _card_bm.clear();
  3338   // Clear the global region bitmap - it will be filled as part
  3339   // of the final counting task.
  3340   _region_bm.clear();
  3342   uint max_regions = _g1h->max_regions();
  3343   assert(_max_worker_id > 0, "uninitialized");
  3345   for (uint i = 0; i < _max_worker_id; i += 1) {
  3346     BitMap* task_card_bm = count_card_bitmap_for(i);
  3347     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
  3349     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
  3350     assert(marked_bytes_array != NULL, "uninitialized");
  3352     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
  3353     task_card_bm->clear();
  3357 void ConcurrentMark::print_stats() {
  3358   if (verbose_stats()) {
  3359     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3360     for (size_t i = 0; i < _active_tasks; ++i) {
  3361       _tasks[i]->print_stats();
  3362       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3367 // abandon current marking iteration due to a Full GC
  3368 void ConcurrentMark::abort() {
  3369   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
  3370   // concurrent bitmap clearing.
  3371   _nextMarkBitMap->clearAll();
  3373   // Note we cannot clear the previous marking bitmap here
  3374   // since VerifyDuringGC verifies the objects marked during
  3375   // a full GC against the previous bitmap.
  3377   // Clear the liveness counting data
  3378   clear_all_count_data();
  3379   // Empty mark stack
  3380   reset_marking_state();
  3381   for (uint i = 0; i < _max_worker_id; ++i) {
  3382     _tasks[i]->clear_region_fields();
  3384   _first_overflow_barrier_sync.abort();
  3385   _second_overflow_barrier_sync.abort();
  3386   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
  3387   if (!gc_id.is_undefined()) {
  3388     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
  3389     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
  3390     _aborted_gc_id = gc_id;
  3392   _has_aborted = true;
  3394   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3395   satb_mq_set.abandon_partial_marking();
  3396   // This can be called either during or outside marking, we'll read
  3397   // the expected_active value from the SATB queue set.
  3398   satb_mq_set.set_active_all_threads(
  3399                                  false, /* new active value */
  3400                                  satb_mq_set.is_active() /* expected_active */);
  3402   _g1h->trace_heap_after_concurrent_cycle();
  3403   _g1h->register_concurrent_cycle_end();
  3406 const GCId& ConcurrentMark::concurrent_gc_id() {
  3407   if (has_aborted()) {
  3408     return _aborted_gc_id;
  3410   return _g1h->gc_tracer_cm()->gc_id();
  3413 static void print_ms_time_info(const char* prefix, const char* name,
  3414                                NumberSeq& ns) {
  3415   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  3416                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  3417   if (ns.num() > 0) {
  3418     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  3419                            prefix, ns.sd(), ns.maximum());
  3423 void ConcurrentMark::print_summary_info() {
  3424   gclog_or_tty->print_cr(" Concurrent marking:");
  3425   print_ms_time_info("  ", "init marks", _init_times);
  3426   print_ms_time_info("  ", "remarks", _remark_times);
  3428     print_ms_time_info("     ", "final marks", _remark_mark_times);
  3429     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  3432   print_ms_time_info("  ", "cleanups", _cleanup_times);
  3433   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  3434                          _total_counting_time,
  3435                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  3436                           (double)_cleanup_times.num()
  3437                          : 0.0));
  3438   if (G1ScrubRemSets) {
  3439     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  3440                            _total_rs_scrub_time,
  3441                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  3442                             (double)_cleanup_times.num()
  3443                            : 0.0));
  3445   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  3446                          (_init_times.sum() + _remark_times.sum() +
  3447                           _cleanup_times.sum())/1000.0);
  3448   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  3449                 "(%8.2f s marking).",
  3450                 cmThread()->vtime_accum(),
  3451                 cmThread()->vtime_mark_accum());
  3454 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  3455   if (use_parallel_marking_threads()) {
  3456     _parallel_workers->print_worker_threads_on(st);
  3460 void ConcurrentMark::print_on_error(outputStream* st) const {
  3461   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
  3462       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
  3463   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
  3464   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
  3467 // We take a break if someone is trying to stop the world.
  3468 bool ConcurrentMark::do_yield_check(uint worker_id) {
  3469   if (SuspendibleThreadSet::should_yield()) {
  3470     if (worker_id == 0) {
  3471       _g1h->g1_policy()->record_concurrent_pause();
  3473     SuspendibleThreadSet::yield();
  3474     return true;
  3475   } else {
  3476     return false;
  3480 bool ConcurrentMark::containing_card_is_marked(void* p) {
  3481   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  3482   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  3485 bool ConcurrentMark::containing_cards_are_marked(void* start,
  3486                                                  void* last) {
  3487   return containing_card_is_marked(start) &&
  3488          containing_card_is_marked(last);
  3491 #ifndef PRODUCT
  3492 // for debugging purposes
  3493 void ConcurrentMark::print_finger() {
  3494   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  3495                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
  3496   for (uint i = 0; i < _max_worker_id; ++i) {
  3497     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
  3499   gclog_or_tty->cr();
  3501 #endif
  3503 void CMTask::scan_object(oop obj) {
  3504   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
  3506   if (_cm->verbose_high()) {
  3507     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
  3508                            _worker_id, p2i((void*) obj));
  3511   size_t obj_size = obj->size();
  3512   _words_scanned += obj_size;
  3514   obj->oop_iterate(_cm_oop_closure);
  3515   statsOnly( ++_objs_scanned );
  3516   check_limits();
  3519 // Closure for iteration over bitmaps
  3520 class CMBitMapClosure : public BitMapClosure {
  3521 private:
  3522   // the bitmap that is being iterated over
  3523   CMBitMap*                   _nextMarkBitMap;
  3524   ConcurrentMark*             _cm;
  3525   CMTask*                     _task;
  3527 public:
  3528   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
  3529     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  3531   bool do_bit(size_t offset) {
  3532     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  3533     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  3534     assert( addr < _cm->finger(), "invariant");
  3536     statsOnly( _task->increase_objs_found_on_bitmap() );
  3537     assert(addr >= _task->finger(), "invariant");
  3539     // We move that task's local finger along.
  3540     _task->move_finger_to(addr);
  3542     _task->scan_object(oop(addr));
  3543     // we only partially drain the local queue and global stack
  3544     _task->drain_local_queue(true);
  3545     _task->drain_global_stack(true);
  3547     // if the has_aborted flag has been raised, we need to bail out of
  3548     // the iteration
  3549     return !_task->has_aborted();
  3551 };
  3553 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
  3554                                ConcurrentMark* cm,
  3555                                CMTask* task)
  3556   : _g1h(g1h), _cm(cm), _task(task) {
  3557   assert(_ref_processor == NULL, "should be initialized to NULL");
  3559   if (G1UseConcMarkReferenceProcessing) {
  3560     _ref_processor = g1h->ref_processor_cm();
  3561     assert(_ref_processor != NULL, "should not be NULL");
  3565 void CMTask::setup_for_region(HeapRegion* hr) {
  3566   assert(hr != NULL,
  3567         "claim_region() should have filtered out NULL regions");
  3568   assert(!hr->continuesHumongous(),
  3569         "claim_region() should have filtered out continues humongous regions");
  3571   if (_cm->verbose_low()) {
  3572     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
  3573                            _worker_id, p2i(hr));
  3576   _curr_region  = hr;
  3577   _finger       = hr->bottom();
  3578   update_region_limit();
  3581 void CMTask::update_region_limit() {
  3582   HeapRegion* hr            = _curr_region;
  3583   HeapWord* bottom          = hr->bottom();
  3584   HeapWord* limit           = hr->next_top_at_mark_start();
  3586   if (limit == bottom) {
  3587     if (_cm->verbose_low()) {
  3588       gclog_or_tty->print_cr("[%u] found an empty region "
  3589                              "["PTR_FORMAT", "PTR_FORMAT")",
  3590                              _worker_id, p2i(bottom), p2i(limit));
  3592     // The region was collected underneath our feet.
  3593     // We set the finger to bottom to ensure that the bitmap
  3594     // iteration that will follow this will not do anything.
  3595     // (this is not a condition that holds when we set the region up,
  3596     // as the region is not supposed to be empty in the first place)
  3597     _finger = bottom;
  3598   } else if (limit >= _region_limit) {
  3599     assert(limit >= _finger, "peace of mind");
  3600   } else {
  3601     assert(limit < _region_limit, "only way to get here");
  3602     // This can happen under some pretty unusual circumstances.  An
  3603     // evacuation pause empties the region underneath our feet (NTAMS
  3604     // at bottom). We then do some allocation in the region (NTAMS
  3605     // stays at bottom), followed by the region being used as a GC
  3606     // alloc region (NTAMS will move to top() and the objects
  3607     // originally below it will be grayed). All objects now marked in
  3608     // the region are explicitly grayed, if below the global finger,
  3609     // and we do not need in fact to scan anything else. So, we simply
  3610     // set _finger to be limit to ensure that the bitmap iteration
  3611     // doesn't do anything.
  3612     _finger = limit;
  3615   _region_limit = limit;
  3618 void CMTask::giveup_current_region() {
  3619   assert(_curr_region != NULL, "invariant");
  3620   if (_cm->verbose_low()) {
  3621     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
  3622                            _worker_id, p2i(_curr_region));
  3624   clear_region_fields();
  3627 void CMTask::clear_region_fields() {
  3628   // Values for these three fields that indicate that we're not
  3629   // holding on to a region.
  3630   _curr_region   = NULL;
  3631   _finger        = NULL;
  3632   _region_limit  = NULL;
  3635 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
  3636   if (cm_oop_closure == NULL) {
  3637     assert(_cm_oop_closure != NULL, "invariant");
  3638   } else {
  3639     assert(_cm_oop_closure == NULL, "invariant");
  3641   _cm_oop_closure = cm_oop_closure;
  3644 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3645   guarantee(nextMarkBitMap != NULL, "invariant");
  3647   if (_cm->verbose_low()) {
  3648     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
  3651   _nextMarkBitMap                = nextMarkBitMap;
  3652   clear_region_fields();
  3654   _calls                         = 0;
  3655   _elapsed_time_ms               = 0.0;
  3656   _termination_time_ms           = 0.0;
  3657   _termination_start_time_ms     = 0.0;
  3659 #if _MARKING_STATS_
  3660   _local_pushes                  = 0;
  3661   _local_pops                    = 0;
  3662   _local_max_size                = 0;
  3663   _objs_scanned                  = 0;
  3664   _global_pushes                 = 0;
  3665   _global_pops                   = 0;
  3666   _global_max_size               = 0;
  3667   _global_transfers_to           = 0;
  3668   _global_transfers_from         = 0;
  3669   _regions_claimed               = 0;
  3670   _objs_found_on_bitmap          = 0;
  3671   _satb_buffers_processed        = 0;
  3672   _steal_attempts                = 0;
  3673   _steals                        = 0;
  3674   _aborted                       = 0;
  3675   _aborted_overflow              = 0;
  3676   _aborted_cm_aborted            = 0;
  3677   _aborted_yield                 = 0;
  3678   _aborted_timed_out             = 0;
  3679   _aborted_satb                  = 0;
  3680   _aborted_termination           = 0;
  3681 #endif // _MARKING_STATS_
  3684 bool CMTask::should_exit_termination() {
  3685   regular_clock_call();
  3686   // This is called when we are in the termination protocol. We should
  3687   // quit if, for some reason, this task wants to abort or the global
  3688   // stack is not empty (this means that we can get work from it).
  3689   return !_cm->mark_stack_empty() || has_aborted();
  3692 void CMTask::reached_limit() {
  3693   assert(_words_scanned >= _words_scanned_limit ||
  3694          _refs_reached >= _refs_reached_limit ,
  3695          "shouldn't have been called otherwise");
  3696   regular_clock_call();
  3699 void CMTask::regular_clock_call() {
  3700   if (has_aborted()) return;
  3702   // First, we need to recalculate the words scanned and refs reached
  3703   // limits for the next clock call.
  3704   recalculate_limits();
  3706   // During the regular clock call we do the following
  3708   // (1) If an overflow has been flagged, then we abort.
  3709   if (_cm->has_overflown()) {
  3710     set_has_aborted();
  3711     return;
  3714   // If we are not concurrent (i.e. we're doing remark) we don't need
  3715   // to check anything else. The other steps are only needed during
  3716   // the concurrent marking phase.
  3717   if (!concurrent()) return;
  3719   // (2) If marking has been aborted for Full GC, then we also abort.
  3720   if (_cm->has_aborted()) {
  3721     set_has_aborted();
  3722     statsOnly( ++_aborted_cm_aborted );
  3723     return;
  3726   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3728   // (3) If marking stats are enabled, then we update the step history.
  3729 #if _MARKING_STATS_
  3730   if (_words_scanned >= _words_scanned_limit) {
  3731     ++_clock_due_to_scanning;
  3733   if (_refs_reached >= _refs_reached_limit) {
  3734     ++_clock_due_to_marking;
  3737   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3738   _interval_start_time_ms = curr_time_ms;
  3739   _all_clock_intervals_ms.add(last_interval_ms);
  3741   if (_cm->verbose_medium()) {
  3742       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
  3743                         "scanned = %d%s, refs reached = %d%s",
  3744                         _worker_id, last_interval_ms,
  3745                         _words_scanned,
  3746                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3747                         _refs_reached,
  3748                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3750 #endif // _MARKING_STATS_
  3752   // (4) We check whether we should yield. If we have to, then we abort.
  3753   if (SuspendibleThreadSet::should_yield()) {
  3754     // We should yield. To do this we abort the task. The caller is
  3755     // responsible for yielding.
  3756     set_has_aborted();
  3757     statsOnly( ++_aborted_yield );
  3758     return;
  3761   // (5) We check whether we've reached our time quota. If we have,
  3762   // then we abort.
  3763   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3764   if (elapsed_time_ms > _time_target_ms) {
  3765     set_has_aborted();
  3766     _has_timed_out = true;
  3767     statsOnly( ++_aborted_timed_out );
  3768     return;
  3771   // (6) Finally, we check whether there are enough completed STAB
  3772   // buffers available for processing. If there are, we abort.
  3773   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3774   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3775     if (_cm->verbose_low()) {
  3776       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
  3777                              _worker_id);
  3779     // we do need to process SATB buffers, we'll abort and restart
  3780     // the marking task to do so
  3781     set_has_aborted();
  3782     statsOnly( ++_aborted_satb );
  3783     return;
  3787 void CMTask::recalculate_limits() {
  3788   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3789   _words_scanned_limit      = _real_words_scanned_limit;
  3791   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3792   _refs_reached_limit       = _real_refs_reached_limit;
  3795 void CMTask::decrease_limits() {
  3796   // This is called when we believe that we're going to do an infrequent
  3797   // operation which will increase the per byte scanned cost (i.e. move
  3798   // entries to/from the global stack). It basically tries to decrease the
  3799   // scanning limit so that the clock is called earlier.
  3801   if (_cm->verbose_medium()) {
  3802     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
  3805   _words_scanned_limit = _real_words_scanned_limit -
  3806     3 * words_scanned_period / 4;
  3807   _refs_reached_limit  = _real_refs_reached_limit -
  3808     3 * refs_reached_period / 4;
  3811 void CMTask::move_entries_to_global_stack() {
  3812   // local array where we'll store the entries that will be popped
  3813   // from the local queue
  3814   oop buffer[global_stack_transfer_size];
  3816   int n = 0;
  3817   oop obj;
  3818   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3819     buffer[n] = obj;
  3820     ++n;
  3823   if (n > 0) {
  3824     // we popped at least one entry from the local queue
  3826     statsOnly( ++_global_transfers_to; _local_pops += n );
  3828     if (!_cm->mark_stack_push(buffer, n)) {
  3829       if (_cm->verbose_low()) {
  3830         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
  3831                                _worker_id);
  3833       set_has_aborted();
  3834     } else {
  3835       // the transfer was successful
  3837       if (_cm->verbose_medium()) {
  3838         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
  3839                                _worker_id, n);
  3841       statsOnly( int tmp_size = _cm->mark_stack_size();
  3842                  if (tmp_size > _global_max_size) {
  3843                    _global_max_size = tmp_size;
  3845                  _global_pushes += n );
  3849   // this operation was quite expensive, so decrease the limits
  3850   decrease_limits();
  3853 void CMTask::get_entries_from_global_stack() {
  3854   // local array where we'll store the entries that will be popped
  3855   // from the global stack.
  3856   oop buffer[global_stack_transfer_size];
  3857   int n;
  3858   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3859   assert(n <= global_stack_transfer_size,
  3860          "we should not pop more than the given limit");
  3861   if (n > 0) {
  3862     // yes, we did actually pop at least one entry
  3864     statsOnly( ++_global_transfers_from; _global_pops += n );
  3865     if (_cm->verbose_medium()) {
  3866       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
  3867                              _worker_id, n);
  3869     for (int i = 0; i < n; ++i) {
  3870       bool success = _task_queue->push(buffer[i]);
  3871       // We only call this when the local queue is empty or under a
  3872       // given target limit. So, we do not expect this push to fail.
  3873       assert(success, "invariant");
  3876     statsOnly( int tmp_size = _task_queue->size();
  3877                if (tmp_size > _local_max_size) {
  3878                  _local_max_size = tmp_size;
  3880                _local_pushes += n );
  3883   // this operation was quite expensive, so decrease the limits
  3884   decrease_limits();
  3887 void CMTask::drain_local_queue(bool partially) {
  3888   if (has_aborted()) return;
  3890   // Decide what the target size is, depending whether we're going to
  3891   // drain it partially (so that other tasks can steal if they run out
  3892   // of things to do) or totally (at the very end).
  3893   size_t target_size;
  3894   if (partially) {
  3895     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3896   } else {
  3897     target_size = 0;
  3900   if (_task_queue->size() > target_size) {
  3901     if (_cm->verbose_high()) {
  3902       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
  3903                              _worker_id, target_size);
  3906     oop obj;
  3907     bool ret = _task_queue->pop_local(obj);
  3908     while (ret) {
  3909       statsOnly( ++_local_pops );
  3911       if (_cm->verbose_high()) {
  3912         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
  3913                                p2i((void*) obj));
  3916       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3917       assert(!_g1h->is_on_master_free_list(
  3918                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
  3920       scan_object(obj);
  3922       if (_task_queue->size() <= target_size || has_aborted()) {
  3923         ret = false;
  3924       } else {
  3925         ret = _task_queue->pop_local(obj);
  3929     if (_cm->verbose_high()) {
  3930       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
  3931                              _worker_id, _task_queue->size());
  3936 void CMTask::drain_global_stack(bool partially) {
  3937   if (has_aborted()) return;
  3939   // We have a policy to drain the local queue before we attempt to
  3940   // drain the global stack.
  3941   assert(partially || _task_queue->size() == 0, "invariant");
  3943   // Decide what the target size is, depending whether we're going to
  3944   // drain it partially (so that other tasks can steal if they run out
  3945   // of things to do) or totally (at the very end).  Notice that,
  3946   // because we move entries from the global stack in chunks or
  3947   // because another task might be doing the same, we might in fact
  3948   // drop below the target. But, this is not a problem.
  3949   size_t target_size;
  3950   if (partially) {
  3951     target_size = _cm->partial_mark_stack_size_target();
  3952   } else {
  3953     target_size = 0;
  3956   if (_cm->mark_stack_size() > target_size) {
  3957     if (_cm->verbose_low()) {
  3958       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
  3959                              _worker_id, target_size);
  3962     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3963       get_entries_from_global_stack();
  3964       drain_local_queue(partially);
  3967     if (_cm->verbose_low()) {
  3968       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
  3969                              _worker_id, _cm->mark_stack_size());
  3974 // SATB Queue has several assumptions on whether to call the par or
  3975 // non-par versions of the methods. this is why some of the code is
  3976 // replicated. We should really get rid of the single-threaded version
  3977 // of the code to simplify things.
  3978 void CMTask::drain_satb_buffers() {
  3979   if (has_aborted()) return;
  3981   // We set this so that the regular clock knows that we're in the
  3982   // middle of draining buffers and doesn't set the abort flag when it
  3983   // notices that SATB buffers are available for draining. It'd be
  3984   // very counter productive if it did that. :-)
  3985   _draining_satb_buffers = true;
  3987   CMObjectClosure oc(this);
  3988   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3989   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3990     satb_mq_set.set_par_closure(_worker_id, &oc);
  3991   } else {
  3992     satb_mq_set.set_closure(&oc);
  3995   // This keeps claiming and applying the closure to completed buffers
  3996   // until we run out of buffers or we need to abort.
  3997   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3998     while (!has_aborted() &&
  3999            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
  4000       if (_cm->verbose_medium()) {
  4001         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  4003       statsOnly( ++_satb_buffers_processed );
  4004       regular_clock_call();
  4006   } else {
  4007     while (!has_aborted() &&
  4008            satb_mq_set.apply_closure_to_completed_buffer()) {
  4009       if (_cm->verbose_medium()) {
  4010         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  4012       statsOnly( ++_satb_buffers_processed );
  4013       regular_clock_call();
  4017   _draining_satb_buffers = false;
  4019   assert(has_aborted() ||
  4020          concurrent() ||
  4021          satb_mq_set.completed_buffers_num() == 0, "invariant");
  4023   if (G1CollectedHeap::use_parallel_gc_threads()) {
  4024     satb_mq_set.set_par_closure(_worker_id, NULL);
  4025   } else {
  4026     satb_mq_set.set_closure(NULL);
  4029   // again, this was a potentially expensive operation, decrease the
  4030   // limits to get the regular clock call early
  4031   decrease_limits();
  4034 void CMTask::print_stats() {
  4035   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
  4036                          _worker_id, _calls);
  4037   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  4038                          _elapsed_time_ms, _termination_time_ms);
  4039   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  4040                          _step_times_ms.num(), _step_times_ms.avg(),
  4041                          _step_times_ms.sd());
  4042   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  4043                          _step_times_ms.maximum(), _step_times_ms.sum());
  4045 #if _MARKING_STATS_
  4046   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  4047                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  4048                          _all_clock_intervals_ms.sd());
  4049   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  4050                          _all_clock_intervals_ms.maximum(),
  4051                          _all_clock_intervals_ms.sum());
  4052   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  4053                          _clock_due_to_scanning, _clock_due_to_marking);
  4054   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  4055                          _objs_scanned, _objs_found_on_bitmap);
  4056   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  4057                          _local_pushes, _local_pops, _local_max_size);
  4058   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  4059                          _global_pushes, _global_pops, _global_max_size);
  4060   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  4061                          _global_transfers_to,_global_transfers_from);
  4062   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
  4063   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  4064   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  4065                          _steal_attempts, _steals);
  4066   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  4067   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  4068                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  4069   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  4070                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  4071 #endif // _MARKING_STATS_
  4074 /*****************************************************************************
  4076     The do_marking_step(time_target_ms, ...) method is the building
  4077     block of the parallel marking framework. It can be called in parallel
  4078     with other invocations of do_marking_step() on different tasks
  4079     (but only one per task, obviously) and concurrently with the
  4080     mutator threads, or during remark, hence it eliminates the need
  4081     for two versions of the code. When called during remark, it will
  4082     pick up from where the task left off during the concurrent marking
  4083     phase. Interestingly, tasks are also claimable during evacuation
  4084     pauses too, since do_marking_step() ensures that it aborts before
  4085     it needs to yield.
  4087     The data structures that it uses to do marking work are the
  4088     following:
  4090       (1) Marking Bitmap. If there are gray objects that appear only
  4091       on the bitmap (this happens either when dealing with an overflow
  4092       or when the initial marking phase has simply marked the roots
  4093       and didn't push them on the stack), then tasks claim heap
  4094       regions whose bitmap they then scan to find gray objects. A
  4095       global finger indicates where the end of the last claimed region
  4096       is. A local finger indicates how far into the region a task has
  4097       scanned. The two fingers are used to determine how to gray an
  4098       object (i.e. whether simply marking it is OK, as it will be
  4099       visited by a task in the future, or whether it needs to be also
  4100       pushed on a stack).
  4102       (2) Local Queue. The local queue of the task which is accessed
  4103       reasonably efficiently by the task. Other tasks can steal from
  4104       it when they run out of work. Throughout the marking phase, a
  4105       task attempts to keep its local queue short but not totally
  4106       empty, so that entries are available for stealing by other
  4107       tasks. Only when there is no more work, a task will totally
  4108       drain its local queue.
  4110       (3) Global Mark Stack. This handles local queue overflow. During
  4111       marking only sets of entries are moved between it and the local
  4112       queues, as access to it requires a mutex and more fine-grain
  4113       interaction with it which might cause contention. If it
  4114       overflows, then the marking phase should restart and iterate
  4115       over the bitmap to identify gray objects. Throughout the marking
  4116       phase, tasks attempt to keep the global mark stack at a small
  4117       length but not totally empty, so that entries are available for
  4118       popping by other tasks. Only when there is no more work, tasks
  4119       will totally drain the global mark stack.
  4121       (4) SATB Buffer Queue. This is where completed SATB buffers are
  4122       made available. Buffers are regularly removed from this queue
  4123       and scanned for roots, so that the queue doesn't get too
  4124       long. During remark, all completed buffers are processed, as
  4125       well as the filled in parts of any uncompleted buffers.
  4127     The do_marking_step() method tries to abort when the time target
  4128     has been reached. There are a few other cases when the
  4129     do_marking_step() method also aborts:
  4131       (1) When the marking phase has been aborted (after a Full GC).
  4133       (2) When a global overflow (on the global stack) has been
  4134       triggered. Before the task aborts, it will actually sync up with
  4135       the other tasks to ensure that all the marking data structures
  4136       (local queues, stacks, fingers etc.)  are re-initialized so that
  4137       when do_marking_step() completes, the marking phase can
  4138       immediately restart.
  4140       (3) When enough completed SATB buffers are available. The
  4141       do_marking_step() method only tries to drain SATB buffers right
  4142       at the beginning. So, if enough buffers are available, the
  4143       marking step aborts and the SATB buffers are processed at
  4144       the beginning of the next invocation.
  4146       (4) To yield. when we have to yield then we abort and yield
  4147       right at the end of do_marking_step(). This saves us from a lot
  4148       of hassle as, by yielding we might allow a Full GC. If this
  4149       happens then objects will be compacted underneath our feet, the
  4150       heap might shrink, etc. We save checking for this by just
  4151       aborting and doing the yield right at the end.
  4153     From the above it follows that the do_marking_step() method should
  4154     be called in a loop (or, otherwise, regularly) until it completes.
  4156     If a marking step completes without its has_aborted() flag being
  4157     true, it means it has completed the current marking phase (and
  4158     also all other marking tasks have done so and have all synced up).
  4160     A method called regular_clock_call() is invoked "regularly" (in
  4161     sub ms intervals) throughout marking. It is this clock method that
  4162     checks all the abort conditions which were mentioned above and
  4163     decides when the task should abort. A work-based scheme is used to
  4164     trigger this clock method: when the number of object words the
  4165     marking phase has scanned or the number of references the marking
  4166     phase has visited reach a given limit. Additional invocations to
  4167     the method clock have been planted in a few other strategic places
  4168     too. The initial reason for the clock method was to avoid calling
  4169     vtime too regularly, as it is quite expensive. So, once it was in
  4170     place, it was natural to piggy-back all the other conditions on it
  4171     too and not constantly check them throughout the code.
  4173     If do_termination is true then do_marking_step will enter its
  4174     termination protocol.
  4176     The value of is_serial must be true when do_marking_step is being
  4177     called serially (i.e. by the VMThread) and do_marking_step should
  4178     skip any synchronization in the termination and overflow code.
  4179     Examples include the serial remark code and the serial reference
  4180     processing closures.
  4182     The value of is_serial must be false when do_marking_step is
  4183     being called by any of the worker threads in a work gang.
  4184     Examples include the concurrent marking code (CMMarkingTask),
  4185     the MT remark code, and the MT reference processing closures.
  4187  *****************************************************************************/
  4189 void CMTask::do_marking_step(double time_target_ms,
  4190                              bool do_termination,
  4191                              bool is_serial) {
  4192   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  4193   assert(concurrent() == _cm->concurrent(), "they should be the same");
  4195   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  4196   assert(_task_queues != NULL, "invariant");
  4197   assert(_task_queue != NULL, "invariant");
  4198   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
  4200   assert(!_claimed,
  4201          "only one thread should claim this task at any one time");
  4203   // OK, this doesn't safeguard again all possible scenarios, as it is
  4204   // possible for two threads to set the _claimed flag at the same
  4205   // time. But it is only for debugging purposes anyway and it will
  4206   // catch most problems.
  4207   _claimed = true;
  4209   _start_time_ms = os::elapsedVTime() * 1000.0;
  4210   statsOnly( _interval_start_time_ms = _start_time_ms );
  4212   // If do_stealing is true then do_marking_step will attempt to
  4213   // steal work from the other CMTasks. It only makes sense to
  4214   // enable stealing when the termination protocol is enabled
  4215   // and do_marking_step() is not being called serially.
  4216   bool do_stealing = do_termination && !is_serial;
  4218   double diff_prediction_ms =
  4219     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  4220   _time_target_ms = time_target_ms - diff_prediction_ms;
  4222   // set up the variables that are used in the work-based scheme to
  4223   // call the regular clock method
  4224   _words_scanned = 0;
  4225   _refs_reached  = 0;
  4226   recalculate_limits();
  4228   // clear all flags
  4229   clear_has_aborted();
  4230   _has_timed_out = false;
  4231   _draining_satb_buffers = false;
  4233   ++_calls;
  4235   if (_cm->verbose_low()) {
  4236     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
  4237                            "target = %1.2lfms >>>>>>>>>>",
  4238                            _worker_id, _calls, _time_target_ms);
  4241   // Set up the bitmap and oop closures. Anything that uses them is
  4242   // eventually called from this method, so it is OK to allocate these
  4243   // statically.
  4244   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  4245   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
  4246   set_cm_oop_closure(&cm_oop_closure);
  4248   if (_cm->has_overflown()) {
  4249     // This can happen if the mark stack overflows during a GC pause
  4250     // and this task, after a yield point, restarts. We have to abort
  4251     // as we need to get into the overflow protocol which happens
  4252     // right at the end of this task.
  4253     set_has_aborted();
  4256   // First drain any available SATB buffers. After this, we will not
  4257   // look at SATB buffers before the next invocation of this method.
  4258   // If enough completed SATB buffers are queued up, the regular clock
  4259   // will abort this task so that it restarts.
  4260   drain_satb_buffers();
  4261   // ...then partially drain the local queue and the global stack
  4262   drain_local_queue(true);
  4263   drain_global_stack(true);
  4265   do {
  4266     if (!has_aborted() && _curr_region != NULL) {
  4267       // This means that we're already holding on to a region.
  4268       assert(_finger != NULL, "if region is not NULL, then the finger "
  4269              "should not be NULL either");
  4271       // We might have restarted this task after an evacuation pause
  4272       // which might have evacuated the region we're holding on to
  4273       // underneath our feet. Let's read its limit again to make sure
  4274       // that we do not iterate over a region of the heap that
  4275       // contains garbage (update_region_limit() will also move
  4276       // _finger to the start of the region if it is found empty).
  4277       update_region_limit();
  4278       // We will start from _finger not from the start of the region,
  4279       // as we might be restarting this task after aborting half-way
  4280       // through scanning this region. In this case, _finger points to
  4281       // the address where we last found a marked object. If this is a
  4282       // fresh region, _finger points to start().
  4283       MemRegion mr = MemRegion(_finger, _region_limit);
  4285       if (_cm->verbose_low()) {
  4286         gclog_or_tty->print_cr("[%u] we're scanning part "
  4287                                "["PTR_FORMAT", "PTR_FORMAT") "
  4288                                "of region "HR_FORMAT,
  4289                                _worker_id, p2i(_finger), p2i(_region_limit),
  4290                                HR_FORMAT_PARAMS(_curr_region));
  4293       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
  4294              "humongous regions should go around loop once only");
  4296       // Some special cases:
  4297       // If the memory region is empty, we can just give up the region.
  4298       // If the current region is humongous then we only need to check
  4299       // the bitmap for the bit associated with the start of the object,
  4300       // scan the object if it's live, and give up the region.
  4301       // Otherwise, let's iterate over the bitmap of the part of the region
  4302       // that is left.
  4303       // If the iteration is successful, give up the region.
  4304       if (mr.is_empty()) {
  4305         giveup_current_region();
  4306         regular_clock_call();
  4307       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
  4308         if (_nextMarkBitMap->isMarked(mr.start())) {
  4309           // The object is marked - apply the closure
  4310           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
  4311           bitmap_closure.do_bit(offset);
  4313         // Even if this task aborted while scanning the humongous object
  4314         // we can (and should) give up the current region.
  4315         giveup_current_region();
  4316         regular_clock_call();
  4317       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  4318         giveup_current_region();
  4319         regular_clock_call();
  4320       } else {
  4321         assert(has_aborted(), "currently the only way to do so");
  4322         // The only way to abort the bitmap iteration is to return
  4323         // false from the do_bit() method. However, inside the
  4324         // do_bit() method we move the _finger to point to the
  4325         // object currently being looked at. So, if we bail out, we
  4326         // have definitely set _finger to something non-null.
  4327         assert(_finger != NULL, "invariant");
  4329         // Region iteration was actually aborted. So now _finger
  4330         // points to the address of the object we last scanned. If we
  4331         // leave it there, when we restart this task, we will rescan
  4332         // the object. It is easy to avoid this. We move the finger by
  4333         // enough to point to the next possible object header (the
  4334         // bitmap knows by how much we need to move it as it knows its
  4335         // granularity).
  4336         assert(_finger < _region_limit, "invariant");
  4337         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
  4338         // Check if bitmap iteration was aborted while scanning the last object
  4339         if (new_finger >= _region_limit) {
  4340           giveup_current_region();
  4341         } else {
  4342           move_finger_to(new_finger);
  4346     // At this point we have either completed iterating over the
  4347     // region we were holding on to, or we have aborted.
  4349     // We then partially drain the local queue and the global stack.
  4350     // (Do we really need this?)
  4351     drain_local_queue(true);
  4352     drain_global_stack(true);
  4354     // Read the note on the claim_region() method on why it might
  4355     // return NULL with potentially more regions available for
  4356     // claiming and why we have to check out_of_regions() to determine
  4357     // whether we're done or not.
  4358     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  4359       // We are going to try to claim a new region. We should have
  4360       // given up on the previous one.
  4361       // Separated the asserts so that we know which one fires.
  4362       assert(_curr_region  == NULL, "invariant");
  4363       assert(_finger       == NULL, "invariant");
  4364       assert(_region_limit == NULL, "invariant");
  4365       if (_cm->verbose_low()) {
  4366         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
  4368       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
  4369       if (claimed_region != NULL) {
  4370         // Yes, we managed to claim one
  4371         statsOnly( ++_regions_claimed );
  4373         if (_cm->verbose_low()) {
  4374           gclog_or_tty->print_cr("[%u] we successfully claimed "
  4375                                  "region "PTR_FORMAT,
  4376                                  _worker_id, p2i(claimed_region));
  4379         setup_for_region(claimed_region);
  4380         assert(_curr_region == claimed_region, "invariant");
  4382       // It is important to call the regular clock here. It might take
  4383       // a while to claim a region if, for example, we hit a large
  4384       // block of empty regions. So we need to call the regular clock
  4385       // method once round the loop to make sure it's called
  4386       // frequently enough.
  4387       regular_clock_call();
  4390     if (!has_aborted() && _curr_region == NULL) {
  4391       assert(_cm->out_of_regions(),
  4392              "at this point we should be out of regions");
  4394   } while ( _curr_region != NULL && !has_aborted());
  4396   if (!has_aborted()) {
  4397     // We cannot check whether the global stack is empty, since other
  4398     // tasks might be pushing objects to it concurrently.
  4399     assert(_cm->out_of_regions(),
  4400            "at this point we should be out of regions");
  4402     if (_cm->verbose_low()) {
  4403       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
  4406     // Try to reduce the number of available SATB buffers so that
  4407     // remark has less work to do.
  4408     drain_satb_buffers();
  4411   // Since we've done everything else, we can now totally drain the
  4412   // local queue and global stack.
  4413   drain_local_queue(false);
  4414   drain_global_stack(false);
  4416   // Attempt at work stealing from other task's queues.
  4417   if (do_stealing && !has_aborted()) {
  4418     // We have not aborted. This means that we have finished all that
  4419     // we could. Let's try to do some stealing...
  4421     // We cannot check whether the global stack is empty, since other
  4422     // tasks might be pushing objects to it concurrently.
  4423     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  4424            "only way to reach here");
  4426     if (_cm->verbose_low()) {
  4427       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
  4430     while (!has_aborted()) {
  4431       oop obj;
  4432       statsOnly( ++_steal_attempts );
  4434       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
  4435         if (_cm->verbose_medium()) {
  4436           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
  4437                                  _worker_id, p2i((void*) obj));
  4440         statsOnly( ++_steals );
  4442         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  4443                "any stolen object should be marked");
  4444         scan_object(obj);
  4446         // And since we're towards the end, let's totally drain the
  4447         // local queue and global stack.
  4448         drain_local_queue(false);
  4449         drain_global_stack(false);
  4450       } else {
  4451         break;
  4456   // If we are about to wrap up and go into termination, check if we
  4457   // should raise the overflow flag.
  4458   if (do_termination && !has_aborted()) {
  4459     if (_cm->force_overflow()->should_force()) {
  4460       _cm->set_has_overflown();
  4461       regular_clock_call();
  4465   // We still haven't aborted. Now, let's try to get into the
  4466   // termination protocol.
  4467   if (do_termination && !has_aborted()) {
  4468     // We cannot check whether the global stack is empty, since other
  4469     // tasks might be concurrently pushing objects on it.
  4470     // Separated the asserts so that we know which one fires.
  4471     assert(_cm->out_of_regions(), "only way to reach here");
  4472     assert(_task_queue->size() == 0, "only way to reach here");
  4474     if (_cm->verbose_low()) {
  4475       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
  4478     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4480     // The CMTask class also extends the TerminatorTerminator class,
  4481     // hence its should_exit_termination() method will also decide
  4482     // whether to exit the termination protocol or not.
  4483     bool finished = (is_serial ||
  4484                      _cm->terminator()->offer_termination(this));
  4485     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4486     _termination_time_ms +=
  4487       termination_end_time_ms - _termination_start_time_ms;
  4489     if (finished) {
  4490       // We're all done.
  4492       if (_worker_id == 0) {
  4493         // let's allow task 0 to do this
  4494         if (concurrent()) {
  4495           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4496           // we need to set this to false before the next
  4497           // safepoint. This way we ensure that the marking phase
  4498           // doesn't observe any more heap expansions.
  4499           _cm->clear_concurrent_marking_in_progress();
  4503       // We can now guarantee that the global stack is empty, since
  4504       // all other tasks have finished. We separated the guarantees so
  4505       // that, if a condition is false, we can immediately find out
  4506       // which one.
  4507       guarantee(_cm->out_of_regions(), "only way to reach here");
  4508       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4509       guarantee(_task_queue->size() == 0, "only way to reach here");
  4510       guarantee(!_cm->has_overflown(), "only way to reach here");
  4511       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4513       if (_cm->verbose_low()) {
  4514         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
  4516     } else {
  4517       // Apparently there's more work to do. Let's abort this task. It
  4518       // will restart it and we can hopefully find more things to do.
  4520       if (_cm->verbose_low()) {
  4521         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
  4522                                _worker_id);
  4525       set_has_aborted();
  4526       statsOnly( ++_aborted_termination );
  4530   // Mainly for debugging purposes to make sure that a pointer to the
  4531   // closure which was statically allocated in this frame doesn't
  4532   // escape it by accident.
  4533   set_cm_oop_closure(NULL);
  4534   double end_time_ms = os::elapsedVTime() * 1000.0;
  4535   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4536   // Update the step history.
  4537   _step_times_ms.add(elapsed_time_ms);
  4539   if (has_aborted()) {
  4540     // The task was aborted for some reason.
  4542     statsOnly( ++_aborted );
  4544     if (_has_timed_out) {
  4545       double diff_ms = elapsed_time_ms - _time_target_ms;
  4546       // Keep statistics of how well we did with respect to hitting
  4547       // our target only if we actually timed out (if we aborted for
  4548       // other reasons, then the results might get skewed).
  4549       _marking_step_diffs_ms.add(diff_ms);
  4552     if (_cm->has_overflown()) {
  4553       // This is the interesting one. We aborted because a global
  4554       // overflow was raised. This means we have to restart the
  4555       // marking phase and start iterating over regions. However, in
  4556       // order to do this we have to make sure that all tasks stop
  4557       // what they are doing and re-initialise in a safe manner. We
  4558       // will achieve this with the use of two barrier sync points.
  4560       if (_cm->verbose_low()) {
  4561         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
  4564       if (!is_serial) {
  4565         // We only need to enter the sync barrier if being called
  4566         // from a parallel context
  4567         _cm->enter_first_sync_barrier(_worker_id);
  4569         // When we exit this sync barrier we know that all tasks have
  4570         // stopped doing marking work. So, it's now safe to
  4571         // re-initialise our data structures. At the end of this method,
  4572         // task 0 will clear the global data structures.
  4575       statsOnly( ++_aborted_overflow );
  4577       // We clear the local state of this task...
  4578       clear_region_fields();
  4580       if (!is_serial) {
  4581         // ...and enter the second barrier.
  4582         _cm->enter_second_sync_barrier(_worker_id);
  4584       // At this point, if we're during the concurrent phase of
  4585       // marking, everything has been re-initialized and we're
  4586       // ready to restart.
  4589     if (_cm->verbose_low()) {
  4590       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4591                              "elapsed = %1.2lfms <<<<<<<<<<",
  4592                              _worker_id, _time_target_ms, elapsed_time_ms);
  4593       if (_cm->has_aborted()) {
  4594         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
  4595                                _worker_id);
  4598   } else {
  4599     if (_cm->verbose_low()) {
  4600       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4601                              "elapsed = %1.2lfms <<<<<<<<<<",
  4602                              _worker_id, _time_target_ms, elapsed_time_ms);
  4606   _claimed = false;
  4609 CMTask::CMTask(uint worker_id,
  4610                ConcurrentMark* cm,
  4611                size_t* marked_bytes,
  4612                BitMap* card_bm,
  4613                CMTaskQueue* task_queue,
  4614                CMTaskQueueSet* task_queues)
  4615   : _g1h(G1CollectedHeap::heap()),
  4616     _worker_id(worker_id), _cm(cm),
  4617     _claimed(false),
  4618     _nextMarkBitMap(NULL), _hash_seed(17),
  4619     _task_queue(task_queue),
  4620     _task_queues(task_queues),
  4621     _cm_oop_closure(NULL),
  4622     _marked_bytes_array(marked_bytes),
  4623     _card_bm(card_bm) {
  4624   guarantee(task_queue != NULL, "invariant");
  4625   guarantee(task_queues != NULL, "invariant");
  4627   statsOnly( _clock_due_to_scanning = 0;
  4628              _clock_due_to_marking  = 0 );
  4630   _marking_step_diffs_ms.add(0.5);
  4633 // These are formatting macros that are used below to ensure
  4634 // consistent formatting. The *_H_* versions are used to format the
  4635 // header for a particular value and they should be kept consistent
  4636 // with the corresponding macro. Also note that most of the macros add
  4637 // the necessary white space (as a prefix) which makes them a bit
  4638 // easier to compose.
  4640 // All the output lines are prefixed with this string to be able to
  4641 // identify them easily in a large log file.
  4642 #define G1PPRL_LINE_PREFIX            "###"
  4644 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
  4645 #ifdef _LP64
  4646 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
  4647 #else // _LP64
  4648 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
  4649 #endif // _LP64
  4651 // For per-region info
  4652 #define G1PPRL_TYPE_FORMAT            "   %-4s"
  4653 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
  4654 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
  4655 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
  4656 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
  4657 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
  4659 // For summary info
  4660 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
  4661 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
  4662 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
  4663 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
  4665 G1PrintRegionLivenessInfoClosure::
  4666 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
  4667   : _out(out),
  4668     _total_used_bytes(0), _total_capacity_bytes(0),
  4669     _total_prev_live_bytes(0), _total_next_live_bytes(0),
  4670     _hum_used_bytes(0), _hum_capacity_bytes(0),
  4671     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
  4672     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
  4673   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  4674   MemRegion g1_committed = g1h->g1_committed();
  4675   MemRegion g1_reserved = g1h->g1_reserved();
  4676   double now = os::elapsedTime();
  4678   // Print the header of the output.
  4679   _out->cr();
  4680   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
  4681   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
  4682                  G1PPRL_SUM_ADDR_FORMAT("committed")
  4683                  G1PPRL_SUM_ADDR_FORMAT("reserved")
  4684                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
  4685                  p2i(g1_committed.start()), p2i(g1_committed.end()),
  4686                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
  4687                  HeapRegion::GrainBytes);
  4688   _out->print_cr(G1PPRL_LINE_PREFIX);
  4689   _out->print_cr(G1PPRL_LINE_PREFIX
  4690                 G1PPRL_TYPE_H_FORMAT
  4691                 G1PPRL_ADDR_BASE_H_FORMAT
  4692                 G1PPRL_BYTE_H_FORMAT
  4693                 G1PPRL_BYTE_H_FORMAT
  4694                 G1PPRL_BYTE_H_FORMAT
  4695                 G1PPRL_DOUBLE_H_FORMAT
  4696                 G1PPRL_BYTE_H_FORMAT
  4697                 G1PPRL_BYTE_H_FORMAT,
  4698                 "type", "address-range",
  4699                 "used", "prev-live", "next-live", "gc-eff",
  4700                 "remset", "code-roots");
  4701   _out->print_cr(G1PPRL_LINE_PREFIX
  4702                 G1PPRL_TYPE_H_FORMAT
  4703                 G1PPRL_ADDR_BASE_H_FORMAT
  4704                 G1PPRL_BYTE_H_FORMAT
  4705                 G1PPRL_BYTE_H_FORMAT
  4706                 G1PPRL_BYTE_H_FORMAT
  4707                 G1PPRL_DOUBLE_H_FORMAT
  4708                 G1PPRL_BYTE_H_FORMAT
  4709                 G1PPRL_BYTE_H_FORMAT,
  4710                 "", "",
  4711                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
  4712                 "(bytes)", "(bytes)");
  4715 // It takes as a parameter a reference to one of the _hum_* fields, it
  4716 // deduces the corresponding value for a region in a humongous region
  4717 // series (either the region size, or what's left if the _hum_* field
  4718 // is < the region size), and updates the _hum_* field accordingly.
  4719 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
  4720   size_t bytes = 0;
  4721   // The > 0 check is to deal with the prev and next live bytes which
  4722   // could be 0.
  4723   if (*hum_bytes > 0) {
  4724     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
  4725     *hum_bytes -= bytes;
  4727   return bytes;
  4730 // It deduces the values for a region in a humongous region series
  4731 // from the _hum_* fields and updates those accordingly. It assumes
  4732 // that that _hum_* fields have already been set up from the "starts
  4733 // humongous" region and we visit the regions in address order.
  4734 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
  4735                                                      size_t* capacity_bytes,
  4736                                                      size_t* prev_live_bytes,
  4737                                                      size_t* next_live_bytes) {
  4738   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
  4739   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
  4740   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
  4741   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
  4742   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
  4745 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  4746   const char* type = "";
  4747   HeapWord* bottom       = r->bottom();
  4748   HeapWord* end          = r->end();
  4749   size_t capacity_bytes  = r->capacity();
  4750   size_t used_bytes      = r->used();
  4751   size_t prev_live_bytes = r->live_bytes();
  4752   size_t next_live_bytes = r->next_live_bytes();
  4753   double gc_eff          = r->gc_efficiency();
  4754   size_t remset_bytes    = r->rem_set()->mem_size();
  4755   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
  4757   if (r->used() == 0) {
  4758     type = "FREE";
  4759   } else if (r->is_survivor()) {
  4760     type = "SURV";
  4761   } else if (r->is_young()) {
  4762     type = "EDEN";
  4763   } else if (r->startsHumongous()) {
  4764     type = "HUMS";
  4766     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
  4767            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
  4768            "they should have been zeroed after the last time we used them");
  4769     // Set up the _hum_* fields.
  4770     _hum_capacity_bytes  = capacity_bytes;
  4771     _hum_used_bytes      = used_bytes;
  4772     _hum_prev_live_bytes = prev_live_bytes;
  4773     _hum_next_live_bytes = next_live_bytes;
  4774     get_hum_bytes(&used_bytes, &capacity_bytes,
  4775                   &prev_live_bytes, &next_live_bytes);
  4776     end = bottom + HeapRegion::GrainWords;
  4777   } else if (r->continuesHumongous()) {
  4778     type = "HUMC";
  4779     get_hum_bytes(&used_bytes, &capacity_bytes,
  4780                   &prev_live_bytes, &next_live_bytes);
  4781     assert(end == bottom + HeapRegion::GrainWords, "invariant");
  4782   } else {
  4783     type = "OLD";
  4786   _total_used_bytes      += used_bytes;
  4787   _total_capacity_bytes  += capacity_bytes;
  4788   _total_prev_live_bytes += prev_live_bytes;
  4789   _total_next_live_bytes += next_live_bytes;
  4790   _total_remset_bytes    += remset_bytes;
  4791   _total_strong_code_roots_bytes += strong_code_roots_bytes;
  4793   // Print a line for this particular region.
  4794   _out->print_cr(G1PPRL_LINE_PREFIX
  4795                  G1PPRL_TYPE_FORMAT
  4796                  G1PPRL_ADDR_BASE_FORMAT
  4797                  G1PPRL_BYTE_FORMAT
  4798                  G1PPRL_BYTE_FORMAT
  4799                  G1PPRL_BYTE_FORMAT
  4800                  G1PPRL_DOUBLE_FORMAT
  4801                  G1PPRL_BYTE_FORMAT
  4802                  G1PPRL_BYTE_FORMAT,
  4803                  type, p2i(bottom), p2i(end),
  4804                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
  4805                  remset_bytes, strong_code_roots_bytes);
  4807   return false;
  4810 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
  4811   // add static memory usages to remembered set sizes
  4812   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
  4813   // Print the footer of the output.
  4814   _out->print_cr(G1PPRL_LINE_PREFIX);
  4815   _out->print_cr(G1PPRL_LINE_PREFIX
  4816                  " SUMMARY"
  4817                  G1PPRL_SUM_MB_FORMAT("capacity")
  4818                  G1PPRL_SUM_MB_PERC_FORMAT("used")
  4819                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
  4820                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
  4821                  G1PPRL_SUM_MB_FORMAT("remset")
  4822                  G1PPRL_SUM_MB_FORMAT("code-roots"),
  4823                  bytes_to_mb(_total_capacity_bytes),
  4824                  bytes_to_mb(_total_used_bytes),
  4825                  perc(_total_used_bytes, _total_capacity_bytes),
  4826                  bytes_to_mb(_total_prev_live_bytes),
  4827                  perc(_total_prev_live_bytes, _total_capacity_bytes),
  4828                  bytes_to_mb(_total_next_live_bytes),
  4829                  perc(_total_next_live_bytes, _total_capacity_bytes),
  4830                  bytes_to_mb(_total_remset_bytes),
  4831                  bytes_to_mb(_total_strong_code_roots_bytes));
  4832   _out->cr();

mercurial