src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 29 Jul 2014 10:26:09 +0200

author
mdoerr
date
Tue, 29 Jul 2014 10:26:09 +0200
changeset 7020
e02e18f40eae
parent 7016
3bf2fc51186b
child 7024
bfba6779654b
permissions
-rw-r--r--

8050973: CMS/G1 GC: add missing Resource and Handle mark
Summary: Add Resource/HandleMark in the work() method of some AbstractGangTask to reclaim these resources earlier.
Reviewed-by: tschatzl, goetz

     1 /*
     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "code/codeCache.hpp"
    28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
    29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
    33 #include "gc_implementation/g1/g1Log.hpp"
    34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
    35 #include "gc_implementation/g1/g1RemSet.hpp"
    36 #include "gc_implementation/g1/heapRegion.inline.hpp"
    37 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    39 #include "gc_implementation/shared/vmGCOperations.hpp"
    40 #include "gc_implementation/shared/gcTimer.hpp"
    41 #include "gc_implementation/shared/gcTrace.hpp"
    42 #include "gc_implementation/shared/gcTraceTime.hpp"
    43 #include "memory/allocation.hpp"
    44 #include "memory/genOopClosures.inline.hpp"
    45 #include "memory/referencePolicy.hpp"
    46 #include "memory/resourceArea.hpp"
    47 #include "oops/oop.inline.hpp"
    48 #include "runtime/handles.inline.hpp"
    49 #include "runtime/java.hpp"
    50 #include "runtime/prefetch.inline.hpp"
    51 #include "services/memTracker.hpp"
    53 // Concurrent marking bit map wrapper
    55 CMBitMapRO::CMBitMapRO(int shifter) :
    56   _bm(),
    57   _shifter(shifter) {
    58   _bmStartWord = 0;
    59   _bmWordSize = 0;
    60 }
    62 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
    63                                                const HeapWord* limit) const {
    64   // First we must round addr *up* to a possible object boundary.
    65   addr = (HeapWord*)align_size_up((intptr_t)addr,
    66                                   HeapWordSize << _shifter);
    67   size_t addrOffset = heapWordToOffset(addr);
    68   if (limit == NULL) {
    69     limit = _bmStartWord + _bmWordSize;
    70   }
    71   size_t limitOffset = heapWordToOffset(limit);
    72   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    73   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    74   assert(nextAddr >= addr, "get_next_one postcondition");
    75   assert(nextAddr == limit || isMarked(nextAddr),
    76          "get_next_one postcondition");
    77   return nextAddr;
    78 }
    80 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
    81                                                  const HeapWord* limit) const {
    82   size_t addrOffset = heapWordToOffset(addr);
    83   if (limit == NULL) {
    84     limit = _bmStartWord + _bmWordSize;
    85   }
    86   size_t limitOffset = heapWordToOffset(limit);
    87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    89   assert(nextAddr >= addr, "get_next_one postcondition");
    90   assert(nextAddr == limit || !isMarked(nextAddr),
    91          "get_next_one postcondition");
    92   return nextAddr;
    93 }
    95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    97   return (int) (diff >> _shifter);
    98 }
   100 #ifndef PRODUCT
   101 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
   102   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   103   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
   104          "size inconsistency");
   105   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
   106          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
   107 }
   108 #endif
   110 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
   111   _bm.print_on_error(st, prefix);
   112 }
   114 bool CMBitMap::allocate(ReservedSpace heap_rs) {
   115   _bmStartWord = (HeapWord*)(heap_rs.base());
   116   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
   117   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
   118                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
   119   if (!brs.is_reserved()) {
   120     warning("ConcurrentMark marking bit map allocation failure");
   121     return false;
   122   }
   123   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
   124   // For now we'll just commit all of the bit map up front.
   125   // Later on we'll try to be more parsimonious with swap.
   126   if (!_virtual_space.initialize(brs, brs.size())) {
   127     warning("ConcurrentMark marking bit map backing store failure");
   128     return false;
   129   }
   130   assert(_virtual_space.committed_size() == brs.size(),
   131          "didn't reserve backing store for all of concurrent marking bit map?");
   132   _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
   133   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
   134          _bmWordSize, "inconsistency in bit map sizing");
   135   _bm.set_size(_bmWordSize >> _shifter);
   136   return true;
   137 }
   139 void CMBitMap::clearAll() {
   140   _bm.clear();
   141   return;
   142 }
   144 void CMBitMap::markRange(MemRegion mr) {
   145   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   146   assert(!mr.is_empty(), "unexpected empty region");
   147   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   148           ((HeapWord *) mr.end())),
   149          "markRange memory region end is not card aligned");
   150   // convert address range into offset range
   151   _bm.at_put_range(heapWordToOffset(mr.start()),
   152                    heapWordToOffset(mr.end()), true);
   153 }
   155 void CMBitMap::clearRange(MemRegion mr) {
   156   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   157   assert(!mr.is_empty(), "unexpected empty region");
   158   // convert address range into offset range
   159   _bm.at_put_range(heapWordToOffset(mr.start()),
   160                    heapWordToOffset(mr.end()), false);
   161 }
   163 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   164                                             HeapWord* end_addr) {
   165   HeapWord* start = getNextMarkedWordAddress(addr);
   166   start = MIN2(start, end_addr);
   167   HeapWord* end   = getNextUnmarkedWordAddress(start);
   168   end = MIN2(end, end_addr);
   169   assert(start <= end, "Consistency check");
   170   MemRegion mr(start, end);
   171   if (!mr.is_empty()) {
   172     clearRange(mr);
   173   }
   174   return mr;
   175 }
   177 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   178   _base(NULL), _cm(cm)
   179 #ifdef ASSERT
   180   , _drain_in_progress(false)
   181   , _drain_in_progress_yields(false)
   182 #endif
   183 {}
   185 bool CMMarkStack::allocate(size_t capacity) {
   186   // allocate a stack of the requisite depth
   187   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
   188   if (!rs.is_reserved()) {
   189     warning("ConcurrentMark MarkStack allocation failure");
   190     return false;
   191   }
   192   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
   193   if (!_virtual_space.initialize(rs, rs.size())) {
   194     warning("ConcurrentMark MarkStack backing store failure");
   195     // Release the virtual memory reserved for the marking stack
   196     rs.release();
   197     return false;
   198   }
   199   assert(_virtual_space.committed_size() == rs.size(),
   200          "Didn't reserve backing store for all of ConcurrentMark stack?");
   201   _base = (oop*) _virtual_space.low();
   202   setEmpty();
   203   _capacity = (jint) capacity;
   204   _saved_index = -1;
   205   _should_expand = false;
   206   NOT_PRODUCT(_max_depth = 0);
   207   return true;
   208 }
   210 void CMMarkStack::expand() {
   211   // Called, during remark, if we've overflown the marking stack during marking.
   212   assert(isEmpty(), "stack should been emptied while handling overflow");
   213   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
   214   // Clear expansion flag
   215   _should_expand = false;
   216   if (_capacity == (jint) MarkStackSizeMax) {
   217     if (PrintGCDetails && Verbose) {
   218       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
   219     }
   220     return;
   221   }
   222   // Double capacity if possible
   223   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
   224   // Do not give up existing stack until we have managed to
   225   // get the double capacity that we desired.
   226   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
   227                                                            sizeof(oop)));
   228   if (rs.is_reserved()) {
   229     // Release the backing store associated with old stack
   230     _virtual_space.release();
   231     // Reinitialize virtual space for new stack
   232     if (!_virtual_space.initialize(rs, rs.size())) {
   233       fatal("Not enough swap for expanded marking stack capacity");
   234     }
   235     _base = (oop*)(_virtual_space.low());
   236     _index = 0;
   237     _capacity = new_capacity;
   238   } else {
   239     if (PrintGCDetails && Verbose) {
   240       // Failed to double capacity, continue;
   241       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
   242                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
   243                           _capacity / K, new_capacity / K);
   244     }
   245   }
   246 }
   248 void CMMarkStack::set_should_expand() {
   249   // If we're resetting the marking state because of an
   250   // marking stack overflow, record that we should, if
   251   // possible, expand the stack.
   252   _should_expand = _cm->has_overflown();
   253 }
   255 CMMarkStack::~CMMarkStack() {
   256   if (_base != NULL) {
   257     _base = NULL;
   258     _virtual_space.release();
   259   }
   260 }
   262 void CMMarkStack::par_push(oop ptr) {
   263   while (true) {
   264     if (isFull()) {
   265       _overflow = true;
   266       return;
   267     }
   268     // Otherwise...
   269     jint index = _index;
   270     jint next_index = index+1;
   271     jint res = Atomic::cmpxchg(next_index, &_index, index);
   272     if (res == index) {
   273       _base[index] = ptr;
   274       // Note that we don't maintain this atomically.  We could, but it
   275       // doesn't seem necessary.
   276       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   277       return;
   278     }
   279     // Otherwise, we need to try again.
   280   }
   281 }
   283 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   284   while (true) {
   285     if (isFull()) {
   286       _overflow = true;
   287       return;
   288     }
   289     // Otherwise...
   290     jint index = _index;
   291     jint next_index = index + n;
   292     if (next_index > _capacity) {
   293       _overflow = true;
   294       return;
   295     }
   296     jint res = Atomic::cmpxchg(next_index, &_index, index);
   297     if (res == index) {
   298       for (int i = 0; i < n; i++) {
   299         int  ind = index + i;
   300         assert(ind < _capacity, "By overflow test above.");
   301         _base[ind] = ptr_arr[i];
   302       }
   303       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   304       return;
   305     }
   306     // Otherwise, we need to try again.
   307   }
   308 }
   310 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   311   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   312   jint start = _index;
   313   jint next_index = start + n;
   314   if (next_index > _capacity) {
   315     _overflow = true;
   316     return;
   317   }
   318   // Otherwise.
   319   _index = next_index;
   320   for (int i = 0; i < n; i++) {
   321     int ind = start + i;
   322     assert(ind < _capacity, "By overflow test above.");
   323     _base[ind] = ptr_arr[i];
   324   }
   325   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   326 }
   328 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   329   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   330   jint index = _index;
   331   if (index == 0) {
   332     *n = 0;
   333     return false;
   334   } else {
   335     int k = MIN2(max, index);
   336     jint  new_ind = index - k;
   337     for (int j = 0; j < k; j++) {
   338       ptr_arr[j] = _base[new_ind + j];
   339     }
   340     _index = new_ind;
   341     *n = k;
   342     return true;
   343   }
   344 }
   346 template<class OopClosureClass>
   347 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   348   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   349          || SafepointSynchronize::is_at_safepoint(),
   350          "Drain recursion must be yield-safe.");
   351   bool res = true;
   352   debug_only(_drain_in_progress = true);
   353   debug_only(_drain_in_progress_yields = yield_after);
   354   while (!isEmpty()) {
   355     oop newOop = pop();
   356     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   357     assert(newOop->is_oop(), "Expected an oop");
   358     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   359            "only grey objects on this stack");
   360     newOop->oop_iterate(cl);
   361     if (yield_after && _cm->do_yield_check()) {
   362       res = false;
   363       break;
   364     }
   365   }
   366   debug_only(_drain_in_progress = false);
   367   return res;
   368 }
   370 void CMMarkStack::note_start_of_gc() {
   371   assert(_saved_index == -1,
   372          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
   373   _saved_index = _index;
   374 }
   376 void CMMarkStack::note_end_of_gc() {
   377   // This is intentionally a guarantee, instead of an assert. If we
   378   // accidentally add something to the mark stack during GC, it
   379   // will be a correctness issue so it's better if we crash. we'll
   380   // only check this once per GC anyway, so it won't be a performance
   381   // issue in any way.
   382   guarantee(_saved_index == _index,
   383             err_msg("saved index: %d index: %d", _saved_index, _index));
   384   _saved_index = -1;
   385 }
   387 void CMMarkStack::oops_do(OopClosure* f) {
   388   assert(_saved_index == _index,
   389          err_msg("saved index: %d index: %d", _saved_index, _index));
   390   for (int i = 0; i < _index; i += 1) {
   391     f->do_oop(&_base[i]);
   392   }
   393 }
   395 bool ConcurrentMark::not_yet_marked(oop obj) const {
   396   return _g1h->is_obj_ill(obj);
   397 }
   399 CMRootRegions::CMRootRegions() :
   400   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
   401   _should_abort(false),  _next_survivor(NULL) { }
   403 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
   404   _young_list = g1h->young_list();
   405   _cm = cm;
   406 }
   408 void CMRootRegions::prepare_for_scan() {
   409   assert(!scan_in_progress(), "pre-condition");
   411   // Currently, only survivors can be root regions.
   412   assert(_next_survivor == NULL, "pre-condition");
   413   _next_survivor = _young_list->first_survivor_region();
   414   _scan_in_progress = (_next_survivor != NULL);
   415   _should_abort = false;
   416 }
   418 HeapRegion* CMRootRegions::claim_next() {
   419   if (_should_abort) {
   420     // If someone has set the should_abort flag, we return NULL to
   421     // force the caller to bail out of their loop.
   422     return NULL;
   423   }
   425   // Currently, only survivors can be root regions.
   426   HeapRegion* res = _next_survivor;
   427   if (res != NULL) {
   428     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   429     // Read it again in case it changed while we were waiting for the lock.
   430     res = _next_survivor;
   431     if (res != NULL) {
   432       if (res == _young_list->last_survivor_region()) {
   433         // We just claimed the last survivor so store NULL to indicate
   434         // that we're done.
   435         _next_survivor = NULL;
   436       } else {
   437         _next_survivor = res->get_next_young_region();
   438       }
   439     } else {
   440       // Someone else claimed the last survivor while we were trying
   441       // to take the lock so nothing else to do.
   442     }
   443   }
   444   assert(res == NULL || res->is_survivor(), "post-condition");
   446   return res;
   447 }
   449 void CMRootRegions::scan_finished() {
   450   assert(scan_in_progress(), "pre-condition");
   452   // Currently, only survivors can be root regions.
   453   if (!_should_abort) {
   454     assert(_next_survivor == NULL, "we should have claimed all survivors");
   455   }
   456   _next_survivor = NULL;
   458   {
   459     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   460     _scan_in_progress = false;
   461     RootRegionScan_lock->notify_all();
   462   }
   463 }
   465 bool CMRootRegions::wait_until_scan_finished() {
   466   if (!scan_in_progress()) return false;
   468   {
   469     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   470     while (scan_in_progress()) {
   471       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
   472     }
   473   }
   474   return true;
   475 }
   477 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   478 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   479 #endif // _MSC_VER
   481 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
   482   return MAX2((n_par_threads + 2) / 4, 1U);
   483 }
   485 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
   486   _g1h(g1h),
   487   _markBitMap1(log2_intptr(MinObjAlignment)),
   488   _markBitMap2(log2_intptr(MinObjAlignment)),
   489   _parallel_marking_threads(0),
   490   _max_parallel_marking_threads(0),
   491   _sleep_factor(0.0),
   492   _marking_task_overhead(1.0),
   493   _cleanup_sleep_factor(0.0),
   494   _cleanup_task_overhead(1.0),
   495   _cleanup_list("Cleanup List"),
   496   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
   497   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
   498             CardTableModRefBS::card_shift,
   499             false /* in_resource_area*/),
   501   _prevMarkBitMap(&_markBitMap1),
   502   _nextMarkBitMap(&_markBitMap2),
   504   _markStack(this),
   505   // _finger set in set_non_marking_state
   507   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
   508   // _active_tasks set in set_non_marking_state
   509   // _tasks set inside the constructor
   510   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
   511   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
   513   _has_overflown(false),
   514   _concurrent(false),
   515   _has_aborted(false),
   516   _aborted_gc_id(GCId::undefined()),
   517   _restart_for_overflow(false),
   518   _concurrent_marking_in_progress(false),
   520   // _verbose_level set below
   522   _init_times(),
   523   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   524   _cleanup_times(),
   525   _total_counting_time(0.0),
   526   _total_rs_scrub_time(0.0),
   528   _parallel_workers(NULL),
   530   _count_card_bitmaps(NULL),
   531   _count_marked_bytes(NULL),
   532   _completed_initialization(false) {
   533   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   534   if (verbose_level < no_verbose) {
   535     verbose_level = no_verbose;
   536   }
   537   if (verbose_level > high_verbose) {
   538     verbose_level = high_verbose;
   539   }
   540   _verbose_level = verbose_level;
   542   if (verbose_low()) {
   543     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   544                            "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
   545   }
   547   if (!_markBitMap1.allocate(heap_rs)) {
   548     warning("Failed to allocate first CM bit map");
   549     return;
   550   }
   551   if (!_markBitMap2.allocate(heap_rs)) {
   552     warning("Failed to allocate second CM bit map");
   553     return;
   554   }
   556   // Create & start a ConcurrentMark thread.
   557   _cmThread = new ConcurrentMarkThread(this);
   558   assert(cmThread() != NULL, "CM Thread should have been created");
   559   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   560   if (_cmThread->osthread() == NULL) {
   561       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
   562   }
   564   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   565   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
   566   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
   568   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   569   satb_qs.set_buffer_size(G1SATBBufferSize);
   571   _root_regions.init(_g1h, this);
   573   if (ConcGCThreads > ParallelGCThreads) {
   574     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
   575             "than ParallelGCThreads (" UINTX_FORMAT ").",
   576             ConcGCThreads, ParallelGCThreads);
   577     return;
   578   }
   579   if (ParallelGCThreads == 0) {
   580     // if we are not running with any parallel GC threads we will not
   581     // spawn any marking threads either
   582     _parallel_marking_threads =       0;
   583     _max_parallel_marking_threads =   0;
   584     _sleep_factor             =     0.0;
   585     _marking_task_overhead    =     1.0;
   586   } else {
   587     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
   588       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
   589       // if both are set
   590       _sleep_factor             = 0.0;
   591       _marking_task_overhead    = 1.0;
   592     } else if (G1MarkingOverheadPercent > 0) {
   593       // We will calculate the number of parallel marking threads based
   594       // on a target overhead with respect to the soft real-time goal
   595       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   596       double overall_cm_overhead =
   597         (double) MaxGCPauseMillis * marking_overhead /
   598         (double) GCPauseIntervalMillis;
   599       double cpu_ratio = 1.0 / (double) os::processor_count();
   600       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   601       double marking_task_overhead =
   602         overall_cm_overhead / marking_thread_num *
   603                                                 (double) os::processor_count();
   604       double sleep_factor =
   605                          (1.0 - marking_task_overhead) / marking_task_overhead;
   607       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
   608       _sleep_factor             = sleep_factor;
   609       _marking_task_overhead    = marking_task_overhead;
   610     } else {
   611       // Calculate the number of parallel marking threads by scaling
   612       // the number of parallel GC threads.
   613       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
   614       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
   615       _sleep_factor             = 0.0;
   616       _marking_task_overhead    = 1.0;
   617     }
   619     assert(ConcGCThreads > 0, "Should have been set");
   620     _parallel_marking_threads = (uint) ConcGCThreads;
   621     _max_parallel_marking_threads = _parallel_marking_threads;
   623     if (parallel_marking_threads() > 1) {
   624       _cleanup_task_overhead = 1.0;
   625     } else {
   626       _cleanup_task_overhead = marking_task_overhead();
   627     }
   628     _cleanup_sleep_factor =
   629                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   631 #if 0
   632     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   633     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   634     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   635     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   636     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   637 #endif
   639     guarantee(parallel_marking_threads() > 0, "peace of mind");
   640     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   641          _max_parallel_marking_threads, false, true);
   642     if (_parallel_workers == NULL) {
   643       vm_exit_during_initialization("Failed necessary allocation.");
   644     } else {
   645       _parallel_workers->initialize_workers();
   646     }
   647   }
   649   if (FLAG_IS_DEFAULT(MarkStackSize)) {
   650     uintx mark_stack_size =
   651       MIN2(MarkStackSizeMax,
   652           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
   653     // Verify that the calculated value for MarkStackSize is in range.
   654     // It would be nice to use the private utility routine from Arguments.
   655     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
   656       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
   657               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   658               mark_stack_size, (uintx) 1, MarkStackSizeMax);
   659       return;
   660     }
   661     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
   662   } else {
   663     // Verify MarkStackSize is in range.
   664     if (FLAG_IS_CMDLINE(MarkStackSize)) {
   665       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
   666         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   667           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
   668                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   669                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
   670           return;
   671         }
   672       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
   673         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   674           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
   675                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
   676                   MarkStackSize, MarkStackSizeMax);
   677           return;
   678         }
   679       }
   680     }
   681   }
   683   if (!_markStack.allocate(MarkStackSize)) {
   684     warning("Failed to allocate CM marking stack");
   685     return;
   686   }
   688   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
   689   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
   691   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
   692   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
   694   BitMap::idx_t card_bm_size = _card_bm.size();
   696   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   697   _active_tasks = _max_worker_id;
   699   size_t max_regions = (size_t) _g1h->max_regions();
   700   for (uint i = 0; i < _max_worker_id; ++i) {
   701     CMTaskQueue* task_queue = new CMTaskQueue();
   702     task_queue->initialize();
   703     _task_queues->register_queue(i, task_queue);
   705     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
   706     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
   708     _tasks[i] = new CMTask(i, this,
   709                            _count_marked_bytes[i],
   710                            &_count_card_bitmaps[i],
   711                            task_queue, _task_queues);
   713     _accum_task_vtime[i] = 0.0;
   714   }
   716   // Calculate the card number for the bottom of the heap. Used
   717   // in biasing indexes into the accounting card bitmaps.
   718   _heap_bottom_card_num =
   719     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
   720                                 CardTableModRefBS::card_shift);
   722   // Clear all the liveness counting data
   723   clear_all_count_data();
   725   // so that the call below can read a sensible value
   726   _heap_start = (HeapWord*) heap_rs.base();
   727   set_non_marking_state();
   728   _completed_initialization = true;
   729 }
   731 void ConcurrentMark::update_g1_committed(bool force) {
   732   // If concurrent marking is not in progress, then we do not need to
   733   // update _heap_end.
   734   if (!concurrent_marking_in_progress() && !force) return;
   736   MemRegion committed = _g1h->g1_committed();
   737   assert(committed.start() == _heap_start, "start shouldn't change");
   738   HeapWord* new_end = committed.end();
   739   if (new_end > _heap_end) {
   740     // The heap has been expanded.
   742     _heap_end = new_end;
   743   }
   744   // Notice that the heap can also shrink. However, this only happens
   745   // during a Full GC (at least currently) and the entire marking
   746   // phase will bail out and the task will not be restarted. So, let's
   747   // do nothing.
   748 }
   750 void ConcurrentMark::reset() {
   751   // Starting values for these two. This should be called in a STW
   752   // phase. CM will be notified of any future g1_committed expansions
   753   // will be at the end of evacuation pauses, when tasks are
   754   // inactive.
   755   MemRegion committed = _g1h->g1_committed();
   756   _heap_start = committed.start();
   757   _heap_end   = committed.end();
   759   // Separated the asserts so that we know which one fires.
   760   assert(_heap_start != NULL, "heap bounds should look ok");
   761   assert(_heap_end != NULL, "heap bounds should look ok");
   762   assert(_heap_start < _heap_end, "heap bounds should look ok");
   764   // Reset all the marking data structures and any necessary flags
   765   reset_marking_state();
   767   if (verbose_low()) {
   768     gclog_or_tty->print_cr("[global] resetting");
   769   }
   771   // We do reset all of them, since different phases will use
   772   // different number of active threads. So, it's easiest to have all
   773   // of them ready.
   774   for (uint i = 0; i < _max_worker_id; ++i) {
   775     _tasks[i]->reset(_nextMarkBitMap);
   776   }
   778   // we need this to make sure that the flag is on during the evac
   779   // pause with initial mark piggy-backed
   780   set_concurrent_marking_in_progress();
   781 }
   784 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
   785   _markStack.set_should_expand();
   786   _markStack.setEmpty();        // Also clears the _markStack overflow flag
   787   if (clear_overflow) {
   788     clear_has_overflown();
   789   } else {
   790     assert(has_overflown(), "pre-condition");
   791   }
   792   _finger = _heap_start;
   794   for (uint i = 0; i < _max_worker_id; ++i) {
   795     CMTaskQueue* queue = _task_queues->queue(i);
   796     queue->set_empty();
   797   }
   798 }
   800 void ConcurrentMark::set_concurrency(uint active_tasks) {
   801   assert(active_tasks <= _max_worker_id, "we should not have more");
   803   _active_tasks = active_tasks;
   804   // Need to update the three data structures below according to the
   805   // number of active threads for this phase.
   806   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   807   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   808   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   809 }
   811 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
   812   set_concurrency(active_tasks);
   814   _concurrent = concurrent;
   815   // We propagate this to all tasks, not just the active ones.
   816   for (uint i = 0; i < _max_worker_id; ++i)
   817     _tasks[i]->set_concurrent(concurrent);
   819   if (concurrent) {
   820     set_concurrent_marking_in_progress();
   821   } else {
   822     // We currently assume that the concurrent flag has been set to
   823     // false before we start remark. At this point we should also be
   824     // in a STW phase.
   825     assert(!concurrent_marking_in_progress(), "invariant");
   826     assert(out_of_regions(),
   827            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
   828                    p2i(_finger), p2i(_heap_end)));
   829     update_g1_committed(true);
   830   }
   831 }
   833 void ConcurrentMark::set_non_marking_state() {
   834   // We set the global marking state to some default values when we're
   835   // not doing marking.
   836   reset_marking_state();
   837   _active_tasks = 0;
   838   clear_concurrent_marking_in_progress();
   839 }
   841 ConcurrentMark::~ConcurrentMark() {
   842   // The ConcurrentMark instance is never freed.
   843   ShouldNotReachHere();
   844 }
   846 void ConcurrentMark::clearNextBitmap() {
   847   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   848   G1CollectorPolicy* g1p = g1h->g1_policy();
   850   // Make sure that the concurrent mark thread looks to still be in
   851   // the current cycle.
   852   guarantee(cmThread()->during_cycle(), "invariant");
   854   // We are finishing up the current cycle by clearing the next
   855   // marking bitmap and getting it ready for the next cycle. During
   856   // this time no other cycle can start. So, let's make sure that this
   857   // is the case.
   858   guarantee(!g1h->mark_in_progress(), "invariant");
   860   // clear the mark bitmap (no grey objects to start with).
   861   // We need to do this in chunks and offer to yield in between
   862   // each chunk.
   863   HeapWord* start  = _nextMarkBitMap->startWord();
   864   HeapWord* end    = _nextMarkBitMap->endWord();
   865   HeapWord* cur    = start;
   866   size_t chunkSize = M;
   867   while (cur < end) {
   868     HeapWord* next = cur + chunkSize;
   869     if (next > end) {
   870       next = end;
   871     }
   872     MemRegion mr(cur,next);
   873     _nextMarkBitMap->clearRange(mr);
   874     cur = next;
   875     do_yield_check();
   877     // Repeat the asserts from above. We'll do them as asserts here to
   878     // minimize their overhead on the product. However, we'll have
   879     // them as guarantees at the beginning / end of the bitmap
   880     // clearing to get some checking in the product.
   881     assert(cmThread()->during_cycle(), "invariant");
   882     assert(!g1h->mark_in_progress(), "invariant");
   883   }
   885   // Clear the liveness counting data
   886   clear_all_count_data();
   888   // Repeat the asserts from above.
   889   guarantee(cmThread()->during_cycle(), "invariant");
   890   guarantee(!g1h->mark_in_progress(), "invariant");
   891 }
   893 bool ConcurrentMark::nextMarkBitmapIsClear() {
   894   return _nextMarkBitMap->getNextMarkedWordAddress(_heap_start, _heap_end) == _heap_end;
   895 }
   897 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   898 public:
   899   bool doHeapRegion(HeapRegion* r) {
   900     if (!r->continuesHumongous()) {
   901       r->note_start_of_marking();
   902     }
   903     return false;
   904   }
   905 };
   907 void ConcurrentMark::checkpointRootsInitialPre() {
   908   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   909   G1CollectorPolicy* g1p = g1h->g1_policy();
   911   _has_aborted = false;
   913 #ifndef PRODUCT
   914   if (G1PrintReachableAtInitialMark) {
   915     print_reachable("at-cycle-start",
   916                     VerifyOption_G1UsePrevMarking, true /* all */);
   917   }
   918 #endif
   920   // Initialise marking structures. This has to be done in a STW phase.
   921   reset();
   923   // For each region note start of marking.
   924   NoteStartOfMarkHRClosure startcl;
   925   g1h->heap_region_iterate(&startcl);
   926 }
   929 void ConcurrentMark::checkpointRootsInitialPost() {
   930   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   932   // If we force an overflow during remark, the remark operation will
   933   // actually abort and we'll restart concurrent marking. If we always
   934   // force an oveflow during remark we'll never actually complete the
   935   // marking phase. So, we initilize this here, at the start of the
   936   // cycle, so that at the remaining overflow number will decrease at
   937   // every remark and we'll eventually not need to cause one.
   938   force_overflow_stw()->init();
   940   // Start Concurrent Marking weak-reference discovery.
   941   ReferenceProcessor* rp = g1h->ref_processor_cm();
   942   // enable ("weak") refs discovery
   943   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
   944   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   946   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   947   // This is the start of  the marking cycle, we're expected all
   948   // threads to have SATB queues with active set to false.
   949   satb_mq_set.set_active_all_threads(true, /* new active value */
   950                                      false /* expected_active */);
   952   _root_regions.prepare_for_scan();
   954   // update_g1_committed() will be called at the end of an evac pause
   955   // when marking is on. So, it's also called at the end of the
   956   // initial-mark pause to update the heap end, if the heap expands
   957   // during it. No need to call it here.
   958 }
   960 /*
   961  * Notice that in the next two methods, we actually leave the STS
   962  * during the barrier sync and join it immediately afterwards. If we
   963  * do not do this, the following deadlock can occur: one thread could
   964  * be in the barrier sync code, waiting for the other thread to also
   965  * sync up, whereas another one could be trying to yield, while also
   966  * waiting for the other threads to sync up too.
   967  *
   968  * Note, however, that this code is also used during remark and in
   969  * this case we should not attempt to leave / enter the STS, otherwise
   970  * we'll either hit an asseert (debug / fastdebug) or deadlock
   971  * (product). So we should only leave / enter the STS if we are
   972  * operating concurrently.
   973  *
   974  * Because the thread that does the sync barrier has left the STS, it
   975  * is possible to be suspended for a Full GC or an evacuation pause
   976  * could occur. This is actually safe, since the entering the sync
   977  * barrier is one of the last things do_marking_step() does, and it
   978  * doesn't manipulate any data structures afterwards.
   979  */
   981 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
   982   if (verbose_low()) {
   983     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
   984   }
   986   if (concurrent()) {
   987     SuspendibleThreadSet::leave();
   988   }
   990   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
   992   if (concurrent()) {
   993     SuspendibleThreadSet::join();
   994   }
   995   // at this point everyone should have synced up and not be doing any
   996   // more work
   998   if (verbose_low()) {
   999     if (barrier_aborted) {
  1000       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
  1001     } else {
  1002       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
  1006   if (barrier_aborted) {
  1007     // If the barrier aborted we ignore the overflow condition and
  1008     // just abort the whole marking phase as quickly as possible.
  1009     return;
  1012   // If we're executing the concurrent phase of marking, reset the marking
  1013   // state; otherwise the marking state is reset after reference processing,
  1014   // during the remark pause.
  1015   // If we reset here as a result of an overflow during the remark we will
  1016   // see assertion failures from any subsequent set_concurrency_and_phase()
  1017   // calls.
  1018   if (concurrent()) {
  1019     // let the task associated with with worker 0 do this
  1020     if (worker_id == 0) {
  1021       // task 0 is responsible for clearing the global data structures
  1022       // We should be here because of an overflow. During STW we should
  1023       // not clear the overflow flag since we rely on it being true when
  1024       // we exit this method to abort the pause and restart concurent
  1025       // marking.
  1026       reset_marking_state(true /* clear_overflow */);
  1027       force_overflow()->update();
  1029       if (G1Log::fine()) {
  1030         gclog_or_tty->gclog_stamp(concurrent_gc_id());
  1031         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
  1036   // after this, each task should reset its own data structures then
  1037   // then go into the second barrier
  1040 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
  1041   if (verbose_low()) {
  1042     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
  1045   if (concurrent()) {
  1046     SuspendibleThreadSet::leave();
  1049   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
  1051   if (concurrent()) {
  1052     SuspendibleThreadSet::join();
  1054   // at this point everything should be re-initialized and ready to go
  1056   if (verbose_low()) {
  1057     if (barrier_aborted) {
  1058       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
  1059     } else {
  1060       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
  1065 #ifndef PRODUCT
  1066 void ForceOverflowSettings::init() {
  1067   _num_remaining = G1ConcMarkForceOverflow;
  1068   _force = false;
  1069   update();
  1072 void ForceOverflowSettings::update() {
  1073   if (_num_remaining > 0) {
  1074     _num_remaining -= 1;
  1075     _force = true;
  1076   } else {
  1077     _force = false;
  1081 bool ForceOverflowSettings::should_force() {
  1082   if (_force) {
  1083     _force = false;
  1084     return true;
  1085   } else {
  1086     return false;
  1089 #endif // !PRODUCT
  1091 class CMConcurrentMarkingTask: public AbstractGangTask {
  1092 private:
  1093   ConcurrentMark*       _cm;
  1094   ConcurrentMarkThread* _cmt;
  1096 public:
  1097   void work(uint worker_id) {
  1098     assert(Thread::current()->is_ConcurrentGC_thread(),
  1099            "this should only be done by a conc GC thread");
  1100     ResourceMark rm;
  1102     double start_vtime = os::elapsedVTime();
  1104     SuspendibleThreadSet::join();
  1106     assert(worker_id < _cm->active_tasks(), "invariant");
  1107     CMTask* the_task = _cm->task(worker_id);
  1108     the_task->record_start_time();
  1109     if (!_cm->has_aborted()) {
  1110       do {
  1111         double start_vtime_sec = os::elapsedVTime();
  1112         double start_time_sec = os::elapsedTime();
  1113         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  1115         the_task->do_marking_step(mark_step_duration_ms,
  1116                                   true  /* do_termination */,
  1117                                   false /* is_serial*/);
  1119         double end_time_sec = os::elapsedTime();
  1120         double end_vtime_sec = os::elapsedVTime();
  1121         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1122         double elapsed_time_sec = end_time_sec - start_time_sec;
  1123         _cm->clear_has_overflown();
  1125         bool ret = _cm->do_yield_check(worker_id);
  1127         jlong sleep_time_ms;
  1128         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1129           sleep_time_ms =
  1130             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1131           SuspendibleThreadSet::leave();
  1132           os::sleep(Thread::current(), sleep_time_ms, false);
  1133           SuspendibleThreadSet::join();
  1135         double end_time2_sec = os::elapsedTime();
  1136         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1138 #if 0
  1139           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1140                                  "overhead %1.4lf",
  1141                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1142                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1143           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1144                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1145 #endif
  1146       } while (!_cm->has_aborted() && the_task->has_aborted());
  1148     the_task->record_end_time();
  1149     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1151     SuspendibleThreadSet::leave();
  1153     double end_vtime = os::elapsedVTime();
  1154     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
  1157   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1158                           ConcurrentMarkThread* cmt) :
  1159       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1161   ~CMConcurrentMarkingTask() { }
  1162 };
  1164 // Calculates the number of active workers for a concurrent
  1165 // phase.
  1166 uint ConcurrentMark::calc_parallel_marking_threads() {
  1167   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1168     uint n_conc_workers = 0;
  1169     if (!UseDynamicNumberOfGCThreads ||
  1170         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
  1171          !ForceDynamicNumberOfGCThreads)) {
  1172       n_conc_workers = max_parallel_marking_threads();
  1173     } else {
  1174       n_conc_workers =
  1175         AdaptiveSizePolicy::calc_default_active_workers(
  1176                                      max_parallel_marking_threads(),
  1177                                      1, /* Minimum workers */
  1178                                      parallel_marking_threads(),
  1179                                      Threads::number_of_non_daemon_threads());
  1180       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
  1181       // that scaling has already gone into "_max_parallel_marking_threads".
  1183     assert(n_conc_workers > 0, "Always need at least 1");
  1184     return n_conc_workers;
  1186   // If we are not running with any parallel GC threads we will not
  1187   // have spawned any marking threads either. Hence the number of
  1188   // concurrent workers should be 0.
  1189   return 0;
  1192 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
  1193   // Currently, only survivors can be root regions.
  1194   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
  1195   G1RootRegionScanClosure cl(_g1h, this, worker_id);
  1197   const uintx interval = PrefetchScanIntervalInBytes;
  1198   HeapWord* curr = hr->bottom();
  1199   const HeapWord* end = hr->top();
  1200   while (curr < end) {
  1201     Prefetch::read(curr, interval);
  1202     oop obj = oop(curr);
  1203     int size = obj->oop_iterate(&cl);
  1204     assert(size == obj->size(), "sanity");
  1205     curr += size;
  1209 class CMRootRegionScanTask : public AbstractGangTask {
  1210 private:
  1211   ConcurrentMark* _cm;
  1213 public:
  1214   CMRootRegionScanTask(ConcurrentMark* cm) :
  1215     AbstractGangTask("Root Region Scan"), _cm(cm) { }
  1217   void work(uint worker_id) {
  1218     assert(Thread::current()->is_ConcurrentGC_thread(),
  1219            "this should only be done by a conc GC thread");
  1221     CMRootRegions* root_regions = _cm->root_regions();
  1222     HeapRegion* hr = root_regions->claim_next();
  1223     while (hr != NULL) {
  1224       _cm->scanRootRegion(hr, worker_id);
  1225       hr = root_regions->claim_next();
  1228 };
  1230 void ConcurrentMark::scanRootRegions() {
  1231   // Start of concurrent marking.
  1232   ClassLoaderDataGraph::clear_claimed_marks();
  1234   // scan_in_progress() will have been set to true only if there was
  1235   // at least one root region to scan. So, if it's false, we
  1236   // should not attempt to do any further work.
  1237   if (root_regions()->scan_in_progress()) {
  1238     _parallel_marking_threads = calc_parallel_marking_threads();
  1239     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1240            "Maximum number of marking threads exceeded");
  1241     uint active_workers = MAX2(1U, parallel_marking_threads());
  1243     CMRootRegionScanTask task(this);
  1244     if (use_parallel_marking_threads()) {
  1245       _parallel_workers->set_active_workers((int) active_workers);
  1246       _parallel_workers->run_task(&task);
  1247     } else {
  1248       task.work(0);
  1251     // It's possible that has_aborted() is true here without actually
  1252     // aborting the survivor scan earlier. This is OK as it's
  1253     // mainly used for sanity checking.
  1254     root_regions()->scan_finished();
  1258 void ConcurrentMark::markFromRoots() {
  1259   // we might be tempted to assert that:
  1260   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1261   //        "inconsistent argument?");
  1262   // However that wouldn't be right, because it's possible that
  1263   // a safepoint is indeed in progress as a younger generation
  1264   // stop-the-world GC happens even as we mark in this generation.
  1266   _restart_for_overflow = false;
  1267   force_overflow_conc()->init();
  1269   // _g1h has _n_par_threads
  1270   _parallel_marking_threads = calc_parallel_marking_threads();
  1271   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1272     "Maximum number of marking threads exceeded");
  1274   uint active_workers = MAX2(1U, parallel_marking_threads());
  1276   // Parallel task terminator is set in "set_concurrency_and_phase()"
  1277   set_concurrency_and_phase(active_workers, true /* concurrent */);
  1279   CMConcurrentMarkingTask markingTask(this, cmThread());
  1280   if (use_parallel_marking_threads()) {
  1281     _parallel_workers->set_active_workers((int)active_workers);
  1282     // Don't set _n_par_threads because it affects MT in process_roots()
  1283     // and the decisions on that MT processing is made elsewhere.
  1284     assert(_parallel_workers->active_workers() > 0, "Should have been set");
  1285     _parallel_workers->run_task(&markingTask);
  1286   } else {
  1287     markingTask.work(0);
  1289   print_stats();
  1292 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1293   // world is stopped at this checkpoint
  1294   assert(SafepointSynchronize::is_at_safepoint(),
  1295          "world should be stopped");
  1297   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1299   // If a full collection has happened, we shouldn't do this.
  1300   if (has_aborted()) {
  1301     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1302     return;
  1305   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1307   if (VerifyDuringGC) {
  1308     HandleMark hm;  // handle scope
  1309     Universe::heap()->prepare_for_verify();
  1310     Universe::verify(VerifyOption_G1UsePrevMarking,
  1311                      " VerifyDuringGC:(before)");
  1314   G1CollectorPolicy* g1p = g1h->g1_policy();
  1315   g1p->record_concurrent_mark_remark_start();
  1317   double start = os::elapsedTime();
  1319   checkpointRootsFinalWork();
  1321   double mark_work_end = os::elapsedTime();
  1323   weakRefsWork(clear_all_soft_refs);
  1325   if (has_overflown()) {
  1326     // Oops.  We overflowed.  Restart concurrent marking.
  1327     _restart_for_overflow = true;
  1328     if (G1TraceMarkStackOverflow) {
  1329       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1332     // Verify the heap w.r.t. the previous marking bitmap.
  1333     if (VerifyDuringGC) {
  1334       HandleMark hm;  // handle scope
  1335       Universe::heap()->prepare_for_verify();
  1336       Universe::verify(VerifyOption_G1UsePrevMarking,
  1337                        " VerifyDuringGC:(overflow)");
  1340     // Clear the marking state because we will be restarting
  1341     // marking due to overflowing the global mark stack.
  1342     reset_marking_state();
  1343   } else {
  1344     // Aggregate the per-task counting data that we have accumulated
  1345     // while marking.
  1346     aggregate_count_data();
  1348     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1349     // We're done with marking.
  1350     // This is the end of  the marking cycle, we're expected all
  1351     // threads to have SATB queues with active set to true.
  1352     satb_mq_set.set_active_all_threads(false, /* new active value */
  1353                                        true /* expected_active */);
  1355     if (VerifyDuringGC) {
  1356       HandleMark hm;  // handle scope
  1357       Universe::heap()->prepare_for_verify();
  1358       Universe::verify(VerifyOption_G1UseNextMarking,
  1359                        " VerifyDuringGC:(after)");
  1361     assert(!restart_for_overflow(), "sanity");
  1362     // Completely reset the marking state since marking completed
  1363     set_non_marking_state();
  1366   // Expand the marking stack, if we have to and if we can.
  1367   if (_markStack.should_expand()) {
  1368     _markStack.expand();
  1371   // Statistics
  1372   double now = os::elapsedTime();
  1373   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1374   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1375   _remark_times.add((now - start) * 1000.0);
  1377   g1p->record_concurrent_mark_remark_end();
  1379   G1CMIsAliveClosure is_alive(g1h);
  1380   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
  1383 // Base class of the closures that finalize and verify the
  1384 // liveness counting data.
  1385 class CMCountDataClosureBase: public HeapRegionClosure {
  1386 protected:
  1387   G1CollectedHeap* _g1h;
  1388   ConcurrentMark* _cm;
  1389   CardTableModRefBS* _ct_bs;
  1391   BitMap* _region_bm;
  1392   BitMap* _card_bm;
  1394   // Takes a region that's not empty (i.e., it has at least one
  1395   // live object in it and sets its corresponding bit on the region
  1396   // bitmap to 1. If the region is "starts humongous" it will also set
  1397   // to 1 the bits on the region bitmap that correspond to its
  1398   // associated "continues humongous" regions.
  1399   void set_bit_for_region(HeapRegion* hr) {
  1400     assert(!hr->continuesHumongous(), "should have filtered those out");
  1402     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1403     if (!hr->startsHumongous()) {
  1404       // Normal (non-humongous) case: just set the bit.
  1405       _region_bm->par_at_put(index, true);
  1406     } else {
  1407       // Starts humongous case: calculate how many regions are part of
  1408       // this humongous region and then set the bit range.
  1409       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
  1410       _region_bm->par_at_put_range(index, end_index, true);
  1414 public:
  1415   CMCountDataClosureBase(G1CollectedHeap* g1h,
  1416                          BitMap* region_bm, BitMap* card_bm):
  1417     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1418     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  1419     _region_bm(region_bm), _card_bm(card_bm) { }
  1420 };
  1422 // Closure that calculates the # live objects per region. Used
  1423 // for verification purposes during the cleanup pause.
  1424 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
  1425   CMBitMapRO* _bm;
  1426   size_t _region_marked_bytes;
  1428 public:
  1429   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
  1430                          BitMap* region_bm, BitMap* card_bm) :
  1431     CMCountDataClosureBase(g1h, region_bm, card_bm),
  1432     _bm(bm), _region_marked_bytes(0) { }
  1434   bool doHeapRegion(HeapRegion* hr) {
  1436     if (hr->continuesHumongous()) {
  1437       // We will ignore these here and process them when their
  1438       // associated "starts humongous" region is processed (see
  1439       // set_bit_for_heap_region()). Note that we cannot rely on their
  1440       // associated "starts humongous" region to have their bit set to
  1441       // 1 since, due to the region chunking in the parallel region
  1442       // iteration, a "continues humongous" region might be visited
  1443       // before its associated "starts humongous".
  1444       return false;
  1447     HeapWord* ntams = hr->next_top_at_mark_start();
  1448     HeapWord* start = hr->bottom();
  1450     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
  1451            err_msg("Preconditions not met - "
  1452                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
  1453                    p2i(start), p2i(ntams), p2i(hr->end())));
  1455     // Find the first marked object at or after "start".
  1456     start = _bm->getNextMarkedWordAddress(start, ntams);
  1458     size_t marked_bytes = 0;
  1460     while (start < ntams) {
  1461       oop obj = oop(start);
  1462       int obj_sz = obj->size();
  1463       HeapWord* obj_end = start + obj_sz;
  1465       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  1466       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
  1468       // Note: if we're looking at the last region in heap - obj_end
  1469       // could be actually just beyond the end of the heap; end_idx
  1470       // will then correspond to a (non-existent) card that is also
  1471       // just beyond the heap.
  1472       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
  1473         // end of object is not card aligned - increment to cover
  1474         // all the cards spanned by the object
  1475         end_idx += 1;
  1478       // Set the bits in the card BM for the cards spanned by this object.
  1479       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1481       // Add the size of this object to the number of marked bytes.
  1482       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1484       // Find the next marked object after this one.
  1485       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
  1488     // Mark the allocated-since-marking portion...
  1489     HeapWord* top = hr->top();
  1490     if (ntams < top) {
  1491       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1492       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1494       // Note: if we're looking at the last region in heap - top
  1495       // could be actually just beyond the end of the heap; end_idx
  1496       // will then correspond to a (non-existent) card that is also
  1497       // just beyond the heap.
  1498       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1499         // end of object is not card aligned - increment to cover
  1500         // all the cards spanned by the object
  1501         end_idx += 1;
  1503       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1505       // This definitely means the region has live objects.
  1506       set_bit_for_region(hr);
  1509     // Update the live region bitmap.
  1510     if (marked_bytes > 0) {
  1511       set_bit_for_region(hr);
  1514     // Set the marked bytes for the current region so that
  1515     // it can be queried by a calling verificiation routine
  1516     _region_marked_bytes = marked_bytes;
  1518     return false;
  1521   size_t region_marked_bytes() const { return _region_marked_bytes; }
  1522 };
  1524 // Heap region closure used for verifying the counting data
  1525 // that was accumulated concurrently and aggregated during
  1526 // the remark pause. This closure is applied to the heap
  1527 // regions during the STW cleanup pause.
  1529 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
  1530   G1CollectedHeap* _g1h;
  1531   ConcurrentMark* _cm;
  1532   CalcLiveObjectsClosure _calc_cl;
  1533   BitMap* _region_bm;   // Region BM to be verified
  1534   BitMap* _card_bm;     // Card BM to be verified
  1535   bool _verbose;        // verbose output?
  1537   BitMap* _exp_region_bm; // Expected Region BM values
  1538   BitMap* _exp_card_bm;   // Expected card BM values
  1540   int _failures;
  1542 public:
  1543   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
  1544                                 BitMap* region_bm,
  1545                                 BitMap* card_bm,
  1546                                 BitMap* exp_region_bm,
  1547                                 BitMap* exp_card_bm,
  1548                                 bool verbose) :
  1549     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1550     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
  1551     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
  1552     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
  1553     _failures(0) { }
  1555   int failures() const { return _failures; }
  1557   bool doHeapRegion(HeapRegion* hr) {
  1558     if (hr->continuesHumongous()) {
  1559       // We will ignore these here and process them when their
  1560       // associated "starts humongous" region is processed (see
  1561       // set_bit_for_heap_region()). Note that we cannot rely on their
  1562       // associated "starts humongous" region to have their bit set to
  1563       // 1 since, due to the region chunking in the parallel region
  1564       // iteration, a "continues humongous" region might be visited
  1565       // before its associated "starts humongous".
  1566       return false;
  1569     int failures = 0;
  1571     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
  1572     // this region and set the corresponding bits in the expected region
  1573     // and card bitmaps.
  1574     bool res = _calc_cl.doHeapRegion(hr);
  1575     assert(res == false, "should be continuing");
  1577     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
  1578                     Mutex::_no_safepoint_check_flag);
  1580     // Verify the marked bytes for this region.
  1581     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
  1582     size_t act_marked_bytes = hr->next_marked_bytes();
  1584     // We're not OK if expected marked bytes > actual marked bytes. It means
  1585     // we have missed accounting some objects during the actual marking.
  1586     if (exp_marked_bytes > act_marked_bytes) {
  1587       if (_verbose) {
  1588         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
  1589                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
  1590                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
  1592       failures += 1;
  1595     // Verify the bit, for this region, in the actual and expected
  1596     // (which was just calculated) region bit maps.
  1597     // We're not OK if the bit in the calculated expected region
  1598     // bitmap is set and the bit in the actual region bitmap is not.
  1599     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
  1601     bool expected = _exp_region_bm->at(index);
  1602     bool actual = _region_bm->at(index);
  1603     if (expected && !actual) {
  1604       if (_verbose) {
  1605         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
  1606                                "expected: %s, actual: %s",
  1607                                hr->hrs_index(),
  1608                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1610       failures += 1;
  1613     // Verify that the card bit maps for the cards spanned by the current
  1614     // region match. We have an error if we have a set bit in the expected
  1615     // bit map and the corresponding bit in the actual bitmap is not set.
  1617     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
  1618     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
  1620     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
  1621       expected = _exp_card_bm->at(i);
  1622       actual = _card_bm->at(i);
  1624       if (expected && !actual) {
  1625         if (_verbose) {
  1626           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
  1627                                  "expected: %s, actual: %s",
  1628                                  hr->hrs_index(), i,
  1629                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1631         failures += 1;
  1635     if (failures > 0 && _verbose)  {
  1636       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
  1637                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
  1638                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
  1639                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
  1642     _failures += failures;
  1644     // We could stop iteration over the heap when we
  1645     // find the first violating region by returning true.
  1646     return false;
  1648 };
  1650 class G1ParVerifyFinalCountTask: public AbstractGangTask {
  1651 protected:
  1652   G1CollectedHeap* _g1h;
  1653   ConcurrentMark* _cm;
  1654   BitMap* _actual_region_bm;
  1655   BitMap* _actual_card_bm;
  1657   uint    _n_workers;
  1659   BitMap* _expected_region_bm;
  1660   BitMap* _expected_card_bm;
  1662   int  _failures;
  1663   bool _verbose;
  1665 public:
  1666   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
  1667                             BitMap* region_bm, BitMap* card_bm,
  1668                             BitMap* expected_region_bm, BitMap* expected_card_bm)
  1669     : AbstractGangTask("G1 verify final counting"),
  1670       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1671       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1672       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
  1673       _failures(0), _verbose(false),
  1674       _n_workers(0) {
  1675     assert(VerifyDuringGC, "don't call this otherwise");
  1677     // Use the value already set as the number of active threads
  1678     // in the call to run_task().
  1679     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1680       assert( _g1h->workers()->active_workers() > 0,
  1681         "Should have been previously set");
  1682       _n_workers = _g1h->workers()->active_workers();
  1683     } else {
  1684       _n_workers = 1;
  1687     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
  1688     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
  1690     _verbose = _cm->verbose_medium();
  1693   void work(uint worker_id) {
  1694     assert(worker_id < _n_workers, "invariant");
  1696     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
  1697                                             _actual_region_bm, _actual_card_bm,
  1698                                             _expected_region_bm,
  1699                                             _expected_card_bm,
  1700                                             _verbose);
  1702     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1703       _g1h->heap_region_par_iterate_chunked(&verify_cl,
  1704                                             worker_id,
  1705                                             _n_workers,
  1706                                             HeapRegion::VerifyCountClaimValue);
  1707     } else {
  1708       _g1h->heap_region_iterate(&verify_cl);
  1711     Atomic::add(verify_cl.failures(), &_failures);
  1714   int failures() const { return _failures; }
  1715 };
  1717 // Closure that finalizes the liveness counting data.
  1718 // Used during the cleanup pause.
  1719 // Sets the bits corresponding to the interval [NTAMS, top]
  1720 // (which contains the implicitly live objects) in the
  1721 // card liveness bitmap. Also sets the bit for each region,
  1722 // containing live data, in the region liveness bitmap.
  1724 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
  1725  public:
  1726   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
  1727                               BitMap* region_bm,
  1728                               BitMap* card_bm) :
  1729     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
  1731   bool doHeapRegion(HeapRegion* hr) {
  1733     if (hr->continuesHumongous()) {
  1734       // We will ignore these here and process them when their
  1735       // associated "starts humongous" region is processed (see
  1736       // set_bit_for_heap_region()). Note that we cannot rely on their
  1737       // associated "starts humongous" region to have their bit set to
  1738       // 1 since, due to the region chunking in the parallel region
  1739       // iteration, a "continues humongous" region might be visited
  1740       // before its associated "starts humongous".
  1741       return false;
  1744     HeapWord* ntams = hr->next_top_at_mark_start();
  1745     HeapWord* top   = hr->top();
  1747     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
  1749     // Mark the allocated-since-marking portion...
  1750     if (ntams < top) {
  1751       // This definitely means the region has live objects.
  1752       set_bit_for_region(hr);
  1754       // Now set the bits in the card bitmap for [ntams, top)
  1755       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1756       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1758       // Note: if we're looking at the last region in heap - top
  1759       // could be actually just beyond the end of the heap; end_idx
  1760       // will then correspond to a (non-existent) card that is also
  1761       // just beyond the heap.
  1762       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1763         // end of object is not card aligned - increment to cover
  1764         // all the cards spanned by the object
  1765         end_idx += 1;
  1768       assert(end_idx <= _card_bm->size(),
  1769              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1770                      end_idx, _card_bm->size()));
  1771       assert(start_idx < _card_bm->size(),
  1772              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1773                      start_idx, _card_bm->size()));
  1775       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1778     // Set the bit for the region if it contains live data
  1779     if (hr->next_marked_bytes() > 0) {
  1780       set_bit_for_region(hr);
  1783     return false;
  1785 };
  1787 class G1ParFinalCountTask: public AbstractGangTask {
  1788 protected:
  1789   G1CollectedHeap* _g1h;
  1790   ConcurrentMark* _cm;
  1791   BitMap* _actual_region_bm;
  1792   BitMap* _actual_card_bm;
  1794   uint    _n_workers;
  1796 public:
  1797   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
  1798     : AbstractGangTask("G1 final counting"),
  1799       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1800       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1801       _n_workers(0) {
  1802     // Use the value already set as the number of active threads
  1803     // in the call to run_task().
  1804     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1805       assert( _g1h->workers()->active_workers() > 0,
  1806         "Should have been previously set");
  1807       _n_workers = _g1h->workers()->active_workers();
  1808     } else {
  1809       _n_workers = 1;
  1813   void work(uint worker_id) {
  1814     assert(worker_id < _n_workers, "invariant");
  1816     FinalCountDataUpdateClosure final_update_cl(_g1h,
  1817                                                 _actual_region_bm,
  1818                                                 _actual_card_bm);
  1820     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1821       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
  1822                                             worker_id,
  1823                                             _n_workers,
  1824                                             HeapRegion::FinalCountClaimValue);
  1825     } else {
  1826       _g1h->heap_region_iterate(&final_update_cl);
  1829 };
  1831 class G1ParNoteEndTask;
  1833 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1834   G1CollectedHeap* _g1;
  1835   size_t _max_live_bytes;
  1836   uint _regions_claimed;
  1837   size_t _freed_bytes;
  1838   FreeRegionList* _local_cleanup_list;
  1839   HeapRegionSetCount _old_regions_removed;
  1840   HeapRegionSetCount _humongous_regions_removed;
  1841   HRRSCleanupTask* _hrrs_cleanup_task;
  1842   double _claimed_region_time;
  1843   double _max_region_time;
  1845 public:
  1846   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1847                              FreeRegionList* local_cleanup_list,
  1848                              HRRSCleanupTask* hrrs_cleanup_task) :
  1849     _g1(g1),
  1850     _max_live_bytes(0), _regions_claimed(0),
  1851     _freed_bytes(0),
  1852     _claimed_region_time(0.0), _max_region_time(0.0),
  1853     _local_cleanup_list(local_cleanup_list),
  1854     _old_regions_removed(),
  1855     _humongous_regions_removed(),
  1856     _hrrs_cleanup_task(hrrs_cleanup_task) { }
  1858   size_t freed_bytes() { return _freed_bytes; }
  1859   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
  1860   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
  1862   bool doHeapRegion(HeapRegion *hr) {
  1863     if (hr->continuesHumongous()) {
  1864       return false;
  1866     // We use a claim value of zero here because all regions
  1867     // were claimed with value 1 in the FinalCount task.
  1868     _g1->reset_gc_time_stamps(hr);
  1869     double start = os::elapsedTime();
  1870     _regions_claimed++;
  1871     hr->note_end_of_marking();
  1872     _max_live_bytes += hr->max_live_bytes();
  1874     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
  1875       _freed_bytes += hr->used();
  1876       hr->set_containing_set(NULL);
  1877       if (hr->isHumongous()) {
  1878         assert(hr->startsHumongous(), "we should only see starts humongous");
  1879         _humongous_regions_removed.increment(1u, hr->capacity());
  1880         _g1->free_humongous_region(hr, _local_cleanup_list, true);
  1881       } else {
  1882         _old_regions_removed.increment(1u, hr->capacity());
  1883         _g1->free_region(hr, _local_cleanup_list, true);
  1885     } else {
  1886       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
  1889     double region_time = (os::elapsedTime() - start);
  1890     _claimed_region_time += region_time;
  1891     if (region_time > _max_region_time) {
  1892       _max_region_time = region_time;
  1894     return false;
  1897   size_t max_live_bytes() { return _max_live_bytes; }
  1898   uint regions_claimed() { return _regions_claimed; }
  1899   double claimed_region_time_sec() { return _claimed_region_time; }
  1900   double max_region_time_sec() { return _max_region_time; }
  1901 };
  1903 class G1ParNoteEndTask: public AbstractGangTask {
  1904   friend class G1NoteEndOfConcMarkClosure;
  1906 protected:
  1907   G1CollectedHeap* _g1h;
  1908   size_t _max_live_bytes;
  1909   size_t _freed_bytes;
  1910   FreeRegionList* _cleanup_list;
  1912 public:
  1913   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1914                    FreeRegionList* cleanup_list) :
  1915     AbstractGangTask("G1 note end"), _g1h(g1h),
  1916     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
  1918   void work(uint worker_id) {
  1919     double start = os::elapsedTime();
  1920     FreeRegionList local_cleanup_list("Local Cleanup List");
  1921     HRRSCleanupTask hrrs_cleanup_task;
  1922     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
  1923                                            &hrrs_cleanup_task);
  1924     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1925       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
  1926                                             _g1h->workers()->active_workers(),
  1927                                             HeapRegion::NoteEndClaimValue);
  1928     } else {
  1929       _g1h->heap_region_iterate(&g1_note_end);
  1931     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1933     // Now update the lists
  1934     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
  1936       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1937       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
  1938       _max_live_bytes += g1_note_end.max_live_bytes();
  1939       _freed_bytes += g1_note_end.freed_bytes();
  1941       // If we iterate over the global cleanup list at the end of
  1942       // cleanup to do this printing we will not guarantee to only
  1943       // generate output for the newly-reclaimed regions (the list
  1944       // might not be empty at the beginning of cleanup; we might
  1945       // still be working on its previous contents). So we do the
  1946       // printing here, before we append the new regions to the global
  1947       // cleanup list.
  1949       G1HRPrinter* hr_printer = _g1h->hr_printer();
  1950       if (hr_printer->is_active()) {
  1951         FreeRegionListIterator iter(&local_cleanup_list);
  1952         while (iter.more_available()) {
  1953           HeapRegion* hr = iter.get_next();
  1954           hr_printer->cleanup(hr);
  1958       _cleanup_list->add_ordered(&local_cleanup_list);
  1959       assert(local_cleanup_list.is_empty(), "post-condition");
  1961       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
  1964   size_t max_live_bytes() { return _max_live_bytes; }
  1965   size_t freed_bytes() { return _freed_bytes; }
  1966 };
  1968 class G1ParScrubRemSetTask: public AbstractGangTask {
  1969 protected:
  1970   G1RemSet* _g1rs;
  1971   BitMap* _region_bm;
  1972   BitMap* _card_bm;
  1973 public:
  1974   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1975                        BitMap* region_bm, BitMap* card_bm) :
  1976     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1977     _region_bm(region_bm), _card_bm(card_bm) { }
  1979   void work(uint worker_id) {
  1980     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1981       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
  1982                        HeapRegion::ScrubRemSetClaimValue);
  1983     } else {
  1984       _g1rs->scrub(_region_bm, _card_bm);
  1988 };
  1990 void ConcurrentMark::cleanup() {
  1991   // world is stopped at this checkpoint
  1992   assert(SafepointSynchronize::is_at_safepoint(),
  1993          "world should be stopped");
  1994   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1996   // If a full collection has happened, we shouldn't do this.
  1997   if (has_aborted()) {
  1998     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1999     return;
  2002   g1h->verify_region_sets_optional();
  2004   if (VerifyDuringGC) {
  2005     HandleMark hm;  // handle scope
  2006     Universe::heap()->prepare_for_verify();
  2007     Universe::verify(VerifyOption_G1UsePrevMarking,
  2008                      " VerifyDuringGC:(before)");
  2011   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  2012   g1p->record_concurrent_mark_cleanup_start();
  2014   double start = os::elapsedTime();
  2016   HeapRegionRemSet::reset_for_cleanup_tasks();
  2018   uint n_workers;
  2020   // Do counting once more with the world stopped for good measure.
  2021   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
  2023   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2024    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  2025            "sanity check");
  2027     g1h->set_par_threads();
  2028     n_workers = g1h->n_par_threads();
  2029     assert(g1h->n_par_threads() == n_workers,
  2030            "Should not have been reset");
  2031     g1h->workers()->run_task(&g1_par_count_task);
  2032     // Done with the parallel phase so reset to 0.
  2033     g1h->set_par_threads(0);
  2035     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
  2036            "sanity check");
  2037   } else {
  2038     n_workers = 1;
  2039     g1_par_count_task.work(0);
  2042   if (VerifyDuringGC) {
  2043     // Verify that the counting data accumulated during marking matches
  2044     // that calculated by walking the marking bitmap.
  2046     // Bitmaps to hold expected values
  2047     BitMap expected_region_bm(_region_bm.size(), true);
  2048     BitMap expected_card_bm(_card_bm.size(), true);
  2050     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
  2051                                                  &_region_bm,
  2052                                                  &_card_bm,
  2053                                                  &expected_region_bm,
  2054                                                  &expected_card_bm);
  2056     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2057       g1h->set_par_threads((int)n_workers);
  2058       g1h->workers()->run_task(&g1_par_verify_task);
  2059       // Done with the parallel phase so reset to 0.
  2060       g1h->set_par_threads(0);
  2062       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
  2063              "sanity check");
  2064     } else {
  2065       g1_par_verify_task.work(0);
  2068     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
  2071   size_t start_used_bytes = g1h->used();
  2072   g1h->set_marking_complete();
  2074   double count_end = os::elapsedTime();
  2075   double this_final_counting_time = (count_end - start);
  2076   _total_counting_time += this_final_counting_time;
  2078   if (G1PrintRegionLivenessInfo) {
  2079     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
  2080     _g1h->heap_region_iterate(&cl);
  2083   // Install newly created mark bitMap as "prev".
  2084   swapMarkBitMaps();
  2086   g1h->reset_gc_time_stamp();
  2088   // Note end of marking in all heap regions.
  2089   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
  2090   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2091     g1h->set_par_threads((int)n_workers);
  2092     g1h->workers()->run_task(&g1_par_note_end_task);
  2093     g1h->set_par_threads(0);
  2095     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  2096            "sanity check");
  2097   } else {
  2098     g1_par_note_end_task.work(0);
  2100   g1h->check_gc_time_stamps();
  2102   if (!cleanup_list_is_empty()) {
  2103     // The cleanup list is not empty, so we'll have to process it
  2104     // concurrently. Notify anyone else that might be wanting free
  2105     // regions that there will be more free regions coming soon.
  2106     g1h->set_free_regions_coming();
  2109   // call below, since it affects the metric by which we sort the heap
  2110   // regions.
  2111   if (G1ScrubRemSets) {
  2112     double rs_scrub_start = os::elapsedTime();
  2113     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  2114     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2115       g1h->set_par_threads((int)n_workers);
  2116       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  2117       g1h->set_par_threads(0);
  2119       assert(g1h->check_heap_region_claim_values(
  2120                                             HeapRegion::ScrubRemSetClaimValue),
  2121              "sanity check");
  2122     } else {
  2123       g1_par_scrub_rs_task.work(0);
  2126     double rs_scrub_end = os::elapsedTime();
  2127     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  2128     _total_rs_scrub_time += this_rs_scrub_time;
  2131   // this will also free any regions totally full of garbage objects,
  2132   // and sort the regions.
  2133   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
  2135   // Statistics.
  2136   double end = os::elapsedTime();
  2137   _cleanup_times.add((end - start) * 1000.0);
  2139   if (G1Log::fine()) {
  2140     g1h->print_size_transition(gclog_or_tty,
  2141                                start_used_bytes,
  2142                                g1h->used(),
  2143                                g1h->capacity());
  2146   // Clean up will have freed any regions completely full of garbage.
  2147   // Update the soft reference policy with the new heap occupancy.
  2148   Universe::update_heap_info_at_gc();
  2150   if (VerifyDuringGC) {
  2151     HandleMark hm;  // handle scope
  2152     Universe::heap()->prepare_for_verify();
  2153     Universe::verify(VerifyOption_G1UsePrevMarking,
  2154                      " VerifyDuringGC:(after)");
  2157   g1h->verify_region_sets_optional();
  2159   // We need to make this be a "collection" so any collection pause that
  2160   // races with it goes around and waits for completeCleanup to finish.
  2161   g1h->increment_total_collections();
  2163   // Clean out dead classes and update Metaspace sizes.
  2164   if (ClassUnloadingWithConcurrentMark) {
  2165     ClassLoaderDataGraph::purge();
  2167   MetaspaceGC::compute_new_size();
  2169   // We reclaimed old regions so we should calculate the sizes to make
  2170   // sure we update the old gen/space data.
  2171   g1h->g1mm()->update_sizes();
  2173   g1h->trace_heap_after_concurrent_cycle();
  2176 void ConcurrentMark::completeCleanup() {
  2177   if (has_aborted()) return;
  2179   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2181   _cleanup_list.verify_optional();
  2182   FreeRegionList tmp_free_list("Tmp Free List");
  2184   if (G1ConcRegionFreeingVerbose) {
  2185     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2186                            "cleanup list has %u entries",
  2187                            _cleanup_list.length());
  2190   // Noone else should be accessing the _cleanup_list at this point,
  2191   // so it's not necessary to take any locks
  2192   while (!_cleanup_list.is_empty()) {
  2193     HeapRegion* hr = _cleanup_list.remove_head();
  2194     assert(hr != NULL, "Got NULL from a non-empty list");
  2195     hr->par_clear();
  2196     tmp_free_list.add_ordered(hr);
  2198     // Instead of adding one region at a time to the secondary_free_list,
  2199     // we accumulate them in the local list and move them a few at a
  2200     // time. This also cuts down on the number of notify_all() calls
  2201     // we do during this process. We'll also append the local list when
  2202     // _cleanup_list is empty (which means we just removed the last
  2203     // region from the _cleanup_list).
  2204     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
  2205         _cleanup_list.is_empty()) {
  2206       if (G1ConcRegionFreeingVerbose) {
  2207         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2208                                "appending %u entries to the secondary_free_list, "
  2209                                "cleanup list still has %u entries",
  2210                                tmp_free_list.length(),
  2211                                _cleanup_list.length());
  2215         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  2216         g1h->secondary_free_list_add(&tmp_free_list);
  2217         SecondaryFreeList_lock->notify_all();
  2220       if (G1StressConcRegionFreeing) {
  2221         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
  2222           os::sleep(Thread::current(), (jlong) 1, false);
  2227   assert(tmp_free_list.is_empty(), "post-condition");
  2230 // Supporting Object and Oop closures for reference discovery
  2231 // and processing in during marking
  2233 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  2234   HeapWord* addr = (HeapWord*)obj;
  2235   return addr != NULL &&
  2236          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  2239 // 'Keep Alive' oop closure used by both serial parallel reference processing.
  2240 // Uses the CMTask associated with a worker thread (for serial reference
  2241 // processing the CMTask for worker 0 is used) to preserve (mark) and
  2242 // trace referent objects.
  2243 //
  2244 // Using the CMTask and embedded local queues avoids having the worker
  2245 // threads operating on the global mark stack. This reduces the risk
  2246 // of overflowing the stack - which we would rather avoid at this late
  2247 // state. Also using the tasks' local queues removes the potential
  2248 // of the workers interfering with each other that could occur if
  2249 // operating on the global stack.
  2251 class G1CMKeepAliveAndDrainClosure: public OopClosure {
  2252   ConcurrentMark* _cm;
  2253   CMTask*         _task;
  2254   int             _ref_counter_limit;
  2255   int             _ref_counter;
  2256   bool            _is_serial;
  2257  public:
  2258   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2259     _cm(cm), _task(task), _is_serial(is_serial),
  2260     _ref_counter_limit(G1RefProcDrainInterval) {
  2261     assert(_ref_counter_limit > 0, "sanity");
  2262     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2263     _ref_counter = _ref_counter_limit;
  2266   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2267   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2269   template <class T> void do_oop_work(T* p) {
  2270     if (!_cm->has_overflown()) {
  2271       oop obj = oopDesc::load_decode_heap_oop(p);
  2272       if (_cm->verbose_high()) {
  2273         gclog_or_tty->print_cr("\t[%u] we're looking at location "
  2274                                "*"PTR_FORMAT" = "PTR_FORMAT,
  2275                                _task->worker_id(), p2i(p), p2i((void*) obj));
  2278       _task->deal_with_reference(obj);
  2279       _ref_counter--;
  2281       if (_ref_counter == 0) {
  2282         // We have dealt with _ref_counter_limit references, pushing them
  2283         // and objects reachable from them on to the local stack (and
  2284         // possibly the global stack). Call CMTask::do_marking_step() to
  2285         // process these entries.
  2286         //
  2287         // We call CMTask::do_marking_step() in a loop, which we'll exit if
  2288         // there's nothing more to do (i.e. we're done with the entries that
  2289         // were pushed as a result of the CMTask::deal_with_reference() calls
  2290         // above) or we overflow.
  2291         //
  2292         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2293         // flag while there may still be some work to do. (See the comment at
  2294         // the beginning of CMTask::do_marking_step() for those conditions -
  2295         // one of which is reaching the specified time target.) It is only
  2296         // when CMTask::do_marking_step() returns without setting the
  2297         // has_aborted() flag that the marking step has completed.
  2298         do {
  2299           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  2300           _task->do_marking_step(mark_step_duration_ms,
  2301                                  false      /* do_termination */,
  2302                                  _is_serial);
  2303         } while (_task->has_aborted() && !_cm->has_overflown());
  2304         _ref_counter = _ref_counter_limit;
  2306     } else {
  2307       if (_cm->verbose_high()) {
  2308          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
  2312 };
  2314 // 'Drain' oop closure used by both serial and parallel reference processing.
  2315 // Uses the CMTask associated with a given worker thread (for serial
  2316 // reference processing the CMtask for worker 0 is used). Calls the
  2317 // do_marking_step routine, with an unbelievably large timeout value,
  2318 // to drain the marking data structures of the remaining entries
  2319 // added by the 'keep alive' oop closure above.
  2321 class G1CMDrainMarkingStackClosure: public VoidClosure {
  2322   ConcurrentMark* _cm;
  2323   CMTask*         _task;
  2324   bool            _is_serial;
  2325  public:
  2326   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2327     _cm(cm), _task(task), _is_serial(is_serial) {
  2328     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2331   void do_void() {
  2332     do {
  2333       if (_cm->verbose_high()) {
  2334         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
  2335                                _task->worker_id(), BOOL_TO_STR(_is_serial));
  2338       // We call CMTask::do_marking_step() to completely drain the local
  2339       // and global marking stacks of entries pushed by the 'keep alive'
  2340       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
  2341       //
  2342       // CMTask::do_marking_step() is called in a loop, which we'll exit
  2343       // if there's nothing more to do (i.e. we'completely drained the
  2344       // entries that were pushed as a a result of applying the 'keep alive'
  2345       // closure to the entries on the discovered ref lists) or we overflow
  2346       // the global marking stack.
  2347       //
  2348       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2349       // flag while there may still be some work to do. (See the comment at
  2350       // the beginning of CMTask::do_marking_step() for those conditions -
  2351       // one of which is reaching the specified time target.) It is only
  2352       // when CMTask::do_marking_step() returns without setting the
  2353       // has_aborted() flag that the marking step has completed.
  2355       _task->do_marking_step(1000000000.0 /* something very large */,
  2356                              true         /* do_termination */,
  2357                              _is_serial);
  2358     } while (_task->has_aborted() && !_cm->has_overflown());
  2360 };
  2362 // Implementation of AbstractRefProcTaskExecutor for parallel
  2363 // reference processing at the end of G1 concurrent marking
  2365 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
  2366 private:
  2367   G1CollectedHeap* _g1h;
  2368   ConcurrentMark*  _cm;
  2369   WorkGang*        _workers;
  2370   int              _active_workers;
  2372 public:
  2373   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
  2374                         ConcurrentMark* cm,
  2375                         WorkGang* workers,
  2376                         int n_workers) :
  2377     _g1h(g1h), _cm(cm),
  2378     _workers(workers), _active_workers(n_workers) { }
  2380   // Executes the given task using concurrent marking worker threads.
  2381   virtual void execute(ProcessTask& task);
  2382   virtual void execute(EnqueueTask& task);
  2383 };
  2385 class G1CMRefProcTaskProxy: public AbstractGangTask {
  2386   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  2387   ProcessTask&     _proc_task;
  2388   G1CollectedHeap* _g1h;
  2389   ConcurrentMark*  _cm;
  2391 public:
  2392   G1CMRefProcTaskProxy(ProcessTask& proc_task,
  2393                      G1CollectedHeap* g1h,
  2394                      ConcurrentMark* cm) :
  2395     AbstractGangTask("Process reference objects in parallel"),
  2396     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
  2397     ReferenceProcessor* rp = _g1h->ref_processor_cm();
  2398     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
  2401   virtual void work(uint worker_id) {
  2402     ResourceMark rm;
  2403     HandleMark hm;
  2404     CMTask* task = _cm->task(worker_id);
  2405     G1CMIsAliveClosure g1_is_alive(_g1h);
  2406     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
  2407     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
  2409     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
  2411 };
  2413 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
  2414   assert(_workers != NULL, "Need parallel worker threads.");
  2415   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2417   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  2419   // We need to reset the concurrency level before each
  2420   // proxy task execution, so that the termination protocol
  2421   // and overflow handling in CMTask::do_marking_step() knows
  2422   // how many workers to wait for.
  2423   _cm->set_concurrency(_active_workers);
  2424   _g1h->set_par_threads(_active_workers);
  2425   _workers->run_task(&proc_task_proxy);
  2426   _g1h->set_par_threads(0);
  2429 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
  2430   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
  2431   EnqueueTask& _enq_task;
  2433 public:
  2434   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
  2435     AbstractGangTask("Enqueue reference objects in parallel"),
  2436     _enq_task(enq_task) { }
  2438   virtual void work(uint worker_id) {
  2439     _enq_task.work(worker_id);
  2441 };
  2443 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
  2444   assert(_workers != NULL, "Need parallel worker threads.");
  2445   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2447   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
  2449   // Not strictly necessary but...
  2450   //
  2451   // We need to reset the concurrency level before each
  2452   // proxy task execution, so that the termination protocol
  2453   // and overflow handling in CMTask::do_marking_step() knows
  2454   // how many workers to wait for.
  2455   _cm->set_concurrency(_active_workers);
  2456   _g1h->set_par_threads(_active_workers);
  2457   _workers->run_task(&enq_task_proxy);
  2458   _g1h->set_par_threads(0);
  2461 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
  2462   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
  2465 // Helper class to get rid of some boilerplate code.
  2466 class G1RemarkGCTraceTime : public GCTraceTime {
  2467   static bool doit_and_prepend(bool doit) {
  2468     if (doit) {
  2469       gclog_or_tty->put(' ');
  2471     return doit;
  2474  public:
  2475   G1RemarkGCTraceTime(const char* title, bool doit)
  2476     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
  2477         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
  2479 };
  2481 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  2482   if (has_overflown()) {
  2483     // Skip processing the discovered references if we have
  2484     // overflown the global marking stack. Reference objects
  2485     // only get discovered once so it is OK to not
  2486     // de-populate the discovered reference lists. We could have,
  2487     // but the only benefit would be that, when marking restarts,
  2488     // less reference objects are discovered.
  2489     return;
  2492   ResourceMark rm;
  2493   HandleMark   hm;
  2495   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2497   // Is alive closure.
  2498   G1CMIsAliveClosure g1_is_alive(g1h);
  2500   // Inner scope to exclude the cleaning of the string and symbol
  2501   // tables from the displayed time.
  2503     if (G1Log::finer()) {
  2504       gclog_or_tty->put(' ');
  2506     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
  2508     ReferenceProcessor* rp = g1h->ref_processor_cm();
  2510     // See the comment in G1CollectedHeap::ref_processing_init()
  2511     // about how reference processing currently works in G1.
  2513     // Set the soft reference policy
  2514     rp->setup_policy(clear_all_soft_refs);
  2515     assert(_markStack.isEmpty(), "mark stack should be empty");
  2517     // Instances of the 'Keep Alive' and 'Complete GC' closures used
  2518     // in serial reference processing. Note these closures are also
  2519     // used for serially processing (by the the current thread) the
  2520     // JNI references during parallel reference processing.
  2521     //
  2522     // These closures do not need to synchronize with the worker
  2523     // threads involved in parallel reference processing as these
  2524     // instances are executed serially by the current thread (e.g.
  2525     // reference processing is not multi-threaded and is thus
  2526     // performed by the current thread instead of a gang worker).
  2527     //
  2528     // The gang tasks involved in parallel reference procssing create
  2529     // their own instances of these closures, which do their own
  2530     // synchronization among themselves.
  2531     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
  2532     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
  2534     // We need at least one active thread. If reference processing
  2535     // is not multi-threaded we use the current (VMThread) thread,
  2536     // otherwise we use the work gang from the G1CollectedHeap and
  2537     // we utilize all the worker threads we can.
  2538     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
  2539     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
  2540     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
  2542     // Parallel processing task executor.
  2543     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
  2544                                               g1h->workers(), active_workers);
  2545     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
  2547     // Set the concurrency level. The phase was already set prior to
  2548     // executing the remark task.
  2549     set_concurrency(active_workers);
  2551     // Set the degree of MT processing here.  If the discovery was done MT,
  2552     // the number of threads involved during discovery could differ from
  2553     // the number of active workers.  This is OK as long as the discovered
  2554     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
  2555     rp->set_active_mt_degree(active_workers);
  2557     // Process the weak references.
  2558     const ReferenceProcessorStats& stats =
  2559         rp->process_discovered_references(&g1_is_alive,
  2560                                           &g1_keep_alive,
  2561                                           &g1_drain_mark_stack,
  2562                                           executor,
  2563                                           g1h->gc_timer_cm(),
  2564                                           concurrent_gc_id());
  2565     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
  2567     // The do_oop work routines of the keep_alive and drain_marking_stack
  2568     // oop closures will set the has_overflown flag if we overflow the
  2569     // global marking stack.
  2571     assert(_markStack.overflow() || _markStack.isEmpty(),
  2572             "mark stack should be empty (unless it overflowed)");
  2574     if (_markStack.overflow()) {
  2575       // This should have been done already when we tried to push an
  2576       // entry on to the global mark stack. But let's do it again.
  2577       set_has_overflown();
  2580     assert(rp->num_q() == active_workers, "why not");
  2582     rp->enqueue_discovered_references(executor);
  2584     rp->verify_no_references_recorded();
  2585     assert(!rp->discovery_enabled(), "Post condition");
  2588   if (has_overflown()) {
  2589     // We can not trust g1_is_alive if the marking stack overflowed
  2590     return;
  2593   assert(_markStack.isEmpty(), "Marking should have completed");
  2595   // Unload Klasses, String, Symbols, Code Cache, etc.
  2597     G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
  2599     if (ClassUnloadingWithConcurrentMark) {
  2600       bool purged_classes;
  2603         G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
  2604         purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
  2608         G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
  2609         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
  2613     if (G1StringDedup::is_enabled()) {
  2614       G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
  2615       G1StringDedup::unlink(&g1_is_alive);
  2620 void ConcurrentMark::swapMarkBitMaps() {
  2621   CMBitMapRO* temp = _prevMarkBitMap;
  2622   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  2623   _nextMarkBitMap  = (CMBitMap*)  temp;
  2626 class CMObjectClosure;
  2628 // Closure for iterating over objects, currently only used for
  2629 // processing SATB buffers.
  2630 class CMObjectClosure : public ObjectClosure {
  2631 private:
  2632   CMTask* _task;
  2634 public:
  2635   void do_object(oop obj) {
  2636     _task->deal_with_reference(obj);
  2639   CMObjectClosure(CMTask* task) : _task(task) { }
  2640 };
  2642 class G1RemarkThreadsClosure : public ThreadClosure {
  2643   CMObjectClosure _cm_obj;
  2644   G1CMOopClosure _cm_cl;
  2645   MarkingCodeBlobClosure _code_cl;
  2646   int _thread_parity;
  2647   bool _is_par;
  2649  public:
  2650   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
  2651     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
  2652     _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
  2654   void do_thread(Thread* thread) {
  2655     if (thread->is_Java_thread()) {
  2656       if (thread->claim_oops_do(_is_par, _thread_parity)) {
  2657         JavaThread* jt = (JavaThread*)thread;
  2659         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
  2660         // however the liveness of oops reachable from nmethods have very complex lifecycles:
  2661         // * Alive if on the stack of an executing method
  2662         // * Weakly reachable otherwise
  2663         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
  2664         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
  2665         jt->nmethods_do(&_code_cl);
  2667         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
  2669     } else if (thread->is_VM_thread()) {
  2670       if (thread->claim_oops_do(_is_par, _thread_parity)) {
  2671         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
  2675 };
  2677 class CMRemarkTask: public AbstractGangTask {
  2678 private:
  2679   ConcurrentMark* _cm;
  2680   bool            _is_serial;
  2681 public:
  2682   void work(uint worker_id) {
  2683     // Since all available tasks are actually started, we should
  2684     // only proceed if we're supposed to be actived.
  2685     if (worker_id < _cm->active_tasks()) {
  2686       CMTask* task = _cm->task(worker_id);
  2687       task->record_start_time();
  2689         ResourceMark rm;
  2690         HandleMark hm;
  2692         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
  2693         Threads::threads_do(&threads_f);
  2696       do {
  2697         task->do_marking_step(1000000000.0 /* something very large */,
  2698                               true         /* do_termination       */,
  2699                               _is_serial);
  2700       } while (task->has_aborted() && !_cm->has_overflown());
  2701       // If we overflow, then we do not want to restart. We instead
  2702       // want to abort remark and do concurrent marking again.
  2703       task->record_end_time();
  2707   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
  2708     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
  2709     _cm->terminator()->reset_for_reuse(active_workers);
  2711 };
  2713 void ConcurrentMark::checkpointRootsFinalWork() {
  2714   ResourceMark rm;
  2715   HandleMark   hm;
  2716   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2718   G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
  2720   g1h->ensure_parsability(false);
  2722   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2723     G1CollectedHeap::StrongRootsScope srs(g1h);
  2724     // this is remark, so we'll use up all active threads
  2725     uint active_workers = g1h->workers()->active_workers();
  2726     if (active_workers == 0) {
  2727       assert(active_workers > 0, "Should have been set earlier");
  2728       active_workers = (uint) ParallelGCThreads;
  2729       g1h->workers()->set_active_workers(active_workers);
  2731     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2732     // Leave _parallel_marking_threads at it's
  2733     // value originally calculated in the ConcurrentMark
  2734     // constructor and pass values of the active workers
  2735     // through the gang in the task.
  2737     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
  2738     // We will start all available threads, even if we decide that the
  2739     // active_workers will be fewer. The extra ones will just bail out
  2740     // immediately.
  2741     g1h->set_par_threads(active_workers);
  2742     g1h->workers()->run_task(&remarkTask);
  2743     g1h->set_par_threads(0);
  2744   } else {
  2745     G1CollectedHeap::StrongRootsScope srs(g1h);
  2746     uint active_workers = 1;
  2747     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2749     // Note - if there's no work gang then the VMThread will be
  2750     // the thread to execute the remark - serially. We have
  2751     // to pass true for the is_serial parameter so that
  2752     // CMTask::do_marking_step() doesn't enter the sync
  2753     // barriers in the event of an overflow. Doing so will
  2754     // cause an assert that the current thread is not a
  2755     // concurrent GC thread.
  2756     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
  2757     remarkTask.work(0);
  2759   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2760   guarantee(has_overflown() ||
  2761             satb_mq_set.completed_buffers_num() == 0,
  2762             err_msg("Invariant: has_overflown = %s, num buffers = %d",
  2763                     BOOL_TO_STR(has_overflown()),
  2764                     satb_mq_set.completed_buffers_num()));
  2766   print_stats();
  2769 #ifndef PRODUCT
  2771 class PrintReachableOopClosure: public OopClosure {
  2772 private:
  2773   G1CollectedHeap* _g1h;
  2774   outputStream*    _out;
  2775   VerifyOption     _vo;
  2776   bool             _all;
  2778 public:
  2779   PrintReachableOopClosure(outputStream* out,
  2780                            VerifyOption  vo,
  2781                            bool          all) :
  2782     _g1h(G1CollectedHeap::heap()),
  2783     _out(out), _vo(vo), _all(all) { }
  2785   void do_oop(narrowOop* p) { do_oop_work(p); }
  2786   void do_oop(      oop* p) { do_oop_work(p); }
  2788   template <class T> void do_oop_work(T* p) {
  2789     oop         obj = oopDesc::load_decode_heap_oop(p);
  2790     const char* str = NULL;
  2791     const char* str2 = "";
  2793     if (obj == NULL) {
  2794       str = "";
  2795     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2796       str = " O";
  2797     } else {
  2798       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2799       guarantee(hr != NULL, "invariant");
  2800       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
  2801       bool marked = _g1h->is_marked(obj, _vo);
  2803       if (over_tams) {
  2804         str = " >";
  2805         if (marked) {
  2806           str2 = " AND MARKED";
  2808       } else if (marked) {
  2809         str = " M";
  2810       } else {
  2811         str = " NOT";
  2815     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2816                    p2i(p), p2i((void*) obj), str, str2);
  2818 };
  2820 class PrintReachableObjectClosure : public ObjectClosure {
  2821 private:
  2822   G1CollectedHeap* _g1h;
  2823   outputStream*    _out;
  2824   VerifyOption     _vo;
  2825   bool             _all;
  2826   HeapRegion*      _hr;
  2828 public:
  2829   PrintReachableObjectClosure(outputStream* out,
  2830                               VerifyOption  vo,
  2831                               bool          all,
  2832                               HeapRegion*   hr) :
  2833     _g1h(G1CollectedHeap::heap()),
  2834     _out(out), _vo(vo), _all(all), _hr(hr) { }
  2836   void do_object(oop o) {
  2837     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
  2838     bool marked = _g1h->is_marked(o, _vo);
  2839     bool print_it = _all || over_tams || marked;
  2841     if (print_it) {
  2842       _out->print_cr(" "PTR_FORMAT"%s",
  2843                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
  2844       PrintReachableOopClosure oopCl(_out, _vo, _all);
  2845       o->oop_iterate_no_header(&oopCl);
  2848 };
  2850 class PrintReachableRegionClosure : public HeapRegionClosure {
  2851 private:
  2852   G1CollectedHeap* _g1h;
  2853   outputStream*    _out;
  2854   VerifyOption     _vo;
  2855   bool             _all;
  2857 public:
  2858   bool doHeapRegion(HeapRegion* hr) {
  2859     HeapWord* b = hr->bottom();
  2860     HeapWord* e = hr->end();
  2861     HeapWord* t = hr->top();
  2862     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
  2863     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2864                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
  2865     _out->cr();
  2867     HeapWord* from = b;
  2868     HeapWord* to   = t;
  2870     if (to > from) {
  2871       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
  2872       _out->cr();
  2873       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
  2874       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2875       _out->cr();
  2878     return false;
  2881   PrintReachableRegionClosure(outputStream* out,
  2882                               VerifyOption  vo,
  2883                               bool          all) :
  2884     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
  2885 };
  2887 void ConcurrentMark::print_reachable(const char* str,
  2888                                      VerifyOption vo,
  2889                                      bool all) {
  2890   gclog_or_tty->cr();
  2891   gclog_or_tty->print_cr("== Doing heap dump... ");
  2893   if (G1PrintReachableBaseFile == NULL) {
  2894     gclog_or_tty->print_cr("  #### error: no base file defined");
  2895     return;
  2898   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2899       (JVM_MAXPATHLEN - 1)) {
  2900     gclog_or_tty->print_cr("  #### error: file name too long");
  2901     return;
  2904   char file_name[JVM_MAXPATHLEN];
  2905   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2906   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2908   fileStream fout(file_name);
  2909   if (!fout.is_open()) {
  2910     gclog_or_tty->print_cr("  #### error: could not open file");
  2911     return;
  2914   outputStream* out = &fout;
  2915   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
  2916   out->cr();
  2918   out->print_cr("--- ITERATING OVER REGIONS");
  2919   out->cr();
  2920   PrintReachableRegionClosure rcl(out, vo, all);
  2921   _g1h->heap_region_iterate(&rcl);
  2922   out->cr();
  2924   gclog_or_tty->print_cr("  done");
  2925   gclog_or_tty->flush();
  2928 #endif // PRODUCT
  2930 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
  2931   // Note we are overriding the read-only view of the prev map here, via
  2932   // the cast.
  2933   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2936 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
  2937   _nextMarkBitMap->clearRange(mr);
  2940 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
  2941   clearRangePrevBitmap(mr);
  2942   clearRangeNextBitmap(mr);
  2945 HeapRegion*
  2946 ConcurrentMark::claim_region(uint worker_id) {
  2947   // "checkpoint" the finger
  2948   HeapWord* finger = _finger;
  2950   // _heap_end will not change underneath our feet; it only changes at
  2951   // yield points.
  2952   while (finger < _heap_end) {
  2953     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2955     // Note on how this code handles humongous regions. In the
  2956     // normal case the finger will reach the start of a "starts
  2957     // humongous" (SH) region. Its end will either be the end of the
  2958     // last "continues humongous" (CH) region in the sequence, or the
  2959     // standard end of the SH region (if the SH is the only region in
  2960     // the sequence). That way claim_region() will skip over the CH
  2961     // regions. However, there is a subtle race between a CM thread
  2962     // executing this method and a mutator thread doing a humongous
  2963     // object allocation. The two are not mutually exclusive as the CM
  2964     // thread does not need to hold the Heap_lock when it gets
  2965     // here. So there is a chance that claim_region() will come across
  2966     // a free region that's in the progress of becoming a SH or a CH
  2967     // region. In the former case, it will either
  2968     //   a) Miss the update to the region's end, in which case it will
  2969     //      visit every subsequent CH region, will find their bitmaps
  2970     //      empty, and do nothing, or
  2971     //   b) Will observe the update of the region's end (in which case
  2972     //      it will skip the subsequent CH regions).
  2973     // If it comes across a region that suddenly becomes CH, the
  2974     // scenario will be similar to b). So, the race between
  2975     // claim_region() and a humongous object allocation might force us
  2976     // to do a bit of unnecessary work (due to some unnecessary bitmap
  2977     // iterations) but it should not introduce and correctness issues.
  2978     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
  2979     HeapWord*   bottom        = curr_region->bottom();
  2980     HeapWord*   end           = curr_region->end();
  2981     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2983     if (verbose_low()) {
  2984       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
  2985                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2986                              "limit = "PTR_FORMAT,
  2987                              worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
  2990     // Is the gap between reading the finger and doing the CAS too long?
  2991     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2992     if (res == finger) {
  2993       // we succeeded
  2995       // notice that _finger == end cannot be guaranteed here since,
  2996       // someone else might have moved the finger even further
  2997       assert(_finger >= end, "the finger should have moved forward");
  2999       if (verbose_low()) {
  3000         gclog_or_tty->print_cr("[%u] we were successful with region = "
  3001                                PTR_FORMAT, worker_id, p2i(curr_region));
  3004       if (limit > bottom) {
  3005         if (verbose_low()) {
  3006           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
  3007                                  "returning it ", worker_id, p2i(curr_region));
  3009         return curr_region;
  3010       } else {
  3011         assert(limit == bottom,
  3012                "the region limit should be at bottom");
  3013         if (verbose_low()) {
  3014           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
  3015                                  "returning NULL", worker_id, p2i(curr_region));
  3017         // we return NULL and the caller should try calling
  3018         // claim_region() again.
  3019         return NULL;
  3021     } else {
  3022       assert(_finger > finger, "the finger should have moved forward");
  3023       if (verbose_low()) {
  3024         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
  3025                                "global finger = "PTR_FORMAT", "
  3026                                "our finger = "PTR_FORMAT,
  3027                                worker_id, p2i(_finger), p2i(finger));
  3030       // read it again
  3031       finger = _finger;
  3035   return NULL;
  3038 #ifndef PRODUCT
  3039 enum VerifyNoCSetOopsPhase {
  3040   VerifyNoCSetOopsStack,
  3041   VerifyNoCSetOopsQueues,
  3042   VerifyNoCSetOopsSATBCompleted,
  3043   VerifyNoCSetOopsSATBThread
  3044 };
  3046 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
  3047 private:
  3048   G1CollectedHeap* _g1h;
  3049   VerifyNoCSetOopsPhase _phase;
  3050   int _info;
  3052   const char* phase_str() {
  3053     switch (_phase) {
  3054     case VerifyNoCSetOopsStack:         return "Stack";
  3055     case VerifyNoCSetOopsQueues:        return "Queue";
  3056     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
  3057     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
  3058     default:                            ShouldNotReachHere();
  3060     return NULL;
  3063   void do_object_work(oop obj) {
  3064     guarantee(!_g1h->obj_in_cs(obj),
  3065               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
  3066                       p2i((void*) obj), phase_str(), _info));
  3069 public:
  3070   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
  3072   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
  3073     _phase = phase;
  3074     _info = info;
  3077   virtual void do_oop(oop* p) {
  3078     oop obj = oopDesc::load_decode_heap_oop(p);
  3079     do_object_work(obj);
  3082   virtual void do_oop(narrowOop* p) {
  3083     // We should not come across narrow oops while scanning marking
  3084     // stacks and SATB buffers.
  3085     ShouldNotReachHere();
  3088   virtual void do_object(oop obj) {
  3089     do_object_work(obj);
  3091 };
  3093 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
  3094                                          bool verify_enqueued_buffers,
  3095                                          bool verify_thread_buffers,
  3096                                          bool verify_fingers) {
  3097   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
  3098   if (!G1CollectedHeap::heap()->mark_in_progress()) {
  3099     return;
  3102   VerifyNoCSetOopsClosure cl;
  3104   if (verify_stacks) {
  3105     // Verify entries on the global mark stack
  3106     cl.set_phase(VerifyNoCSetOopsStack);
  3107     _markStack.oops_do(&cl);
  3109     // Verify entries on the task queues
  3110     for (uint i = 0; i < _max_worker_id; i += 1) {
  3111       cl.set_phase(VerifyNoCSetOopsQueues, i);
  3112       CMTaskQueue* queue = _task_queues->queue(i);
  3113       queue->oops_do(&cl);
  3117   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
  3119   // Verify entries on the enqueued SATB buffers
  3120   if (verify_enqueued_buffers) {
  3121     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
  3122     satb_qs.iterate_completed_buffers_read_only(&cl);
  3125   // Verify entries on the per-thread SATB buffers
  3126   if (verify_thread_buffers) {
  3127     cl.set_phase(VerifyNoCSetOopsSATBThread);
  3128     satb_qs.iterate_thread_buffers_read_only(&cl);
  3131   if (verify_fingers) {
  3132     // Verify the global finger
  3133     HeapWord* global_finger = finger();
  3134     if (global_finger != NULL && global_finger < _heap_end) {
  3135       // The global finger always points to a heap region boundary. We
  3136       // use heap_region_containing_raw() to get the containing region
  3137       // given that the global finger could be pointing to a free region
  3138       // which subsequently becomes continues humongous. If that
  3139       // happens, heap_region_containing() will return the bottom of the
  3140       // corresponding starts humongous region and the check below will
  3141       // not hold any more.
  3142       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
  3143       guarantee(global_finger == global_hr->bottom(),
  3144                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
  3145                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
  3148     // Verify the task fingers
  3149     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
  3150     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
  3151       CMTask* task = _tasks[i];
  3152       HeapWord* task_finger = task->finger();
  3153       if (task_finger != NULL && task_finger < _heap_end) {
  3154         // See above note on the global finger verification.
  3155         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
  3156         guarantee(task_finger == task_hr->bottom() ||
  3157                   !task_hr->in_collection_set(),
  3158                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
  3159                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
  3164 #endif // PRODUCT
  3166 // Aggregate the counting data that was constructed concurrently
  3167 // with marking.
  3168 class AggregateCountDataHRClosure: public HeapRegionClosure {
  3169   G1CollectedHeap* _g1h;
  3170   ConcurrentMark* _cm;
  3171   CardTableModRefBS* _ct_bs;
  3172   BitMap* _cm_card_bm;
  3173   uint _max_worker_id;
  3175  public:
  3176   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
  3177                               BitMap* cm_card_bm,
  3178                               uint max_worker_id) :
  3179     _g1h(g1h), _cm(g1h->concurrent_mark()),
  3180     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  3181     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
  3183   bool doHeapRegion(HeapRegion* hr) {
  3184     if (hr->continuesHumongous()) {
  3185       // We will ignore these here and process them when their
  3186       // associated "starts humongous" region is processed.
  3187       // Note that we cannot rely on their associated
  3188       // "starts humongous" region to have their bit set to 1
  3189       // since, due to the region chunking in the parallel region
  3190       // iteration, a "continues humongous" region might be visited
  3191       // before its associated "starts humongous".
  3192       return false;
  3195     HeapWord* start = hr->bottom();
  3196     HeapWord* limit = hr->next_top_at_mark_start();
  3197     HeapWord* end = hr->end();
  3199     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
  3200            err_msg("Preconditions not met - "
  3201                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
  3202                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
  3203                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
  3205     assert(hr->next_marked_bytes() == 0, "Precondition");
  3207     if (start == limit) {
  3208       // NTAMS of this region has not been set so nothing to do.
  3209       return false;
  3212     // 'start' should be in the heap.
  3213     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
  3214     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
  3215     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
  3217     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  3218     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
  3219     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
  3221     // If ntams is not card aligned then we bump card bitmap index
  3222     // for limit so that we get the all the cards spanned by
  3223     // the object ending at ntams.
  3224     // Note: if this is the last region in the heap then ntams
  3225     // could be actually just beyond the end of the the heap;
  3226     // limit_idx will then  correspond to a (non-existent) card
  3227     // that is also outside the heap.
  3228     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
  3229       limit_idx += 1;
  3232     assert(limit_idx <= end_idx, "or else use atomics");
  3234     // Aggregate the "stripe" in the count data associated with hr.
  3235     uint hrs_index = hr->hrs_index();
  3236     size_t marked_bytes = 0;
  3238     for (uint i = 0; i < _max_worker_id; i += 1) {
  3239       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
  3240       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
  3242       // Fetch the marked_bytes in this region for task i and
  3243       // add it to the running total for this region.
  3244       marked_bytes += marked_bytes_array[hrs_index];
  3246       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
  3247       // into the global card bitmap.
  3248       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
  3250       while (scan_idx < limit_idx) {
  3251         assert(task_card_bm->at(scan_idx) == true, "should be");
  3252         _cm_card_bm->set_bit(scan_idx);
  3253         assert(_cm_card_bm->at(scan_idx) == true, "should be");
  3255         // BitMap::get_next_one_offset() can handle the case when
  3256         // its left_offset parameter is greater than its right_offset
  3257         // parameter. It does, however, have an early exit if
  3258         // left_offset == right_offset. So let's limit the value
  3259         // passed in for left offset here.
  3260         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
  3261         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
  3265     // Update the marked bytes for this region.
  3266     hr->add_to_marked_bytes(marked_bytes);
  3268     // Next heap region
  3269     return false;
  3271 };
  3273 class G1AggregateCountDataTask: public AbstractGangTask {
  3274 protected:
  3275   G1CollectedHeap* _g1h;
  3276   ConcurrentMark* _cm;
  3277   BitMap* _cm_card_bm;
  3278   uint _max_worker_id;
  3279   int _active_workers;
  3281 public:
  3282   G1AggregateCountDataTask(G1CollectedHeap* g1h,
  3283                            ConcurrentMark* cm,
  3284                            BitMap* cm_card_bm,
  3285                            uint max_worker_id,
  3286                            int n_workers) :
  3287     AbstractGangTask("Count Aggregation"),
  3288     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
  3289     _max_worker_id(max_worker_id),
  3290     _active_workers(n_workers) { }
  3292   void work(uint worker_id) {
  3293     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
  3295     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3296       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
  3297                                             _active_workers,
  3298                                             HeapRegion::AggregateCountClaimValue);
  3299     } else {
  3300       _g1h->heap_region_iterate(&cl);
  3303 };
  3306 void ConcurrentMark::aggregate_count_data() {
  3307   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
  3308                         _g1h->workers()->active_workers() :
  3309                         1);
  3311   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
  3312                                            _max_worker_id, n_workers);
  3314   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3315     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  3316            "sanity check");
  3317     _g1h->set_par_threads(n_workers);
  3318     _g1h->workers()->run_task(&g1_par_agg_task);
  3319     _g1h->set_par_threads(0);
  3321     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
  3322            "sanity check");
  3323     _g1h->reset_heap_region_claim_values();
  3324   } else {
  3325     g1_par_agg_task.work(0);
  3329 // Clear the per-worker arrays used to store the per-region counting data
  3330 void ConcurrentMark::clear_all_count_data() {
  3331   // Clear the global card bitmap - it will be filled during
  3332   // liveness count aggregation (during remark) and the
  3333   // final counting task.
  3334   _card_bm.clear();
  3336   // Clear the global region bitmap - it will be filled as part
  3337   // of the final counting task.
  3338   _region_bm.clear();
  3340   uint max_regions = _g1h->max_regions();
  3341   assert(_max_worker_id > 0, "uninitialized");
  3343   for (uint i = 0; i < _max_worker_id; i += 1) {
  3344     BitMap* task_card_bm = count_card_bitmap_for(i);
  3345     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
  3347     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
  3348     assert(marked_bytes_array != NULL, "uninitialized");
  3350     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
  3351     task_card_bm->clear();
  3355 void ConcurrentMark::print_stats() {
  3356   if (verbose_stats()) {
  3357     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3358     for (size_t i = 0; i < _active_tasks; ++i) {
  3359       _tasks[i]->print_stats();
  3360       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3365 // abandon current marking iteration due to a Full GC
  3366 void ConcurrentMark::abort() {
  3367   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
  3368   // concurrent bitmap clearing.
  3369   _nextMarkBitMap->clearAll();
  3370   // Clear the liveness counting data
  3371   clear_all_count_data();
  3372   // Empty mark stack
  3373   reset_marking_state();
  3374   for (uint i = 0; i < _max_worker_id; ++i) {
  3375     _tasks[i]->clear_region_fields();
  3377   _first_overflow_barrier_sync.abort();
  3378   _second_overflow_barrier_sync.abort();
  3379   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
  3380   if (!gc_id.is_undefined()) {
  3381     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
  3382     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
  3383     _aborted_gc_id = gc_id;
  3385   _has_aborted = true;
  3387   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3388   satb_mq_set.abandon_partial_marking();
  3389   // This can be called either during or outside marking, we'll read
  3390   // the expected_active value from the SATB queue set.
  3391   satb_mq_set.set_active_all_threads(
  3392                                  false, /* new active value */
  3393                                  satb_mq_set.is_active() /* expected_active */);
  3395   _g1h->trace_heap_after_concurrent_cycle();
  3396   _g1h->register_concurrent_cycle_end();
  3399 const GCId& ConcurrentMark::concurrent_gc_id() {
  3400   if (has_aborted()) {
  3401     return _aborted_gc_id;
  3403   return _g1h->gc_tracer_cm()->gc_id();
  3406 static void print_ms_time_info(const char* prefix, const char* name,
  3407                                NumberSeq& ns) {
  3408   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  3409                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  3410   if (ns.num() > 0) {
  3411     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  3412                            prefix, ns.sd(), ns.maximum());
  3416 void ConcurrentMark::print_summary_info() {
  3417   gclog_or_tty->print_cr(" Concurrent marking:");
  3418   print_ms_time_info("  ", "init marks", _init_times);
  3419   print_ms_time_info("  ", "remarks", _remark_times);
  3421     print_ms_time_info("     ", "final marks", _remark_mark_times);
  3422     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  3425   print_ms_time_info("  ", "cleanups", _cleanup_times);
  3426   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  3427                          _total_counting_time,
  3428                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  3429                           (double)_cleanup_times.num()
  3430                          : 0.0));
  3431   if (G1ScrubRemSets) {
  3432     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  3433                            _total_rs_scrub_time,
  3434                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  3435                             (double)_cleanup_times.num()
  3436                            : 0.0));
  3438   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  3439                          (_init_times.sum() + _remark_times.sum() +
  3440                           _cleanup_times.sum())/1000.0);
  3441   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  3442                 "(%8.2f s marking).",
  3443                 cmThread()->vtime_accum(),
  3444                 cmThread()->vtime_mark_accum());
  3447 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  3448   if (use_parallel_marking_threads()) {
  3449     _parallel_workers->print_worker_threads_on(st);
  3453 void ConcurrentMark::print_on_error(outputStream* st) const {
  3454   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
  3455       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
  3456   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
  3457   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
  3460 // We take a break if someone is trying to stop the world.
  3461 bool ConcurrentMark::do_yield_check(uint worker_id) {
  3462   if (SuspendibleThreadSet::should_yield()) {
  3463     if (worker_id == 0) {
  3464       _g1h->g1_policy()->record_concurrent_pause();
  3466     SuspendibleThreadSet::yield();
  3467     return true;
  3468   } else {
  3469     return false;
  3473 bool ConcurrentMark::containing_card_is_marked(void* p) {
  3474   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  3475   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  3478 bool ConcurrentMark::containing_cards_are_marked(void* start,
  3479                                                  void* last) {
  3480   return containing_card_is_marked(start) &&
  3481          containing_card_is_marked(last);
  3484 #ifndef PRODUCT
  3485 // for debugging purposes
  3486 void ConcurrentMark::print_finger() {
  3487   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  3488                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
  3489   for (uint i = 0; i < _max_worker_id; ++i) {
  3490     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
  3492   gclog_or_tty->cr();
  3494 #endif
  3496 void CMTask::scan_object(oop obj) {
  3497   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
  3499   if (_cm->verbose_high()) {
  3500     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
  3501                            _worker_id, p2i((void*) obj));
  3504   size_t obj_size = obj->size();
  3505   _words_scanned += obj_size;
  3507   obj->oop_iterate(_cm_oop_closure);
  3508   statsOnly( ++_objs_scanned );
  3509   check_limits();
  3512 // Closure for iteration over bitmaps
  3513 class CMBitMapClosure : public BitMapClosure {
  3514 private:
  3515   // the bitmap that is being iterated over
  3516   CMBitMap*                   _nextMarkBitMap;
  3517   ConcurrentMark*             _cm;
  3518   CMTask*                     _task;
  3520 public:
  3521   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
  3522     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  3524   bool do_bit(size_t offset) {
  3525     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  3526     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  3527     assert( addr < _cm->finger(), "invariant");
  3529     statsOnly( _task->increase_objs_found_on_bitmap() );
  3530     assert(addr >= _task->finger(), "invariant");
  3532     // We move that task's local finger along.
  3533     _task->move_finger_to(addr);
  3535     _task->scan_object(oop(addr));
  3536     // we only partially drain the local queue and global stack
  3537     _task->drain_local_queue(true);
  3538     _task->drain_global_stack(true);
  3540     // if the has_aborted flag has been raised, we need to bail out of
  3541     // the iteration
  3542     return !_task->has_aborted();
  3544 };
  3546 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
  3547                                ConcurrentMark* cm,
  3548                                CMTask* task)
  3549   : _g1h(g1h), _cm(cm), _task(task) {
  3550   assert(_ref_processor == NULL, "should be initialized to NULL");
  3552   if (G1UseConcMarkReferenceProcessing) {
  3553     _ref_processor = g1h->ref_processor_cm();
  3554     assert(_ref_processor != NULL, "should not be NULL");
  3558 void CMTask::setup_for_region(HeapRegion* hr) {
  3559   // Separated the asserts so that we know which one fires.
  3560   assert(hr != NULL,
  3561         "claim_region() should have filtered out continues humongous regions");
  3562   assert(!hr->continuesHumongous(),
  3563         "claim_region() should have filtered out continues humongous regions");
  3565   if (_cm->verbose_low()) {
  3566     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
  3567                            _worker_id, p2i(hr));
  3570   _curr_region  = hr;
  3571   _finger       = hr->bottom();
  3572   update_region_limit();
  3575 void CMTask::update_region_limit() {
  3576   HeapRegion* hr            = _curr_region;
  3577   HeapWord* bottom          = hr->bottom();
  3578   HeapWord* limit           = hr->next_top_at_mark_start();
  3580   if (limit == bottom) {
  3581     if (_cm->verbose_low()) {
  3582       gclog_or_tty->print_cr("[%u] found an empty region "
  3583                              "["PTR_FORMAT", "PTR_FORMAT")",
  3584                              _worker_id, p2i(bottom), p2i(limit));
  3586     // The region was collected underneath our feet.
  3587     // We set the finger to bottom to ensure that the bitmap
  3588     // iteration that will follow this will not do anything.
  3589     // (this is not a condition that holds when we set the region up,
  3590     // as the region is not supposed to be empty in the first place)
  3591     _finger = bottom;
  3592   } else if (limit >= _region_limit) {
  3593     assert(limit >= _finger, "peace of mind");
  3594   } else {
  3595     assert(limit < _region_limit, "only way to get here");
  3596     // This can happen under some pretty unusual circumstances.  An
  3597     // evacuation pause empties the region underneath our feet (NTAMS
  3598     // at bottom). We then do some allocation in the region (NTAMS
  3599     // stays at bottom), followed by the region being used as a GC
  3600     // alloc region (NTAMS will move to top() and the objects
  3601     // originally below it will be grayed). All objects now marked in
  3602     // the region are explicitly grayed, if below the global finger,
  3603     // and we do not need in fact to scan anything else. So, we simply
  3604     // set _finger to be limit to ensure that the bitmap iteration
  3605     // doesn't do anything.
  3606     _finger = limit;
  3609   _region_limit = limit;
  3612 void CMTask::giveup_current_region() {
  3613   assert(_curr_region != NULL, "invariant");
  3614   if (_cm->verbose_low()) {
  3615     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
  3616                            _worker_id, p2i(_curr_region));
  3618   clear_region_fields();
  3621 void CMTask::clear_region_fields() {
  3622   // Values for these three fields that indicate that we're not
  3623   // holding on to a region.
  3624   _curr_region   = NULL;
  3625   _finger        = NULL;
  3626   _region_limit  = NULL;
  3629 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
  3630   if (cm_oop_closure == NULL) {
  3631     assert(_cm_oop_closure != NULL, "invariant");
  3632   } else {
  3633     assert(_cm_oop_closure == NULL, "invariant");
  3635   _cm_oop_closure = cm_oop_closure;
  3638 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3639   guarantee(nextMarkBitMap != NULL, "invariant");
  3641   if (_cm->verbose_low()) {
  3642     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
  3645   _nextMarkBitMap                = nextMarkBitMap;
  3646   clear_region_fields();
  3648   _calls                         = 0;
  3649   _elapsed_time_ms               = 0.0;
  3650   _termination_time_ms           = 0.0;
  3651   _termination_start_time_ms     = 0.0;
  3653 #if _MARKING_STATS_
  3654   _local_pushes                  = 0;
  3655   _local_pops                    = 0;
  3656   _local_max_size                = 0;
  3657   _objs_scanned                  = 0;
  3658   _global_pushes                 = 0;
  3659   _global_pops                   = 0;
  3660   _global_max_size               = 0;
  3661   _global_transfers_to           = 0;
  3662   _global_transfers_from         = 0;
  3663   _regions_claimed               = 0;
  3664   _objs_found_on_bitmap          = 0;
  3665   _satb_buffers_processed        = 0;
  3666   _steal_attempts                = 0;
  3667   _steals                        = 0;
  3668   _aborted                       = 0;
  3669   _aborted_overflow              = 0;
  3670   _aborted_cm_aborted            = 0;
  3671   _aborted_yield                 = 0;
  3672   _aborted_timed_out             = 0;
  3673   _aborted_satb                  = 0;
  3674   _aborted_termination           = 0;
  3675 #endif // _MARKING_STATS_
  3678 bool CMTask::should_exit_termination() {
  3679   regular_clock_call();
  3680   // This is called when we are in the termination protocol. We should
  3681   // quit if, for some reason, this task wants to abort or the global
  3682   // stack is not empty (this means that we can get work from it).
  3683   return !_cm->mark_stack_empty() || has_aborted();
  3686 void CMTask::reached_limit() {
  3687   assert(_words_scanned >= _words_scanned_limit ||
  3688          _refs_reached >= _refs_reached_limit ,
  3689          "shouldn't have been called otherwise");
  3690   regular_clock_call();
  3693 void CMTask::regular_clock_call() {
  3694   if (has_aborted()) return;
  3696   // First, we need to recalculate the words scanned and refs reached
  3697   // limits for the next clock call.
  3698   recalculate_limits();
  3700   // During the regular clock call we do the following
  3702   // (1) If an overflow has been flagged, then we abort.
  3703   if (_cm->has_overflown()) {
  3704     set_has_aborted();
  3705     return;
  3708   // If we are not concurrent (i.e. we're doing remark) we don't need
  3709   // to check anything else. The other steps are only needed during
  3710   // the concurrent marking phase.
  3711   if (!concurrent()) return;
  3713   // (2) If marking has been aborted for Full GC, then we also abort.
  3714   if (_cm->has_aborted()) {
  3715     set_has_aborted();
  3716     statsOnly( ++_aborted_cm_aborted );
  3717     return;
  3720   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3722   // (3) If marking stats are enabled, then we update the step history.
  3723 #if _MARKING_STATS_
  3724   if (_words_scanned >= _words_scanned_limit) {
  3725     ++_clock_due_to_scanning;
  3727   if (_refs_reached >= _refs_reached_limit) {
  3728     ++_clock_due_to_marking;
  3731   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3732   _interval_start_time_ms = curr_time_ms;
  3733   _all_clock_intervals_ms.add(last_interval_ms);
  3735   if (_cm->verbose_medium()) {
  3736       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
  3737                         "scanned = %d%s, refs reached = %d%s",
  3738                         _worker_id, last_interval_ms,
  3739                         _words_scanned,
  3740                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3741                         _refs_reached,
  3742                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3744 #endif // _MARKING_STATS_
  3746   // (4) We check whether we should yield. If we have to, then we abort.
  3747   if (SuspendibleThreadSet::should_yield()) {
  3748     // We should yield. To do this we abort the task. The caller is
  3749     // responsible for yielding.
  3750     set_has_aborted();
  3751     statsOnly( ++_aborted_yield );
  3752     return;
  3755   // (5) We check whether we've reached our time quota. If we have,
  3756   // then we abort.
  3757   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3758   if (elapsed_time_ms > _time_target_ms) {
  3759     set_has_aborted();
  3760     _has_timed_out = true;
  3761     statsOnly( ++_aborted_timed_out );
  3762     return;
  3765   // (6) Finally, we check whether there are enough completed STAB
  3766   // buffers available for processing. If there are, we abort.
  3767   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3768   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3769     if (_cm->verbose_low()) {
  3770       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
  3771                              _worker_id);
  3773     // we do need to process SATB buffers, we'll abort and restart
  3774     // the marking task to do so
  3775     set_has_aborted();
  3776     statsOnly( ++_aborted_satb );
  3777     return;
  3781 void CMTask::recalculate_limits() {
  3782   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3783   _words_scanned_limit      = _real_words_scanned_limit;
  3785   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3786   _refs_reached_limit       = _real_refs_reached_limit;
  3789 void CMTask::decrease_limits() {
  3790   // This is called when we believe that we're going to do an infrequent
  3791   // operation which will increase the per byte scanned cost (i.e. move
  3792   // entries to/from the global stack). It basically tries to decrease the
  3793   // scanning limit so that the clock is called earlier.
  3795   if (_cm->verbose_medium()) {
  3796     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
  3799   _words_scanned_limit = _real_words_scanned_limit -
  3800     3 * words_scanned_period / 4;
  3801   _refs_reached_limit  = _real_refs_reached_limit -
  3802     3 * refs_reached_period / 4;
  3805 void CMTask::move_entries_to_global_stack() {
  3806   // local array where we'll store the entries that will be popped
  3807   // from the local queue
  3808   oop buffer[global_stack_transfer_size];
  3810   int n = 0;
  3811   oop obj;
  3812   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3813     buffer[n] = obj;
  3814     ++n;
  3817   if (n > 0) {
  3818     // we popped at least one entry from the local queue
  3820     statsOnly( ++_global_transfers_to; _local_pops += n );
  3822     if (!_cm->mark_stack_push(buffer, n)) {
  3823       if (_cm->verbose_low()) {
  3824         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
  3825                                _worker_id);
  3827       set_has_aborted();
  3828     } else {
  3829       // the transfer was successful
  3831       if (_cm->verbose_medium()) {
  3832         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
  3833                                _worker_id, n);
  3835       statsOnly( int tmp_size = _cm->mark_stack_size();
  3836                  if (tmp_size > _global_max_size) {
  3837                    _global_max_size = tmp_size;
  3839                  _global_pushes += n );
  3843   // this operation was quite expensive, so decrease the limits
  3844   decrease_limits();
  3847 void CMTask::get_entries_from_global_stack() {
  3848   // local array where we'll store the entries that will be popped
  3849   // from the global stack.
  3850   oop buffer[global_stack_transfer_size];
  3851   int n;
  3852   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3853   assert(n <= global_stack_transfer_size,
  3854          "we should not pop more than the given limit");
  3855   if (n > 0) {
  3856     // yes, we did actually pop at least one entry
  3858     statsOnly( ++_global_transfers_from; _global_pops += n );
  3859     if (_cm->verbose_medium()) {
  3860       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
  3861                              _worker_id, n);
  3863     for (int i = 0; i < n; ++i) {
  3864       bool success = _task_queue->push(buffer[i]);
  3865       // We only call this when the local queue is empty or under a
  3866       // given target limit. So, we do not expect this push to fail.
  3867       assert(success, "invariant");
  3870     statsOnly( int tmp_size = _task_queue->size();
  3871                if (tmp_size > _local_max_size) {
  3872                  _local_max_size = tmp_size;
  3874                _local_pushes += n );
  3877   // this operation was quite expensive, so decrease the limits
  3878   decrease_limits();
  3881 void CMTask::drain_local_queue(bool partially) {
  3882   if (has_aborted()) return;
  3884   // Decide what the target size is, depending whether we're going to
  3885   // drain it partially (so that other tasks can steal if they run out
  3886   // of things to do) or totally (at the very end).
  3887   size_t target_size;
  3888   if (partially) {
  3889     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3890   } else {
  3891     target_size = 0;
  3894   if (_task_queue->size() > target_size) {
  3895     if (_cm->verbose_high()) {
  3896       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
  3897                              _worker_id, target_size);
  3900     oop obj;
  3901     bool ret = _task_queue->pop_local(obj);
  3902     while (ret) {
  3903       statsOnly( ++_local_pops );
  3905       if (_cm->verbose_high()) {
  3906         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
  3907                                p2i((void*) obj));
  3910       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3911       assert(!_g1h->is_on_master_free_list(
  3912                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
  3914       scan_object(obj);
  3916       if (_task_queue->size() <= target_size || has_aborted()) {
  3917         ret = false;
  3918       } else {
  3919         ret = _task_queue->pop_local(obj);
  3923     if (_cm->verbose_high()) {
  3924       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
  3925                              _worker_id, _task_queue->size());
  3930 void CMTask::drain_global_stack(bool partially) {
  3931   if (has_aborted()) return;
  3933   // We have a policy to drain the local queue before we attempt to
  3934   // drain the global stack.
  3935   assert(partially || _task_queue->size() == 0, "invariant");
  3937   // Decide what the target size is, depending whether we're going to
  3938   // drain it partially (so that other tasks can steal if they run out
  3939   // of things to do) or totally (at the very end).  Notice that,
  3940   // because we move entries from the global stack in chunks or
  3941   // because another task might be doing the same, we might in fact
  3942   // drop below the target. But, this is not a problem.
  3943   size_t target_size;
  3944   if (partially) {
  3945     target_size = _cm->partial_mark_stack_size_target();
  3946   } else {
  3947     target_size = 0;
  3950   if (_cm->mark_stack_size() > target_size) {
  3951     if (_cm->verbose_low()) {
  3952       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
  3953                              _worker_id, target_size);
  3956     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3957       get_entries_from_global_stack();
  3958       drain_local_queue(partially);
  3961     if (_cm->verbose_low()) {
  3962       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
  3963                              _worker_id, _cm->mark_stack_size());
  3968 // SATB Queue has several assumptions on whether to call the par or
  3969 // non-par versions of the methods. this is why some of the code is
  3970 // replicated. We should really get rid of the single-threaded version
  3971 // of the code to simplify things.
  3972 void CMTask::drain_satb_buffers() {
  3973   if (has_aborted()) return;
  3975   // We set this so that the regular clock knows that we're in the
  3976   // middle of draining buffers and doesn't set the abort flag when it
  3977   // notices that SATB buffers are available for draining. It'd be
  3978   // very counter productive if it did that. :-)
  3979   _draining_satb_buffers = true;
  3981   CMObjectClosure oc(this);
  3982   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3983   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3984     satb_mq_set.set_par_closure(_worker_id, &oc);
  3985   } else {
  3986     satb_mq_set.set_closure(&oc);
  3989   // This keeps claiming and applying the closure to completed buffers
  3990   // until we run out of buffers or we need to abort.
  3991   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3992     while (!has_aborted() &&
  3993            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
  3994       if (_cm->verbose_medium()) {
  3995         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3997       statsOnly( ++_satb_buffers_processed );
  3998       regular_clock_call();
  4000   } else {
  4001     while (!has_aborted() &&
  4002            satb_mq_set.apply_closure_to_completed_buffer()) {
  4003       if (_cm->verbose_medium()) {
  4004         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  4006       statsOnly( ++_satb_buffers_processed );
  4007       regular_clock_call();
  4011   _draining_satb_buffers = false;
  4013   assert(has_aborted() ||
  4014          concurrent() ||
  4015          satb_mq_set.completed_buffers_num() == 0, "invariant");
  4017   if (G1CollectedHeap::use_parallel_gc_threads()) {
  4018     satb_mq_set.set_par_closure(_worker_id, NULL);
  4019   } else {
  4020     satb_mq_set.set_closure(NULL);
  4023   // again, this was a potentially expensive operation, decrease the
  4024   // limits to get the regular clock call early
  4025   decrease_limits();
  4028 void CMTask::print_stats() {
  4029   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
  4030                          _worker_id, _calls);
  4031   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  4032                          _elapsed_time_ms, _termination_time_ms);
  4033   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  4034                          _step_times_ms.num(), _step_times_ms.avg(),
  4035                          _step_times_ms.sd());
  4036   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  4037                          _step_times_ms.maximum(), _step_times_ms.sum());
  4039 #if _MARKING_STATS_
  4040   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  4041                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  4042                          _all_clock_intervals_ms.sd());
  4043   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  4044                          _all_clock_intervals_ms.maximum(),
  4045                          _all_clock_intervals_ms.sum());
  4046   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  4047                          _clock_due_to_scanning, _clock_due_to_marking);
  4048   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  4049                          _objs_scanned, _objs_found_on_bitmap);
  4050   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  4051                          _local_pushes, _local_pops, _local_max_size);
  4052   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  4053                          _global_pushes, _global_pops, _global_max_size);
  4054   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  4055                          _global_transfers_to,_global_transfers_from);
  4056   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
  4057   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  4058   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  4059                          _steal_attempts, _steals);
  4060   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  4061   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  4062                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  4063   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  4064                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  4065 #endif // _MARKING_STATS_
  4068 /*****************************************************************************
  4070     The do_marking_step(time_target_ms, ...) method is the building
  4071     block of the parallel marking framework. It can be called in parallel
  4072     with other invocations of do_marking_step() on different tasks
  4073     (but only one per task, obviously) and concurrently with the
  4074     mutator threads, or during remark, hence it eliminates the need
  4075     for two versions of the code. When called during remark, it will
  4076     pick up from where the task left off during the concurrent marking
  4077     phase. Interestingly, tasks are also claimable during evacuation
  4078     pauses too, since do_marking_step() ensures that it aborts before
  4079     it needs to yield.
  4081     The data structures that it uses to do marking work are the
  4082     following:
  4084       (1) Marking Bitmap. If there are gray objects that appear only
  4085       on the bitmap (this happens either when dealing with an overflow
  4086       or when the initial marking phase has simply marked the roots
  4087       and didn't push them on the stack), then tasks claim heap
  4088       regions whose bitmap they then scan to find gray objects. A
  4089       global finger indicates where the end of the last claimed region
  4090       is. A local finger indicates how far into the region a task has
  4091       scanned. The two fingers are used to determine how to gray an
  4092       object (i.e. whether simply marking it is OK, as it will be
  4093       visited by a task in the future, or whether it needs to be also
  4094       pushed on a stack).
  4096       (2) Local Queue. The local queue of the task which is accessed
  4097       reasonably efficiently by the task. Other tasks can steal from
  4098       it when they run out of work. Throughout the marking phase, a
  4099       task attempts to keep its local queue short but not totally
  4100       empty, so that entries are available for stealing by other
  4101       tasks. Only when there is no more work, a task will totally
  4102       drain its local queue.
  4104       (3) Global Mark Stack. This handles local queue overflow. During
  4105       marking only sets of entries are moved between it and the local
  4106       queues, as access to it requires a mutex and more fine-grain
  4107       interaction with it which might cause contention. If it
  4108       overflows, then the marking phase should restart and iterate
  4109       over the bitmap to identify gray objects. Throughout the marking
  4110       phase, tasks attempt to keep the global mark stack at a small
  4111       length but not totally empty, so that entries are available for
  4112       popping by other tasks. Only when there is no more work, tasks
  4113       will totally drain the global mark stack.
  4115       (4) SATB Buffer Queue. This is where completed SATB buffers are
  4116       made available. Buffers are regularly removed from this queue
  4117       and scanned for roots, so that the queue doesn't get too
  4118       long. During remark, all completed buffers are processed, as
  4119       well as the filled in parts of any uncompleted buffers.
  4121     The do_marking_step() method tries to abort when the time target
  4122     has been reached. There are a few other cases when the
  4123     do_marking_step() method also aborts:
  4125       (1) When the marking phase has been aborted (after a Full GC).
  4127       (2) When a global overflow (on the global stack) has been
  4128       triggered. Before the task aborts, it will actually sync up with
  4129       the other tasks to ensure that all the marking data structures
  4130       (local queues, stacks, fingers etc.)  are re-initialized so that
  4131       when do_marking_step() completes, the marking phase can
  4132       immediately restart.
  4134       (3) When enough completed SATB buffers are available. The
  4135       do_marking_step() method only tries to drain SATB buffers right
  4136       at the beginning. So, if enough buffers are available, the
  4137       marking step aborts and the SATB buffers are processed at
  4138       the beginning of the next invocation.
  4140       (4) To yield. when we have to yield then we abort and yield
  4141       right at the end of do_marking_step(). This saves us from a lot
  4142       of hassle as, by yielding we might allow a Full GC. If this
  4143       happens then objects will be compacted underneath our feet, the
  4144       heap might shrink, etc. We save checking for this by just
  4145       aborting and doing the yield right at the end.
  4147     From the above it follows that the do_marking_step() method should
  4148     be called in a loop (or, otherwise, regularly) until it completes.
  4150     If a marking step completes without its has_aborted() flag being
  4151     true, it means it has completed the current marking phase (and
  4152     also all other marking tasks have done so and have all synced up).
  4154     A method called regular_clock_call() is invoked "regularly" (in
  4155     sub ms intervals) throughout marking. It is this clock method that
  4156     checks all the abort conditions which were mentioned above and
  4157     decides when the task should abort. A work-based scheme is used to
  4158     trigger this clock method: when the number of object words the
  4159     marking phase has scanned or the number of references the marking
  4160     phase has visited reach a given limit. Additional invocations to
  4161     the method clock have been planted in a few other strategic places
  4162     too. The initial reason for the clock method was to avoid calling
  4163     vtime too regularly, as it is quite expensive. So, once it was in
  4164     place, it was natural to piggy-back all the other conditions on it
  4165     too and not constantly check them throughout the code.
  4167     If do_termination is true then do_marking_step will enter its
  4168     termination protocol.
  4170     The value of is_serial must be true when do_marking_step is being
  4171     called serially (i.e. by the VMThread) and do_marking_step should
  4172     skip any synchronization in the termination and overflow code.
  4173     Examples include the serial remark code and the serial reference
  4174     processing closures.
  4176     The value of is_serial must be false when do_marking_step is
  4177     being called by any of the worker threads in a work gang.
  4178     Examples include the concurrent marking code (CMMarkingTask),
  4179     the MT remark code, and the MT reference processing closures.
  4181  *****************************************************************************/
  4183 void CMTask::do_marking_step(double time_target_ms,
  4184                              bool do_termination,
  4185                              bool is_serial) {
  4186   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  4187   assert(concurrent() == _cm->concurrent(), "they should be the same");
  4189   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  4190   assert(_task_queues != NULL, "invariant");
  4191   assert(_task_queue != NULL, "invariant");
  4192   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
  4194   assert(!_claimed,
  4195          "only one thread should claim this task at any one time");
  4197   // OK, this doesn't safeguard again all possible scenarios, as it is
  4198   // possible for two threads to set the _claimed flag at the same
  4199   // time. But it is only for debugging purposes anyway and it will
  4200   // catch most problems.
  4201   _claimed = true;
  4203   _start_time_ms = os::elapsedVTime() * 1000.0;
  4204   statsOnly( _interval_start_time_ms = _start_time_ms );
  4206   // If do_stealing is true then do_marking_step will attempt to
  4207   // steal work from the other CMTasks. It only makes sense to
  4208   // enable stealing when the termination protocol is enabled
  4209   // and do_marking_step() is not being called serially.
  4210   bool do_stealing = do_termination && !is_serial;
  4212   double diff_prediction_ms =
  4213     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  4214   _time_target_ms = time_target_ms - diff_prediction_ms;
  4216   // set up the variables that are used in the work-based scheme to
  4217   // call the regular clock method
  4218   _words_scanned = 0;
  4219   _refs_reached  = 0;
  4220   recalculate_limits();
  4222   // clear all flags
  4223   clear_has_aborted();
  4224   _has_timed_out = false;
  4225   _draining_satb_buffers = false;
  4227   ++_calls;
  4229   if (_cm->verbose_low()) {
  4230     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
  4231                            "target = %1.2lfms >>>>>>>>>>",
  4232                            _worker_id, _calls, _time_target_ms);
  4235   // Set up the bitmap and oop closures. Anything that uses them is
  4236   // eventually called from this method, so it is OK to allocate these
  4237   // statically.
  4238   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  4239   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
  4240   set_cm_oop_closure(&cm_oop_closure);
  4242   if (_cm->has_overflown()) {
  4243     // This can happen if the mark stack overflows during a GC pause
  4244     // and this task, after a yield point, restarts. We have to abort
  4245     // as we need to get into the overflow protocol which happens
  4246     // right at the end of this task.
  4247     set_has_aborted();
  4250   // First drain any available SATB buffers. After this, we will not
  4251   // look at SATB buffers before the next invocation of this method.
  4252   // If enough completed SATB buffers are queued up, the regular clock
  4253   // will abort this task so that it restarts.
  4254   drain_satb_buffers();
  4255   // ...then partially drain the local queue and the global stack
  4256   drain_local_queue(true);
  4257   drain_global_stack(true);
  4259   do {
  4260     if (!has_aborted() && _curr_region != NULL) {
  4261       // This means that we're already holding on to a region.
  4262       assert(_finger != NULL, "if region is not NULL, then the finger "
  4263              "should not be NULL either");
  4265       // We might have restarted this task after an evacuation pause
  4266       // which might have evacuated the region we're holding on to
  4267       // underneath our feet. Let's read its limit again to make sure
  4268       // that we do not iterate over a region of the heap that
  4269       // contains garbage (update_region_limit() will also move
  4270       // _finger to the start of the region if it is found empty).
  4271       update_region_limit();
  4272       // We will start from _finger not from the start of the region,
  4273       // as we might be restarting this task after aborting half-way
  4274       // through scanning this region. In this case, _finger points to
  4275       // the address where we last found a marked object. If this is a
  4276       // fresh region, _finger points to start().
  4277       MemRegion mr = MemRegion(_finger, _region_limit);
  4279       if (_cm->verbose_low()) {
  4280         gclog_or_tty->print_cr("[%u] we're scanning part "
  4281                                "["PTR_FORMAT", "PTR_FORMAT") "
  4282                                "of region "HR_FORMAT,
  4283                                _worker_id, p2i(_finger), p2i(_region_limit),
  4284                                HR_FORMAT_PARAMS(_curr_region));
  4287       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
  4288              "humongous regions should go around loop once only");
  4290       // Some special cases:
  4291       // If the memory region is empty, we can just give up the region.
  4292       // If the current region is humongous then we only need to check
  4293       // the bitmap for the bit associated with the start of the object,
  4294       // scan the object if it's live, and give up the region.
  4295       // Otherwise, let's iterate over the bitmap of the part of the region
  4296       // that is left.
  4297       // If the iteration is successful, give up the region.
  4298       if (mr.is_empty()) {
  4299         giveup_current_region();
  4300         regular_clock_call();
  4301       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
  4302         if (_nextMarkBitMap->isMarked(mr.start())) {
  4303           // The object is marked - apply the closure
  4304           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
  4305           bitmap_closure.do_bit(offset);
  4307         // Even if this task aborted while scanning the humongous object
  4308         // we can (and should) give up the current region.
  4309         giveup_current_region();
  4310         regular_clock_call();
  4311       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  4312         giveup_current_region();
  4313         regular_clock_call();
  4314       } else {
  4315         assert(has_aborted(), "currently the only way to do so");
  4316         // The only way to abort the bitmap iteration is to return
  4317         // false from the do_bit() method. However, inside the
  4318         // do_bit() method we move the _finger to point to the
  4319         // object currently being looked at. So, if we bail out, we
  4320         // have definitely set _finger to something non-null.
  4321         assert(_finger != NULL, "invariant");
  4323         // Region iteration was actually aborted. So now _finger
  4324         // points to the address of the object we last scanned. If we
  4325         // leave it there, when we restart this task, we will rescan
  4326         // the object. It is easy to avoid this. We move the finger by
  4327         // enough to point to the next possible object header (the
  4328         // bitmap knows by how much we need to move it as it knows its
  4329         // granularity).
  4330         assert(_finger < _region_limit, "invariant");
  4331         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
  4332         // Check if bitmap iteration was aborted while scanning the last object
  4333         if (new_finger >= _region_limit) {
  4334           giveup_current_region();
  4335         } else {
  4336           move_finger_to(new_finger);
  4340     // At this point we have either completed iterating over the
  4341     // region we were holding on to, or we have aborted.
  4343     // We then partially drain the local queue and the global stack.
  4344     // (Do we really need this?)
  4345     drain_local_queue(true);
  4346     drain_global_stack(true);
  4348     // Read the note on the claim_region() method on why it might
  4349     // return NULL with potentially more regions available for
  4350     // claiming and why we have to check out_of_regions() to determine
  4351     // whether we're done or not.
  4352     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  4353       // We are going to try to claim a new region. We should have
  4354       // given up on the previous one.
  4355       // Separated the asserts so that we know which one fires.
  4356       assert(_curr_region  == NULL, "invariant");
  4357       assert(_finger       == NULL, "invariant");
  4358       assert(_region_limit == NULL, "invariant");
  4359       if (_cm->verbose_low()) {
  4360         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
  4362       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
  4363       if (claimed_region != NULL) {
  4364         // Yes, we managed to claim one
  4365         statsOnly( ++_regions_claimed );
  4367         if (_cm->verbose_low()) {
  4368           gclog_or_tty->print_cr("[%u] we successfully claimed "
  4369                                  "region "PTR_FORMAT,
  4370                                  _worker_id, p2i(claimed_region));
  4373         setup_for_region(claimed_region);
  4374         assert(_curr_region == claimed_region, "invariant");
  4376       // It is important to call the regular clock here. It might take
  4377       // a while to claim a region if, for example, we hit a large
  4378       // block of empty regions. So we need to call the regular clock
  4379       // method once round the loop to make sure it's called
  4380       // frequently enough.
  4381       regular_clock_call();
  4384     if (!has_aborted() && _curr_region == NULL) {
  4385       assert(_cm->out_of_regions(),
  4386              "at this point we should be out of regions");
  4388   } while ( _curr_region != NULL && !has_aborted());
  4390   if (!has_aborted()) {
  4391     // We cannot check whether the global stack is empty, since other
  4392     // tasks might be pushing objects to it concurrently.
  4393     assert(_cm->out_of_regions(),
  4394            "at this point we should be out of regions");
  4396     if (_cm->verbose_low()) {
  4397       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
  4400     // Try to reduce the number of available SATB buffers so that
  4401     // remark has less work to do.
  4402     drain_satb_buffers();
  4405   // Since we've done everything else, we can now totally drain the
  4406   // local queue and global stack.
  4407   drain_local_queue(false);
  4408   drain_global_stack(false);
  4410   // Attempt at work stealing from other task's queues.
  4411   if (do_stealing && !has_aborted()) {
  4412     // We have not aborted. This means that we have finished all that
  4413     // we could. Let's try to do some stealing...
  4415     // We cannot check whether the global stack is empty, since other
  4416     // tasks might be pushing objects to it concurrently.
  4417     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  4418            "only way to reach here");
  4420     if (_cm->verbose_low()) {
  4421       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
  4424     while (!has_aborted()) {
  4425       oop obj;
  4426       statsOnly( ++_steal_attempts );
  4428       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
  4429         if (_cm->verbose_medium()) {
  4430           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
  4431                                  _worker_id, p2i((void*) obj));
  4434         statsOnly( ++_steals );
  4436         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  4437                "any stolen object should be marked");
  4438         scan_object(obj);
  4440         // And since we're towards the end, let's totally drain the
  4441         // local queue and global stack.
  4442         drain_local_queue(false);
  4443         drain_global_stack(false);
  4444       } else {
  4445         break;
  4450   // If we are about to wrap up and go into termination, check if we
  4451   // should raise the overflow flag.
  4452   if (do_termination && !has_aborted()) {
  4453     if (_cm->force_overflow()->should_force()) {
  4454       _cm->set_has_overflown();
  4455       regular_clock_call();
  4459   // We still haven't aborted. Now, let's try to get into the
  4460   // termination protocol.
  4461   if (do_termination && !has_aborted()) {
  4462     // We cannot check whether the global stack is empty, since other
  4463     // tasks might be concurrently pushing objects on it.
  4464     // Separated the asserts so that we know which one fires.
  4465     assert(_cm->out_of_regions(), "only way to reach here");
  4466     assert(_task_queue->size() == 0, "only way to reach here");
  4468     if (_cm->verbose_low()) {
  4469       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
  4472     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4474     // The CMTask class also extends the TerminatorTerminator class,
  4475     // hence its should_exit_termination() method will also decide
  4476     // whether to exit the termination protocol or not.
  4477     bool finished = (is_serial ||
  4478                      _cm->terminator()->offer_termination(this));
  4479     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4480     _termination_time_ms +=
  4481       termination_end_time_ms - _termination_start_time_ms;
  4483     if (finished) {
  4484       // We're all done.
  4486       if (_worker_id == 0) {
  4487         // let's allow task 0 to do this
  4488         if (concurrent()) {
  4489           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4490           // we need to set this to false before the next
  4491           // safepoint. This way we ensure that the marking phase
  4492           // doesn't observe any more heap expansions.
  4493           _cm->clear_concurrent_marking_in_progress();
  4497       // We can now guarantee that the global stack is empty, since
  4498       // all other tasks have finished. We separated the guarantees so
  4499       // that, if a condition is false, we can immediately find out
  4500       // which one.
  4501       guarantee(_cm->out_of_regions(), "only way to reach here");
  4502       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4503       guarantee(_task_queue->size() == 0, "only way to reach here");
  4504       guarantee(!_cm->has_overflown(), "only way to reach here");
  4505       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4507       if (_cm->verbose_low()) {
  4508         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
  4510     } else {
  4511       // Apparently there's more work to do. Let's abort this task. It
  4512       // will restart it and we can hopefully find more things to do.
  4514       if (_cm->verbose_low()) {
  4515         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
  4516                                _worker_id);
  4519       set_has_aborted();
  4520       statsOnly( ++_aborted_termination );
  4524   // Mainly for debugging purposes to make sure that a pointer to the
  4525   // closure which was statically allocated in this frame doesn't
  4526   // escape it by accident.
  4527   set_cm_oop_closure(NULL);
  4528   double end_time_ms = os::elapsedVTime() * 1000.0;
  4529   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4530   // Update the step history.
  4531   _step_times_ms.add(elapsed_time_ms);
  4533   if (has_aborted()) {
  4534     // The task was aborted for some reason.
  4536     statsOnly( ++_aborted );
  4538     if (_has_timed_out) {
  4539       double diff_ms = elapsed_time_ms - _time_target_ms;
  4540       // Keep statistics of how well we did with respect to hitting
  4541       // our target only if we actually timed out (if we aborted for
  4542       // other reasons, then the results might get skewed).
  4543       _marking_step_diffs_ms.add(diff_ms);
  4546     if (_cm->has_overflown()) {
  4547       // This is the interesting one. We aborted because a global
  4548       // overflow was raised. This means we have to restart the
  4549       // marking phase and start iterating over regions. However, in
  4550       // order to do this we have to make sure that all tasks stop
  4551       // what they are doing and re-initialise in a safe manner. We
  4552       // will achieve this with the use of two barrier sync points.
  4554       if (_cm->verbose_low()) {
  4555         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
  4558       if (!is_serial) {
  4559         // We only need to enter the sync barrier if being called
  4560         // from a parallel context
  4561         _cm->enter_first_sync_barrier(_worker_id);
  4563         // When we exit this sync barrier we know that all tasks have
  4564         // stopped doing marking work. So, it's now safe to
  4565         // re-initialise our data structures. At the end of this method,
  4566         // task 0 will clear the global data structures.
  4569       statsOnly( ++_aborted_overflow );
  4571       // We clear the local state of this task...
  4572       clear_region_fields();
  4574       if (!is_serial) {
  4575         // ...and enter the second barrier.
  4576         _cm->enter_second_sync_barrier(_worker_id);
  4578       // At this point, if we're during the concurrent phase of
  4579       // marking, everything has been re-initialized and we're
  4580       // ready to restart.
  4583     if (_cm->verbose_low()) {
  4584       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4585                              "elapsed = %1.2lfms <<<<<<<<<<",
  4586                              _worker_id, _time_target_ms, elapsed_time_ms);
  4587       if (_cm->has_aborted()) {
  4588         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
  4589                                _worker_id);
  4592   } else {
  4593     if (_cm->verbose_low()) {
  4594       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4595                              "elapsed = %1.2lfms <<<<<<<<<<",
  4596                              _worker_id, _time_target_ms, elapsed_time_ms);
  4600   _claimed = false;
  4603 CMTask::CMTask(uint worker_id,
  4604                ConcurrentMark* cm,
  4605                size_t* marked_bytes,
  4606                BitMap* card_bm,
  4607                CMTaskQueue* task_queue,
  4608                CMTaskQueueSet* task_queues)
  4609   : _g1h(G1CollectedHeap::heap()),
  4610     _worker_id(worker_id), _cm(cm),
  4611     _claimed(false),
  4612     _nextMarkBitMap(NULL), _hash_seed(17),
  4613     _task_queue(task_queue),
  4614     _task_queues(task_queues),
  4615     _cm_oop_closure(NULL),
  4616     _marked_bytes_array(marked_bytes),
  4617     _card_bm(card_bm) {
  4618   guarantee(task_queue != NULL, "invariant");
  4619   guarantee(task_queues != NULL, "invariant");
  4621   statsOnly( _clock_due_to_scanning = 0;
  4622              _clock_due_to_marking  = 0 );
  4624   _marking_step_diffs_ms.add(0.5);
  4627 // These are formatting macros that are used below to ensure
  4628 // consistent formatting. The *_H_* versions are used to format the
  4629 // header for a particular value and they should be kept consistent
  4630 // with the corresponding macro. Also note that most of the macros add
  4631 // the necessary white space (as a prefix) which makes them a bit
  4632 // easier to compose.
  4634 // All the output lines are prefixed with this string to be able to
  4635 // identify them easily in a large log file.
  4636 #define G1PPRL_LINE_PREFIX            "###"
  4638 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
  4639 #ifdef _LP64
  4640 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
  4641 #else // _LP64
  4642 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
  4643 #endif // _LP64
  4645 // For per-region info
  4646 #define G1PPRL_TYPE_FORMAT            "   %-4s"
  4647 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
  4648 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
  4649 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
  4650 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
  4651 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
  4653 // For summary info
  4654 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
  4655 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
  4656 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
  4657 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
  4659 G1PrintRegionLivenessInfoClosure::
  4660 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
  4661   : _out(out),
  4662     _total_used_bytes(0), _total_capacity_bytes(0),
  4663     _total_prev_live_bytes(0), _total_next_live_bytes(0),
  4664     _hum_used_bytes(0), _hum_capacity_bytes(0),
  4665     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
  4666     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
  4667   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  4668   MemRegion g1_committed = g1h->g1_committed();
  4669   MemRegion g1_reserved = g1h->g1_reserved();
  4670   double now = os::elapsedTime();
  4672   // Print the header of the output.
  4673   _out->cr();
  4674   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
  4675   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
  4676                  G1PPRL_SUM_ADDR_FORMAT("committed")
  4677                  G1PPRL_SUM_ADDR_FORMAT("reserved")
  4678                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
  4679                  p2i(g1_committed.start()), p2i(g1_committed.end()),
  4680                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
  4681                  HeapRegion::GrainBytes);
  4682   _out->print_cr(G1PPRL_LINE_PREFIX);
  4683   _out->print_cr(G1PPRL_LINE_PREFIX
  4684                 G1PPRL_TYPE_H_FORMAT
  4685                 G1PPRL_ADDR_BASE_H_FORMAT
  4686                 G1PPRL_BYTE_H_FORMAT
  4687                 G1PPRL_BYTE_H_FORMAT
  4688                 G1PPRL_BYTE_H_FORMAT
  4689                 G1PPRL_DOUBLE_H_FORMAT
  4690                 G1PPRL_BYTE_H_FORMAT
  4691                 G1PPRL_BYTE_H_FORMAT,
  4692                 "type", "address-range",
  4693                 "used", "prev-live", "next-live", "gc-eff",
  4694                 "remset", "code-roots");
  4695   _out->print_cr(G1PPRL_LINE_PREFIX
  4696                 G1PPRL_TYPE_H_FORMAT
  4697                 G1PPRL_ADDR_BASE_H_FORMAT
  4698                 G1PPRL_BYTE_H_FORMAT
  4699                 G1PPRL_BYTE_H_FORMAT
  4700                 G1PPRL_BYTE_H_FORMAT
  4701                 G1PPRL_DOUBLE_H_FORMAT
  4702                 G1PPRL_BYTE_H_FORMAT
  4703                 G1PPRL_BYTE_H_FORMAT,
  4704                 "", "",
  4705                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
  4706                 "(bytes)", "(bytes)");
  4709 // It takes as a parameter a reference to one of the _hum_* fields, it
  4710 // deduces the corresponding value for a region in a humongous region
  4711 // series (either the region size, or what's left if the _hum_* field
  4712 // is < the region size), and updates the _hum_* field accordingly.
  4713 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
  4714   size_t bytes = 0;
  4715   // The > 0 check is to deal with the prev and next live bytes which
  4716   // could be 0.
  4717   if (*hum_bytes > 0) {
  4718     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
  4719     *hum_bytes -= bytes;
  4721   return bytes;
  4724 // It deduces the values for a region in a humongous region series
  4725 // from the _hum_* fields and updates those accordingly. It assumes
  4726 // that that _hum_* fields have already been set up from the "starts
  4727 // humongous" region and we visit the regions in address order.
  4728 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
  4729                                                      size_t* capacity_bytes,
  4730                                                      size_t* prev_live_bytes,
  4731                                                      size_t* next_live_bytes) {
  4732   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
  4733   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
  4734   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
  4735   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
  4736   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
  4739 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  4740   const char* type = "";
  4741   HeapWord* bottom       = r->bottom();
  4742   HeapWord* end          = r->end();
  4743   size_t capacity_bytes  = r->capacity();
  4744   size_t used_bytes      = r->used();
  4745   size_t prev_live_bytes = r->live_bytes();
  4746   size_t next_live_bytes = r->next_live_bytes();
  4747   double gc_eff          = r->gc_efficiency();
  4748   size_t remset_bytes    = r->rem_set()->mem_size();
  4749   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
  4751   if (r->used() == 0) {
  4752     type = "FREE";
  4753   } else if (r->is_survivor()) {
  4754     type = "SURV";
  4755   } else if (r->is_young()) {
  4756     type = "EDEN";
  4757   } else if (r->startsHumongous()) {
  4758     type = "HUMS";
  4760     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
  4761            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
  4762            "they should have been zeroed after the last time we used them");
  4763     // Set up the _hum_* fields.
  4764     _hum_capacity_bytes  = capacity_bytes;
  4765     _hum_used_bytes      = used_bytes;
  4766     _hum_prev_live_bytes = prev_live_bytes;
  4767     _hum_next_live_bytes = next_live_bytes;
  4768     get_hum_bytes(&used_bytes, &capacity_bytes,
  4769                   &prev_live_bytes, &next_live_bytes);
  4770     end = bottom + HeapRegion::GrainWords;
  4771   } else if (r->continuesHumongous()) {
  4772     type = "HUMC";
  4773     get_hum_bytes(&used_bytes, &capacity_bytes,
  4774                   &prev_live_bytes, &next_live_bytes);
  4775     assert(end == bottom + HeapRegion::GrainWords, "invariant");
  4776   } else {
  4777     type = "OLD";
  4780   _total_used_bytes      += used_bytes;
  4781   _total_capacity_bytes  += capacity_bytes;
  4782   _total_prev_live_bytes += prev_live_bytes;
  4783   _total_next_live_bytes += next_live_bytes;
  4784   _total_remset_bytes    += remset_bytes;
  4785   _total_strong_code_roots_bytes += strong_code_roots_bytes;
  4787   // Print a line for this particular region.
  4788   _out->print_cr(G1PPRL_LINE_PREFIX
  4789                  G1PPRL_TYPE_FORMAT
  4790                  G1PPRL_ADDR_BASE_FORMAT
  4791                  G1PPRL_BYTE_FORMAT
  4792                  G1PPRL_BYTE_FORMAT
  4793                  G1PPRL_BYTE_FORMAT
  4794                  G1PPRL_DOUBLE_FORMAT
  4795                  G1PPRL_BYTE_FORMAT
  4796                  G1PPRL_BYTE_FORMAT,
  4797                  type, p2i(bottom), p2i(end),
  4798                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
  4799                  remset_bytes, strong_code_roots_bytes);
  4801   return false;
  4804 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
  4805   // add static memory usages to remembered set sizes
  4806   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
  4807   // Print the footer of the output.
  4808   _out->print_cr(G1PPRL_LINE_PREFIX);
  4809   _out->print_cr(G1PPRL_LINE_PREFIX
  4810                  " SUMMARY"
  4811                  G1PPRL_SUM_MB_FORMAT("capacity")
  4812                  G1PPRL_SUM_MB_PERC_FORMAT("used")
  4813                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
  4814                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
  4815                  G1PPRL_SUM_MB_FORMAT("remset")
  4816                  G1PPRL_SUM_MB_FORMAT("code-roots"),
  4817                  bytes_to_mb(_total_capacity_bytes),
  4818                  bytes_to_mb(_total_used_bytes),
  4819                  perc(_total_used_bytes, _total_capacity_bytes),
  4820                  bytes_to_mb(_total_prev_live_bytes),
  4821                  perc(_total_prev_live_bytes, _total_capacity_bytes),
  4822                  bytes_to_mb(_total_next_live_bytes),
  4823                  perc(_total_next_live_bytes, _total_capacity_bytes),
  4824                  bytes_to_mb(_total_remset_bytes),
  4825                  bytes_to_mb(_total_strong_code_roots_bytes));
  4826   _out->cr();

mercurial