src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 02 Sep 2014 15:04:00 +0200

author
tschatzl
date
Tue, 02 Sep 2014 15:04:00 +0200
changeset 7094
9337d0e7ea4f
parent 7091
a8ea2f110d87
child 7100
edb5f3b38aab
permissions
-rw-r--r--

8055919: Remove dead code in G1 concurrent marking code
Reviewed-by: jmasa, jwilhelm

     1 /*
     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "code/codeCache.hpp"
    28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
    29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
    33 #include "gc_implementation/g1/g1Log.hpp"
    34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
    35 #include "gc_implementation/g1/g1RemSet.hpp"
    36 #include "gc_implementation/g1/heapRegion.inline.hpp"
    37 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
    38 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    39 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
    40 #include "gc_implementation/shared/vmGCOperations.hpp"
    41 #include "gc_implementation/shared/gcTimer.hpp"
    42 #include "gc_implementation/shared/gcTrace.hpp"
    43 #include "gc_implementation/shared/gcTraceTime.hpp"
    44 #include "memory/allocation.hpp"
    45 #include "memory/genOopClosures.inline.hpp"
    46 #include "memory/referencePolicy.hpp"
    47 #include "memory/resourceArea.hpp"
    48 #include "oops/oop.inline.hpp"
    49 #include "runtime/handles.inline.hpp"
    50 #include "runtime/java.hpp"
    51 #include "runtime/prefetch.inline.hpp"
    52 #include "services/memTracker.hpp"
    54 // Concurrent marking bit map wrapper
    56 CMBitMapRO::CMBitMapRO(int shifter) :
    57   _bm(),
    58   _shifter(shifter) {
    59   _bmStartWord = 0;
    60   _bmWordSize = 0;
    61 }
    63 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
    64                                                const HeapWord* limit) const {
    65   // First we must round addr *up* to a possible object boundary.
    66   addr = (HeapWord*)align_size_up((intptr_t)addr,
    67                                   HeapWordSize << _shifter);
    68   size_t addrOffset = heapWordToOffset(addr);
    69   if (limit == NULL) {
    70     limit = _bmStartWord + _bmWordSize;
    71   }
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
    82                                                  const HeapWord* limit) const {
    83   size_t addrOffset = heapWordToOffset(addr);
    84   if (limit == NULL) {
    85     limit = _bmStartWord + _bmWordSize;
    86   }
    87   size_t limitOffset = heapWordToOffset(limit);
    88   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    89   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    90   assert(nextAddr >= addr, "get_next_one postcondition");
    91   assert(nextAddr == limit || !isMarked(nextAddr),
    92          "get_next_one postcondition");
    93   return nextAddr;
    94 }
    96 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    97   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    98   return (int) (diff >> _shifter);
    99 }
   101 #ifndef PRODUCT
   102 bool CMBitMapRO::covers(MemRegion heap_rs) const {
   103   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   104   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
   105          "size inconsistency");
   106   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
   107          _bmWordSize  == heap_rs.word_size();
   108 }
   109 #endif
   111 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
   112   _bm.print_on_error(st, prefix);
   113 }
   115 size_t CMBitMap::compute_size(size_t heap_size) {
   116   return heap_size / mark_distance();
   117 }
   119 size_t CMBitMap::mark_distance() {
   120   return MinObjAlignmentInBytes * BitsPerByte;
   121 }
   123 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
   124   _bmStartWord = heap.start();
   125   _bmWordSize = heap.word_size();
   127   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
   128   _bm.set_size(_bmWordSize >> _shifter);
   130   storage->set_mapping_changed_listener(&_listener);
   131 }
   133 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) {
   134   // We need to clear the bitmap on commit, removing any existing information.
   135   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
   136   _bm->clearRange(mr);
   137 }
   139 // Closure used for clearing the given mark bitmap.
   140 class ClearBitmapHRClosure : public HeapRegionClosure {
   141  private:
   142   ConcurrentMark* _cm;
   143   CMBitMap* _bitmap;
   144   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
   145  public:
   146   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
   147     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
   148   }
   150   virtual bool doHeapRegion(HeapRegion* r) {
   151     size_t const chunk_size_in_words = M / HeapWordSize;
   153     HeapWord* cur = r->bottom();
   154     HeapWord* const end = r->end();
   156     while (cur < end) {
   157       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
   158       _bitmap->clearRange(mr);
   160       cur += chunk_size_in_words;
   162       // Abort iteration if after yielding the marking has been aborted.
   163       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
   164         return true;
   165       }
   166       // Repeat the asserts from before the start of the closure. We will do them
   167       // as asserts here to minimize their overhead on the product. However, we
   168       // will have them as guarantees at the beginning / end of the bitmap
   169       // clearing to get some checking in the product.
   170       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
   171       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
   172     }
   174     return false;
   175   }
   176 };
   178 void CMBitMap::clearAll() {
   179   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
   180   G1CollectedHeap::heap()->heap_region_iterate(&cl);
   181   guarantee(cl.complete(), "Must have completed iteration.");
   182   return;
   183 }
   185 void CMBitMap::markRange(MemRegion mr) {
   186   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   187   assert(!mr.is_empty(), "unexpected empty region");
   188   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   189           ((HeapWord *) mr.end())),
   190          "markRange memory region end is not card aligned");
   191   // convert address range into offset range
   192   _bm.at_put_range(heapWordToOffset(mr.start()),
   193                    heapWordToOffset(mr.end()), true);
   194 }
   196 void CMBitMap::clearRange(MemRegion mr) {
   197   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   198   assert(!mr.is_empty(), "unexpected empty region");
   199   // convert address range into offset range
   200   _bm.at_put_range(heapWordToOffset(mr.start()),
   201                    heapWordToOffset(mr.end()), false);
   202 }
   204 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   205                                             HeapWord* end_addr) {
   206   HeapWord* start = getNextMarkedWordAddress(addr);
   207   start = MIN2(start, end_addr);
   208   HeapWord* end   = getNextUnmarkedWordAddress(start);
   209   end = MIN2(end, end_addr);
   210   assert(start <= end, "Consistency check");
   211   MemRegion mr(start, end);
   212   if (!mr.is_empty()) {
   213     clearRange(mr);
   214   }
   215   return mr;
   216 }
   218 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   219   _base(NULL), _cm(cm)
   220 #ifdef ASSERT
   221   , _drain_in_progress(false)
   222   , _drain_in_progress_yields(false)
   223 #endif
   224 {}
   226 bool CMMarkStack::allocate(size_t capacity) {
   227   // allocate a stack of the requisite depth
   228   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
   229   if (!rs.is_reserved()) {
   230     warning("ConcurrentMark MarkStack allocation failure");
   231     return false;
   232   }
   233   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
   234   if (!_virtual_space.initialize(rs, rs.size())) {
   235     warning("ConcurrentMark MarkStack backing store failure");
   236     // Release the virtual memory reserved for the marking stack
   237     rs.release();
   238     return false;
   239   }
   240   assert(_virtual_space.committed_size() == rs.size(),
   241          "Didn't reserve backing store for all of ConcurrentMark stack?");
   242   _base = (oop*) _virtual_space.low();
   243   setEmpty();
   244   _capacity = (jint) capacity;
   245   _saved_index = -1;
   246   _should_expand = false;
   247   NOT_PRODUCT(_max_depth = 0);
   248   return true;
   249 }
   251 void CMMarkStack::expand() {
   252   // Called, during remark, if we've overflown the marking stack during marking.
   253   assert(isEmpty(), "stack should been emptied while handling overflow");
   254   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
   255   // Clear expansion flag
   256   _should_expand = false;
   257   if (_capacity == (jint) MarkStackSizeMax) {
   258     if (PrintGCDetails && Verbose) {
   259       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
   260     }
   261     return;
   262   }
   263   // Double capacity if possible
   264   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
   265   // Do not give up existing stack until we have managed to
   266   // get the double capacity that we desired.
   267   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
   268                                                            sizeof(oop)));
   269   if (rs.is_reserved()) {
   270     // Release the backing store associated with old stack
   271     _virtual_space.release();
   272     // Reinitialize virtual space for new stack
   273     if (!_virtual_space.initialize(rs, rs.size())) {
   274       fatal("Not enough swap for expanded marking stack capacity");
   275     }
   276     _base = (oop*)(_virtual_space.low());
   277     _index = 0;
   278     _capacity = new_capacity;
   279   } else {
   280     if (PrintGCDetails && Verbose) {
   281       // Failed to double capacity, continue;
   282       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
   283                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
   284                           _capacity / K, new_capacity / K);
   285     }
   286   }
   287 }
   289 void CMMarkStack::set_should_expand() {
   290   // If we're resetting the marking state because of an
   291   // marking stack overflow, record that we should, if
   292   // possible, expand the stack.
   293   _should_expand = _cm->has_overflown();
   294 }
   296 CMMarkStack::~CMMarkStack() {
   297   if (_base != NULL) {
   298     _base = NULL;
   299     _virtual_space.release();
   300   }
   301 }
   303 void CMMarkStack::par_push(oop ptr) {
   304   while (true) {
   305     if (isFull()) {
   306       _overflow = true;
   307       return;
   308     }
   309     // Otherwise...
   310     jint index = _index;
   311     jint next_index = index+1;
   312     jint res = Atomic::cmpxchg(next_index, &_index, index);
   313     if (res == index) {
   314       _base[index] = ptr;
   315       // Note that we don't maintain this atomically.  We could, but it
   316       // doesn't seem necessary.
   317       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   318       return;
   319     }
   320     // Otherwise, we need to try again.
   321   }
   322 }
   324 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   325   while (true) {
   326     if (isFull()) {
   327       _overflow = true;
   328       return;
   329     }
   330     // Otherwise...
   331     jint index = _index;
   332     jint next_index = index + n;
   333     if (next_index > _capacity) {
   334       _overflow = true;
   335       return;
   336     }
   337     jint res = Atomic::cmpxchg(next_index, &_index, index);
   338     if (res == index) {
   339       for (int i = 0; i < n; i++) {
   340         int  ind = index + i;
   341         assert(ind < _capacity, "By overflow test above.");
   342         _base[ind] = ptr_arr[i];
   343       }
   344       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   345       return;
   346     }
   347     // Otherwise, we need to try again.
   348   }
   349 }
   351 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   352   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   353   jint start = _index;
   354   jint next_index = start + n;
   355   if (next_index > _capacity) {
   356     _overflow = true;
   357     return;
   358   }
   359   // Otherwise.
   360   _index = next_index;
   361   for (int i = 0; i < n; i++) {
   362     int ind = start + i;
   363     assert(ind < _capacity, "By overflow test above.");
   364     _base[ind] = ptr_arr[i];
   365   }
   366   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   367 }
   369 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   370   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   371   jint index = _index;
   372   if (index == 0) {
   373     *n = 0;
   374     return false;
   375   } else {
   376     int k = MIN2(max, index);
   377     jint  new_ind = index - k;
   378     for (int j = 0; j < k; j++) {
   379       ptr_arr[j] = _base[new_ind + j];
   380     }
   381     _index = new_ind;
   382     *n = k;
   383     return true;
   384   }
   385 }
   387 template<class OopClosureClass>
   388 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   389   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   390          || SafepointSynchronize::is_at_safepoint(),
   391          "Drain recursion must be yield-safe.");
   392   bool res = true;
   393   debug_only(_drain_in_progress = true);
   394   debug_only(_drain_in_progress_yields = yield_after);
   395   while (!isEmpty()) {
   396     oop newOop = pop();
   397     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   398     assert(newOop->is_oop(), "Expected an oop");
   399     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   400            "only grey objects on this stack");
   401     newOop->oop_iterate(cl);
   402     if (yield_after && _cm->do_yield_check()) {
   403       res = false;
   404       break;
   405     }
   406   }
   407   debug_only(_drain_in_progress = false);
   408   return res;
   409 }
   411 void CMMarkStack::note_start_of_gc() {
   412   assert(_saved_index == -1,
   413          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
   414   _saved_index = _index;
   415 }
   417 void CMMarkStack::note_end_of_gc() {
   418   // This is intentionally a guarantee, instead of an assert. If we
   419   // accidentally add something to the mark stack during GC, it
   420   // will be a correctness issue so it's better if we crash. we'll
   421   // only check this once per GC anyway, so it won't be a performance
   422   // issue in any way.
   423   guarantee(_saved_index == _index,
   424             err_msg("saved index: %d index: %d", _saved_index, _index));
   425   _saved_index = -1;
   426 }
   428 void CMMarkStack::oops_do(OopClosure* f) {
   429   assert(_saved_index == _index,
   430          err_msg("saved index: %d index: %d", _saved_index, _index));
   431   for (int i = 0; i < _index; i += 1) {
   432     f->do_oop(&_base[i]);
   433   }
   434 }
   436 CMRootRegions::CMRootRegions() :
   437   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
   438   _should_abort(false),  _next_survivor(NULL) { }
   440 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
   441   _young_list = g1h->young_list();
   442   _cm = cm;
   443 }
   445 void CMRootRegions::prepare_for_scan() {
   446   assert(!scan_in_progress(), "pre-condition");
   448   // Currently, only survivors can be root regions.
   449   assert(_next_survivor == NULL, "pre-condition");
   450   _next_survivor = _young_list->first_survivor_region();
   451   _scan_in_progress = (_next_survivor != NULL);
   452   _should_abort = false;
   453 }
   455 HeapRegion* CMRootRegions::claim_next() {
   456   if (_should_abort) {
   457     // If someone has set the should_abort flag, we return NULL to
   458     // force the caller to bail out of their loop.
   459     return NULL;
   460   }
   462   // Currently, only survivors can be root regions.
   463   HeapRegion* res = _next_survivor;
   464   if (res != NULL) {
   465     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   466     // Read it again in case it changed while we were waiting for the lock.
   467     res = _next_survivor;
   468     if (res != NULL) {
   469       if (res == _young_list->last_survivor_region()) {
   470         // We just claimed the last survivor so store NULL to indicate
   471         // that we're done.
   472         _next_survivor = NULL;
   473       } else {
   474         _next_survivor = res->get_next_young_region();
   475       }
   476     } else {
   477       // Someone else claimed the last survivor while we were trying
   478       // to take the lock so nothing else to do.
   479     }
   480   }
   481   assert(res == NULL || res->is_survivor(), "post-condition");
   483   return res;
   484 }
   486 void CMRootRegions::scan_finished() {
   487   assert(scan_in_progress(), "pre-condition");
   489   // Currently, only survivors can be root regions.
   490   if (!_should_abort) {
   491     assert(_next_survivor == NULL, "we should have claimed all survivors");
   492   }
   493   _next_survivor = NULL;
   495   {
   496     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   497     _scan_in_progress = false;
   498     RootRegionScan_lock->notify_all();
   499   }
   500 }
   502 bool CMRootRegions::wait_until_scan_finished() {
   503   if (!scan_in_progress()) return false;
   505   {
   506     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
   507     while (scan_in_progress()) {
   508       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
   509     }
   510   }
   511   return true;
   512 }
   514 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   515 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   516 #endif // _MSC_VER
   518 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
   519   return MAX2((n_par_threads + 2) / 4, 1U);
   520 }
   522 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
   523   _g1h(g1h),
   524   _markBitMap1(),
   525   _markBitMap2(),
   526   _parallel_marking_threads(0),
   527   _max_parallel_marking_threads(0),
   528   _sleep_factor(0.0),
   529   _marking_task_overhead(1.0),
   530   _cleanup_sleep_factor(0.0),
   531   _cleanup_task_overhead(1.0),
   532   _cleanup_list("Cleanup List"),
   533   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
   534   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
   535             CardTableModRefBS::card_shift,
   536             false /* in_resource_area*/),
   538   _prevMarkBitMap(&_markBitMap1),
   539   _nextMarkBitMap(&_markBitMap2),
   541   _markStack(this),
   542   // _finger set in set_non_marking_state
   544   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
   545   // _active_tasks set in set_non_marking_state
   546   // _tasks set inside the constructor
   547   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
   548   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
   550   _has_overflown(false),
   551   _concurrent(false),
   552   _has_aborted(false),
   553   _aborted_gc_id(GCId::undefined()),
   554   _restart_for_overflow(false),
   555   _concurrent_marking_in_progress(false),
   557   // _verbose_level set below
   559   _init_times(),
   560   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   561   _cleanup_times(),
   562   _total_counting_time(0.0),
   563   _total_rs_scrub_time(0.0),
   565   _parallel_workers(NULL),
   567   _count_card_bitmaps(NULL),
   568   _count_marked_bytes(NULL),
   569   _completed_initialization(false) {
   570   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   571   if (verbose_level < no_verbose) {
   572     verbose_level = no_verbose;
   573   }
   574   if (verbose_level > high_verbose) {
   575     verbose_level = high_verbose;
   576   }
   577   _verbose_level = verbose_level;
   579   if (verbose_low()) {
   580     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   581                            "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
   582   }
   584   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
   585   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
   587   // Create & start a ConcurrentMark thread.
   588   _cmThread = new ConcurrentMarkThread(this);
   589   assert(cmThread() != NULL, "CM Thread should have been created");
   590   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   591   if (_cmThread->osthread() == NULL) {
   592       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
   593   }
   595   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   596   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
   597   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
   599   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   600   satb_qs.set_buffer_size(G1SATBBufferSize);
   602   _root_regions.init(_g1h, this);
   604   if (ConcGCThreads > ParallelGCThreads) {
   605     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
   606             "than ParallelGCThreads (" UINTX_FORMAT ").",
   607             ConcGCThreads, ParallelGCThreads);
   608     return;
   609   }
   610   if (ParallelGCThreads == 0) {
   611     // if we are not running with any parallel GC threads we will not
   612     // spawn any marking threads either
   613     _parallel_marking_threads =       0;
   614     _max_parallel_marking_threads =   0;
   615     _sleep_factor             =     0.0;
   616     _marking_task_overhead    =     1.0;
   617   } else {
   618     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
   619       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
   620       // if both are set
   621       _sleep_factor             = 0.0;
   622       _marking_task_overhead    = 1.0;
   623     } else if (G1MarkingOverheadPercent > 0) {
   624       // We will calculate the number of parallel marking threads based
   625       // on a target overhead with respect to the soft real-time goal
   626       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   627       double overall_cm_overhead =
   628         (double) MaxGCPauseMillis * marking_overhead /
   629         (double) GCPauseIntervalMillis;
   630       double cpu_ratio = 1.0 / (double) os::processor_count();
   631       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   632       double marking_task_overhead =
   633         overall_cm_overhead / marking_thread_num *
   634                                                 (double) os::processor_count();
   635       double sleep_factor =
   636                          (1.0 - marking_task_overhead) / marking_task_overhead;
   638       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
   639       _sleep_factor             = sleep_factor;
   640       _marking_task_overhead    = marking_task_overhead;
   641     } else {
   642       // Calculate the number of parallel marking threads by scaling
   643       // the number of parallel GC threads.
   644       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
   645       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
   646       _sleep_factor             = 0.0;
   647       _marking_task_overhead    = 1.0;
   648     }
   650     assert(ConcGCThreads > 0, "Should have been set");
   651     _parallel_marking_threads = (uint) ConcGCThreads;
   652     _max_parallel_marking_threads = _parallel_marking_threads;
   654     if (parallel_marking_threads() > 1) {
   655       _cleanup_task_overhead = 1.0;
   656     } else {
   657       _cleanup_task_overhead = marking_task_overhead();
   658     }
   659     _cleanup_sleep_factor =
   660                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   662 #if 0
   663     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   664     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   665     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   666     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   667     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   668 #endif
   670     guarantee(parallel_marking_threads() > 0, "peace of mind");
   671     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   672          _max_parallel_marking_threads, false, true);
   673     if (_parallel_workers == NULL) {
   674       vm_exit_during_initialization("Failed necessary allocation.");
   675     } else {
   676       _parallel_workers->initialize_workers();
   677     }
   678   }
   680   if (FLAG_IS_DEFAULT(MarkStackSize)) {
   681     uintx mark_stack_size =
   682       MIN2(MarkStackSizeMax,
   683           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
   684     // Verify that the calculated value for MarkStackSize is in range.
   685     // It would be nice to use the private utility routine from Arguments.
   686     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
   687       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
   688               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   689               mark_stack_size, (uintx) 1, MarkStackSizeMax);
   690       return;
   691     }
   692     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
   693   } else {
   694     // Verify MarkStackSize is in range.
   695     if (FLAG_IS_CMDLINE(MarkStackSize)) {
   696       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
   697         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   698           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
   699                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
   700                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
   701           return;
   702         }
   703       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
   704         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
   705           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
   706                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
   707                   MarkStackSize, MarkStackSizeMax);
   708           return;
   709         }
   710       }
   711     }
   712   }
   714   if (!_markStack.allocate(MarkStackSize)) {
   715     warning("Failed to allocate CM marking stack");
   716     return;
   717   }
   719   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
   720   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
   722   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
   723   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
   725   BitMap::idx_t card_bm_size = _card_bm.size();
   727   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   728   _active_tasks = _max_worker_id;
   730   size_t max_regions = (size_t) _g1h->max_regions();
   731   for (uint i = 0; i < _max_worker_id; ++i) {
   732     CMTaskQueue* task_queue = new CMTaskQueue();
   733     task_queue->initialize();
   734     _task_queues->register_queue(i, task_queue);
   736     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
   737     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
   739     _tasks[i] = new CMTask(i, this,
   740                            _count_marked_bytes[i],
   741                            &_count_card_bitmaps[i],
   742                            task_queue, _task_queues);
   744     _accum_task_vtime[i] = 0.0;
   745   }
   747   // Calculate the card number for the bottom of the heap. Used
   748   // in biasing indexes into the accounting card bitmaps.
   749   _heap_bottom_card_num =
   750     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
   751                                 CardTableModRefBS::card_shift);
   753   // Clear all the liveness counting data
   754   clear_all_count_data();
   756   // so that the call below can read a sensible value
   757   _heap_start = g1h->reserved_region().start();
   758   set_non_marking_state();
   759   _completed_initialization = true;
   760 }
   762 void ConcurrentMark::reset() {
   763   // Starting values for these two. This should be called in a STW
   764   // phase.
   765   MemRegion reserved = _g1h->g1_reserved();
   766   _heap_start = reserved.start();
   767   _heap_end   = reserved.end();
   769   // Separated the asserts so that we know which one fires.
   770   assert(_heap_start != NULL, "heap bounds should look ok");
   771   assert(_heap_end != NULL, "heap bounds should look ok");
   772   assert(_heap_start < _heap_end, "heap bounds should look ok");
   774   // Reset all the marking data structures and any necessary flags
   775   reset_marking_state();
   777   if (verbose_low()) {
   778     gclog_or_tty->print_cr("[global] resetting");
   779   }
   781   // We do reset all of them, since different phases will use
   782   // different number of active threads. So, it's easiest to have all
   783   // of them ready.
   784   for (uint i = 0; i < _max_worker_id; ++i) {
   785     _tasks[i]->reset(_nextMarkBitMap);
   786   }
   788   // we need this to make sure that the flag is on during the evac
   789   // pause with initial mark piggy-backed
   790   set_concurrent_marking_in_progress();
   791 }
   794 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
   795   _markStack.set_should_expand();
   796   _markStack.setEmpty();        // Also clears the _markStack overflow flag
   797   if (clear_overflow) {
   798     clear_has_overflown();
   799   } else {
   800     assert(has_overflown(), "pre-condition");
   801   }
   802   _finger = _heap_start;
   804   for (uint i = 0; i < _max_worker_id; ++i) {
   805     CMTaskQueue* queue = _task_queues->queue(i);
   806     queue->set_empty();
   807   }
   808 }
   810 void ConcurrentMark::set_concurrency(uint active_tasks) {
   811   assert(active_tasks <= _max_worker_id, "we should not have more");
   813   _active_tasks = active_tasks;
   814   // Need to update the three data structures below according to the
   815   // number of active threads for this phase.
   816   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   817   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   818   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   819 }
   821 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
   822   set_concurrency(active_tasks);
   824   _concurrent = concurrent;
   825   // We propagate this to all tasks, not just the active ones.
   826   for (uint i = 0; i < _max_worker_id; ++i)
   827     _tasks[i]->set_concurrent(concurrent);
   829   if (concurrent) {
   830     set_concurrent_marking_in_progress();
   831   } else {
   832     // We currently assume that the concurrent flag has been set to
   833     // false before we start remark. At this point we should also be
   834     // in a STW phase.
   835     assert(!concurrent_marking_in_progress(), "invariant");
   836     assert(out_of_regions(),
   837            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
   838                    p2i(_finger), p2i(_heap_end)));
   839   }
   840 }
   842 void ConcurrentMark::set_non_marking_state() {
   843   // We set the global marking state to some default values when we're
   844   // not doing marking.
   845   reset_marking_state();
   846   _active_tasks = 0;
   847   clear_concurrent_marking_in_progress();
   848 }
   850 ConcurrentMark::~ConcurrentMark() {
   851   // The ConcurrentMark instance is never freed.
   852   ShouldNotReachHere();
   853 }
   855 void ConcurrentMark::clearNextBitmap() {
   856   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   858   // Make sure that the concurrent mark thread looks to still be in
   859   // the current cycle.
   860   guarantee(cmThread()->during_cycle(), "invariant");
   862   // We are finishing up the current cycle by clearing the next
   863   // marking bitmap and getting it ready for the next cycle. During
   864   // this time no other cycle can start. So, let's make sure that this
   865   // is the case.
   866   guarantee(!g1h->mark_in_progress(), "invariant");
   868   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
   869   g1h->heap_region_iterate(&cl);
   871   // Clear the liveness counting data. If the marking has been aborted, the abort()
   872   // call already did that.
   873   if (cl.complete()) {
   874     clear_all_count_data();
   875   }
   877   // Repeat the asserts from above.
   878   guarantee(cmThread()->during_cycle(), "invariant");
   879   guarantee(!g1h->mark_in_progress(), "invariant");
   880 }
   882 class CheckBitmapClearHRClosure : public HeapRegionClosure {
   883   CMBitMap* _bitmap;
   884   bool _error;
   885  public:
   886   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
   887   }
   889   virtual bool doHeapRegion(HeapRegion* r) {
   890     return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end();
   891   }
   892 };
   894 bool ConcurrentMark::nextMarkBitmapIsClear() {
   895   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
   896   _g1h->heap_region_iterate(&cl);
   897   return cl.complete();
   898 }
   900 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   901 public:
   902   bool doHeapRegion(HeapRegion* r) {
   903     if (!r->continuesHumongous()) {
   904       r->note_start_of_marking();
   905     }
   906     return false;
   907   }
   908 };
   910 void ConcurrentMark::checkpointRootsInitialPre() {
   911   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   912   G1CollectorPolicy* g1p = g1h->g1_policy();
   914   _has_aborted = false;
   916 #ifndef PRODUCT
   917   if (G1PrintReachableAtInitialMark) {
   918     print_reachable("at-cycle-start",
   919                     VerifyOption_G1UsePrevMarking, true /* all */);
   920   }
   921 #endif
   923   // Initialise marking structures. This has to be done in a STW phase.
   924   reset();
   926   // For each region note start of marking.
   927   NoteStartOfMarkHRClosure startcl;
   928   g1h->heap_region_iterate(&startcl);
   929 }
   932 void ConcurrentMark::checkpointRootsInitialPost() {
   933   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   935   // If we force an overflow during remark, the remark operation will
   936   // actually abort and we'll restart concurrent marking. If we always
   937   // force an oveflow during remark we'll never actually complete the
   938   // marking phase. So, we initilize this here, at the start of the
   939   // cycle, so that at the remaining overflow number will decrease at
   940   // every remark and we'll eventually not need to cause one.
   941   force_overflow_stw()->init();
   943   // Start Concurrent Marking weak-reference discovery.
   944   ReferenceProcessor* rp = g1h->ref_processor_cm();
   945   // enable ("weak") refs discovery
   946   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
   947   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   949   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   950   // This is the start of  the marking cycle, we're expected all
   951   // threads to have SATB queues with active set to false.
   952   satb_mq_set.set_active_all_threads(true, /* new active value */
   953                                      false /* expected_active */);
   955   _root_regions.prepare_for_scan();
   957   // update_g1_committed() will be called at the end of an evac pause
   958   // when marking is on. So, it's also called at the end of the
   959   // initial-mark pause to update the heap end, if the heap expands
   960   // during it. No need to call it here.
   961 }
   963 /*
   964  * Notice that in the next two methods, we actually leave the STS
   965  * during the barrier sync and join it immediately afterwards. If we
   966  * do not do this, the following deadlock can occur: one thread could
   967  * be in the barrier sync code, waiting for the other thread to also
   968  * sync up, whereas another one could be trying to yield, while also
   969  * waiting for the other threads to sync up too.
   970  *
   971  * Note, however, that this code is also used during remark and in
   972  * this case we should not attempt to leave / enter the STS, otherwise
   973  * we'll either hit an asseert (debug / fastdebug) or deadlock
   974  * (product). So we should only leave / enter the STS if we are
   975  * operating concurrently.
   976  *
   977  * Because the thread that does the sync barrier has left the STS, it
   978  * is possible to be suspended for a Full GC or an evacuation pause
   979  * could occur. This is actually safe, since the entering the sync
   980  * barrier is one of the last things do_marking_step() does, and it
   981  * doesn't manipulate any data structures afterwards.
   982  */
   984 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
   985   if (verbose_low()) {
   986     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
   987   }
   989   if (concurrent()) {
   990     SuspendibleThreadSet::leave();
   991   }
   993   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
   995   if (concurrent()) {
   996     SuspendibleThreadSet::join();
   997   }
   998   // at this point everyone should have synced up and not be doing any
   999   // more work
  1001   if (verbose_low()) {
  1002     if (barrier_aborted) {
  1003       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
  1004     } else {
  1005       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
  1009   if (barrier_aborted) {
  1010     // If the barrier aborted we ignore the overflow condition and
  1011     // just abort the whole marking phase as quickly as possible.
  1012     return;
  1015   // If we're executing the concurrent phase of marking, reset the marking
  1016   // state; otherwise the marking state is reset after reference processing,
  1017   // during the remark pause.
  1018   // If we reset here as a result of an overflow during the remark we will
  1019   // see assertion failures from any subsequent set_concurrency_and_phase()
  1020   // calls.
  1021   if (concurrent()) {
  1022     // let the task associated with with worker 0 do this
  1023     if (worker_id == 0) {
  1024       // task 0 is responsible for clearing the global data structures
  1025       // We should be here because of an overflow. During STW we should
  1026       // not clear the overflow flag since we rely on it being true when
  1027       // we exit this method to abort the pause and restart concurent
  1028       // marking.
  1029       reset_marking_state(true /* clear_overflow */);
  1030       force_overflow()->update();
  1032       if (G1Log::fine()) {
  1033         gclog_or_tty->gclog_stamp(concurrent_gc_id());
  1034         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
  1039   // after this, each task should reset its own data structures then
  1040   // then go into the second barrier
  1043 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
  1044   if (verbose_low()) {
  1045     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
  1048   if (concurrent()) {
  1049     SuspendibleThreadSet::leave();
  1052   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
  1054   if (concurrent()) {
  1055     SuspendibleThreadSet::join();
  1057   // at this point everything should be re-initialized and ready to go
  1059   if (verbose_low()) {
  1060     if (barrier_aborted) {
  1061       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
  1062     } else {
  1063       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
  1068 #ifndef PRODUCT
  1069 void ForceOverflowSettings::init() {
  1070   _num_remaining = G1ConcMarkForceOverflow;
  1071   _force = false;
  1072   update();
  1075 void ForceOverflowSettings::update() {
  1076   if (_num_remaining > 0) {
  1077     _num_remaining -= 1;
  1078     _force = true;
  1079   } else {
  1080     _force = false;
  1084 bool ForceOverflowSettings::should_force() {
  1085   if (_force) {
  1086     _force = false;
  1087     return true;
  1088   } else {
  1089     return false;
  1092 #endif // !PRODUCT
  1094 class CMConcurrentMarkingTask: public AbstractGangTask {
  1095 private:
  1096   ConcurrentMark*       _cm;
  1097   ConcurrentMarkThread* _cmt;
  1099 public:
  1100   void work(uint worker_id) {
  1101     assert(Thread::current()->is_ConcurrentGC_thread(),
  1102            "this should only be done by a conc GC thread");
  1103     ResourceMark rm;
  1105     double start_vtime = os::elapsedVTime();
  1107     SuspendibleThreadSet::join();
  1109     assert(worker_id < _cm->active_tasks(), "invariant");
  1110     CMTask* the_task = _cm->task(worker_id);
  1111     the_task->record_start_time();
  1112     if (!_cm->has_aborted()) {
  1113       do {
  1114         double start_vtime_sec = os::elapsedVTime();
  1115         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  1117         the_task->do_marking_step(mark_step_duration_ms,
  1118                                   true  /* do_termination */,
  1119                                   false /* is_serial*/);
  1121         double end_vtime_sec = os::elapsedVTime();
  1122         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1123         _cm->clear_has_overflown();
  1125         _cm->do_yield_check(worker_id);
  1127         jlong sleep_time_ms;
  1128         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1129           sleep_time_ms =
  1130             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1131           SuspendibleThreadSet::leave();
  1132           os::sleep(Thread::current(), sleep_time_ms, false);
  1133           SuspendibleThreadSet::join();
  1135       } while (!_cm->has_aborted() && the_task->has_aborted());
  1137     the_task->record_end_time();
  1138     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1140     SuspendibleThreadSet::leave();
  1142     double end_vtime = os::elapsedVTime();
  1143     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
  1146   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1147                           ConcurrentMarkThread* cmt) :
  1148       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1150   ~CMConcurrentMarkingTask() { }
  1151 };
  1153 // Calculates the number of active workers for a concurrent
  1154 // phase.
  1155 uint ConcurrentMark::calc_parallel_marking_threads() {
  1156   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1157     uint n_conc_workers = 0;
  1158     if (!UseDynamicNumberOfGCThreads ||
  1159         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
  1160          !ForceDynamicNumberOfGCThreads)) {
  1161       n_conc_workers = max_parallel_marking_threads();
  1162     } else {
  1163       n_conc_workers =
  1164         AdaptiveSizePolicy::calc_default_active_workers(
  1165                                      max_parallel_marking_threads(),
  1166                                      1, /* Minimum workers */
  1167                                      parallel_marking_threads(),
  1168                                      Threads::number_of_non_daemon_threads());
  1169       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
  1170       // that scaling has already gone into "_max_parallel_marking_threads".
  1172     assert(n_conc_workers > 0, "Always need at least 1");
  1173     return n_conc_workers;
  1175   // If we are not running with any parallel GC threads we will not
  1176   // have spawned any marking threads either. Hence the number of
  1177   // concurrent workers should be 0.
  1178   return 0;
  1181 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
  1182   // Currently, only survivors can be root regions.
  1183   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
  1184   G1RootRegionScanClosure cl(_g1h, this, worker_id);
  1186   const uintx interval = PrefetchScanIntervalInBytes;
  1187   HeapWord* curr = hr->bottom();
  1188   const HeapWord* end = hr->top();
  1189   while (curr < end) {
  1190     Prefetch::read(curr, interval);
  1191     oop obj = oop(curr);
  1192     int size = obj->oop_iterate(&cl);
  1193     assert(size == obj->size(), "sanity");
  1194     curr += size;
  1198 class CMRootRegionScanTask : public AbstractGangTask {
  1199 private:
  1200   ConcurrentMark* _cm;
  1202 public:
  1203   CMRootRegionScanTask(ConcurrentMark* cm) :
  1204     AbstractGangTask("Root Region Scan"), _cm(cm) { }
  1206   void work(uint worker_id) {
  1207     assert(Thread::current()->is_ConcurrentGC_thread(),
  1208            "this should only be done by a conc GC thread");
  1210     CMRootRegions* root_regions = _cm->root_regions();
  1211     HeapRegion* hr = root_regions->claim_next();
  1212     while (hr != NULL) {
  1213       _cm->scanRootRegion(hr, worker_id);
  1214       hr = root_regions->claim_next();
  1217 };
  1219 void ConcurrentMark::scanRootRegions() {
  1220   // Start of concurrent marking.
  1221   ClassLoaderDataGraph::clear_claimed_marks();
  1223   // scan_in_progress() will have been set to true only if there was
  1224   // at least one root region to scan. So, if it's false, we
  1225   // should not attempt to do any further work.
  1226   if (root_regions()->scan_in_progress()) {
  1227     _parallel_marking_threads = calc_parallel_marking_threads();
  1228     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1229            "Maximum number of marking threads exceeded");
  1230     uint active_workers = MAX2(1U, parallel_marking_threads());
  1232     CMRootRegionScanTask task(this);
  1233     if (use_parallel_marking_threads()) {
  1234       _parallel_workers->set_active_workers((int) active_workers);
  1235       _parallel_workers->run_task(&task);
  1236     } else {
  1237       task.work(0);
  1240     // It's possible that has_aborted() is true here without actually
  1241     // aborting the survivor scan earlier. This is OK as it's
  1242     // mainly used for sanity checking.
  1243     root_regions()->scan_finished();
  1247 void ConcurrentMark::markFromRoots() {
  1248   // we might be tempted to assert that:
  1249   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1250   //        "inconsistent argument?");
  1251   // However that wouldn't be right, because it's possible that
  1252   // a safepoint is indeed in progress as a younger generation
  1253   // stop-the-world GC happens even as we mark in this generation.
  1255   _restart_for_overflow = false;
  1256   force_overflow_conc()->init();
  1258   // _g1h has _n_par_threads
  1259   _parallel_marking_threads = calc_parallel_marking_threads();
  1260   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
  1261     "Maximum number of marking threads exceeded");
  1263   uint active_workers = MAX2(1U, parallel_marking_threads());
  1265   // Parallel task terminator is set in "set_concurrency_and_phase()"
  1266   set_concurrency_and_phase(active_workers, true /* concurrent */);
  1268   CMConcurrentMarkingTask markingTask(this, cmThread());
  1269   if (use_parallel_marking_threads()) {
  1270     _parallel_workers->set_active_workers((int)active_workers);
  1271     // Don't set _n_par_threads because it affects MT in process_roots()
  1272     // and the decisions on that MT processing is made elsewhere.
  1273     assert(_parallel_workers->active_workers() > 0, "Should have been set");
  1274     _parallel_workers->run_task(&markingTask);
  1275   } else {
  1276     markingTask.work(0);
  1278   print_stats();
  1281 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1282   // world is stopped at this checkpoint
  1283   assert(SafepointSynchronize::is_at_safepoint(),
  1284          "world should be stopped");
  1286   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1288   // If a full collection has happened, we shouldn't do this.
  1289   if (has_aborted()) {
  1290     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1291     return;
  1294   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1296   if (VerifyDuringGC) {
  1297     HandleMark hm;  // handle scope
  1298     Universe::heap()->prepare_for_verify();
  1299     Universe::verify(VerifyOption_G1UsePrevMarking,
  1300                      " VerifyDuringGC:(before)");
  1302   g1h->check_bitmaps("Remark Start");
  1304   G1CollectorPolicy* g1p = g1h->g1_policy();
  1305   g1p->record_concurrent_mark_remark_start();
  1307   double start = os::elapsedTime();
  1309   checkpointRootsFinalWork();
  1311   double mark_work_end = os::elapsedTime();
  1313   weakRefsWork(clear_all_soft_refs);
  1315   if (has_overflown()) {
  1316     // Oops.  We overflowed.  Restart concurrent marking.
  1317     _restart_for_overflow = true;
  1318     if (G1TraceMarkStackOverflow) {
  1319       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1322     // Verify the heap w.r.t. the previous marking bitmap.
  1323     if (VerifyDuringGC) {
  1324       HandleMark hm;  // handle scope
  1325       Universe::heap()->prepare_for_verify();
  1326       Universe::verify(VerifyOption_G1UsePrevMarking,
  1327                        " VerifyDuringGC:(overflow)");
  1330     // Clear the marking state because we will be restarting
  1331     // marking due to overflowing the global mark stack.
  1332     reset_marking_state();
  1333   } else {
  1334     // Aggregate the per-task counting data that we have accumulated
  1335     // while marking.
  1336     aggregate_count_data();
  1338     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1339     // We're done with marking.
  1340     // This is the end of  the marking cycle, we're expected all
  1341     // threads to have SATB queues with active set to true.
  1342     satb_mq_set.set_active_all_threads(false, /* new active value */
  1343                                        true /* expected_active */);
  1345     if (VerifyDuringGC) {
  1346       HandleMark hm;  // handle scope
  1347       Universe::heap()->prepare_for_verify();
  1348       Universe::verify(VerifyOption_G1UseNextMarking,
  1349                        " VerifyDuringGC:(after)");
  1351     g1h->check_bitmaps("Remark End");
  1352     assert(!restart_for_overflow(), "sanity");
  1353     // Completely reset the marking state since marking completed
  1354     set_non_marking_state();
  1357   // Expand the marking stack, if we have to and if we can.
  1358   if (_markStack.should_expand()) {
  1359     _markStack.expand();
  1362   // Statistics
  1363   double now = os::elapsedTime();
  1364   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1365   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1366   _remark_times.add((now - start) * 1000.0);
  1368   g1p->record_concurrent_mark_remark_end();
  1370   G1CMIsAliveClosure is_alive(g1h);
  1371   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
  1374 // Base class of the closures that finalize and verify the
  1375 // liveness counting data.
  1376 class CMCountDataClosureBase: public HeapRegionClosure {
  1377 protected:
  1378   G1CollectedHeap* _g1h;
  1379   ConcurrentMark* _cm;
  1380   CardTableModRefBS* _ct_bs;
  1382   BitMap* _region_bm;
  1383   BitMap* _card_bm;
  1385   // Takes a region that's not empty (i.e., it has at least one
  1386   // live object in it and sets its corresponding bit on the region
  1387   // bitmap to 1. If the region is "starts humongous" it will also set
  1388   // to 1 the bits on the region bitmap that correspond to its
  1389   // associated "continues humongous" regions.
  1390   void set_bit_for_region(HeapRegion* hr) {
  1391     assert(!hr->continuesHumongous(), "should have filtered those out");
  1393     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
  1394     if (!hr->startsHumongous()) {
  1395       // Normal (non-humongous) case: just set the bit.
  1396       _region_bm->par_at_put(index, true);
  1397     } else {
  1398       // Starts humongous case: calculate how many regions are part of
  1399       // this humongous region and then set the bit range.
  1400       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
  1401       _region_bm->par_at_put_range(index, end_index, true);
  1405 public:
  1406   CMCountDataClosureBase(G1CollectedHeap* g1h,
  1407                          BitMap* region_bm, BitMap* card_bm):
  1408     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1409     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  1410     _region_bm(region_bm), _card_bm(card_bm) { }
  1411 };
  1413 // Closure that calculates the # live objects per region. Used
  1414 // for verification purposes during the cleanup pause.
  1415 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
  1416   CMBitMapRO* _bm;
  1417   size_t _region_marked_bytes;
  1419 public:
  1420   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
  1421                          BitMap* region_bm, BitMap* card_bm) :
  1422     CMCountDataClosureBase(g1h, region_bm, card_bm),
  1423     _bm(bm), _region_marked_bytes(0) { }
  1425   bool doHeapRegion(HeapRegion* hr) {
  1427     if (hr->continuesHumongous()) {
  1428       // We will ignore these here and process them when their
  1429       // associated "starts humongous" region is processed (see
  1430       // set_bit_for_heap_region()). Note that we cannot rely on their
  1431       // associated "starts humongous" region to have their bit set to
  1432       // 1 since, due to the region chunking in the parallel region
  1433       // iteration, a "continues humongous" region might be visited
  1434       // before its associated "starts humongous".
  1435       return false;
  1438     HeapWord* ntams = hr->next_top_at_mark_start();
  1439     HeapWord* start = hr->bottom();
  1441     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
  1442            err_msg("Preconditions not met - "
  1443                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
  1444                    p2i(start), p2i(ntams), p2i(hr->end())));
  1446     // Find the first marked object at or after "start".
  1447     start = _bm->getNextMarkedWordAddress(start, ntams);
  1449     size_t marked_bytes = 0;
  1451     while (start < ntams) {
  1452       oop obj = oop(start);
  1453       int obj_sz = obj->size();
  1454       HeapWord* obj_end = start + obj_sz;
  1456       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  1457       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
  1459       // Note: if we're looking at the last region in heap - obj_end
  1460       // could be actually just beyond the end of the heap; end_idx
  1461       // will then correspond to a (non-existent) card that is also
  1462       // just beyond the heap.
  1463       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
  1464         // end of object is not card aligned - increment to cover
  1465         // all the cards spanned by the object
  1466         end_idx += 1;
  1469       // Set the bits in the card BM for the cards spanned by this object.
  1470       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1472       // Add the size of this object to the number of marked bytes.
  1473       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1475       // Find the next marked object after this one.
  1476       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
  1479     // Mark the allocated-since-marking portion...
  1480     HeapWord* top = hr->top();
  1481     if (ntams < top) {
  1482       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1483       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1485       // Note: if we're looking at the last region in heap - top
  1486       // could be actually just beyond the end of the heap; end_idx
  1487       // will then correspond to a (non-existent) card that is also
  1488       // just beyond the heap.
  1489       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1490         // end of object is not card aligned - increment to cover
  1491         // all the cards spanned by the object
  1492         end_idx += 1;
  1494       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1496       // This definitely means the region has live objects.
  1497       set_bit_for_region(hr);
  1500     // Update the live region bitmap.
  1501     if (marked_bytes > 0) {
  1502       set_bit_for_region(hr);
  1505     // Set the marked bytes for the current region so that
  1506     // it can be queried by a calling verificiation routine
  1507     _region_marked_bytes = marked_bytes;
  1509     return false;
  1512   size_t region_marked_bytes() const { return _region_marked_bytes; }
  1513 };
  1515 // Heap region closure used for verifying the counting data
  1516 // that was accumulated concurrently and aggregated during
  1517 // the remark pause. This closure is applied to the heap
  1518 // regions during the STW cleanup pause.
  1520 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
  1521   G1CollectedHeap* _g1h;
  1522   ConcurrentMark* _cm;
  1523   CalcLiveObjectsClosure _calc_cl;
  1524   BitMap* _region_bm;   // Region BM to be verified
  1525   BitMap* _card_bm;     // Card BM to be verified
  1526   bool _verbose;        // verbose output?
  1528   BitMap* _exp_region_bm; // Expected Region BM values
  1529   BitMap* _exp_card_bm;   // Expected card BM values
  1531   int _failures;
  1533 public:
  1534   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
  1535                                 BitMap* region_bm,
  1536                                 BitMap* card_bm,
  1537                                 BitMap* exp_region_bm,
  1538                                 BitMap* exp_card_bm,
  1539                                 bool verbose) :
  1540     _g1h(g1h), _cm(g1h->concurrent_mark()),
  1541     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
  1542     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
  1543     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
  1544     _failures(0) { }
  1546   int failures() const { return _failures; }
  1548   bool doHeapRegion(HeapRegion* hr) {
  1549     if (hr->continuesHumongous()) {
  1550       // We will ignore these here and process them when their
  1551       // associated "starts humongous" region is processed (see
  1552       // set_bit_for_heap_region()). Note that we cannot rely on their
  1553       // associated "starts humongous" region to have their bit set to
  1554       // 1 since, due to the region chunking in the parallel region
  1555       // iteration, a "continues humongous" region might be visited
  1556       // before its associated "starts humongous".
  1557       return false;
  1560     int failures = 0;
  1562     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
  1563     // this region and set the corresponding bits in the expected region
  1564     // and card bitmaps.
  1565     bool res = _calc_cl.doHeapRegion(hr);
  1566     assert(res == false, "should be continuing");
  1568     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
  1569                     Mutex::_no_safepoint_check_flag);
  1571     // Verify the marked bytes for this region.
  1572     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
  1573     size_t act_marked_bytes = hr->next_marked_bytes();
  1575     // We're not OK if expected marked bytes > actual marked bytes. It means
  1576     // we have missed accounting some objects during the actual marking.
  1577     if (exp_marked_bytes > act_marked_bytes) {
  1578       if (_verbose) {
  1579         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
  1580                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
  1581                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
  1583       failures += 1;
  1586     // Verify the bit, for this region, in the actual and expected
  1587     // (which was just calculated) region bit maps.
  1588     // We're not OK if the bit in the calculated expected region
  1589     // bitmap is set and the bit in the actual region bitmap is not.
  1590     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
  1592     bool expected = _exp_region_bm->at(index);
  1593     bool actual = _region_bm->at(index);
  1594     if (expected && !actual) {
  1595       if (_verbose) {
  1596         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
  1597                                "expected: %s, actual: %s",
  1598                                hr->hrm_index(),
  1599                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1601       failures += 1;
  1604     // Verify that the card bit maps for the cards spanned by the current
  1605     // region match. We have an error if we have a set bit in the expected
  1606     // bit map and the corresponding bit in the actual bitmap is not set.
  1608     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
  1609     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
  1611     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
  1612       expected = _exp_card_bm->at(i);
  1613       actual = _card_bm->at(i);
  1615       if (expected && !actual) {
  1616         if (_verbose) {
  1617           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
  1618                                  "expected: %s, actual: %s",
  1619                                  hr->hrm_index(), i,
  1620                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
  1622         failures += 1;
  1626     if (failures > 0 && _verbose)  {
  1627       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
  1628                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
  1629                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
  1630                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
  1633     _failures += failures;
  1635     // We could stop iteration over the heap when we
  1636     // find the first violating region by returning true.
  1637     return false;
  1639 };
  1641 class G1ParVerifyFinalCountTask: public AbstractGangTask {
  1642 protected:
  1643   G1CollectedHeap* _g1h;
  1644   ConcurrentMark* _cm;
  1645   BitMap* _actual_region_bm;
  1646   BitMap* _actual_card_bm;
  1648   uint    _n_workers;
  1650   BitMap* _expected_region_bm;
  1651   BitMap* _expected_card_bm;
  1653   int  _failures;
  1654   bool _verbose;
  1656 public:
  1657   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
  1658                             BitMap* region_bm, BitMap* card_bm,
  1659                             BitMap* expected_region_bm, BitMap* expected_card_bm)
  1660     : AbstractGangTask("G1 verify final counting"),
  1661       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1662       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1663       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
  1664       _failures(0), _verbose(false),
  1665       _n_workers(0) {
  1666     assert(VerifyDuringGC, "don't call this otherwise");
  1668     // Use the value already set as the number of active threads
  1669     // in the call to run_task().
  1670     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1671       assert( _g1h->workers()->active_workers() > 0,
  1672         "Should have been previously set");
  1673       _n_workers = _g1h->workers()->active_workers();
  1674     } else {
  1675       _n_workers = 1;
  1678     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
  1679     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
  1681     _verbose = _cm->verbose_medium();
  1684   void work(uint worker_id) {
  1685     assert(worker_id < _n_workers, "invariant");
  1687     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
  1688                                             _actual_region_bm, _actual_card_bm,
  1689                                             _expected_region_bm,
  1690                                             _expected_card_bm,
  1691                                             _verbose);
  1693     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1694       _g1h->heap_region_par_iterate_chunked(&verify_cl,
  1695                                             worker_id,
  1696                                             _n_workers,
  1697                                             HeapRegion::VerifyCountClaimValue);
  1698     } else {
  1699       _g1h->heap_region_iterate(&verify_cl);
  1702     Atomic::add(verify_cl.failures(), &_failures);
  1705   int failures() const { return _failures; }
  1706 };
  1708 // Closure that finalizes the liveness counting data.
  1709 // Used during the cleanup pause.
  1710 // Sets the bits corresponding to the interval [NTAMS, top]
  1711 // (which contains the implicitly live objects) in the
  1712 // card liveness bitmap. Also sets the bit for each region,
  1713 // containing live data, in the region liveness bitmap.
  1715 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
  1716  public:
  1717   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
  1718                               BitMap* region_bm,
  1719                               BitMap* card_bm) :
  1720     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
  1722   bool doHeapRegion(HeapRegion* hr) {
  1724     if (hr->continuesHumongous()) {
  1725       // We will ignore these here and process them when their
  1726       // associated "starts humongous" region is processed (see
  1727       // set_bit_for_heap_region()). Note that we cannot rely on their
  1728       // associated "starts humongous" region to have their bit set to
  1729       // 1 since, due to the region chunking in the parallel region
  1730       // iteration, a "continues humongous" region might be visited
  1731       // before its associated "starts humongous".
  1732       return false;
  1735     HeapWord* ntams = hr->next_top_at_mark_start();
  1736     HeapWord* top   = hr->top();
  1738     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
  1740     // Mark the allocated-since-marking portion...
  1741     if (ntams < top) {
  1742       // This definitely means the region has live objects.
  1743       set_bit_for_region(hr);
  1745       // Now set the bits in the card bitmap for [ntams, top)
  1746       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
  1747       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
  1749       // Note: if we're looking at the last region in heap - top
  1750       // could be actually just beyond the end of the heap; end_idx
  1751       // will then correspond to a (non-existent) card that is also
  1752       // just beyond the heap.
  1753       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
  1754         // end of object is not card aligned - increment to cover
  1755         // all the cards spanned by the object
  1756         end_idx += 1;
  1759       assert(end_idx <= _card_bm->size(),
  1760              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1761                      end_idx, _card_bm->size()));
  1762       assert(start_idx < _card_bm->size(),
  1763              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
  1764                      start_idx, _card_bm->size()));
  1766       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
  1769     // Set the bit for the region if it contains live data
  1770     if (hr->next_marked_bytes() > 0) {
  1771       set_bit_for_region(hr);
  1774     return false;
  1776 };
  1778 class G1ParFinalCountTask: public AbstractGangTask {
  1779 protected:
  1780   G1CollectedHeap* _g1h;
  1781   ConcurrentMark* _cm;
  1782   BitMap* _actual_region_bm;
  1783   BitMap* _actual_card_bm;
  1785   uint    _n_workers;
  1787 public:
  1788   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
  1789     : AbstractGangTask("G1 final counting"),
  1790       _g1h(g1h), _cm(_g1h->concurrent_mark()),
  1791       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
  1792       _n_workers(0) {
  1793     // Use the value already set as the number of active threads
  1794     // in the call to run_task().
  1795     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1796       assert( _g1h->workers()->active_workers() > 0,
  1797         "Should have been previously set");
  1798       _n_workers = _g1h->workers()->active_workers();
  1799     } else {
  1800       _n_workers = 1;
  1804   void work(uint worker_id) {
  1805     assert(worker_id < _n_workers, "invariant");
  1807     FinalCountDataUpdateClosure final_update_cl(_g1h,
  1808                                                 _actual_region_bm,
  1809                                                 _actual_card_bm);
  1811     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1812       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
  1813                                             worker_id,
  1814                                             _n_workers,
  1815                                             HeapRegion::FinalCountClaimValue);
  1816     } else {
  1817       _g1h->heap_region_iterate(&final_update_cl);
  1820 };
  1822 class G1ParNoteEndTask;
  1824 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1825   G1CollectedHeap* _g1;
  1826   size_t _max_live_bytes;
  1827   uint _regions_claimed;
  1828   size_t _freed_bytes;
  1829   FreeRegionList* _local_cleanup_list;
  1830   HeapRegionSetCount _old_regions_removed;
  1831   HeapRegionSetCount _humongous_regions_removed;
  1832   HRRSCleanupTask* _hrrs_cleanup_task;
  1833   double _claimed_region_time;
  1834   double _max_region_time;
  1836 public:
  1837   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1838                              FreeRegionList* local_cleanup_list,
  1839                              HRRSCleanupTask* hrrs_cleanup_task) :
  1840     _g1(g1),
  1841     _max_live_bytes(0), _regions_claimed(0),
  1842     _freed_bytes(0),
  1843     _claimed_region_time(0.0), _max_region_time(0.0),
  1844     _local_cleanup_list(local_cleanup_list),
  1845     _old_regions_removed(),
  1846     _humongous_regions_removed(),
  1847     _hrrs_cleanup_task(hrrs_cleanup_task) { }
  1849   size_t freed_bytes() { return _freed_bytes; }
  1850   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
  1851   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
  1853   bool doHeapRegion(HeapRegion *hr) {
  1854     if (hr->continuesHumongous()) {
  1855       return false;
  1857     // We use a claim value of zero here because all regions
  1858     // were claimed with value 1 in the FinalCount task.
  1859     _g1->reset_gc_time_stamps(hr);
  1860     double start = os::elapsedTime();
  1861     _regions_claimed++;
  1862     hr->note_end_of_marking();
  1863     _max_live_bytes += hr->max_live_bytes();
  1865     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
  1866       _freed_bytes += hr->used();
  1867       hr->set_containing_set(NULL);
  1868       if (hr->isHumongous()) {
  1869         assert(hr->startsHumongous(), "we should only see starts humongous");
  1870         _humongous_regions_removed.increment(1u, hr->capacity());
  1871         _g1->free_humongous_region(hr, _local_cleanup_list, true);
  1872       } else {
  1873         _old_regions_removed.increment(1u, hr->capacity());
  1874         _g1->free_region(hr, _local_cleanup_list, true);
  1876     } else {
  1877       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
  1880     double region_time = (os::elapsedTime() - start);
  1881     _claimed_region_time += region_time;
  1882     if (region_time > _max_region_time) {
  1883       _max_region_time = region_time;
  1885     return false;
  1888   size_t max_live_bytes() { return _max_live_bytes; }
  1889   uint regions_claimed() { return _regions_claimed; }
  1890   double claimed_region_time_sec() { return _claimed_region_time; }
  1891   double max_region_time_sec() { return _max_region_time; }
  1892 };
  1894 class G1ParNoteEndTask: public AbstractGangTask {
  1895   friend class G1NoteEndOfConcMarkClosure;
  1897 protected:
  1898   G1CollectedHeap* _g1h;
  1899   size_t _max_live_bytes;
  1900   size_t _freed_bytes;
  1901   FreeRegionList* _cleanup_list;
  1903 public:
  1904   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1905                    FreeRegionList* cleanup_list) :
  1906     AbstractGangTask("G1 note end"), _g1h(g1h),
  1907     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
  1909   void work(uint worker_id) {
  1910     double start = os::elapsedTime();
  1911     FreeRegionList local_cleanup_list("Local Cleanup List");
  1912     HRRSCleanupTask hrrs_cleanup_task;
  1913     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
  1914                                            &hrrs_cleanup_task);
  1915     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1916       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
  1917                                             _g1h->workers()->active_workers(),
  1918                                             HeapRegion::NoteEndClaimValue);
  1919     } else {
  1920       _g1h->heap_region_iterate(&g1_note_end);
  1922     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1924     // Now update the lists
  1925     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
  1927       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1928       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
  1929       _max_live_bytes += g1_note_end.max_live_bytes();
  1930       _freed_bytes += g1_note_end.freed_bytes();
  1932       // If we iterate over the global cleanup list at the end of
  1933       // cleanup to do this printing we will not guarantee to only
  1934       // generate output for the newly-reclaimed regions (the list
  1935       // might not be empty at the beginning of cleanup; we might
  1936       // still be working on its previous contents). So we do the
  1937       // printing here, before we append the new regions to the global
  1938       // cleanup list.
  1940       G1HRPrinter* hr_printer = _g1h->hr_printer();
  1941       if (hr_printer->is_active()) {
  1942         FreeRegionListIterator iter(&local_cleanup_list);
  1943         while (iter.more_available()) {
  1944           HeapRegion* hr = iter.get_next();
  1945           hr_printer->cleanup(hr);
  1949       _cleanup_list->add_ordered(&local_cleanup_list);
  1950       assert(local_cleanup_list.is_empty(), "post-condition");
  1952       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
  1955   size_t max_live_bytes() { return _max_live_bytes; }
  1956   size_t freed_bytes() { return _freed_bytes; }
  1957 };
  1959 class G1ParScrubRemSetTask: public AbstractGangTask {
  1960 protected:
  1961   G1RemSet* _g1rs;
  1962   BitMap* _region_bm;
  1963   BitMap* _card_bm;
  1964 public:
  1965   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1966                        BitMap* region_bm, BitMap* card_bm) :
  1967     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1968     _region_bm(region_bm), _card_bm(card_bm) { }
  1970   void work(uint worker_id) {
  1971     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1972       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
  1973                        HeapRegion::ScrubRemSetClaimValue);
  1974     } else {
  1975       _g1rs->scrub(_region_bm, _card_bm);
  1979 };
  1981 void ConcurrentMark::cleanup() {
  1982   // world is stopped at this checkpoint
  1983   assert(SafepointSynchronize::is_at_safepoint(),
  1984          "world should be stopped");
  1985   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1987   // If a full collection has happened, we shouldn't do this.
  1988   if (has_aborted()) {
  1989     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1990     return;
  1993   g1h->verify_region_sets_optional();
  1995   if (VerifyDuringGC) {
  1996     HandleMark hm;  // handle scope
  1997     Universe::heap()->prepare_for_verify();
  1998     Universe::verify(VerifyOption_G1UsePrevMarking,
  1999                      " VerifyDuringGC:(before)");
  2001   g1h->check_bitmaps("Cleanup Start");
  2003   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  2004   g1p->record_concurrent_mark_cleanup_start();
  2006   double start = os::elapsedTime();
  2008   HeapRegionRemSet::reset_for_cleanup_tasks();
  2010   uint n_workers;
  2012   // Do counting once more with the world stopped for good measure.
  2013   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
  2015   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2016    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  2017            "sanity check");
  2019     g1h->set_par_threads();
  2020     n_workers = g1h->n_par_threads();
  2021     assert(g1h->n_par_threads() == n_workers,
  2022            "Should not have been reset");
  2023     g1h->workers()->run_task(&g1_par_count_task);
  2024     // Done with the parallel phase so reset to 0.
  2025     g1h->set_par_threads(0);
  2027     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
  2028            "sanity check");
  2029   } else {
  2030     n_workers = 1;
  2031     g1_par_count_task.work(0);
  2034   if (VerifyDuringGC) {
  2035     // Verify that the counting data accumulated during marking matches
  2036     // that calculated by walking the marking bitmap.
  2038     // Bitmaps to hold expected values
  2039     BitMap expected_region_bm(_region_bm.size(), true);
  2040     BitMap expected_card_bm(_card_bm.size(), true);
  2042     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
  2043                                                  &_region_bm,
  2044                                                  &_card_bm,
  2045                                                  &expected_region_bm,
  2046                                                  &expected_card_bm);
  2048     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2049       g1h->set_par_threads((int)n_workers);
  2050       g1h->workers()->run_task(&g1_par_verify_task);
  2051       // Done with the parallel phase so reset to 0.
  2052       g1h->set_par_threads(0);
  2054       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
  2055              "sanity check");
  2056     } else {
  2057       g1_par_verify_task.work(0);
  2060     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
  2063   size_t start_used_bytes = g1h->used();
  2064   g1h->set_marking_complete();
  2066   double count_end = os::elapsedTime();
  2067   double this_final_counting_time = (count_end - start);
  2068   _total_counting_time += this_final_counting_time;
  2070   if (G1PrintRegionLivenessInfo) {
  2071     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
  2072     _g1h->heap_region_iterate(&cl);
  2075   // Install newly created mark bitMap as "prev".
  2076   swapMarkBitMaps();
  2078   g1h->reset_gc_time_stamp();
  2080   // Note end of marking in all heap regions.
  2081   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
  2082   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2083     g1h->set_par_threads((int)n_workers);
  2084     g1h->workers()->run_task(&g1_par_note_end_task);
  2085     g1h->set_par_threads(0);
  2087     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  2088            "sanity check");
  2089   } else {
  2090     g1_par_note_end_task.work(0);
  2092   g1h->check_gc_time_stamps();
  2094   if (!cleanup_list_is_empty()) {
  2095     // The cleanup list is not empty, so we'll have to process it
  2096     // concurrently. Notify anyone else that might be wanting free
  2097     // regions that there will be more free regions coming soon.
  2098     g1h->set_free_regions_coming();
  2101   // call below, since it affects the metric by which we sort the heap
  2102   // regions.
  2103   if (G1ScrubRemSets) {
  2104     double rs_scrub_start = os::elapsedTime();
  2105     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  2106     if (G1CollectedHeap::use_parallel_gc_threads()) {
  2107       g1h->set_par_threads((int)n_workers);
  2108       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  2109       g1h->set_par_threads(0);
  2111       assert(g1h->check_heap_region_claim_values(
  2112                                             HeapRegion::ScrubRemSetClaimValue),
  2113              "sanity check");
  2114     } else {
  2115       g1_par_scrub_rs_task.work(0);
  2118     double rs_scrub_end = os::elapsedTime();
  2119     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  2120     _total_rs_scrub_time += this_rs_scrub_time;
  2123   // this will also free any regions totally full of garbage objects,
  2124   // and sort the regions.
  2125   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
  2127   // Statistics.
  2128   double end = os::elapsedTime();
  2129   _cleanup_times.add((end - start) * 1000.0);
  2131   if (G1Log::fine()) {
  2132     g1h->print_size_transition(gclog_or_tty,
  2133                                start_used_bytes,
  2134                                g1h->used(),
  2135                                g1h->capacity());
  2138   // Clean up will have freed any regions completely full of garbage.
  2139   // Update the soft reference policy with the new heap occupancy.
  2140   Universe::update_heap_info_at_gc();
  2142   if (VerifyDuringGC) {
  2143     HandleMark hm;  // handle scope
  2144     Universe::heap()->prepare_for_verify();
  2145     Universe::verify(VerifyOption_G1UsePrevMarking,
  2146                      " VerifyDuringGC:(after)");
  2148   g1h->check_bitmaps("Cleanup End");
  2150   g1h->verify_region_sets_optional();
  2152   // We need to make this be a "collection" so any collection pause that
  2153   // races with it goes around and waits for completeCleanup to finish.
  2154   g1h->increment_total_collections();
  2156   // Clean out dead classes and update Metaspace sizes.
  2157   if (ClassUnloadingWithConcurrentMark) {
  2158     ClassLoaderDataGraph::purge();
  2160   MetaspaceGC::compute_new_size();
  2162   // We reclaimed old regions so we should calculate the sizes to make
  2163   // sure we update the old gen/space data.
  2164   g1h->g1mm()->update_sizes();
  2166   g1h->trace_heap_after_concurrent_cycle();
  2169 void ConcurrentMark::completeCleanup() {
  2170   if (has_aborted()) return;
  2172   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2174   _cleanup_list.verify_optional();
  2175   FreeRegionList tmp_free_list("Tmp Free List");
  2177   if (G1ConcRegionFreeingVerbose) {
  2178     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2179                            "cleanup list has %u entries",
  2180                            _cleanup_list.length());
  2183   // No one else should be accessing the _cleanup_list at this point,
  2184   // so it is not necessary to take any locks
  2185   while (!_cleanup_list.is_empty()) {
  2186     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
  2187     assert(hr != NULL, "Got NULL from a non-empty list");
  2188     hr->par_clear();
  2189     tmp_free_list.add_ordered(hr);
  2191     // Instead of adding one region at a time to the secondary_free_list,
  2192     // we accumulate them in the local list and move them a few at a
  2193     // time. This also cuts down on the number of notify_all() calls
  2194     // we do during this process. We'll also append the local list when
  2195     // _cleanup_list is empty (which means we just removed the last
  2196     // region from the _cleanup_list).
  2197     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
  2198         _cleanup_list.is_empty()) {
  2199       if (G1ConcRegionFreeingVerbose) {
  2200         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  2201                                "appending %u entries to the secondary_free_list, "
  2202                                "cleanup list still has %u entries",
  2203                                tmp_free_list.length(),
  2204                                _cleanup_list.length());
  2208         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  2209         g1h->secondary_free_list_add(&tmp_free_list);
  2210         SecondaryFreeList_lock->notify_all();
  2213       if (G1StressConcRegionFreeing) {
  2214         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
  2215           os::sleep(Thread::current(), (jlong) 1, false);
  2220   assert(tmp_free_list.is_empty(), "post-condition");
  2223 // Supporting Object and Oop closures for reference discovery
  2224 // and processing in during marking
  2226 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  2227   HeapWord* addr = (HeapWord*)obj;
  2228   return addr != NULL &&
  2229          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  2232 // 'Keep Alive' oop closure used by both serial parallel reference processing.
  2233 // Uses the CMTask associated with a worker thread (for serial reference
  2234 // processing the CMTask for worker 0 is used) to preserve (mark) and
  2235 // trace referent objects.
  2236 //
  2237 // Using the CMTask and embedded local queues avoids having the worker
  2238 // threads operating on the global mark stack. This reduces the risk
  2239 // of overflowing the stack - which we would rather avoid at this late
  2240 // state. Also using the tasks' local queues removes the potential
  2241 // of the workers interfering with each other that could occur if
  2242 // operating on the global stack.
  2244 class G1CMKeepAliveAndDrainClosure: public OopClosure {
  2245   ConcurrentMark* _cm;
  2246   CMTask*         _task;
  2247   int             _ref_counter_limit;
  2248   int             _ref_counter;
  2249   bool            _is_serial;
  2250  public:
  2251   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2252     _cm(cm), _task(task), _is_serial(is_serial),
  2253     _ref_counter_limit(G1RefProcDrainInterval) {
  2254     assert(_ref_counter_limit > 0, "sanity");
  2255     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2256     _ref_counter = _ref_counter_limit;
  2259   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2260   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2262   template <class T> void do_oop_work(T* p) {
  2263     if (!_cm->has_overflown()) {
  2264       oop obj = oopDesc::load_decode_heap_oop(p);
  2265       if (_cm->verbose_high()) {
  2266         gclog_or_tty->print_cr("\t[%u] we're looking at location "
  2267                                "*"PTR_FORMAT" = "PTR_FORMAT,
  2268                                _task->worker_id(), p2i(p), p2i((void*) obj));
  2271       _task->deal_with_reference(obj);
  2272       _ref_counter--;
  2274       if (_ref_counter == 0) {
  2275         // We have dealt with _ref_counter_limit references, pushing them
  2276         // and objects reachable from them on to the local stack (and
  2277         // possibly the global stack). Call CMTask::do_marking_step() to
  2278         // process these entries.
  2279         //
  2280         // We call CMTask::do_marking_step() in a loop, which we'll exit if
  2281         // there's nothing more to do (i.e. we're done with the entries that
  2282         // were pushed as a result of the CMTask::deal_with_reference() calls
  2283         // above) or we overflow.
  2284         //
  2285         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2286         // flag while there may still be some work to do. (See the comment at
  2287         // the beginning of CMTask::do_marking_step() for those conditions -
  2288         // one of which is reaching the specified time target.) It is only
  2289         // when CMTask::do_marking_step() returns without setting the
  2290         // has_aborted() flag that the marking step has completed.
  2291         do {
  2292           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  2293           _task->do_marking_step(mark_step_duration_ms,
  2294                                  false      /* do_termination */,
  2295                                  _is_serial);
  2296         } while (_task->has_aborted() && !_cm->has_overflown());
  2297         _ref_counter = _ref_counter_limit;
  2299     } else {
  2300       if (_cm->verbose_high()) {
  2301          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
  2305 };
  2307 // 'Drain' oop closure used by both serial and parallel reference processing.
  2308 // Uses the CMTask associated with a given worker thread (for serial
  2309 // reference processing the CMtask for worker 0 is used). Calls the
  2310 // do_marking_step routine, with an unbelievably large timeout value,
  2311 // to drain the marking data structures of the remaining entries
  2312 // added by the 'keep alive' oop closure above.
  2314 class G1CMDrainMarkingStackClosure: public VoidClosure {
  2315   ConcurrentMark* _cm;
  2316   CMTask*         _task;
  2317   bool            _is_serial;
  2318  public:
  2319   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
  2320     _cm(cm), _task(task), _is_serial(is_serial) {
  2321     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
  2324   void do_void() {
  2325     do {
  2326       if (_cm->verbose_high()) {
  2327         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
  2328                                _task->worker_id(), BOOL_TO_STR(_is_serial));
  2331       // We call CMTask::do_marking_step() to completely drain the local
  2332       // and global marking stacks of entries pushed by the 'keep alive'
  2333       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
  2334       //
  2335       // CMTask::do_marking_step() is called in a loop, which we'll exit
  2336       // if there's nothing more to do (i.e. we'completely drained the
  2337       // entries that were pushed as a a result of applying the 'keep alive'
  2338       // closure to the entries on the discovered ref lists) or we overflow
  2339       // the global marking stack.
  2340       //
  2341       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
  2342       // flag while there may still be some work to do. (See the comment at
  2343       // the beginning of CMTask::do_marking_step() for those conditions -
  2344       // one of which is reaching the specified time target.) It is only
  2345       // when CMTask::do_marking_step() returns without setting the
  2346       // has_aborted() flag that the marking step has completed.
  2348       _task->do_marking_step(1000000000.0 /* something very large */,
  2349                              true         /* do_termination */,
  2350                              _is_serial);
  2351     } while (_task->has_aborted() && !_cm->has_overflown());
  2353 };
  2355 // Implementation of AbstractRefProcTaskExecutor for parallel
  2356 // reference processing at the end of G1 concurrent marking
  2358 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
  2359 private:
  2360   G1CollectedHeap* _g1h;
  2361   ConcurrentMark*  _cm;
  2362   WorkGang*        _workers;
  2363   int              _active_workers;
  2365 public:
  2366   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
  2367                         ConcurrentMark* cm,
  2368                         WorkGang* workers,
  2369                         int n_workers) :
  2370     _g1h(g1h), _cm(cm),
  2371     _workers(workers), _active_workers(n_workers) { }
  2373   // Executes the given task using concurrent marking worker threads.
  2374   virtual void execute(ProcessTask& task);
  2375   virtual void execute(EnqueueTask& task);
  2376 };
  2378 class G1CMRefProcTaskProxy: public AbstractGangTask {
  2379   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  2380   ProcessTask&     _proc_task;
  2381   G1CollectedHeap* _g1h;
  2382   ConcurrentMark*  _cm;
  2384 public:
  2385   G1CMRefProcTaskProxy(ProcessTask& proc_task,
  2386                      G1CollectedHeap* g1h,
  2387                      ConcurrentMark* cm) :
  2388     AbstractGangTask("Process reference objects in parallel"),
  2389     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
  2390     ReferenceProcessor* rp = _g1h->ref_processor_cm();
  2391     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
  2394   virtual void work(uint worker_id) {
  2395     ResourceMark rm;
  2396     HandleMark hm;
  2397     CMTask* task = _cm->task(worker_id);
  2398     G1CMIsAliveClosure g1_is_alive(_g1h);
  2399     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
  2400     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
  2402     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
  2404 };
  2406 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
  2407   assert(_workers != NULL, "Need parallel worker threads.");
  2408   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2410   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  2412   // We need to reset the concurrency level before each
  2413   // proxy task execution, so that the termination protocol
  2414   // and overflow handling in CMTask::do_marking_step() knows
  2415   // how many workers to wait for.
  2416   _cm->set_concurrency(_active_workers);
  2417   _g1h->set_par_threads(_active_workers);
  2418   _workers->run_task(&proc_task_proxy);
  2419   _g1h->set_par_threads(0);
  2422 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
  2423   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
  2424   EnqueueTask& _enq_task;
  2426 public:
  2427   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
  2428     AbstractGangTask("Enqueue reference objects in parallel"),
  2429     _enq_task(enq_task) { }
  2431   virtual void work(uint worker_id) {
  2432     _enq_task.work(worker_id);
  2434 };
  2436 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
  2437   assert(_workers != NULL, "Need parallel worker threads.");
  2438   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  2440   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
  2442   // Not strictly necessary but...
  2443   //
  2444   // We need to reset the concurrency level before each
  2445   // proxy task execution, so that the termination protocol
  2446   // and overflow handling in CMTask::do_marking_step() knows
  2447   // how many workers to wait for.
  2448   _cm->set_concurrency(_active_workers);
  2449   _g1h->set_par_threads(_active_workers);
  2450   _workers->run_task(&enq_task_proxy);
  2451   _g1h->set_par_threads(0);
  2454 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
  2455   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
  2458 // Helper class to get rid of some boilerplate code.
  2459 class G1RemarkGCTraceTime : public GCTraceTime {
  2460   static bool doit_and_prepend(bool doit) {
  2461     if (doit) {
  2462       gclog_or_tty->put(' ');
  2464     return doit;
  2467  public:
  2468   G1RemarkGCTraceTime(const char* title, bool doit)
  2469     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
  2470         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
  2472 };
  2474 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  2475   if (has_overflown()) {
  2476     // Skip processing the discovered references if we have
  2477     // overflown the global marking stack. Reference objects
  2478     // only get discovered once so it is OK to not
  2479     // de-populate the discovered reference lists. We could have,
  2480     // but the only benefit would be that, when marking restarts,
  2481     // less reference objects are discovered.
  2482     return;
  2485   ResourceMark rm;
  2486   HandleMark   hm;
  2488   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2490   // Is alive closure.
  2491   G1CMIsAliveClosure g1_is_alive(g1h);
  2493   // Inner scope to exclude the cleaning of the string and symbol
  2494   // tables from the displayed time.
  2496     if (G1Log::finer()) {
  2497       gclog_or_tty->put(' ');
  2499     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
  2501     ReferenceProcessor* rp = g1h->ref_processor_cm();
  2503     // See the comment in G1CollectedHeap::ref_processing_init()
  2504     // about how reference processing currently works in G1.
  2506     // Set the soft reference policy
  2507     rp->setup_policy(clear_all_soft_refs);
  2508     assert(_markStack.isEmpty(), "mark stack should be empty");
  2510     // Instances of the 'Keep Alive' and 'Complete GC' closures used
  2511     // in serial reference processing. Note these closures are also
  2512     // used for serially processing (by the the current thread) the
  2513     // JNI references during parallel reference processing.
  2514     //
  2515     // These closures do not need to synchronize with the worker
  2516     // threads involved in parallel reference processing as these
  2517     // instances are executed serially by the current thread (e.g.
  2518     // reference processing is not multi-threaded and is thus
  2519     // performed by the current thread instead of a gang worker).
  2520     //
  2521     // The gang tasks involved in parallel reference procssing create
  2522     // their own instances of these closures, which do their own
  2523     // synchronization among themselves.
  2524     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
  2525     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
  2527     // We need at least one active thread. If reference processing
  2528     // is not multi-threaded we use the current (VMThread) thread,
  2529     // otherwise we use the work gang from the G1CollectedHeap and
  2530     // we utilize all the worker threads we can.
  2531     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
  2532     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
  2533     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
  2535     // Parallel processing task executor.
  2536     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
  2537                                               g1h->workers(), active_workers);
  2538     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
  2540     // Set the concurrency level. The phase was already set prior to
  2541     // executing the remark task.
  2542     set_concurrency(active_workers);
  2544     // Set the degree of MT processing here.  If the discovery was done MT,
  2545     // the number of threads involved during discovery could differ from
  2546     // the number of active workers.  This is OK as long as the discovered
  2547     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
  2548     rp->set_active_mt_degree(active_workers);
  2550     // Process the weak references.
  2551     const ReferenceProcessorStats& stats =
  2552         rp->process_discovered_references(&g1_is_alive,
  2553                                           &g1_keep_alive,
  2554                                           &g1_drain_mark_stack,
  2555                                           executor,
  2556                                           g1h->gc_timer_cm(),
  2557                                           concurrent_gc_id());
  2558     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
  2560     // The do_oop work routines of the keep_alive and drain_marking_stack
  2561     // oop closures will set the has_overflown flag if we overflow the
  2562     // global marking stack.
  2564     assert(_markStack.overflow() || _markStack.isEmpty(),
  2565             "mark stack should be empty (unless it overflowed)");
  2567     if (_markStack.overflow()) {
  2568       // This should have been done already when we tried to push an
  2569       // entry on to the global mark stack. But let's do it again.
  2570       set_has_overflown();
  2573     assert(rp->num_q() == active_workers, "why not");
  2575     rp->enqueue_discovered_references(executor);
  2577     rp->verify_no_references_recorded();
  2578     assert(!rp->discovery_enabled(), "Post condition");
  2581   if (has_overflown()) {
  2582     // We can not trust g1_is_alive if the marking stack overflowed
  2583     return;
  2586   assert(_markStack.isEmpty(), "Marking should have completed");
  2588   // Unload Klasses, String, Symbols, Code Cache, etc.
  2590     G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
  2592     if (ClassUnloadingWithConcurrentMark) {
  2593       bool purged_classes;
  2596         G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
  2597         purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
  2601         G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
  2602         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
  2606     if (G1StringDedup::is_enabled()) {
  2607       G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
  2608       G1StringDedup::unlink(&g1_is_alive);
  2613 void ConcurrentMark::swapMarkBitMaps() {
  2614   CMBitMapRO* temp = _prevMarkBitMap;
  2615   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  2616   _nextMarkBitMap  = (CMBitMap*)  temp;
  2619 class CMObjectClosure;
  2621 // Closure for iterating over objects, currently only used for
  2622 // processing SATB buffers.
  2623 class CMObjectClosure : public ObjectClosure {
  2624 private:
  2625   CMTask* _task;
  2627 public:
  2628   void do_object(oop obj) {
  2629     _task->deal_with_reference(obj);
  2632   CMObjectClosure(CMTask* task) : _task(task) { }
  2633 };
  2635 class G1RemarkThreadsClosure : public ThreadClosure {
  2636   CMObjectClosure _cm_obj;
  2637   G1CMOopClosure _cm_cl;
  2638   MarkingCodeBlobClosure _code_cl;
  2639   int _thread_parity;
  2640   bool _is_par;
  2642  public:
  2643   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
  2644     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
  2645     _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
  2647   void do_thread(Thread* thread) {
  2648     if (thread->is_Java_thread()) {
  2649       if (thread->claim_oops_do(_is_par, _thread_parity)) {
  2650         JavaThread* jt = (JavaThread*)thread;
  2652         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
  2653         // however the liveness of oops reachable from nmethods have very complex lifecycles:
  2654         // * Alive if on the stack of an executing method
  2655         // * Weakly reachable otherwise
  2656         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
  2657         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
  2658         jt->nmethods_do(&_code_cl);
  2660         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
  2662     } else if (thread->is_VM_thread()) {
  2663       if (thread->claim_oops_do(_is_par, _thread_parity)) {
  2664         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
  2668 };
  2670 class CMRemarkTask: public AbstractGangTask {
  2671 private:
  2672   ConcurrentMark* _cm;
  2673   bool            _is_serial;
  2674 public:
  2675   void work(uint worker_id) {
  2676     // Since all available tasks are actually started, we should
  2677     // only proceed if we're supposed to be actived.
  2678     if (worker_id < _cm->active_tasks()) {
  2679       CMTask* task = _cm->task(worker_id);
  2680       task->record_start_time();
  2682         ResourceMark rm;
  2683         HandleMark hm;
  2685         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
  2686         Threads::threads_do(&threads_f);
  2689       do {
  2690         task->do_marking_step(1000000000.0 /* something very large */,
  2691                               true         /* do_termination       */,
  2692                               _is_serial);
  2693       } while (task->has_aborted() && !_cm->has_overflown());
  2694       // If we overflow, then we do not want to restart. We instead
  2695       // want to abort remark and do concurrent marking again.
  2696       task->record_end_time();
  2700   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
  2701     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
  2702     _cm->terminator()->reset_for_reuse(active_workers);
  2704 };
  2706 void ConcurrentMark::checkpointRootsFinalWork() {
  2707   ResourceMark rm;
  2708   HandleMark   hm;
  2709   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  2711   G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
  2713   g1h->ensure_parsability(false);
  2715   if (G1CollectedHeap::use_parallel_gc_threads()) {
  2716     G1CollectedHeap::StrongRootsScope srs(g1h);
  2717     // this is remark, so we'll use up all active threads
  2718     uint active_workers = g1h->workers()->active_workers();
  2719     if (active_workers == 0) {
  2720       assert(active_workers > 0, "Should have been set earlier");
  2721       active_workers = (uint) ParallelGCThreads;
  2722       g1h->workers()->set_active_workers(active_workers);
  2724     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2725     // Leave _parallel_marking_threads at it's
  2726     // value originally calculated in the ConcurrentMark
  2727     // constructor and pass values of the active workers
  2728     // through the gang in the task.
  2730     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
  2731     // We will start all available threads, even if we decide that the
  2732     // active_workers will be fewer. The extra ones will just bail out
  2733     // immediately.
  2734     g1h->set_par_threads(active_workers);
  2735     g1h->workers()->run_task(&remarkTask);
  2736     g1h->set_par_threads(0);
  2737   } else {
  2738     G1CollectedHeap::StrongRootsScope srs(g1h);
  2739     uint active_workers = 1;
  2740     set_concurrency_and_phase(active_workers, false /* concurrent */);
  2742     // Note - if there's no work gang then the VMThread will be
  2743     // the thread to execute the remark - serially. We have
  2744     // to pass true for the is_serial parameter so that
  2745     // CMTask::do_marking_step() doesn't enter the sync
  2746     // barriers in the event of an overflow. Doing so will
  2747     // cause an assert that the current thread is not a
  2748     // concurrent GC thread.
  2749     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
  2750     remarkTask.work(0);
  2752   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2753   guarantee(has_overflown() ||
  2754             satb_mq_set.completed_buffers_num() == 0,
  2755             err_msg("Invariant: has_overflown = %s, num buffers = %d",
  2756                     BOOL_TO_STR(has_overflown()),
  2757                     satb_mq_set.completed_buffers_num()));
  2759   print_stats();
  2762 #ifndef PRODUCT
  2764 class PrintReachableOopClosure: public OopClosure {
  2765 private:
  2766   G1CollectedHeap* _g1h;
  2767   outputStream*    _out;
  2768   VerifyOption     _vo;
  2769   bool             _all;
  2771 public:
  2772   PrintReachableOopClosure(outputStream* out,
  2773                            VerifyOption  vo,
  2774                            bool          all) :
  2775     _g1h(G1CollectedHeap::heap()),
  2776     _out(out), _vo(vo), _all(all) { }
  2778   void do_oop(narrowOop* p) { do_oop_work(p); }
  2779   void do_oop(      oop* p) { do_oop_work(p); }
  2781   template <class T> void do_oop_work(T* p) {
  2782     oop         obj = oopDesc::load_decode_heap_oop(p);
  2783     const char* str = NULL;
  2784     const char* str2 = "";
  2786     if (obj == NULL) {
  2787       str = "";
  2788     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2789       str = " O";
  2790     } else {
  2791       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2792       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
  2793       bool marked = _g1h->is_marked(obj, _vo);
  2795       if (over_tams) {
  2796         str = " >";
  2797         if (marked) {
  2798           str2 = " AND MARKED";
  2800       } else if (marked) {
  2801         str = " M";
  2802       } else {
  2803         str = " NOT";
  2807     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2808                    p2i(p), p2i((void*) obj), str, str2);
  2810 };
  2812 class PrintReachableObjectClosure : public ObjectClosure {
  2813 private:
  2814   G1CollectedHeap* _g1h;
  2815   outputStream*    _out;
  2816   VerifyOption     _vo;
  2817   bool             _all;
  2818   HeapRegion*      _hr;
  2820 public:
  2821   PrintReachableObjectClosure(outputStream* out,
  2822                               VerifyOption  vo,
  2823                               bool          all,
  2824                               HeapRegion*   hr) :
  2825     _g1h(G1CollectedHeap::heap()),
  2826     _out(out), _vo(vo), _all(all), _hr(hr) { }
  2828   void do_object(oop o) {
  2829     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
  2830     bool marked = _g1h->is_marked(o, _vo);
  2831     bool print_it = _all || over_tams || marked;
  2833     if (print_it) {
  2834       _out->print_cr(" "PTR_FORMAT"%s",
  2835                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
  2836       PrintReachableOopClosure oopCl(_out, _vo, _all);
  2837       o->oop_iterate_no_header(&oopCl);
  2840 };
  2842 class PrintReachableRegionClosure : public HeapRegionClosure {
  2843 private:
  2844   G1CollectedHeap* _g1h;
  2845   outputStream*    _out;
  2846   VerifyOption     _vo;
  2847   bool             _all;
  2849 public:
  2850   bool doHeapRegion(HeapRegion* hr) {
  2851     HeapWord* b = hr->bottom();
  2852     HeapWord* e = hr->end();
  2853     HeapWord* t = hr->top();
  2854     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
  2855     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2856                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
  2857     _out->cr();
  2859     HeapWord* from = b;
  2860     HeapWord* to   = t;
  2862     if (to > from) {
  2863       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
  2864       _out->cr();
  2865       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
  2866       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2867       _out->cr();
  2870     return false;
  2873   PrintReachableRegionClosure(outputStream* out,
  2874                               VerifyOption  vo,
  2875                               bool          all) :
  2876     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
  2877 };
  2879 void ConcurrentMark::print_reachable(const char* str,
  2880                                      VerifyOption vo,
  2881                                      bool all) {
  2882   gclog_or_tty->cr();
  2883   gclog_or_tty->print_cr("== Doing heap dump... ");
  2885   if (G1PrintReachableBaseFile == NULL) {
  2886     gclog_or_tty->print_cr("  #### error: no base file defined");
  2887     return;
  2890   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2891       (JVM_MAXPATHLEN - 1)) {
  2892     gclog_or_tty->print_cr("  #### error: file name too long");
  2893     return;
  2896   char file_name[JVM_MAXPATHLEN];
  2897   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2898   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2900   fileStream fout(file_name);
  2901   if (!fout.is_open()) {
  2902     gclog_or_tty->print_cr("  #### error: could not open file");
  2903     return;
  2906   outputStream* out = &fout;
  2907   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
  2908   out->cr();
  2910   out->print_cr("--- ITERATING OVER REGIONS");
  2911   out->cr();
  2912   PrintReachableRegionClosure rcl(out, vo, all);
  2913   _g1h->heap_region_iterate(&rcl);
  2914   out->cr();
  2916   gclog_or_tty->print_cr("  done");
  2917   gclog_or_tty->flush();
  2920 #endif // PRODUCT
  2922 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
  2923   // Note we are overriding the read-only view of the prev map here, via
  2924   // the cast.
  2925   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2928 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
  2929   _nextMarkBitMap->clearRange(mr);
  2932 HeapRegion*
  2933 ConcurrentMark::claim_region(uint worker_id) {
  2934   // "checkpoint" the finger
  2935   HeapWord* finger = _finger;
  2937   // _heap_end will not change underneath our feet; it only changes at
  2938   // yield points.
  2939   while (finger < _heap_end) {
  2940     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2942     // Note on how this code handles humongous regions. In the
  2943     // normal case the finger will reach the start of a "starts
  2944     // humongous" (SH) region. Its end will either be the end of the
  2945     // last "continues humongous" (CH) region in the sequence, or the
  2946     // standard end of the SH region (if the SH is the only region in
  2947     // the sequence). That way claim_region() will skip over the CH
  2948     // regions. However, there is a subtle race between a CM thread
  2949     // executing this method and a mutator thread doing a humongous
  2950     // object allocation. The two are not mutually exclusive as the CM
  2951     // thread does not need to hold the Heap_lock when it gets
  2952     // here. So there is a chance that claim_region() will come across
  2953     // a free region that's in the progress of becoming a SH or a CH
  2954     // region. In the former case, it will either
  2955     //   a) Miss the update to the region's end, in which case it will
  2956     //      visit every subsequent CH region, will find their bitmaps
  2957     //      empty, and do nothing, or
  2958     //   b) Will observe the update of the region's end (in which case
  2959     //      it will skip the subsequent CH regions).
  2960     // If it comes across a region that suddenly becomes CH, the
  2961     // scenario will be similar to b). So, the race between
  2962     // claim_region() and a humongous object allocation might force us
  2963     // to do a bit of unnecessary work (due to some unnecessary bitmap
  2964     // iterations) but it should not introduce and correctness issues.
  2965     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
  2967     // Above heap_region_containing_raw may return NULL as we always scan claim
  2968     // until the end of the heap. In this case, just jump to the next region.
  2969     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
  2971     // Is the gap between reading the finger and doing the CAS too long?
  2972     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2973     if (res == finger && curr_region != NULL) {
  2974       // we succeeded
  2975       HeapWord*   bottom        = curr_region->bottom();
  2976       HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2978       if (verbose_low()) {
  2979         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
  2980                                "["PTR_FORMAT", "PTR_FORMAT"), "
  2981                                "limit = "PTR_FORMAT,
  2982                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
  2985       // notice that _finger == end cannot be guaranteed here since,
  2986       // someone else might have moved the finger even further
  2987       assert(_finger >= end, "the finger should have moved forward");
  2989       if (verbose_low()) {
  2990         gclog_or_tty->print_cr("[%u] we were successful with region = "
  2991                                PTR_FORMAT, worker_id, p2i(curr_region));
  2994       if (limit > bottom) {
  2995         if (verbose_low()) {
  2996           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
  2997                                  "returning it ", worker_id, p2i(curr_region));
  2999         return curr_region;
  3000       } else {
  3001         assert(limit == bottom,
  3002                "the region limit should be at bottom");
  3003         if (verbose_low()) {
  3004           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
  3005                                  "returning NULL", worker_id, p2i(curr_region));
  3007         // we return NULL and the caller should try calling
  3008         // claim_region() again.
  3009         return NULL;
  3011     } else {
  3012       assert(_finger > finger, "the finger should have moved forward");
  3013       if (verbose_low()) {
  3014         if (curr_region == NULL) {
  3015           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
  3016                                  "global finger = "PTR_FORMAT", "
  3017                                  "our finger = "PTR_FORMAT,
  3018                                  worker_id, p2i(_finger), p2i(finger));
  3019         } else {
  3020           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
  3021                                  "global finger = "PTR_FORMAT", "
  3022                                  "our finger = "PTR_FORMAT,
  3023                                  worker_id, p2i(_finger), p2i(finger));
  3027       // read it again
  3028       finger = _finger;
  3032   return NULL;
  3035 #ifndef PRODUCT
  3036 enum VerifyNoCSetOopsPhase {
  3037   VerifyNoCSetOopsStack,
  3038   VerifyNoCSetOopsQueues,
  3039   VerifyNoCSetOopsSATBCompleted,
  3040   VerifyNoCSetOopsSATBThread
  3041 };
  3043 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
  3044 private:
  3045   G1CollectedHeap* _g1h;
  3046   VerifyNoCSetOopsPhase _phase;
  3047   int _info;
  3049   const char* phase_str() {
  3050     switch (_phase) {
  3051     case VerifyNoCSetOopsStack:         return "Stack";
  3052     case VerifyNoCSetOopsQueues:        return "Queue";
  3053     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
  3054     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
  3055     default:                            ShouldNotReachHere();
  3057     return NULL;
  3060   void do_object_work(oop obj) {
  3061     guarantee(!_g1h->obj_in_cs(obj),
  3062               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
  3063                       p2i((void*) obj), phase_str(), _info));
  3066 public:
  3067   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
  3069   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
  3070     _phase = phase;
  3071     _info = info;
  3074   virtual void do_oop(oop* p) {
  3075     oop obj = oopDesc::load_decode_heap_oop(p);
  3076     do_object_work(obj);
  3079   virtual void do_oop(narrowOop* p) {
  3080     // We should not come across narrow oops while scanning marking
  3081     // stacks and SATB buffers.
  3082     ShouldNotReachHere();
  3085   virtual void do_object(oop obj) {
  3086     do_object_work(obj);
  3088 };
  3090 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
  3091                                          bool verify_enqueued_buffers,
  3092                                          bool verify_thread_buffers,
  3093                                          bool verify_fingers) {
  3094   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
  3095   if (!G1CollectedHeap::heap()->mark_in_progress()) {
  3096     return;
  3099   VerifyNoCSetOopsClosure cl;
  3101   if (verify_stacks) {
  3102     // Verify entries on the global mark stack
  3103     cl.set_phase(VerifyNoCSetOopsStack);
  3104     _markStack.oops_do(&cl);
  3106     // Verify entries on the task queues
  3107     for (uint i = 0; i < _max_worker_id; i += 1) {
  3108       cl.set_phase(VerifyNoCSetOopsQueues, i);
  3109       CMTaskQueue* queue = _task_queues->queue(i);
  3110       queue->oops_do(&cl);
  3114   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
  3116   // Verify entries on the enqueued SATB buffers
  3117   if (verify_enqueued_buffers) {
  3118     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
  3119     satb_qs.iterate_completed_buffers_read_only(&cl);
  3122   // Verify entries on the per-thread SATB buffers
  3123   if (verify_thread_buffers) {
  3124     cl.set_phase(VerifyNoCSetOopsSATBThread);
  3125     satb_qs.iterate_thread_buffers_read_only(&cl);
  3128   if (verify_fingers) {
  3129     // Verify the global finger
  3130     HeapWord* global_finger = finger();
  3131     if (global_finger != NULL && global_finger < _heap_end) {
  3132       // The global finger always points to a heap region boundary. We
  3133       // use heap_region_containing_raw() to get the containing region
  3134       // given that the global finger could be pointing to a free region
  3135       // which subsequently becomes continues humongous. If that
  3136       // happens, heap_region_containing() will return the bottom of the
  3137       // corresponding starts humongous region and the check below will
  3138       // not hold any more.
  3139       // Since we always iterate over all regions, we might get a NULL HeapRegion
  3140       // here.
  3141       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
  3142       guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
  3143                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
  3144                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
  3147     // Verify the task fingers
  3148     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
  3149     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
  3150       CMTask* task = _tasks[i];
  3151       HeapWord* task_finger = task->finger();
  3152       if (task_finger != NULL && task_finger < _heap_end) {
  3153         // See above note on the global finger verification.
  3154         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
  3155         guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
  3156                   !task_hr->in_collection_set(),
  3157                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
  3158                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
  3163 #endif // PRODUCT
  3165 // Aggregate the counting data that was constructed concurrently
  3166 // with marking.
  3167 class AggregateCountDataHRClosure: public HeapRegionClosure {
  3168   G1CollectedHeap* _g1h;
  3169   ConcurrentMark* _cm;
  3170   CardTableModRefBS* _ct_bs;
  3171   BitMap* _cm_card_bm;
  3172   uint _max_worker_id;
  3174  public:
  3175   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
  3176                               BitMap* cm_card_bm,
  3177                               uint max_worker_id) :
  3178     _g1h(g1h), _cm(g1h->concurrent_mark()),
  3179     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
  3180     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
  3182   bool doHeapRegion(HeapRegion* hr) {
  3183     if (hr->continuesHumongous()) {
  3184       // We will ignore these here and process them when their
  3185       // associated "starts humongous" region is processed.
  3186       // Note that we cannot rely on their associated
  3187       // "starts humongous" region to have their bit set to 1
  3188       // since, due to the region chunking in the parallel region
  3189       // iteration, a "continues humongous" region might be visited
  3190       // before its associated "starts humongous".
  3191       return false;
  3194     HeapWord* start = hr->bottom();
  3195     HeapWord* limit = hr->next_top_at_mark_start();
  3196     HeapWord* end = hr->end();
  3198     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
  3199            err_msg("Preconditions not met - "
  3200                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
  3201                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
  3202                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
  3204     assert(hr->next_marked_bytes() == 0, "Precondition");
  3206     if (start == limit) {
  3207       // NTAMS of this region has not been set so nothing to do.
  3208       return false;
  3211     // 'start' should be in the heap.
  3212     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
  3213     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
  3214     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
  3216     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
  3217     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
  3218     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
  3220     // If ntams is not card aligned then we bump card bitmap index
  3221     // for limit so that we get the all the cards spanned by
  3222     // the object ending at ntams.
  3223     // Note: if this is the last region in the heap then ntams
  3224     // could be actually just beyond the end of the the heap;
  3225     // limit_idx will then  correspond to a (non-existent) card
  3226     // that is also outside the heap.
  3227     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
  3228       limit_idx += 1;
  3231     assert(limit_idx <= end_idx, "or else use atomics");
  3233     // Aggregate the "stripe" in the count data associated with hr.
  3234     uint hrm_index = hr->hrm_index();
  3235     size_t marked_bytes = 0;
  3237     for (uint i = 0; i < _max_worker_id; i += 1) {
  3238       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
  3239       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
  3241       // Fetch the marked_bytes in this region for task i and
  3242       // add it to the running total for this region.
  3243       marked_bytes += marked_bytes_array[hrm_index];
  3245       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
  3246       // into the global card bitmap.
  3247       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
  3249       while (scan_idx < limit_idx) {
  3250         assert(task_card_bm->at(scan_idx) == true, "should be");
  3251         _cm_card_bm->set_bit(scan_idx);
  3252         assert(_cm_card_bm->at(scan_idx) == true, "should be");
  3254         // BitMap::get_next_one_offset() can handle the case when
  3255         // its left_offset parameter is greater than its right_offset
  3256         // parameter. It does, however, have an early exit if
  3257         // left_offset == right_offset. So let's limit the value
  3258         // passed in for left offset here.
  3259         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
  3260         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
  3264     // Update the marked bytes for this region.
  3265     hr->add_to_marked_bytes(marked_bytes);
  3267     // Next heap region
  3268     return false;
  3270 };
  3272 class G1AggregateCountDataTask: public AbstractGangTask {
  3273 protected:
  3274   G1CollectedHeap* _g1h;
  3275   ConcurrentMark* _cm;
  3276   BitMap* _cm_card_bm;
  3277   uint _max_worker_id;
  3278   int _active_workers;
  3280 public:
  3281   G1AggregateCountDataTask(G1CollectedHeap* g1h,
  3282                            ConcurrentMark* cm,
  3283                            BitMap* cm_card_bm,
  3284                            uint max_worker_id,
  3285                            int n_workers) :
  3286     AbstractGangTask("Count Aggregation"),
  3287     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
  3288     _max_worker_id(max_worker_id),
  3289     _active_workers(n_workers) { }
  3291   void work(uint worker_id) {
  3292     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
  3294     if (G1CollectedHeap::use_parallel_gc_threads()) {
  3295       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
  3296                                             _active_workers,
  3297                                             HeapRegion::AggregateCountClaimValue);
  3298     } else {
  3299       _g1h->heap_region_iterate(&cl);
  3302 };
  3305 void ConcurrentMark::aggregate_count_data() {
  3306   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
  3307                         _g1h->workers()->active_workers() :
  3308                         1);
  3310   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
  3311                                            _max_worker_id, n_workers);
  3313   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3314     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
  3315            "sanity check");
  3316     _g1h->set_par_threads(n_workers);
  3317     _g1h->workers()->run_task(&g1_par_agg_task);
  3318     _g1h->set_par_threads(0);
  3320     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
  3321            "sanity check");
  3322     _g1h->reset_heap_region_claim_values();
  3323   } else {
  3324     g1_par_agg_task.work(0);
  3328 // Clear the per-worker arrays used to store the per-region counting data
  3329 void ConcurrentMark::clear_all_count_data() {
  3330   // Clear the global card bitmap - it will be filled during
  3331   // liveness count aggregation (during remark) and the
  3332   // final counting task.
  3333   _card_bm.clear();
  3335   // Clear the global region bitmap - it will be filled as part
  3336   // of the final counting task.
  3337   _region_bm.clear();
  3339   uint max_regions = _g1h->max_regions();
  3340   assert(_max_worker_id > 0, "uninitialized");
  3342   for (uint i = 0; i < _max_worker_id; i += 1) {
  3343     BitMap* task_card_bm = count_card_bitmap_for(i);
  3344     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
  3346     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
  3347     assert(marked_bytes_array != NULL, "uninitialized");
  3349     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
  3350     task_card_bm->clear();
  3354 void ConcurrentMark::print_stats() {
  3355   if (verbose_stats()) {
  3356     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3357     for (size_t i = 0; i < _active_tasks; ++i) {
  3358       _tasks[i]->print_stats();
  3359       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  3364 // abandon current marking iteration due to a Full GC
  3365 void ConcurrentMark::abort() {
  3366   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
  3367   // concurrent bitmap clearing.
  3368   _nextMarkBitMap->clearAll();
  3370   // Note we cannot clear the previous marking bitmap here
  3371   // since VerifyDuringGC verifies the objects marked during
  3372   // a full GC against the previous bitmap.
  3374   // Clear the liveness counting data
  3375   clear_all_count_data();
  3376   // Empty mark stack
  3377   reset_marking_state();
  3378   for (uint i = 0; i < _max_worker_id; ++i) {
  3379     _tasks[i]->clear_region_fields();
  3381   _first_overflow_barrier_sync.abort();
  3382   _second_overflow_barrier_sync.abort();
  3383   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
  3384   if (!gc_id.is_undefined()) {
  3385     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
  3386     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
  3387     _aborted_gc_id = gc_id;
  3389   _has_aborted = true;
  3391   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3392   satb_mq_set.abandon_partial_marking();
  3393   // This can be called either during or outside marking, we'll read
  3394   // the expected_active value from the SATB queue set.
  3395   satb_mq_set.set_active_all_threads(
  3396                                  false, /* new active value */
  3397                                  satb_mq_set.is_active() /* expected_active */);
  3399   _g1h->trace_heap_after_concurrent_cycle();
  3400   _g1h->register_concurrent_cycle_end();
  3403 const GCId& ConcurrentMark::concurrent_gc_id() {
  3404   if (has_aborted()) {
  3405     return _aborted_gc_id;
  3407   return _g1h->gc_tracer_cm()->gc_id();
  3410 static void print_ms_time_info(const char* prefix, const char* name,
  3411                                NumberSeq& ns) {
  3412   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  3413                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  3414   if (ns.num() > 0) {
  3415     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  3416                            prefix, ns.sd(), ns.maximum());
  3420 void ConcurrentMark::print_summary_info() {
  3421   gclog_or_tty->print_cr(" Concurrent marking:");
  3422   print_ms_time_info("  ", "init marks", _init_times);
  3423   print_ms_time_info("  ", "remarks", _remark_times);
  3425     print_ms_time_info("     ", "final marks", _remark_mark_times);
  3426     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  3429   print_ms_time_info("  ", "cleanups", _cleanup_times);
  3430   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  3431                          _total_counting_time,
  3432                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  3433                           (double)_cleanup_times.num()
  3434                          : 0.0));
  3435   if (G1ScrubRemSets) {
  3436     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  3437                            _total_rs_scrub_time,
  3438                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  3439                             (double)_cleanup_times.num()
  3440                            : 0.0));
  3442   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  3443                          (_init_times.sum() + _remark_times.sum() +
  3444                           _cleanup_times.sum())/1000.0);
  3445   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  3446                 "(%8.2f s marking).",
  3447                 cmThread()->vtime_accum(),
  3448                 cmThread()->vtime_mark_accum());
  3451 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  3452   if (use_parallel_marking_threads()) {
  3453     _parallel_workers->print_worker_threads_on(st);
  3457 void ConcurrentMark::print_on_error(outputStream* st) const {
  3458   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
  3459       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
  3460   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
  3461   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
  3464 // We take a break if someone is trying to stop the world.
  3465 bool ConcurrentMark::do_yield_check(uint worker_id) {
  3466   if (SuspendibleThreadSet::should_yield()) {
  3467     if (worker_id == 0) {
  3468       _g1h->g1_policy()->record_concurrent_pause();
  3470     SuspendibleThreadSet::yield();
  3471     return true;
  3472   } else {
  3473     return false;
  3477 #ifndef PRODUCT
  3478 // for debugging purposes
  3479 void ConcurrentMark::print_finger() {
  3480   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  3481                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
  3482   for (uint i = 0; i < _max_worker_id; ++i) {
  3483     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
  3485   gclog_or_tty->cr();
  3487 #endif
  3489 void CMTask::scan_object(oop obj) {
  3490   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
  3492   if (_cm->verbose_high()) {
  3493     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
  3494                            _worker_id, p2i((void*) obj));
  3497   size_t obj_size = obj->size();
  3498   _words_scanned += obj_size;
  3500   obj->oop_iterate(_cm_oop_closure);
  3501   statsOnly( ++_objs_scanned );
  3502   check_limits();
  3505 // Closure for iteration over bitmaps
  3506 class CMBitMapClosure : public BitMapClosure {
  3507 private:
  3508   // the bitmap that is being iterated over
  3509   CMBitMap*                   _nextMarkBitMap;
  3510   ConcurrentMark*             _cm;
  3511   CMTask*                     _task;
  3513 public:
  3514   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
  3515     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  3517   bool do_bit(size_t offset) {
  3518     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  3519     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  3520     assert( addr < _cm->finger(), "invariant");
  3522     statsOnly( _task->increase_objs_found_on_bitmap() );
  3523     assert(addr >= _task->finger(), "invariant");
  3525     // We move that task's local finger along.
  3526     _task->move_finger_to(addr);
  3528     _task->scan_object(oop(addr));
  3529     // we only partially drain the local queue and global stack
  3530     _task->drain_local_queue(true);
  3531     _task->drain_global_stack(true);
  3533     // if the has_aborted flag has been raised, we need to bail out of
  3534     // the iteration
  3535     return !_task->has_aborted();
  3537 };
  3539 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
  3540                                ConcurrentMark* cm,
  3541                                CMTask* task)
  3542   : _g1h(g1h), _cm(cm), _task(task) {
  3543   assert(_ref_processor == NULL, "should be initialized to NULL");
  3545   if (G1UseConcMarkReferenceProcessing) {
  3546     _ref_processor = g1h->ref_processor_cm();
  3547     assert(_ref_processor != NULL, "should not be NULL");
  3551 void CMTask::setup_for_region(HeapRegion* hr) {
  3552   assert(hr != NULL,
  3553         "claim_region() should have filtered out NULL regions");
  3554   assert(!hr->continuesHumongous(),
  3555         "claim_region() should have filtered out continues humongous regions");
  3557   if (_cm->verbose_low()) {
  3558     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
  3559                            _worker_id, p2i(hr));
  3562   _curr_region  = hr;
  3563   _finger       = hr->bottom();
  3564   update_region_limit();
  3567 void CMTask::update_region_limit() {
  3568   HeapRegion* hr            = _curr_region;
  3569   HeapWord* bottom          = hr->bottom();
  3570   HeapWord* limit           = hr->next_top_at_mark_start();
  3572   if (limit == bottom) {
  3573     if (_cm->verbose_low()) {
  3574       gclog_or_tty->print_cr("[%u] found an empty region "
  3575                              "["PTR_FORMAT", "PTR_FORMAT")",
  3576                              _worker_id, p2i(bottom), p2i(limit));
  3578     // The region was collected underneath our feet.
  3579     // We set the finger to bottom to ensure that the bitmap
  3580     // iteration that will follow this will not do anything.
  3581     // (this is not a condition that holds when we set the region up,
  3582     // as the region is not supposed to be empty in the first place)
  3583     _finger = bottom;
  3584   } else if (limit >= _region_limit) {
  3585     assert(limit >= _finger, "peace of mind");
  3586   } else {
  3587     assert(limit < _region_limit, "only way to get here");
  3588     // This can happen under some pretty unusual circumstances.  An
  3589     // evacuation pause empties the region underneath our feet (NTAMS
  3590     // at bottom). We then do some allocation in the region (NTAMS
  3591     // stays at bottom), followed by the region being used as a GC
  3592     // alloc region (NTAMS will move to top() and the objects
  3593     // originally below it will be grayed). All objects now marked in
  3594     // the region are explicitly grayed, if below the global finger,
  3595     // and we do not need in fact to scan anything else. So, we simply
  3596     // set _finger to be limit to ensure that the bitmap iteration
  3597     // doesn't do anything.
  3598     _finger = limit;
  3601   _region_limit = limit;
  3604 void CMTask::giveup_current_region() {
  3605   assert(_curr_region != NULL, "invariant");
  3606   if (_cm->verbose_low()) {
  3607     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
  3608                            _worker_id, p2i(_curr_region));
  3610   clear_region_fields();
  3613 void CMTask::clear_region_fields() {
  3614   // Values for these three fields that indicate that we're not
  3615   // holding on to a region.
  3616   _curr_region   = NULL;
  3617   _finger        = NULL;
  3618   _region_limit  = NULL;
  3621 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
  3622   if (cm_oop_closure == NULL) {
  3623     assert(_cm_oop_closure != NULL, "invariant");
  3624   } else {
  3625     assert(_cm_oop_closure == NULL, "invariant");
  3627   _cm_oop_closure = cm_oop_closure;
  3630 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3631   guarantee(nextMarkBitMap != NULL, "invariant");
  3633   if (_cm->verbose_low()) {
  3634     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
  3637   _nextMarkBitMap                = nextMarkBitMap;
  3638   clear_region_fields();
  3640   _calls                         = 0;
  3641   _elapsed_time_ms               = 0.0;
  3642   _termination_time_ms           = 0.0;
  3643   _termination_start_time_ms     = 0.0;
  3645 #if _MARKING_STATS_
  3646   _local_pushes                  = 0;
  3647   _local_pops                    = 0;
  3648   _local_max_size                = 0;
  3649   _objs_scanned                  = 0;
  3650   _global_pushes                 = 0;
  3651   _global_pops                   = 0;
  3652   _global_max_size               = 0;
  3653   _global_transfers_to           = 0;
  3654   _global_transfers_from         = 0;
  3655   _regions_claimed               = 0;
  3656   _objs_found_on_bitmap          = 0;
  3657   _satb_buffers_processed        = 0;
  3658   _steal_attempts                = 0;
  3659   _steals                        = 0;
  3660   _aborted                       = 0;
  3661   _aborted_overflow              = 0;
  3662   _aborted_cm_aborted            = 0;
  3663   _aborted_yield                 = 0;
  3664   _aborted_timed_out             = 0;
  3665   _aborted_satb                  = 0;
  3666   _aborted_termination           = 0;
  3667 #endif // _MARKING_STATS_
  3670 bool CMTask::should_exit_termination() {
  3671   regular_clock_call();
  3672   // This is called when we are in the termination protocol. We should
  3673   // quit if, for some reason, this task wants to abort or the global
  3674   // stack is not empty (this means that we can get work from it).
  3675   return !_cm->mark_stack_empty() || has_aborted();
  3678 void CMTask::reached_limit() {
  3679   assert(_words_scanned >= _words_scanned_limit ||
  3680          _refs_reached >= _refs_reached_limit ,
  3681          "shouldn't have been called otherwise");
  3682   regular_clock_call();
  3685 void CMTask::regular_clock_call() {
  3686   if (has_aborted()) return;
  3688   // First, we need to recalculate the words scanned and refs reached
  3689   // limits for the next clock call.
  3690   recalculate_limits();
  3692   // During the regular clock call we do the following
  3694   // (1) If an overflow has been flagged, then we abort.
  3695   if (_cm->has_overflown()) {
  3696     set_has_aborted();
  3697     return;
  3700   // If we are not concurrent (i.e. we're doing remark) we don't need
  3701   // to check anything else. The other steps are only needed during
  3702   // the concurrent marking phase.
  3703   if (!concurrent()) return;
  3705   // (2) If marking has been aborted for Full GC, then we also abort.
  3706   if (_cm->has_aborted()) {
  3707     set_has_aborted();
  3708     statsOnly( ++_aborted_cm_aborted );
  3709     return;
  3712   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3714   // (3) If marking stats are enabled, then we update the step history.
  3715 #if _MARKING_STATS_
  3716   if (_words_scanned >= _words_scanned_limit) {
  3717     ++_clock_due_to_scanning;
  3719   if (_refs_reached >= _refs_reached_limit) {
  3720     ++_clock_due_to_marking;
  3723   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3724   _interval_start_time_ms = curr_time_ms;
  3725   _all_clock_intervals_ms.add(last_interval_ms);
  3727   if (_cm->verbose_medium()) {
  3728       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
  3729                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
  3730                         _worker_id, last_interval_ms,
  3731                         _words_scanned,
  3732                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3733                         _refs_reached,
  3734                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3736 #endif // _MARKING_STATS_
  3738   // (4) We check whether we should yield. If we have to, then we abort.
  3739   if (SuspendibleThreadSet::should_yield()) {
  3740     // We should yield. To do this we abort the task. The caller is
  3741     // responsible for yielding.
  3742     set_has_aborted();
  3743     statsOnly( ++_aborted_yield );
  3744     return;
  3747   // (5) We check whether we've reached our time quota. If we have,
  3748   // then we abort.
  3749   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3750   if (elapsed_time_ms > _time_target_ms) {
  3751     set_has_aborted();
  3752     _has_timed_out = true;
  3753     statsOnly( ++_aborted_timed_out );
  3754     return;
  3757   // (6) Finally, we check whether there are enough completed STAB
  3758   // buffers available for processing. If there are, we abort.
  3759   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3760   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3761     if (_cm->verbose_low()) {
  3762       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
  3763                              _worker_id);
  3765     // we do need to process SATB buffers, we'll abort and restart
  3766     // the marking task to do so
  3767     set_has_aborted();
  3768     statsOnly( ++_aborted_satb );
  3769     return;
  3773 void CMTask::recalculate_limits() {
  3774   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3775   _words_scanned_limit      = _real_words_scanned_limit;
  3777   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3778   _refs_reached_limit       = _real_refs_reached_limit;
  3781 void CMTask::decrease_limits() {
  3782   // This is called when we believe that we're going to do an infrequent
  3783   // operation which will increase the per byte scanned cost (i.e. move
  3784   // entries to/from the global stack). It basically tries to decrease the
  3785   // scanning limit so that the clock is called earlier.
  3787   if (_cm->verbose_medium()) {
  3788     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
  3791   _words_scanned_limit = _real_words_scanned_limit -
  3792     3 * words_scanned_period / 4;
  3793   _refs_reached_limit  = _real_refs_reached_limit -
  3794     3 * refs_reached_period / 4;
  3797 void CMTask::move_entries_to_global_stack() {
  3798   // local array where we'll store the entries that will be popped
  3799   // from the local queue
  3800   oop buffer[global_stack_transfer_size];
  3802   int n = 0;
  3803   oop obj;
  3804   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3805     buffer[n] = obj;
  3806     ++n;
  3809   if (n > 0) {
  3810     // we popped at least one entry from the local queue
  3812     statsOnly( ++_global_transfers_to; _local_pops += n );
  3814     if (!_cm->mark_stack_push(buffer, n)) {
  3815       if (_cm->verbose_low()) {
  3816         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
  3817                                _worker_id);
  3819       set_has_aborted();
  3820     } else {
  3821       // the transfer was successful
  3823       if (_cm->verbose_medium()) {
  3824         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
  3825                                _worker_id, n);
  3827       statsOnly( int tmp_size = _cm->mark_stack_size();
  3828                  if (tmp_size > _global_max_size) {
  3829                    _global_max_size = tmp_size;
  3831                  _global_pushes += n );
  3835   // this operation was quite expensive, so decrease the limits
  3836   decrease_limits();
  3839 void CMTask::get_entries_from_global_stack() {
  3840   // local array where we'll store the entries that will be popped
  3841   // from the global stack.
  3842   oop buffer[global_stack_transfer_size];
  3843   int n;
  3844   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3845   assert(n <= global_stack_transfer_size,
  3846          "we should not pop more than the given limit");
  3847   if (n > 0) {
  3848     // yes, we did actually pop at least one entry
  3850     statsOnly( ++_global_transfers_from; _global_pops += n );
  3851     if (_cm->verbose_medium()) {
  3852       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
  3853                              _worker_id, n);
  3855     for (int i = 0; i < n; ++i) {
  3856       bool success = _task_queue->push(buffer[i]);
  3857       // We only call this when the local queue is empty or under a
  3858       // given target limit. So, we do not expect this push to fail.
  3859       assert(success, "invariant");
  3862     statsOnly( int tmp_size = _task_queue->size();
  3863                if (tmp_size > _local_max_size) {
  3864                  _local_max_size = tmp_size;
  3866                _local_pushes += n );
  3869   // this operation was quite expensive, so decrease the limits
  3870   decrease_limits();
  3873 void CMTask::drain_local_queue(bool partially) {
  3874   if (has_aborted()) return;
  3876   // Decide what the target size is, depending whether we're going to
  3877   // drain it partially (so that other tasks can steal if they run out
  3878   // of things to do) or totally (at the very end).
  3879   size_t target_size;
  3880   if (partially) {
  3881     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3882   } else {
  3883     target_size = 0;
  3886   if (_task_queue->size() > target_size) {
  3887     if (_cm->verbose_high()) {
  3888       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
  3889                              _worker_id, target_size);
  3892     oop obj;
  3893     bool ret = _task_queue->pop_local(obj);
  3894     while (ret) {
  3895       statsOnly( ++_local_pops );
  3897       if (_cm->verbose_high()) {
  3898         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
  3899                                p2i((void*) obj));
  3902       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3903       assert(!_g1h->is_on_master_free_list(
  3904                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
  3906       scan_object(obj);
  3908       if (_task_queue->size() <= target_size || has_aborted()) {
  3909         ret = false;
  3910       } else {
  3911         ret = _task_queue->pop_local(obj);
  3915     if (_cm->verbose_high()) {
  3916       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
  3917                              _worker_id, _task_queue->size());
  3922 void CMTask::drain_global_stack(bool partially) {
  3923   if (has_aborted()) return;
  3925   // We have a policy to drain the local queue before we attempt to
  3926   // drain the global stack.
  3927   assert(partially || _task_queue->size() == 0, "invariant");
  3929   // Decide what the target size is, depending whether we're going to
  3930   // drain it partially (so that other tasks can steal if they run out
  3931   // of things to do) or totally (at the very end).  Notice that,
  3932   // because we move entries from the global stack in chunks or
  3933   // because another task might be doing the same, we might in fact
  3934   // drop below the target. But, this is not a problem.
  3935   size_t target_size;
  3936   if (partially) {
  3937     target_size = _cm->partial_mark_stack_size_target();
  3938   } else {
  3939     target_size = 0;
  3942   if (_cm->mark_stack_size() > target_size) {
  3943     if (_cm->verbose_low()) {
  3944       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
  3945                              _worker_id, target_size);
  3948     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3949       get_entries_from_global_stack();
  3950       drain_local_queue(partially);
  3953     if (_cm->verbose_low()) {
  3954       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
  3955                              _worker_id, _cm->mark_stack_size());
  3960 // SATB Queue has several assumptions on whether to call the par or
  3961 // non-par versions of the methods. this is why some of the code is
  3962 // replicated. We should really get rid of the single-threaded version
  3963 // of the code to simplify things.
  3964 void CMTask::drain_satb_buffers() {
  3965   if (has_aborted()) return;
  3967   // We set this so that the regular clock knows that we're in the
  3968   // middle of draining buffers and doesn't set the abort flag when it
  3969   // notices that SATB buffers are available for draining. It'd be
  3970   // very counter productive if it did that. :-)
  3971   _draining_satb_buffers = true;
  3973   CMObjectClosure oc(this);
  3974   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3975   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3976     satb_mq_set.set_par_closure(_worker_id, &oc);
  3977   } else {
  3978     satb_mq_set.set_closure(&oc);
  3981   // This keeps claiming and applying the closure to completed buffers
  3982   // until we run out of buffers or we need to abort.
  3983   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3984     while (!has_aborted() &&
  3985            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
  3986       if (_cm->verbose_medium()) {
  3987         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3989       statsOnly( ++_satb_buffers_processed );
  3990       regular_clock_call();
  3992   } else {
  3993     while (!has_aborted() &&
  3994            satb_mq_set.apply_closure_to_completed_buffer()) {
  3995       if (_cm->verbose_medium()) {
  3996         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
  3998       statsOnly( ++_satb_buffers_processed );
  3999       regular_clock_call();
  4003   _draining_satb_buffers = false;
  4005   assert(has_aborted() ||
  4006          concurrent() ||
  4007          satb_mq_set.completed_buffers_num() == 0, "invariant");
  4009   if (G1CollectedHeap::use_parallel_gc_threads()) {
  4010     satb_mq_set.set_par_closure(_worker_id, NULL);
  4011   } else {
  4012     satb_mq_set.set_closure(NULL);
  4015   // again, this was a potentially expensive operation, decrease the
  4016   // limits to get the regular clock call early
  4017   decrease_limits();
  4020 void CMTask::print_stats() {
  4021   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
  4022                          _worker_id, _calls);
  4023   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  4024                          _elapsed_time_ms, _termination_time_ms);
  4025   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  4026                          _step_times_ms.num(), _step_times_ms.avg(),
  4027                          _step_times_ms.sd());
  4028   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  4029                          _step_times_ms.maximum(), _step_times_ms.sum());
  4031 #if _MARKING_STATS_
  4032   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  4033                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  4034                          _all_clock_intervals_ms.sd());
  4035   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  4036                          _all_clock_intervals_ms.maximum(),
  4037                          _all_clock_intervals_ms.sum());
  4038   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  4039                          _clock_due_to_scanning, _clock_due_to_marking);
  4040   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  4041                          _objs_scanned, _objs_found_on_bitmap);
  4042   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  4043                          _local_pushes, _local_pops, _local_max_size);
  4044   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  4045                          _global_pushes, _global_pops, _global_max_size);
  4046   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  4047                          _global_transfers_to,_global_transfers_from);
  4048   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
  4049   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  4050   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  4051                          _steal_attempts, _steals);
  4052   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  4053   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  4054                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  4055   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  4056                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  4057 #endif // _MARKING_STATS_
  4060 /*****************************************************************************
  4062     The do_marking_step(time_target_ms, ...) method is the building
  4063     block of the parallel marking framework. It can be called in parallel
  4064     with other invocations of do_marking_step() on different tasks
  4065     (but only one per task, obviously) and concurrently with the
  4066     mutator threads, or during remark, hence it eliminates the need
  4067     for two versions of the code. When called during remark, it will
  4068     pick up from where the task left off during the concurrent marking
  4069     phase. Interestingly, tasks are also claimable during evacuation
  4070     pauses too, since do_marking_step() ensures that it aborts before
  4071     it needs to yield.
  4073     The data structures that it uses to do marking work are the
  4074     following:
  4076       (1) Marking Bitmap. If there are gray objects that appear only
  4077       on the bitmap (this happens either when dealing with an overflow
  4078       or when the initial marking phase has simply marked the roots
  4079       and didn't push them on the stack), then tasks claim heap
  4080       regions whose bitmap they then scan to find gray objects. A
  4081       global finger indicates where the end of the last claimed region
  4082       is. A local finger indicates how far into the region a task has
  4083       scanned. The two fingers are used to determine how to gray an
  4084       object (i.e. whether simply marking it is OK, as it will be
  4085       visited by a task in the future, or whether it needs to be also
  4086       pushed on a stack).
  4088       (2) Local Queue. The local queue of the task which is accessed
  4089       reasonably efficiently by the task. Other tasks can steal from
  4090       it when they run out of work. Throughout the marking phase, a
  4091       task attempts to keep its local queue short but not totally
  4092       empty, so that entries are available for stealing by other
  4093       tasks. Only when there is no more work, a task will totally
  4094       drain its local queue.
  4096       (3) Global Mark Stack. This handles local queue overflow. During
  4097       marking only sets of entries are moved between it and the local
  4098       queues, as access to it requires a mutex and more fine-grain
  4099       interaction with it which might cause contention. If it
  4100       overflows, then the marking phase should restart and iterate
  4101       over the bitmap to identify gray objects. Throughout the marking
  4102       phase, tasks attempt to keep the global mark stack at a small
  4103       length but not totally empty, so that entries are available for
  4104       popping by other tasks. Only when there is no more work, tasks
  4105       will totally drain the global mark stack.
  4107       (4) SATB Buffer Queue. This is where completed SATB buffers are
  4108       made available. Buffers are regularly removed from this queue
  4109       and scanned for roots, so that the queue doesn't get too
  4110       long. During remark, all completed buffers are processed, as
  4111       well as the filled in parts of any uncompleted buffers.
  4113     The do_marking_step() method tries to abort when the time target
  4114     has been reached. There are a few other cases when the
  4115     do_marking_step() method also aborts:
  4117       (1) When the marking phase has been aborted (after a Full GC).
  4119       (2) When a global overflow (on the global stack) has been
  4120       triggered. Before the task aborts, it will actually sync up with
  4121       the other tasks to ensure that all the marking data structures
  4122       (local queues, stacks, fingers etc.)  are re-initialized so that
  4123       when do_marking_step() completes, the marking phase can
  4124       immediately restart.
  4126       (3) When enough completed SATB buffers are available. The
  4127       do_marking_step() method only tries to drain SATB buffers right
  4128       at the beginning. So, if enough buffers are available, the
  4129       marking step aborts and the SATB buffers are processed at
  4130       the beginning of the next invocation.
  4132       (4) To yield. when we have to yield then we abort and yield
  4133       right at the end of do_marking_step(). This saves us from a lot
  4134       of hassle as, by yielding we might allow a Full GC. If this
  4135       happens then objects will be compacted underneath our feet, the
  4136       heap might shrink, etc. We save checking for this by just
  4137       aborting and doing the yield right at the end.
  4139     From the above it follows that the do_marking_step() method should
  4140     be called in a loop (or, otherwise, regularly) until it completes.
  4142     If a marking step completes without its has_aborted() flag being
  4143     true, it means it has completed the current marking phase (and
  4144     also all other marking tasks have done so and have all synced up).
  4146     A method called regular_clock_call() is invoked "regularly" (in
  4147     sub ms intervals) throughout marking. It is this clock method that
  4148     checks all the abort conditions which were mentioned above and
  4149     decides when the task should abort. A work-based scheme is used to
  4150     trigger this clock method: when the number of object words the
  4151     marking phase has scanned or the number of references the marking
  4152     phase has visited reach a given limit. Additional invocations to
  4153     the method clock have been planted in a few other strategic places
  4154     too. The initial reason for the clock method was to avoid calling
  4155     vtime too regularly, as it is quite expensive. So, once it was in
  4156     place, it was natural to piggy-back all the other conditions on it
  4157     too and not constantly check them throughout the code.
  4159     If do_termination is true then do_marking_step will enter its
  4160     termination protocol.
  4162     The value of is_serial must be true when do_marking_step is being
  4163     called serially (i.e. by the VMThread) and do_marking_step should
  4164     skip any synchronization in the termination and overflow code.
  4165     Examples include the serial remark code and the serial reference
  4166     processing closures.
  4168     The value of is_serial must be false when do_marking_step is
  4169     being called by any of the worker threads in a work gang.
  4170     Examples include the concurrent marking code (CMMarkingTask),
  4171     the MT remark code, and the MT reference processing closures.
  4173  *****************************************************************************/
  4175 void CMTask::do_marking_step(double time_target_ms,
  4176                              bool do_termination,
  4177                              bool is_serial) {
  4178   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  4179   assert(concurrent() == _cm->concurrent(), "they should be the same");
  4181   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  4182   assert(_task_queues != NULL, "invariant");
  4183   assert(_task_queue != NULL, "invariant");
  4184   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
  4186   assert(!_claimed,
  4187          "only one thread should claim this task at any one time");
  4189   // OK, this doesn't safeguard again all possible scenarios, as it is
  4190   // possible for two threads to set the _claimed flag at the same
  4191   // time. But it is only for debugging purposes anyway and it will
  4192   // catch most problems.
  4193   _claimed = true;
  4195   _start_time_ms = os::elapsedVTime() * 1000.0;
  4196   statsOnly( _interval_start_time_ms = _start_time_ms );
  4198   // If do_stealing is true then do_marking_step will attempt to
  4199   // steal work from the other CMTasks. It only makes sense to
  4200   // enable stealing when the termination protocol is enabled
  4201   // and do_marking_step() is not being called serially.
  4202   bool do_stealing = do_termination && !is_serial;
  4204   double diff_prediction_ms =
  4205     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  4206   _time_target_ms = time_target_ms - diff_prediction_ms;
  4208   // set up the variables that are used in the work-based scheme to
  4209   // call the regular clock method
  4210   _words_scanned = 0;
  4211   _refs_reached  = 0;
  4212   recalculate_limits();
  4214   // clear all flags
  4215   clear_has_aborted();
  4216   _has_timed_out = false;
  4217   _draining_satb_buffers = false;
  4219   ++_calls;
  4221   if (_cm->verbose_low()) {
  4222     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
  4223                            "target = %1.2lfms >>>>>>>>>>",
  4224                            _worker_id, _calls, _time_target_ms);
  4227   // Set up the bitmap and oop closures. Anything that uses them is
  4228   // eventually called from this method, so it is OK to allocate these
  4229   // statically.
  4230   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  4231   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
  4232   set_cm_oop_closure(&cm_oop_closure);
  4234   if (_cm->has_overflown()) {
  4235     // This can happen if the mark stack overflows during a GC pause
  4236     // and this task, after a yield point, restarts. We have to abort
  4237     // as we need to get into the overflow protocol which happens
  4238     // right at the end of this task.
  4239     set_has_aborted();
  4242   // First drain any available SATB buffers. After this, we will not
  4243   // look at SATB buffers before the next invocation of this method.
  4244   // If enough completed SATB buffers are queued up, the regular clock
  4245   // will abort this task so that it restarts.
  4246   drain_satb_buffers();
  4247   // ...then partially drain the local queue and the global stack
  4248   drain_local_queue(true);
  4249   drain_global_stack(true);
  4251   do {
  4252     if (!has_aborted() && _curr_region != NULL) {
  4253       // This means that we're already holding on to a region.
  4254       assert(_finger != NULL, "if region is not NULL, then the finger "
  4255              "should not be NULL either");
  4257       // We might have restarted this task after an evacuation pause
  4258       // which might have evacuated the region we're holding on to
  4259       // underneath our feet. Let's read its limit again to make sure
  4260       // that we do not iterate over a region of the heap that
  4261       // contains garbage (update_region_limit() will also move
  4262       // _finger to the start of the region if it is found empty).
  4263       update_region_limit();
  4264       // We will start from _finger not from the start of the region,
  4265       // as we might be restarting this task after aborting half-way
  4266       // through scanning this region. In this case, _finger points to
  4267       // the address where we last found a marked object. If this is a
  4268       // fresh region, _finger points to start().
  4269       MemRegion mr = MemRegion(_finger, _region_limit);
  4271       if (_cm->verbose_low()) {
  4272         gclog_or_tty->print_cr("[%u] we're scanning part "
  4273                                "["PTR_FORMAT", "PTR_FORMAT") "
  4274                                "of region "HR_FORMAT,
  4275                                _worker_id, p2i(_finger), p2i(_region_limit),
  4276                                HR_FORMAT_PARAMS(_curr_region));
  4279       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
  4280              "humongous regions should go around loop once only");
  4282       // Some special cases:
  4283       // If the memory region is empty, we can just give up the region.
  4284       // If the current region is humongous then we only need to check
  4285       // the bitmap for the bit associated with the start of the object,
  4286       // scan the object if it's live, and give up the region.
  4287       // Otherwise, let's iterate over the bitmap of the part of the region
  4288       // that is left.
  4289       // If the iteration is successful, give up the region.
  4290       if (mr.is_empty()) {
  4291         giveup_current_region();
  4292         regular_clock_call();
  4293       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
  4294         if (_nextMarkBitMap->isMarked(mr.start())) {
  4295           // The object is marked - apply the closure
  4296           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
  4297           bitmap_closure.do_bit(offset);
  4299         // Even if this task aborted while scanning the humongous object
  4300         // we can (and should) give up the current region.
  4301         giveup_current_region();
  4302         regular_clock_call();
  4303       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  4304         giveup_current_region();
  4305         regular_clock_call();
  4306       } else {
  4307         assert(has_aborted(), "currently the only way to do so");
  4308         // The only way to abort the bitmap iteration is to return
  4309         // false from the do_bit() method. However, inside the
  4310         // do_bit() method we move the _finger to point to the
  4311         // object currently being looked at. So, if we bail out, we
  4312         // have definitely set _finger to something non-null.
  4313         assert(_finger != NULL, "invariant");
  4315         // Region iteration was actually aborted. So now _finger
  4316         // points to the address of the object we last scanned. If we
  4317         // leave it there, when we restart this task, we will rescan
  4318         // the object. It is easy to avoid this. We move the finger by
  4319         // enough to point to the next possible object header (the
  4320         // bitmap knows by how much we need to move it as it knows its
  4321         // granularity).
  4322         assert(_finger < _region_limit, "invariant");
  4323         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
  4324         // Check if bitmap iteration was aborted while scanning the last object
  4325         if (new_finger >= _region_limit) {
  4326           giveup_current_region();
  4327         } else {
  4328           move_finger_to(new_finger);
  4332     // At this point we have either completed iterating over the
  4333     // region we were holding on to, or we have aborted.
  4335     // We then partially drain the local queue and the global stack.
  4336     // (Do we really need this?)
  4337     drain_local_queue(true);
  4338     drain_global_stack(true);
  4340     // Read the note on the claim_region() method on why it might
  4341     // return NULL with potentially more regions available for
  4342     // claiming and why we have to check out_of_regions() to determine
  4343     // whether we're done or not.
  4344     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  4345       // We are going to try to claim a new region. We should have
  4346       // given up on the previous one.
  4347       // Separated the asserts so that we know which one fires.
  4348       assert(_curr_region  == NULL, "invariant");
  4349       assert(_finger       == NULL, "invariant");
  4350       assert(_region_limit == NULL, "invariant");
  4351       if (_cm->verbose_low()) {
  4352         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
  4354       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
  4355       if (claimed_region != NULL) {
  4356         // Yes, we managed to claim one
  4357         statsOnly( ++_regions_claimed );
  4359         if (_cm->verbose_low()) {
  4360           gclog_or_tty->print_cr("[%u] we successfully claimed "
  4361                                  "region "PTR_FORMAT,
  4362                                  _worker_id, p2i(claimed_region));
  4365         setup_for_region(claimed_region);
  4366         assert(_curr_region == claimed_region, "invariant");
  4368       // It is important to call the regular clock here. It might take
  4369       // a while to claim a region if, for example, we hit a large
  4370       // block of empty regions. So we need to call the regular clock
  4371       // method once round the loop to make sure it's called
  4372       // frequently enough.
  4373       regular_clock_call();
  4376     if (!has_aborted() && _curr_region == NULL) {
  4377       assert(_cm->out_of_regions(),
  4378              "at this point we should be out of regions");
  4380   } while ( _curr_region != NULL && !has_aborted());
  4382   if (!has_aborted()) {
  4383     // We cannot check whether the global stack is empty, since other
  4384     // tasks might be pushing objects to it concurrently.
  4385     assert(_cm->out_of_regions(),
  4386            "at this point we should be out of regions");
  4388     if (_cm->verbose_low()) {
  4389       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
  4392     // Try to reduce the number of available SATB buffers so that
  4393     // remark has less work to do.
  4394     drain_satb_buffers();
  4397   // Since we've done everything else, we can now totally drain the
  4398   // local queue and global stack.
  4399   drain_local_queue(false);
  4400   drain_global_stack(false);
  4402   // Attempt at work stealing from other task's queues.
  4403   if (do_stealing && !has_aborted()) {
  4404     // We have not aborted. This means that we have finished all that
  4405     // we could. Let's try to do some stealing...
  4407     // We cannot check whether the global stack is empty, since other
  4408     // tasks might be pushing objects to it concurrently.
  4409     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  4410            "only way to reach here");
  4412     if (_cm->verbose_low()) {
  4413       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
  4416     while (!has_aborted()) {
  4417       oop obj;
  4418       statsOnly( ++_steal_attempts );
  4420       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
  4421         if (_cm->verbose_medium()) {
  4422           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
  4423                                  _worker_id, p2i((void*) obj));
  4426         statsOnly( ++_steals );
  4428         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  4429                "any stolen object should be marked");
  4430         scan_object(obj);
  4432         // And since we're towards the end, let's totally drain the
  4433         // local queue and global stack.
  4434         drain_local_queue(false);
  4435         drain_global_stack(false);
  4436       } else {
  4437         break;
  4442   // If we are about to wrap up and go into termination, check if we
  4443   // should raise the overflow flag.
  4444   if (do_termination && !has_aborted()) {
  4445     if (_cm->force_overflow()->should_force()) {
  4446       _cm->set_has_overflown();
  4447       regular_clock_call();
  4451   // We still haven't aborted. Now, let's try to get into the
  4452   // termination protocol.
  4453   if (do_termination && !has_aborted()) {
  4454     // We cannot check whether the global stack is empty, since other
  4455     // tasks might be concurrently pushing objects on it.
  4456     // Separated the asserts so that we know which one fires.
  4457     assert(_cm->out_of_regions(), "only way to reach here");
  4458     assert(_task_queue->size() == 0, "only way to reach here");
  4460     if (_cm->verbose_low()) {
  4461       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
  4464     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4466     // The CMTask class also extends the TerminatorTerminator class,
  4467     // hence its should_exit_termination() method will also decide
  4468     // whether to exit the termination protocol or not.
  4469     bool finished = (is_serial ||
  4470                      _cm->terminator()->offer_termination(this));
  4471     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4472     _termination_time_ms +=
  4473       termination_end_time_ms - _termination_start_time_ms;
  4475     if (finished) {
  4476       // We're all done.
  4478       if (_worker_id == 0) {
  4479         // let's allow task 0 to do this
  4480         if (concurrent()) {
  4481           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4482           // we need to set this to false before the next
  4483           // safepoint. This way we ensure that the marking phase
  4484           // doesn't observe any more heap expansions.
  4485           _cm->clear_concurrent_marking_in_progress();
  4489       // We can now guarantee that the global stack is empty, since
  4490       // all other tasks have finished. We separated the guarantees so
  4491       // that, if a condition is false, we can immediately find out
  4492       // which one.
  4493       guarantee(_cm->out_of_regions(), "only way to reach here");
  4494       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4495       guarantee(_task_queue->size() == 0, "only way to reach here");
  4496       guarantee(!_cm->has_overflown(), "only way to reach here");
  4497       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4499       if (_cm->verbose_low()) {
  4500         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
  4502     } else {
  4503       // Apparently there's more work to do. Let's abort this task. It
  4504       // will restart it and we can hopefully find more things to do.
  4506       if (_cm->verbose_low()) {
  4507         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
  4508                                _worker_id);
  4511       set_has_aborted();
  4512       statsOnly( ++_aborted_termination );
  4516   // Mainly for debugging purposes to make sure that a pointer to the
  4517   // closure which was statically allocated in this frame doesn't
  4518   // escape it by accident.
  4519   set_cm_oop_closure(NULL);
  4520   double end_time_ms = os::elapsedVTime() * 1000.0;
  4521   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4522   // Update the step history.
  4523   _step_times_ms.add(elapsed_time_ms);
  4525   if (has_aborted()) {
  4526     // The task was aborted for some reason.
  4528     statsOnly( ++_aborted );
  4530     if (_has_timed_out) {
  4531       double diff_ms = elapsed_time_ms - _time_target_ms;
  4532       // Keep statistics of how well we did with respect to hitting
  4533       // our target only if we actually timed out (if we aborted for
  4534       // other reasons, then the results might get skewed).
  4535       _marking_step_diffs_ms.add(diff_ms);
  4538     if (_cm->has_overflown()) {
  4539       // This is the interesting one. We aborted because a global
  4540       // overflow was raised. This means we have to restart the
  4541       // marking phase and start iterating over regions. However, in
  4542       // order to do this we have to make sure that all tasks stop
  4543       // what they are doing and re-initialise in a safe manner. We
  4544       // will achieve this with the use of two barrier sync points.
  4546       if (_cm->verbose_low()) {
  4547         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
  4550       if (!is_serial) {
  4551         // We only need to enter the sync barrier if being called
  4552         // from a parallel context
  4553         _cm->enter_first_sync_barrier(_worker_id);
  4555         // When we exit this sync barrier we know that all tasks have
  4556         // stopped doing marking work. So, it's now safe to
  4557         // re-initialise our data structures. At the end of this method,
  4558         // task 0 will clear the global data structures.
  4561       statsOnly( ++_aborted_overflow );
  4563       // We clear the local state of this task...
  4564       clear_region_fields();
  4566       if (!is_serial) {
  4567         // ...and enter the second barrier.
  4568         _cm->enter_second_sync_barrier(_worker_id);
  4570       // At this point, if we're during the concurrent phase of
  4571       // marking, everything has been re-initialized and we're
  4572       // ready to restart.
  4575     if (_cm->verbose_low()) {
  4576       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4577                              "elapsed = %1.2lfms <<<<<<<<<<",
  4578                              _worker_id, _time_target_ms, elapsed_time_ms);
  4579       if (_cm->has_aborted()) {
  4580         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
  4581                                _worker_id);
  4584   } else {
  4585     if (_cm->verbose_low()) {
  4586       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4587                              "elapsed = %1.2lfms <<<<<<<<<<",
  4588                              _worker_id, _time_target_ms, elapsed_time_ms);
  4592   _claimed = false;
  4595 CMTask::CMTask(uint worker_id,
  4596                ConcurrentMark* cm,
  4597                size_t* marked_bytes,
  4598                BitMap* card_bm,
  4599                CMTaskQueue* task_queue,
  4600                CMTaskQueueSet* task_queues)
  4601   : _g1h(G1CollectedHeap::heap()),
  4602     _worker_id(worker_id), _cm(cm),
  4603     _claimed(false),
  4604     _nextMarkBitMap(NULL), _hash_seed(17),
  4605     _task_queue(task_queue),
  4606     _task_queues(task_queues),
  4607     _cm_oop_closure(NULL),
  4608     _marked_bytes_array(marked_bytes),
  4609     _card_bm(card_bm) {
  4610   guarantee(task_queue != NULL, "invariant");
  4611   guarantee(task_queues != NULL, "invariant");
  4613   statsOnly( _clock_due_to_scanning = 0;
  4614              _clock_due_to_marking  = 0 );
  4616   _marking_step_diffs_ms.add(0.5);
  4619 // These are formatting macros that are used below to ensure
  4620 // consistent formatting. The *_H_* versions are used to format the
  4621 // header for a particular value and they should be kept consistent
  4622 // with the corresponding macro. Also note that most of the macros add
  4623 // the necessary white space (as a prefix) which makes them a bit
  4624 // easier to compose.
  4626 // All the output lines are prefixed with this string to be able to
  4627 // identify them easily in a large log file.
  4628 #define G1PPRL_LINE_PREFIX            "###"
  4630 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
  4631 #ifdef _LP64
  4632 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
  4633 #else // _LP64
  4634 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
  4635 #endif // _LP64
  4637 // For per-region info
  4638 #define G1PPRL_TYPE_FORMAT            "   %-4s"
  4639 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
  4640 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
  4641 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
  4642 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
  4643 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
  4645 // For summary info
  4646 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
  4647 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
  4648 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
  4649 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
  4651 G1PrintRegionLivenessInfoClosure::
  4652 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
  4653   : _out(out),
  4654     _total_used_bytes(0), _total_capacity_bytes(0),
  4655     _total_prev_live_bytes(0), _total_next_live_bytes(0),
  4656     _hum_used_bytes(0), _hum_capacity_bytes(0),
  4657     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
  4658     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
  4659   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  4660   MemRegion g1_reserved = g1h->g1_reserved();
  4661   double now = os::elapsedTime();
  4663   // Print the header of the output.
  4664   _out->cr();
  4665   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
  4666   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
  4667                  G1PPRL_SUM_ADDR_FORMAT("reserved")
  4668                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
  4669                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
  4670                  HeapRegion::GrainBytes);
  4671   _out->print_cr(G1PPRL_LINE_PREFIX);
  4672   _out->print_cr(G1PPRL_LINE_PREFIX
  4673                 G1PPRL_TYPE_H_FORMAT
  4674                 G1PPRL_ADDR_BASE_H_FORMAT
  4675                 G1PPRL_BYTE_H_FORMAT
  4676                 G1PPRL_BYTE_H_FORMAT
  4677                 G1PPRL_BYTE_H_FORMAT
  4678                 G1PPRL_DOUBLE_H_FORMAT
  4679                 G1PPRL_BYTE_H_FORMAT
  4680                 G1PPRL_BYTE_H_FORMAT,
  4681                 "type", "address-range",
  4682                 "used", "prev-live", "next-live", "gc-eff",
  4683                 "remset", "code-roots");
  4684   _out->print_cr(G1PPRL_LINE_PREFIX
  4685                 G1PPRL_TYPE_H_FORMAT
  4686                 G1PPRL_ADDR_BASE_H_FORMAT
  4687                 G1PPRL_BYTE_H_FORMAT
  4688                 G1PPRL_BYTE_H_FORMAT
  4689                 G1PPRL_BYTE_H_FORMAT
  4690                 G1PPRL_DOUBLE_H_FORMAT
  4691                 G1PPRL_BYTE_H_FORMAT
  4692                 G1PPRL_BYTE_H_FORMAT,
  4693                 "", "",
  4694                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
  4695                 "(bytes)", "(bytes)");
  4698 // It takes as a parameter a reference to one of the _hum_* fields, it
  4699 // deduces the corresponding value for a region in a humongous region
  4700 // series (either the region size, or what's left if the _hum_* field
  4701 // is < the region size), and updates the _hum_* field accordingly.
  4702 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
  4703   size_t bytes = 0;
  4704   // The > 0 check is to deal with the prev and next live bytes which
  4705   // could be 0.
  4706   if (*hum_bytes > 0) {
  4707     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
  4708     *hum_bytes -= bytes;
  4710   return bytes;
  4713 // It deduces the values for a region in a humongous region series
  4714 // from the _hum_* fields and updates those accordingly. It assumes
  4715 // that that _hum_* fields have already been set up from the "starts
  4716 // humongous" region and we visit the regions in address order.
  4717 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
  4718                                                      size_t* capacity_bytes,
  4719                                                      size_t* prev_live_bytes,
  4720                                                      size_t* next_live_bytes) {
  4721   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
  4722   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
  4723   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
  4724   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
  4725   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
  4728 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  4729   const char* type = "";
  4730   HeapWord* bottom       = r->bottom();
  4731   HeapWord* end          = r->end();
  4732   size_t capacity_bytes  = r->capacity();
  4733   size_t used_bytes      = r->used();
  4734   size_t prev_live_bytes = r->live_bytes();
  4735   size_t next_live_bytes = r->next_live_bytes();
  4736   double gc_eff          = r->gc_efficiency();
  4737   size_t remset_bytes    = r->rem_set()->mem_size();
  4738   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
  4740   if (r->used() == 0) {
  4741     type = "FREE";
  4742   } else if (r->is_survivor()) {
  4743     type = "SURV";
  4744   } else if (r->is_young()) {
  4745     type = "EDEN";
  4746   } else if (r->startsHumongous()) {
  4747     type = "HUMS";
  4749     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
  4750            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
  4751            "they should have been zeroed after the last time we used them");
  4752     // Set up the _hum_* fields.
  4753     _hum_capacity_bytes  = capacity_bytes;
  4754     _hum_used_bytes      = used_bytes;
  4755     _hum_prev_live_bytes = prev_live_bytes;
  4756     _hum_next_live_bytes = next_live_bytes;
  4757     get_hum_bytes(&used_bytes, &capacity_bytes,
  4758                   &prev_live_bytes, &next_live_bytes);
  4759     end = bottom + HeapRegion::GrainWords;
  4760   } else if (r->continuesHumongous()) {
  4761     type = "HUMC";
  4762     get_hum_bytes(&used_bytes, &capacity_bytes,
  4763                   &prev_live_bytes, &next_live_bytes);
  4764     assert(end == bottom + HeapRegion::GrainWords, "invariant");
  4765   } else {
  4766     type = "OLD";
  4769   _total_used_bytes      += used_bytes;
  4770   _total_capacity_bytes  += capacity_bytes;
  4771   _total_prev_live_bytes += prev_live_bytes;
  4772   _total_next_live_bytes += next_live_bytes;
  4773   _total_remset_bytes    += remset_bytes;
  4774   _total_strong_code_roots_bytes += strong_code_roots_bytes;
  4776   // Print a line for this particular region.
  4777   _out->print_cr(G1PPRL_LINE_PREFIX
  4778                  G1PPRL_TYPE_FORMAT
  4779                  G1PPRL_ADDR_BASE_FORMAT
  4780                  G1PPRL_BYTE_FORMAT
  4781                  G1PPRL_BYTE_FORMAT
  4782                  G1PPRL_BYTE_FORMAT
  4783                  G1PPRL_DOUBLE_FORMAT
  4784                  G1PPRL_BYTE_FORMAT
  4785                  G1PPRL_BYTE_FORMAT,
  4786                  type, p2i(bottom), p2i(end),
  4787                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
  4788                  remset_bytes, strong_code_roots_bytes);
  4790   return false;
  4793 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
  4794   // add static memory usages to remembered set sizes
  4795   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
  4796   // Print the footer of the output.
  4797   _out->print_cr(G1PPRL_LINE_PREFIX);
  4798   _out->print_cr(G1PPRL_LINE_PREFIX
  4799                  " SUMMARY"
  4800                  G1PPRL_SUM_MB_FORMAT("capacity")
  4801                  G1PPRL_SUM_MB_PERC_FORMAT("used")
  4802                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
  4803                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
  4804                  G1PPRL_SUM_MB_FORMAT("remset")
  4805                  G1PPRL_SUM_MB_FORMAT("code-roots"),
  4806                  bytes_to_mb(_total_capacity_bytes),
  4807                  bytes_to_mb(_total_used_bytes),
  4808                  perc(_total_used_bytes, _total_capacity_bytes),
  4809                  bytes_to_mb(_total_prev_live_bytes),
  4810                  perc(_total_prev_live_bytes, _total_capacity_bytes),
  4811                  bytes_to_mb(_total_next_live_bytes),
  4812                  perc(_total_next_live_bytes, _total_capacity_bytes),
  4813                  bytes_to_mb(_total_remset_bytes),
  4814                  bytes_to_mb(_total_strong_code_roots_bytes));
  4815   _out->cr();

mercurial