src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 23 Nov 2010 13:22:55 -0800

author
stefank
date
Tue, 23 Nov 2010 13:22:55 -0800
changeset 2314
f95d63e2154a
parent 2240
a5c514e74487
child 2316
fd1d227ef1b9
permissions
-rw-r--r--

6989984: Use standard include model for Hospot
Summary: Replaced MakeDeps and the includeDB files with more standardized solutions.
Reviewed-by: coleenp, kvn, kamg

     1 /*
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "gc_implementation/g1/concurrentMark.hpp"
    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    31 #include "gc_implementation/g1/g1RemSet.hpp"
    32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    34 #include "memory/genOopClosures.inline.hpp"
    35 #include "memory/referencePolicy.hpp"
    36 #include "memory/resourceArea.hpp"
    37 #include "oops/oop.inline.hpp"
    38 #include "runtime/handles.inline.hpp"
    39 #include "runtime/java.hpp"
    41 //
    42 // CMS Bit Map Wrapper
    44 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    45   _bm((uintptr_t*)NULL,0),
    46   _shifter(shifter) {
    47   _bmStartWord = (HeapWord*)(rs.base());
    48   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    49   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    50                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    52   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    53   // For now we'll just commit all of the bit map up fromt.
    54   // Later on we'll try to be more parsimonious with swap.
    55   guarantee(_virtual_space.initialize(brs, brs.size()),
    56             "couldn't reseve backing store for CMS bit map");
    57   assert(_virtual_space.committed_size() == brs.size(),
    58          "didn't reserve backing store for all of CMS bit map?");
    59   _bm.set_map((uintptr_t*)_virtual_space.low());
    60   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    61          _bmWordSize, "inconsistency in bit map sizing");
    62   _bm.set_size(_bmWordSize >> _shifter);
    63 }
    65 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    66                                                HeapWord* limit) const {
    67   // First we must round addr *up* to a possible object boundary.
    68   addr = (HeapWord*)align_size_up((intptr_t)addr,
    69                                   HeapWordSize << _shifter);
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    82                                                  HeapWord* limit) const {
    83   size_t addrOffset = heapWordToOffset(addr);
    84   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    85   size_t limitOffset = heapWordToOffset(limit);
    86   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    87   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    88   assert(nextAddr >= addr, "get_next_one postcondition");
    89   assert(nextAddr == limit || !isMarked(nextAddr),
    90          "get_next_one postcondition");
    91   return nextAddr;
    92 }
    94 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    95   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    96   return (int) (diff >> _shifter);
    97 }
    99 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
   100   HeapWord* left  = MAX2(_bmStartWord, mr.start());
   101   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
   102   if (right > left) {
   103     // Right-open interval [leftOffset, rightOffset).
   104     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
   105   } else {
   106     return true;
   107   }
   108 }
   110 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
   111                                              size_t    from_start_index,
   112                                              HeapWord* to_start_word,
   113                                              size_t    word_num) {
   114   _bm.mostly_disjoint_range_union(from_bitmap,
   115                                   from_start_index,
   116                                   heapWordToOffset(to_start_word),
   117                                   word_num);
   118 }
   120 #ifndef PRODUCT
   121 bool CMBitMapRO::covers(ReservedSpace rs) const {
   122   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   123   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   124          "size inconsistency");
   125   return _bmStartWord == (HeapWord*)(rs.base()) &&
   126          _bmWordSize  == rs.size()>>LogHeapWordSize;
   127 }
   128 #endif
   130 void CMBitMap::clearAll() {
   131   _bm.clear();
   132   return;
   133 }
   135 void CMBitMap::markRange(MemRegion mr) {
   136   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   137   assert(!mr.is_empty(), "unexpected empty region");
   138   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   139           ((HeapWord *) mr.end())),
   140          "markRange memory region end is not card aligned");
   141   // convert address range into offset range
   142   _bm.at_put_range(heapWordToOffset(mr.start()),
   143                    heapWordToOffset(mr.end()), true);
   144 }
   146 void CMBitMap::clearRange(MemRegion mr) {
   147   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   148   assert(!mr.is_empty(), "unexpected empty region");
   149   // convert address range into offset range
   150   _bm.at_put_range(heapWordToOffset(mr.start()),
   151                    heapWordToOffset(mr.end()), false);
   152 }
   154 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   155                                             HeapWord* end_addr) {
   156   HeapWord* start = getNextMarkedWordAddress(addr);
   157   start = MIN2(start, end_addr);
   158   HeapWord* end   = getNextUnmarkedWordAddress(start);
   159   end = MIN2(end, end_addr);
   160   assert(start <= end, "Consistency check");
   161   MemRegion mr(start, end);
   162   if (!mr.is_empty()) {
   163     clearRange(mr);
   164   }
   165   return mr;
   166 }
   168 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   169   _base(NULL), _cm(cm)
   170 #ifdef ASSERT
   171   , _drain_in_progress(false)
   172   , _drain_in_progress_yields(false)
   173 #endif
   174 {}
   176 void CMMarkStack::allocate(size_t size) {
   177   _base = NEW_C_HEAP_ARRAY(oop, size);
   178   if (_base == NULL)
   179     vm_exit_during_initialization("Failed to allocate "
   180                                   "CM region mark stack");
   181   _index = 0;
   182   // QQQQ cast ...
   183   _capacity = (jint) size;
   184   _oops_do_bound = -1;
   185   NOT_PRODUCT(_max_depth = 0);
   186 }
   188 CMMarkStack::~CMMarkStack() {
   189   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   190 }
   192 void CMMarkStack::par_push(oop ptr) {
   193   while (true) {
   194     if (isFull()) {
   195       _overflow = true;
   196       return;
   197     }
   198     // Otherwise...
   199     jint index = _index;
   200     jint next_index = index+1;
   201     jint res = Atomic::cmpxchg(next_index, &_index, index);
   202     if (res == index) {
   203       _base[index] = ptr;
   204       // Note that we don't maintain this atomically.  We could, but it
   205       // doesn't seem necessary.
   206       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   207       return;
   208     }
   209     // Otherwise, we need to try again.
   210   }
   211 }
   213 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   214   while (true) {
   215     if (isFull()) {
   216       _overflow = true;
   217       return;
   218     }
   219     // Otherwise...
   220     jint index = _index;
   221     jint next_index = index + n;
   222     if (next_index > _capacity) {
   223       _overflow = true;
   224       return;
   225     }
   226     jint res = Atomic::cmpxchg(next_index, &_index, index);
   227     if (res == index) {
   228       for (int i = 0; i < n; i++) {
   229         int ind = index + i;
   230         assert(ind < _capacity, "By overflow test above.");
   231         _base[ind] = ptr_arr[i];
   232       }
   233       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   234       return;
   235     }
   236     // Otherwise, we need to try again.
   237   }
   238 }
   241 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   242   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   243   jint start = _index;
   244   jint next_index = start + n;
   245   if (next_index > _capacity) {
   246     _overflow = true;
   247     return;
   248   }
   249   // Otherwise.
   250   _index = next_index;
   251   for (int i = 0; i < n; i++) {
   252     int ind = start + i;
   253     assert(ind < _capacity, "By overflow test above.");
   254     _base[ind] = ptr_arr[i];
   255   }
   256 }
   259 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   260   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   261   jint index = _index;
   262   if (index == 0) {
   263     *n = 0;
   264     return false;
   265   } else {
   266     int k = MIN2(max, index);
   267     jint new_ind = index - k;
   268     for (int j = 0; j < k; j++) {
   269       ptr_arr[j] = _base[new_ind + j];
   270     }
   271     _index = new_ind;
   272     *n = k;
   273     return true;
   274   }
   275 }
   278 CMRegionStack::CMRegionStack() : _base(NULL) {}
   280 void CMRegionStack::allocate(size_t size) {
   281   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   282   if (_base == NULL)
   283     vm_exit_during_initialization("Failed to allocate "
   284                                   "CM region mark stack");
   285   _index = 0;
   286   // QQQQ cast ...
   287   _capacity = (jint) size;
   288 }
   290 CMRegionStack::~CMRegionStack() {
   291   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   292 }
   294 void CMRegionStack::push_lock_free(MemRegion mr) {
   295   assert(mr.word_size() > 0, "Precondition");
   296   while (true) {
   297     jint index = _index;
   299     if (index >= _capacity) {
   300       _overflow = true;
   301       return;
   302     }
   303     // Otherwise...
   304     jint next_index = index+1;
   305     jint res = Atomic::cmpxchg(next_index, &_index, index);
   306     if (res == index) {
   307       _base[index] = mr;
   308       return;
   309     }
   310     // Otherwise, we need to try again.
   311   }
   312 }
   314 // Lock-free pop of the region stack. Called during the concurrent
   315 // marking / remark phases. Should only be called in tandem with
   316 // other lock-free pops.
   317 MemRegion CMRegionStack::pop_lock_free() {
   318   while (true) {
   319     jint index = _index;
   321     if (index == 0) {
   322       return MemRegion();
   323     }
   324     // Otherwise...
   325     jint next_index = index-1;
   326     jint res = Atomic::cmpxchg(next_index, &_index, index);
   327     if (res == index) {
   328       MemRegion mr = _base[next_index];
   329       if (mr.start() != NULL) {
   330         assert(mr.end() != NULL, "invariant");
   331         assert(mr.word_size() > 0, "invariant");
   332         return mr;
   333       } else {
   334         // that entry was invalidated... let's skip it
   335         assert(mr.end() == NULL, "invariant");
   336       }
   337     }
   338     // Otherwise, we need to try again.
   339   }
   340 }
   342 #if 0
   343 // The routines that manipulate the region stack with a lock are
   344 // not currently used. They should be retained, however, as a
   345 // diagnostic aid.
   347 void CMRegionStack::push_with_lock(MemRegion mr) {
   348   assert(mr.word_size() > 0, "Precondition");
   349   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   351   if (isFull()) {
   352     _overflow = true;
   353     return;
   354   }
   356   _base[_index] = mr;
   357   _index += 1;
   358 }
   360 MemRegion CMRegionStack::pop_with_lock() {
   361   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   363   while (true) {
   364     if (_index == 0) {
   365       return MemRegion();
   366     }
   367     _index -= 1;
   369     MemRegion mr = _base[_index];
   370     if (mr.start() != NULL) {
   371       assert(mr.end() != NULL, "invariant");
   372       assert(mr.word_size() > 0, "invariant");
   373       return mr;
   374     } else {
   375       // that entry was invalidated... let's skip it
   376       assert(mr.end() == NULL, "invariant");
   377     }
   378   }
   379 }
   380 #endif
   382 bool CMRegionStack::invalidate_entries_into_cset() {
   383   bool result = false;
   384   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   385   for (int i = 0; i < _oops_do_bound; ++i) {
   386     MemRegion mr = _base[i];
   387     if (mr.start() != NULL) {
   388       assert(mr.end() != NULL, "invariant");
   389       assert(mr.word_size() > 0, "invariant");
   390       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   391       assert(hr != NULL, "invariant");
   392       if (hr->in_collection_set()) {
   393         // The region points into the collection set
   394         _base[i] = MemRegion();
   395         result = true;
   396       }
   397     } else {
   398       // that entry was invalidated... let's skip it
   399       assert(mr.end() == NULL, "invariant");
   400     }
   401   }
   402   return result;
   403 }
   405 template<class OopClosureClass>
   406 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   407   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   408          || SafepointSynchronize::is_at_safepoint(),
   409          "Drain recursion must be yield-safe.");
   410   bool res = true;
   411   debug_only(_drain_in_progress = true);
   412   debug_only(_drain_in_progress_yields = yield_after);
   413   while (!isEmpty()) {
   414     oop newOop = pop();
   415     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   416     assert(newOop->is_oop(), "Expected an oop");
   417     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   418            "only grey objects on this stack");
   419     // iterate over the oops in this oop, marking and pushing
   420     // the ones in CMS generation.
   421     newOop->oop_iterate(cl);
   422     if (yield_after && _cm->do_yield_check()) {
   423       res = false; break;
   424     }
   425   }
   426   debug_only(_drain_in_progress = false);
   427   return res;
   428 }
   430 void CMMarkStack::oops_do(OopClosure* f) {
   431   if (_index == 0) return;
   432   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   433          "Bound must be set.");
   434   for (int i = 0; i < _oops_do_bound; i++) {
   435     f->do_oop(&_base[i]);
   436   }
   437   _oops_do_bound = -1;
   438 }
   440 bool ConcurrentMark::not_yet_marked(oop obj) const {
   441   return (_g1h->is_obj_ill(obj)
   442           || (_g1h->is_in_permanent(obj)
   443               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   444 }
   446 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   447 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   448 #endif // _MSC_VER
   450 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   451                                int max_regions) :
   452   _markBitMap1(rs, MinObjAlignment - 1),
   453   _markBitMap2(rs, MinObjAlignment - 1),
   455   _parallel_marking_threads(0),
   456   _sleep_factor(0.0),
   457   _marking_task_overhead(1.0),
   458   _cleanup_sleep_factor(0.0),
   459   _cleanup_task_overhead(1.0),
   460   _region_bm(max_regions, false /* in_resource_area*/),
   461   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   462            CardTableModRefBS::card_shift,
   463            false /* in_resource_area*/),
   464   _prevMarkBitMap(&_markBitMap1),
   465   _nextMarkBitMap(&_markBitMap2),
   466   _at_least_one_mark_complete(false),
   468   _markStack(this),
   469   _regionStack(),
   470   // _finger set in set_non_marking_state
   472   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   473   // _active_tasks set in set_non_marking_state
   474   // _tasks set inside the constructor
   475   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   476   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   478   _has_overflown(false),
   479   _concurrent(false),
   480   _has_aborted(false),
   481   _restart_for_overflow(false),
   482   _concurrent_marking_in_progress(false),
   483   _should_gray_objects(false),
   485   // _verbose_level set below
   487   _init_times(),
   488   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   489   _cleanup_times(),
   490   _total_counting_time(0.0),
   491   _total_rs_scrub_time(0.0),
   493   _parallel_workers(NULL)
   494 {
   495   CMVerboseLevel verbose_level =
   496     (CMVerboseLevel) G1MarkingVerboseLevel;
   497   if (verbose_level < no_verbose)
   498     verbose_level = no_verbose;
   499   if (verbose_level > high_verbose)
   500     verbose_level = high_verbose;
   501   _verbose_level = verbose_level;
   503   if (verbose_low())
   504     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   505                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   507   _markStack.allocate(MarkStackSize);
   508   _regionStack.allocate(G1MarkRegionStackSize);
   510   // Create & start a ConcurrentMark thread.
   511   _cmThread = new ConcurrentMarkThread(this);
   512   assert(cmThread() != NULL, "CM Thread should have been created");
   513   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   515   _g1h = G1CollectedHeap::heap();
   516   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   517   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   518   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   520   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   521   satb_qs.set_buffer_size(G1SATBBufferSize);
   523   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   524   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   525   for (int i = 0 ; i < size; i++) {
   526     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   527   }
   529   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   530   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   532   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   533   _active_tasks = _max_task_num;
   534   for (int i = 0; i < (int) _max_task_num; ++i) {
   535     CMTaskQueue* task_queue = new CMTaskQueue();
   536     task_queue->initialize();
   537     _task_queues->register_queue(i, task_queue);
   539     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   540     _accum_task_vtime[i] = 0.0;
   541   }
   543   if (ConcGCThreads > ParallelGCThreads) {
   544     vm_exit_during_initialization("Can't have more ConcGCThreads "
   545                                   "than ParallelGCThreads.");
   546   }
   547   if (ParallelGCThreads == 0) {
   548     // if we are not running with any parallel GC threads we will not
   549     // spawn any marking threads either
   550     _parallel_marking_threads =   0;
   551     _sleep_factor             = 0.0;
   552     _marking_task_overhead    = 1.0;
   553   } else {
   554     if (ConcGCThreads > 0) {
   555       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   556       // if both are set
   558       _parallel_marking_threads = ConcGCThreads;
   559       _sleep_factor             = 0.0;
   560       _marking_task_overhead    = 1.0;
   561     } else if (G1MarkingOverheadPercent > 0) {
   562       // we will calculate the number of parallel marking threads
   563       // based on a target overhead with respect to the soft real-time
   564       // goal
   566       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   567       double overall_cm_overhead =
   568         (double) MaxGCPauseMillis * marking_overhead /
   569         (double) GCPauseIntervalMillis;
   570       double cpu_ratio = 1.0 / (double) os::processor_count();
   571       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   572       double marking_task_overhead =
   573         overall_cm_overhead / marking_thread_num *
   574                                                 (double) os::processor_count();
   575       double sleep_factor =
   576                          (1.0 - marking_task_overhead) / marking_task_overhead;
   578       _parallel_marking_threads = (size_t) marking_thread_num;
   579       _sleep_factor             = sleep_factor;
   580       _marking_task_overhead    = marking_task_overhead;
   581     } else {
   582       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   583       _sleep_factor             = 0.0;
   584       _marking_task_overhead    = 1.0;
   585     }
   587     if (parallel_marking_threads() > 1)
   588       _cleanup_task_overhead = 1.0;
   589     else
   590       _cleanup_task_overhead = marking_task_overhead();
   591     _cleanup_sleep_factor =
   592                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   594 #if 0
   595     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   596     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   597     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   598     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   599     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   600 #endif
   602     guarantee(parallel_marking_threads() > 0, "peace of mind");
   603     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   604          (int) _parallel_marking_threads, false, true);
   605     if (_parallel_workers == NULL) {
   606       vm_exit_during_initialization("Failed necessary allocation.");
   607     } else {
   608       _parallel_workers->initialize_workers();
   609     }
   610   }
   612   // so that the call below can read a sensible value
   613   _heap_start = (HeapWord*) rs.base();
   614   set_non_marking_state();
   615 }
   617 void ConcurrentMark::update_g1_committed(bool force) {
   618   // If concurrent marking is not in progress, then we do not need to
   619   // update _heap_end. This has a subtle and important
   620   // side-effect. Imagine that two evacuation pauses happen between
   621   // marking completion and remark. The first one can grow the
   622   // heap (hence now the finger is below the heap end). Then, the
   623   // second one could unnecessarily push regions on the region
   624   // stack. This causes the invariant that the region stack is empty
   625   // at the beginning of remark to be false. By ensuring that we do
   626   // not observe heap expansions after marking is complete, then we do
   627   // not have this problem.
   628   if (!concurrent_marking_in_progress() && !force)
   629     return;
   631   MemRegion committed = _g1h->g1_committed();
   632   assert(committed.start() == _heap_start, "start shouldn't change");
   633   HeapWord* new_end = committed.end();
   634   if (new_end > _heap_end) {
   635     // The heap has been expanded.
   637     _heap_end = new_end;
   638   }
   639   // Notice that the heap can also shrink. However, this only happens
   640   // during a Full GC (at least currently) and the entire marking
   641   // phase will bail out and the task will not be restarted. So, let's
   642   // do nothing.
   643 }
   645 void ConcurrentMark::reset() {
   646   // Starting values for these two. This should be called in a STW
   647   // phase. CM will be notified of any future g1_committed expansions
   648   // will be at the end of evacuation pauses, when tasks are
   649   // inactive.
   650   MemRegion committed = _g1h->g1_committed();
   651   _heap_start = committed.start();
   652   _heap_end   = committed.end();
   654   // Separated the asserts so that we know which one fires.
   655   assert(_heap_start != NULL, "heap bounds should look ok");
   656   assert(_heap_end != NULL, "heap bounds should look ok");
   657   assert(_heap_start < _heap_end, "heap bounds should look ok");
   659   // reset all the marking data structures and any necessary flags
   660   clear_marking_state();
   662   if (verbose_low())
   663     gclog_or_tty->print_cr("[global] resetting");
   665   // We do reset all of them, since different phases will use
   666   // different number of active threads. So, it's easiest to have all
   667   // of them ready.
   668   for (int i = 0; i < (int) _max_task_num; ++i) {
   669     _tasks[i]->reset(_nextMarkBitMap);
   670   }
   672   // we need this to make sure that the flag is on during the evac
   673   // pause with initial mark piggy-backed
   674   set_concurrent_marking_in_progress();
   675 }
   677 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   678   assert(active_tasks <= _max_task_num, "we should not have more");
   680   _active_tasks = active_tasks;
   681   // Need to update the three data structures below according to the
   682   // number of active threads for this phase.
   683   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   684   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   685   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   687   _concurrent = concurrent;
   688   // We propagate this to all tasks, not just the active ones.
   689   for (int i = 0; i < (int) _max_task_num; ++i)
   690     _tasks[i]->set_concurrent(concurrent);
   692   if (concurrent) {
   693     set_concurrent_marking_in_progress();
   694   } else {
   695     // We currently assume that the concurrent flag has been set to
   696     // false before we start remark. At this point we should also be
   697     // in a STW phase.
   698     assert(!concurrent_marking_in_progress(), "invariant");
   699     assert(_finger == _heap_end, "only way to get here");
   700     update_g1_committed(true);
   701   }
   702 }
   704 void ConcurrentMark::set_non_marking_state() {
   705   // We set the global marking state to some default values when we're
   706   // not doing marking.
   707   clear_marking_state();
   708   _active_tasks = 0;
   709   clear_concurrent_marking_in_progress();
   710 }
   712 ConcurrentMark::~ConcurrentMark() {
   713   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   714   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   715   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   716                     _par_cleanup_thread_state);
   718   for (int i = 0; i < (int) _max_task_num; ++i) {
   719     delete _task_queues->queue(i);
   720     delete _tasks[i];
   721   }
   722   delete _task_queues;
   723   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   724 }
   726 // This closure is used to mark refs into the g1 generation
   727 // from external roots in the CMS bit map.
   728 // Called at the first checkpoint.
   729 //
   731 void ConcurrentMark::clearNextBitmap() {
   732   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   733   G1CollectorPolicy* g1p = g1h->g1_policy();
   735   // Make sure that the concurrent mark thread looks to still be in
   736   // the current cycle.
   737   guarantee(cmThread()->during_cycle(), "invariant");
   739   // We are finishing up the current cycle by clearing the next
   740   // marking bitmap and getting it ready for the next cycle. During
   741   // this time no other cycle can start. So, let's make sure that this
   742   // is the case.
   743   guarantee(!g1h->mark_in_progress(), "invariant");
   745   // clear the mark bitmap (no grey objects to start with).
   746   // We need to do this in chunks and offer to yield in between
   747   // each chunk.
   748   HeapWord* start  = _nextMarkBitMap->startWord();
   749   HeapWord* end    = _nextMarkBitMap->endWord();
   750   HeapWord* cur    = start;
   751   size_t chunkSize = M;
   752   while (cur < end) {
   753     HeapWord* next = cur + chunkSize;
   754     if (next > end)
   755       next = end;
   756     MemRegion mr(cur,next);
   757     _nextMarkBitMap->clearRange(mr);
   758     cur = next;
   759     do_yield_check();
   761     // Repeat the asserts from above. We'll do them as asserts here to
   762     // minimize their overhead on the product. However, we'll have
   763     // them as guarantees at the beginning / end of the bitmap
   764     // clearing to get some checking in the product.
   765     assert(cmThread()->during_cycle(), "invariant");
   766     assert(!g1h->mark_in_progress(), "invariant");
   767   }
   769   // Repeat the asserts from above.
   770   guarantee(cmThread()->during_cycle(), "invariant");
   771   guarantee(!g1h->mark_in_progress(), "invariant");
   772 }
   774 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   775 public:
   776   bool doHeapRegion(HeapRegion* r) {
   777     if (!r->continuesHumongous()) {
   778       r->note_start_of_marking(true);
   779     }
   780     return false;
   781   }
   782 };
   784 void ConcurrentMark::checkpointRootsInitialPre() {
   785   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   786   G1CollectorPolicy* g1p = g1h->g1_policy();
   788   _has_aborted = false;
   790 #ifndef PRODUCT
   791   if (G1PrintReachableAtInitialMark) {
   792     print_reachable("at-cycle-start",
   793                     true /* use_prev_marking */, true /* all */);
   794   }
   795 #endif
   797   // Initialise marking structures. This has to be done in a STW phase.
   798   reset();
   799 }
   801 class CMMarkRootsClosure: public OopsInGenClosure {
   802 private:
   803   ConcurrentMark*  _cm;
   804   G1CollectedHeap* _g1h;
   805   bool             _do_barrier;
   807 public:
   808   CMMarkRootsClosure(ConcurrentMark* cm,
   809                      G1CollectedHeap* g1h,
   810                      bool do_barrier) : _cm(cm), _g1h(g1h),
   811                                         _do_barrier(do_barrier) { }
   813   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   814   virtual void do_oop(      oop* p) { do_oop_work(p); }
   816   template <class T> void do_oop_work(T* p) {
   817     T heap_oop = oopDesc::load_heap_oop(p);
   818     if (!oopDesc::is_null(heap_oop)) {
   819       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   820       assert(obj->is_oop() || obj->mark() == NULL,
   821              "expected an oop, possibly with mark word displaced");
   822       HeapWord* addr = (HeapWord*)obj;
   823       if (_g1h->is_in_g1_reserved(addr)) {
   824         _cm->grayRoot(obj);
   825       }
   826     }
   827     if (_do_barrier) {
   828       assert(!_g1h->is_in_g1_reserved(p),
   829              "Should be called on external roots");
   830       do_barrier(p);
   831     }
   832   }
   833 };
   835 void ConcurrentMark::checkpointRootsInitialPost() {
   836   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   838   // For each region note start of marking.
   839   NoteStartOfMarkHRClosure startcl;
   840   g1h->heap_region_iterate(&startcl);
   842   // Start weak-reference discovery.
   843   ReferenceProcessor* rp = g1h->ref_processor();
   844   rp->verify_no_references_recorded();
   845   rp->enable_discovery(); // enable ("weak") refs discovery
   846   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   848   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   849   // This is the start of  the marking cycle, we're expected all
   850   // threads to have SATB queues with active set to false.
   851   satb_mq_set.set_active_all_threads(true, /* new active value */
   852                                      false /* expected_active */);
   854   // update_g1_committed() will be called at the end of an evac pause
   855   // when marking is on. So, it's also called at the end of the
   856   // initial-mark pause to update the heap end, if the heap expands
   857   // during it. No need to call it here.
   858 }
   860 // Checkpoint the roots into this generation from outside
   861 // this generation. [Note this initial checkpoint need only
   862 // be approximate -- we'll do a catch up phase subsequently.]
   863 void ConcurrentMark::checkpointRootsInitial() {
   864   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   865   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   867   double start = os::elapsedTime();
   869   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   870   g1p->record_concurrent_mark_init_start();
   871   checkpointRootsInitialPre();
   873   // YSR: when concurrent precleaning is in place, we'll
   874   // need to clear the cached card table here
   876   ResourceMark rm;
   877   HandleMark  hm;
   879   g1h->ensure_parsability(false);
   880   g1h->perm_gen()->save_marks();
   882   CMMarkRootsClosure notOlder(this, g1h, false);
   883   CMMarkRootsClosure older(this, g1h, true);
   885   g1h->set_marking_started();
   886   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   888   g1h->process_strong_roots(true,    // activate StrongRootsScope
   889                             false,   // fake perm gen collection
   890                             SharedHeap::SO_AllClasses,
   891                             &notOlder, // Regular roots
   892                             NULL,     // do not visit active blobs
   893                             &older    // Perm Gen Roots
   894                             );
   895   checkpointRootsInitialPost();
   897   // Statistics.
   898   double end = os::elapsedTime();
   899   _init_times.add((end - start) * 1000.0);
   901   g1p->record_concurrent_mark_init_end();
   902 }
   904 /*
   905    Notice that in the next two methods, we actually leave the STS
   906    during the barrier sync and join it immediately afterwards. If we
   907    do not do this, this then the following deadlock can occur: one
   908    thread could be in the barrier sync code, waiting for the other
   909    thread to also sync up, whereas another one could be trying to
   910    yield, while also waiting for the other threads to sync up too.
   912    Because the thread that does the sync barrier has left the STS, it
   913    is possible to be suspended for a Full GC or an evacuation pause
   914    could occur. This is actually safe, since the entering the sync
   915    barrier is one of the last things do_marking_step() does, and it
   916    doesn't manipulate any data structures afterwards.
   917 */
   919 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   920   if (verbose_low())
   921     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   923   ConcurrentGCThread::stsLeave();
   924   _first_overflow_barrier_sync.enter();
   925   ConcurrentGCThread::stsJoin();
   926   // at this point everyone should have synced up and not be doing any
   927   // more work
   929   if (verbose_low())
   930     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   932   // let task 0 do this
   933   if (task_num == 0) {
   934     // task 0 is responsible for clearing the global data structures
   935     clear_marking_state();
   937     if (PrintGC) {
   938       gclog_or_tty->date_stamp(PrintGCDateStamps);
   939       gclog_or_tty->stamp(PrintGCTimeStamps);
   940       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   941     }
   942   }
   944   // after this, each task should reset its own data structures then
   945   // then go into the second barrier
   946 }
   948 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   949   if (verbose_low())
   950     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   952   ConcurrentGCThread::stsLeave();
   953   _second_overflow_barrier_sync.enter();
   954   ConcurrentGCThread::stsJoin();
   955   // at this point everything should be re-initialised and ready to go
   957   if (verbose_low())
   958     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   959 }
   961 void ConcurrentMark::grayRoot(oop p) {
   962   HeapWord* addr = (HeapWord*) p;
   963   // We can't really check against _heap_start and _heap_end, since it
   964   // is possible during an evacuation pause with piggy-backed
   965   // initial-mark that the committed space is expanded during the
   966   // pause without CM observing this change. So the assertions below
   967   // is a bit conservative; but better than nothing.
   968   assert(_g1h->g1_committed().contains(addr),
   969          "address should be within the heap bounds");
   971   if (!_nextMarkBitMap->isMarked(addr))
   972     _nextMarkBitMap->parMark(addr);
   973 }
   975 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   976   // The objects on the region have already been marked "in bulk" by
   977   // the caller. We only need to decide whether to push the region on
   978   // the region stack or not.
   980   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   981     // We're done with marking and waiting for remark. We do not need to
   982     // push anything else on the region stack.
   983     return;
   985   HeapWord* finger = _finger;
   987   if (verbose_low())
   988     gclog_or_tty->print_cr("[global] attempting to push "
   989                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   990                            PTR_FORMAT, mr.start(), mr.end(), finger);
   992   if (mr.start() < finger) {
   993     // The finger is always heap region aligned and it is not possible
   994     // for mr to span heap regions.
   995     assert(mr.end() <= finger, "invariant");
   997     // Separated the asserts so that we know which one fires.
   998     assert(mr.start() <= mr.end(),
   999            "region boundaries should fall within the committed space");
  1000     assert(_heap_start <= mr.start(),
  1001            "region boundaries should fall within the committed space");
  1002     assert(mr.end() <= _heap_end,
  1003            "region boundaries should fall within the committed space");
  1004     if (verbose_low())
  1005       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
  1006                              "below the finger, pushing it",
  1007                              mr.start(), mr.end());
  1009     if (!region_stack_push_lock_free(mr)) {
  1010       if (verbose_low())
  1011         gclog_or_tty->print_cr("[global] region stack has overflown.");
  1016 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
  1017   // The object is not marked by the caller. We need to at least mark
  1018   // it and maybe push in on the stack.
  1020   HeapWord* addr = (HeapWord*)p;
  1021   if (!_nextMarkBitMap->isMarked(addr)) {
  1022     // We definitely need to mark it, irrespective whether we bail out
  1023     // because we're done with marking.
  1024     if (_nextMarkBitMap->parMark(addr)) {
  1025       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1026         // If we're done with concurrent marking and we're waiting for
  1027         // remark, then we're not pushing anything on the stack.
  1028         return;
  1030       // No OrderAccess:store_load() is needed. It is implicit in the
  1031       // CAS done in parMark(addr) above
  1032       HeapWord* finger = _finger;
  1034       if (addr < finger) {
  1035         if (!mark_stack_push(oop(addr))) {
  1036           if (verbose_low())
  1037             gclog_or_tty->print_cr("[global] global stack overflow "
  1038                                    "during parMark");
  1045 class CMConcurrentMarkingTask: public AbstractGangTask {
  1046 private:
  1047   ConcurrentMark*       _cm;
  1048   ConcurrentMarkThread* _cmt;
  1050 public:
  1051   void work(int worker_i) {
  1052     assert(Thread::current()->is_ConcurrentGC_thread(),
  1053            "this should only be done by a conc GC thread");
  1055     double start_vtime = os::elapsedVTime();
  1057     ConcurrentGCThread::stsJoin();
  1059     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1060     CMTask* the_task = _cm->task(worker_i);
  1061     the_task->record_start_time();
  1062     if (!_cm->has_aborted()) {
  1063       do {
  1064         double start_vtime_sec = os::elapsedVTime();
  1065         double start_time_sec = os::elapsedTime();
  1066         the_task->do_marking_step(10.0);
  1067         double end_time_sec = os::elapsedTime();
  1068         double end_vtime_sec = os::elapsedVTime();
  1069         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1070         double elapsed_time_sec = end_time_sec - start_time_sec;
  1071         _cm->clear_has_overflown();
  1073         bool ret = _cm->do_yield_check(worker_i);
  1075         jlong sleep_time_ms;
  1076         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1077           sleep_time_ms =
  1078             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1079           ConcurrentGCThread::stsLeave();
  1080           os::sleep(Thread::current(), sleep_time_ms, false);
  1081           ConcurrentGCThread::stsJoin();
  1083         double end_time2_sec = os::elapsedTime();
  1084         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1086 #if 0
  1087           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1088                                  "overhead %1.4lf",
  1089                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1090                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1091           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1092                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1093 #endif
  1094       } while (!_cm->has_aborted() && the_task->has_aborted());
  1096     the_task->record_end_time();
  1097     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1099     ConcurrentGCThread::stsLeave();
  1101     double end_vtime = os::elapsedVTime();
  1102     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1105   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1106                           ConcurrentMarkThread* cmt) :
  1107       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1109   ~CMConcurrentMarkingTask() { }
  1110 };
  1112 void ConcurrentMark::markFromRoots() {
  1113   // we might be tempted to assert that:
  1114   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1115   //        "inconsistent argument?");
  1116   // However that wouldn't be right, because it's possible that
  1117   // a safepoint is indeed in progress as a younger generation
  1118   // stop-the-world GC happens even as we mark in this generation.
  1120   _restart_for_overflow = false;
  1122   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1124   CMConcurrentMarkingTask markingTask(this, cmThread());
  1125   if (parallel_marking_threads() > 0)
  1126     _parallel_workers->run_task(&markingTask);
  1127   else
  1128     markingTask.work(0);
  1129   print_stats();
  1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1133   // world is stopped at this checkpoint
  1134   assert(SafepointSynchronize::is_at_safepoint(),
  1135          "world should be stopped");
  1136   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1138   // If a full collection has happened, we shouldn't do this.
  1139   if (has_aborted()) {
  1140     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1141     return;
  1144   if (VerifyDuringGC) {
  1145     HandleMark hm;  // handle scope
  1146     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1147     Universe::heap()->prepare_for_verify();
  1148     Universe::verify(true, false, true);
  1151   G1CollectorPolicy* g1p = g1h->g1_policy();
  1152   g1p->record_concurrent_mark_remark_start();
  1154   double start = os::elapsedTime();
  1156   checkpointRootsFinalWork();
  1158   double mark_work_end = os::elapsedTime();
  1160   weakRefsWork(clear_all_soft_refs);
  1162   if (has_overflown()) {
  1163     // Oops.  We overflowed.  Restart concurrent marking.
  1164     _restart_for_overflow = true;
  1165     // Clear the flag. We do not need it any more.
  1166     clear_has_overflown();
  1167     if (G1TraceMarkStackOverflow)
  1168       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1169   } else {
  1170     // We're done with marking.
  1171     // This is the end of  the marking cycle, we're expected all
  1172     // threads to have SATB queues with active set to true.
  1173     JavaThread::satb_mark_queue_set().set_active_all_threads(
  1174                                                   false, /* new active value */
  1175                                                   true /* expected_active */);
  1177     if (VerifyDuringGC) {
  1178       HandleMark hm;  // handle scope
  1179       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1180       Universe::heap()->prepare_for_verify();
  1181       Universe::heap()->verify(/* allow_dirty */      true,
  1182                                /* silent */           false,
  1183                                /* use_prev_marking */ false);
  1187 #if VERIFY_OBJS_PROCESSED
  1188   _scan_obj_cl.objs_processed = 0;
  1189   ThreadLocalObjQueue::objs_enqueued = 0;
  1190 #endif
  1192   // Statistics
  1193   double now = os::elapsedTime();
  1194   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1195   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1196   _remark_times.add((now - start) * 1000.0);
  1198   g1p->record_concurrent_mark_remark_end();
  1202 #define CARD_BM_TEST_MODE 0
  1204 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1206   CMBitMapRO* _bm;
  1207   ConcurrentMark* _cm;
  1208   bool _changed;
  1209   bool _yield;
  1210   size_t _words_done;
  1211   size_t _tot_live;
  1212   size_t _tot_used;
  1213   size_t _regions_done;
  1214   double _start_vtime_sec;
  1216   BitMap* _region_bm;
  1217   BitMap* _card_bm;
  1218   intptr_t _bottom_card_num;
  1219   bool _final;
  1221   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1222     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1223 #if CARD_BM_TEST_MODE
  1224       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1225 #else
  1226       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1227 #endif
  1231 public:
  1232   CalcLiveObjectsClosure(bool final,
  1233                          CMBitMapRO *bm, ConcurrentMark *cm,
  1234                          BitMap* region_bm, BitMap* card_bm) :
  1235     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1236     _words_done(0), _tot_live(0), _tot_used(0),
  1237     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1238     _regions_done(0), _start_vtime_sec(0.0)
  1240     _bottom_card_num =
  1241       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1242                CardTableModRefBS::card_shift);
  1245   // It takes a region that's not empty (i.e., it has at least one
  1246   // live object in it and sets its corresponding bit on the region
  1247   // bitmap to 1. If the region is "starts humongous" it will also set
  1248   // to 1 the bits on the region bitmap that correspond to its
  1249   // associated "continues humongous" regions.
  1250   void set_bit_for_region(HeapRegion* hr) {
  1251     assert(!hr->continuesHumongous(), "should have filtered those out");
  1253     size_t index = hr->hrs_index();
  1254     if (!hr->startsHumongous()) {
  1255       // Normal (non-humongous) case: just set the bit.
  1256       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1257     } else {
  1258       // Starts humongous case: calculate how many regions are part of
  1259       // this humongous region and then set the bit range. It might
  1260       // have been a bit more efficient to look at the object that
  1261       // spans these humongous regions to calculate their number from
  1262       // the object's size. However, it's a good idea to calculate
  1263       // this based on the metadata itself, and not the region
  1264       // contents, so that this code is not aware of what goes into
  1265       // the humongous regions (in case this changes in the future).
  1266       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1267       size_t end_index = index + 1;
  1268       while (end_index < g1h->n_regions()) {
  1269         HeapRegion* chr = g1h->region_at(end_index);
  1270         if (!chr->continuesHumongous()) {
  1271           break;
  1273         end_index += 1;
  1275       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1276                                    (BitMap::idx_t) end_index, true);
  1280   bool doHeapRegion(HeapRegion* hr) {
  1281     if (!_final && _regions_done == 0)
  1282       _start_vtime_sec = os::elapsedVTime();
  1284     if (hr->continuesHumongous()) {
  1285       // We will ignore these here and process them when their
  1286       // associated "starts humongous" region is processed (see
  1287       // set_bit_for_heap_region()). Note that we cannot rely on their
  1288       // associated "starts humongous" region to have their bit set to
  1289       // 1 since, due to the region chunking in the parallel region
  1290       // iteration, a "continues humongous" region might be visited
  1291       // before its associated "starts humongous".
  1292       return false;
  1295     HeapWord* nextTop = hr->next_top_at_mark_start();
  1296     HeapWord* start   = hr->top_at_conc_mark_count();
  1297     assert(hr->bottom() <= start && start <= hr->end() &&
  1298            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1299            start <= nextTop,
  1300            "Preconditions.");
  1301     // Otherwise, record the number of word's we'll examine.
  1302     size_t words_done = (nextTop - start);
  1303     // Find the first marked object at or after "start".
  1304     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1305     size_t marked_bytes = 0;
  1307     // Below, the term "card num" means the result of shifting an address
  1308     // by the card shift -- address 0 corresponds to card number 0.  One
  1309     // must subtract the card num of the bottom of the heap to obtain a
  1310     // card table index.
  1311     // The first card num of the sequence of live cards currently being
  1312     // constructed.  -1 ==> no sequence.
  1313     intptr_t start_card_num = -1;
  1314     // The last card num of the sequence of live cards currently being
  1315     // constructed.  -1 ==> no sequence.
  1316     intptr_t last_card_num = -1;
  1318     while (start < nextTop) {
  1319       if (_yield && _cm->do_yield_check()) {
  1320         // We yielded.  It might be for a full collection, in which case
  1321         // all bets are off; terminate the traversal.
  1322         if (_cm->has_aborted()) {
  1323           _changed = false;
  1324           return true;
  1325         } else {
  1326           // Otherwise, it might be a collection pause, and the region
  1327           // we're looking at might be in the collection set.  We'll
  1328           // abandon this region.
  1329           return false;
  1332       oop obj = oop(start);
  1333       int obj_sz = obj->size();
  1334       // The card num of the start of the current object.
  1335       intptr_t obj_card_num =
  1336         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1338       HeapWord* obj_last = start + obj_sz - 1;
  1339       intptr_t obj_last_card_num =
  1340         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1342       if (obj_card_num != last_card_num) {
  1343         if (start_card_num == -1) {
  1344           assert(last_card_num == -1, "Both or neither.");
  1345           start_card_num = obj_card_num;
  1346         } else {
  1347           assert(last_card_num != -1, "Both or neither.");
  1348           assert(obj_card_num >= last_card_num, "Inv");
  1349           if ((obj_card_num - last_card_num) > 1) {
  1350             // Mark the last run, and start a new one.
  1351             mark_card_num_range(start_card_num, last_card_num);
  1352             start_card_num = obj_card_num;
  1355 #if CARD_BM_TEST_MODE
  1356         /*
  1357         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1358                                obj_card_num - _bottom_card_num,
  1359                                obj_last_card_num - _bottom_card_num);
  1360         */
  1361         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1362           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1364 #endif
  1366       // In any case, we set the last card num.
  1367       last_card_num = obj_last_card_num;
  1369       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1370       // Find the next marked object after this one.
  1371       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1372       _changed = true;
  1374     // Handle the last range, if any.
  1375     if (start_card_num != -1)
  1376       mark_card_num_range(start_card_num, last_card_num);
  1377     if (_final) {
  1378       // Mark the allocated-since-marking portion...
  1379       HeapWord* tp = hr->top();
  1380       if (nextTop < tp) {
  1381         start_card_num =
  1382           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1383         last_card_num =
  1384           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1385         mark_card_num_range(start_card_num, last_card_num);
  1386         // This definitely means the region has live objects.
  1387         set_bit_for_region(hr);
  1391     hr->add_to_marked_bytes(marked_bytes);
  1392     // Update the live region bitmap.
  1393     if (marked_bytes > 0) {
  1394       set_bit_for_region(hr);
  1396     hr->set_top_at_conc_mark_count(nextTop);
  1397     _tot_live += hr->next_live_bytes();
  1398     _tot_used += hr->used();
  1399     _words_done = words_done;
  1401     if (!_final) {
  1402       ++_regions_done;
  1403       if (_regions_done % 10 == 0) {
  1404         double end_vtime_sec = os::elapsedVTime();
  1405         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1406         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1407           jlong sleep_time_ms =
  1408             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1409           os::sleep(Thread::current(), sleep_time_ms, false);
  1410           _start_vtime_sec = end_vtime_sec;
  1415     return false;
  1418   bool changed() { return _changed;  }
  1419   void reset()   { _changed = false; _words_done = 0; }
  1420   void no_yield() { _yield = false; }
  1421   size_t words_done() { return _words_done; }
  1422   size_t tot_live() { return _tot_live; }
  1423   size_t tot_used() { return _tot_used; }
  1424 };
  1427 void ConcurrentMark::calcDesiredRegions() {
  1428   _region_bm.clear();
  1429   _card_bm.clear();
  1430   CalcLiveObjectsClosure calccl(false /*final*/,
  1431                                 nextMarkBitMap(), this,
  1432                                 &_region_bm, &_card_bm);
  1433   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1434   g1h->heap_region_iterate(&calccl);
  1436   do {
  1437     calccl.reset();
  1438     g1h->heap_region_iterate(&calccl);
  1439   } while (calccl.changed());
  1442 class G1ParFinalCountTask: public AbstractGangTask {
  1443 protected:
  1444   G1CollectedHeap* _g1h;
  1445   CMBitMap* _bm;
  1446   size_t _n_workers;
  1447   size_t *_live_bytes;
  1448   size_t *_used_bytes;
  1449   BitMap* _region_bm;
  1450   BitMap* _card_bm;
  1451 public:
  1452   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1453                       BitMap* region_bm, BitMap* card_bm) :
  1454     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1455     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1457     if (ParallelGCThreads > 0)
  1458       _n_workers = _g1h->workers()->total_workers();
  1459     else
  1460       _n_workers = 1;
  1461     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1462     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1465   ~G1ParFinalCountTask() {
  1466     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1467     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1470   void work(int i) {
  1471     CalcLiveObjectsClosure calccl(true /*final*/,
  1472                                   _bm, _g1h->concurrent_mark(),
  1473                                   _region_bm, _card_bm);
  1474     calccl.no_yield();
  1475     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1476       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1477                                             HeapRegion::FinalCountClaimValue);
  1478     } else {
  1479       _g1h->heap_region_iterate(&calccl);
  1481     assert(calccl.complete(), "Shouldn't have yielded!");
  1483     assert((size_t) i < _n_workers, "invariant");
  1484     _live_bytes[i] = calccl.tot_live();
  1485     _used_bytes[i] = calccl.tot_used();
  1487   size_t live_bytes()  {
  1488     size_t live_bytes = 0;
  1489     for (size_t i = 0; i < _n_workers; ++i)
  1490       live_bytes += _live_bytes[i];
  1491     return live_bytes;
  1493   size_t used_bytes()  {
  1494     size_t used_bytes = 0;
  1495     for (size_t i = 0; i < _n_workers; ++i)
  1496       used_bytes += _used_bytes[i];
  1497     return used_bytes;
  1499 };
  1501 class G1ParNoteEndTask;
  1503 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1504   G1CollectedHeap* _g1;
  1505   int _worker_num;
  1506   size_t _max_live_bytes;
  1507   size_t _regions_claimed;
  1508   size_t _freed_bytes;
  1509   size_t _cleared_h_regions;
  1510   size_t _freed_regions;
  1511   UncleanRegionList* _unclean_region_list;
  1512   double _claimed_region_time;
  1513   double _max_region_time;
  1515 public:
  1516   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1517                              UncleanRegionList* list,
  1518                              int worker_num);
  1519   size_t freed_bytes() { return _freed_bytes; }
  1520   size_t cleared_h_regions() { return _cleared_h_regions; }
  1521   size_t freed_regions() { return  _freed_regions; }
  1522   UncleanRegionList* unclean_region_list() {
  1523     return _unclean_region_list;
  1526   bool doHeapRegion(HeapRegion *r);
  1528   size_t max_live_bytes() { return _max_live_bytes; }
  1529   size_t regions_claimed() { return _regions_claimed; }
  1530   double claimed_region_time_sec() { return _claimed_region_time; }
  1531   double max_region_time_sec() { return _max_region_time; }
  1532 };
  1534 class G1ParNoteEndTask: public AbstractGangTask {
  1535   friend class G1NoteEndOfConcMarkClosure;
  1536 protected:
  1537   G1CollectedHeap* _g1h;
  1538   size_t _max_live_bytes;
  1539   size_t _freed_bytes;
  1540   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1541 public:
  1542   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1543                    ConcurrentMark::ParCleanupThreadState**
  1544                    par_cleanup_thread_state) :
  1545     AbstractGangTask("G1 note end"), _g1h(g1h),
  1546     _max_live_bytes(0), _freed_bytes(0),
  1547     _par_cleanup_thread_state(par_cleanup_thread_state)
  1548   {}
  1550   void work(int i) {
  1551     double start = os::elapsedTime();
  1552     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1553                                            &_par_cleanup_thread_state[i]->list,
  1554                                            i);
  1555     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1556       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1557                                             HeapRegion::NoteEndClaimValue);
  1558     } else {
  1559       _g1h->heap_region_iterate(&g1_note_end);
  1561     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1563     // Now finish up freeing the current thread's regions.
  1564     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1565                                   g1_note_end.cleared_h_regions(),
  1566                                   0, NULL);
  1568       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1569       _max_live_bytes += g1_note_end.max_live_bytes();
  1570       _freed_bytes += g1_note_end.freed_bytes();
  1572     double end = os::elapsedTime();
  1573     if (G1PrintParCleanupStats) {
  1574       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1575                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1576                           i, start, end, (end-start)*1000.0,
  1577                           g1_note_end.regions_claimed(),
  1578                           g1_note_end.claimed_region_time_sec()*1000.0,
  1579                           g1_note_end.max_region_time_sec()*1000.0);
  1582   size_t max_live_bytes() { return _max_live_bytes; }
  1583   size_t freed_bytes() { return _freed_bytes; }
  1584 };
  1586 class G1ParScrubRemSetTask: public AbstractGangTask {
  1587 protected:
  1588   G1RemSet* _g1rs;
  1589   BitMap* _region_bm;
  1590   BitMap* _card_bm;
  1591 public:
  1592   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1593                        BitMap* region_bm, BitMap* card_bm) :
  1594     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1595     _region_bm(region_bm), _card_bm(card_bm)
  1596   {}
  1598   void work(int i) {
  1599     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1600       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1601                        HeapRegion::ScrubRemSetClaimValue);
  1602     } else {
  1603       _g1rs->scrub(_region_bm, _card_bm);
  1607 };
  1609 G1NoteEndOfConcMarkClosure::
  1610 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1611                            UncleanRegionList* list,
  1612                            int worker_num)
  1613   : _g1(g1), _worker_num(worker_num),
  1614     _max_live_bytes(0), _regions_claimed(0),
  1615     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1616     _claimed_region_time(0.0), _max_region_time(0.0),
  1617     _unclean_region_list(list)
  1618 {}
  1620 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1621   // We use a claim value of zero here because all regions
  1622   // were claimed with value 1 in the FinalCount task.
  1623   r->reset_gc_time_stamp();
  1624   if (!r->continuesHumongous()) {
  1625     double start = os::elapsedTime();
  1626     _regions_claimed++;
  1627     r->note_end_of_marking();
  1628     _max_live_bytes += r->max_live_bytes();
  1629     _g1->free_region_if_totally_empty_work(r,
  1630                                            _freed_bytes,
  1631                                            _cleared_h_regions,
  1632                                            _freed_regions,
  1633                                            _unclean_region_list,
  1634                                            true /*par*/);
  1635     double region_time = (os::elapsedTime() - start);
  1636     _claimed_region_time += region_time;
  1637     if (region_time > _max_region_time) _max_region_time = region_time;
  1639   return false;
  1642 void ConcurrentMark::cleanup() {
  1643   // world is stopped at this checkpoint
  1644   assert(SafepointSynchronize::is_at_safepoint(),
  1645          "world should be stopped");
  1646   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1648   // If a full collection has happened, we shouldn't do this.
  1649   if (has_aborted()) {
  1650     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1651     return;
  1654   if (VerifyDuringGC) {
  1655     HandleMark hm;  // handle scope
  1656     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1657     Universe::heap()->prepare_for_verify();
  1658     Universe::verify(/* allow dirty  */ true,
  1659                      /* silent       */ false,
  1660                      /* prev marking */ true);
  1663   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1664   g1p->record_concurrent_mark_cleanup_start();
  1666   double start = os::elapsedTime();
  1668   // Do counting once more with the world stopped for good measure.
  1669   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1670                                         &_region_bm, &_card_bm);
  1671   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1672     assert(g1h->check_heap_region_claim_values(
  1673                                                HeapRegion::InitialClaimValue),
  1674            "sanity check");
  1676     int n_workers = g1h->workers()->total_workers();
  1677     g1h->set_par_threads(n_workers);
  1678     g1h->workers()->run_task(&g1_par_count_task);
  1679     g1h->set_par_threads(0);
  1681     assert(g1h->check_heap_region_claim_values(
  1682                                              HeapRegion::FinalCountClaimValue),
  1683            "sanity check");
  1684   } else {
  1685     g1_par_count_task.work(0);
  1688   size_t known_garbage_bytes =
  1689     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1690 #if 0
  1691   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1692                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1693                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1694                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1695 #endif // 0
  1696   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1698   size_t start_used_bytes = g1h->used();
  1699   _at_least_one_mark_complete = true;
  1700   g1h->set_marking_complete();
  1702   double count_end = os::elapsedTime();
  1703   double this_final_counting_time = (count_end - start);
  1704   if (G1PrintParCleanupStats) {
  1705     gclog_or_tty->print_cr("Cleanup:");
  1706     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1707                            this_final_counting_time*1000.0);
  1709   _total_counting_time += this_final_counting_time;
  1711   // Install newly created mark bitMap as "prev".
  1712   swapMarkBitMaps();
  1714   g1h->reset_gc_time_stamp();
  1716   // Note end of marking in all heap regions.
  1717   double note_end_start = os::elapsedTime();
  1718   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1719   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1720     int n_workers = g1h->workers()->total_workers();
  1721     g1h->set_par_threads(n_workers);
  1722     g1h->workers()->run_task(&g1_par_note_end_task);
  1723     g1h->set_par_threads(0);
  1725     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1726            "sanity check");
  1727   } else {
  1728     g1_par_note_end_task.work(0);
  1730   g1h->set_unclean_regions_coming(true);
  1731   double note_end_end = os::elapsedTime();
  1732   // Tell the mutators that there might be unclean regions coming...
  1733   if (G1PrintParCleanupStats) {
  1734     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1735                            (note_end_end - note_end_start)*1000.0);
  1739   // call below, since it affects the metric by which we sort the heap
  1740   // regions.
  1741   if (G1ScrubRemSets) {
  1742     double rs_scrub_start = os::elapsedTime();
  1743     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1744     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1745       int n_workers = g1h->workers()->total_workers();
  1746       g1h->set_par_threads(n_workers);
  1747       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1748       g1h->set_par_threads(0);
  1750       assert(g1h->check_heap_region_claim_values(
  1751                                             HeapRegion::ScrubRemSetClaimValue),
  1752              "sanity check");
  1753     } else {
  1754       g1_par_scrub_rs_task.work(0);
  1757     double rs_scrub_end = os::elapsedTime();
  1758     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1759     _total_rs_scrub_time += this_rs_scrub_time;
  1762   // this will also free any regions totally full of garbage objects,
  1763   // and sort the regions.
  1764   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1765                         g1_par_note_end_task.freed_bytes(),
  1766                         g1_par_note_end_task.max_live_bytes());
  1768   // Statistics.
  1769   double end = os::elapsedTime();
  1770   _cleanup_times.add((end - start) * 1000.0);
  1772   // G1CollectedHeap::heap()->print();
  1773   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1774   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1776   if (PrintGC || PrintGCDetails) {
  1777     g1h->print_size_transition(gclog_or_tty,
  1778                                start_used_bytes,
  1779                                g1h->used(),
  1780                                g1h->capacity());
  1783   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1784   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1786   // We need to make this be a "collection" so any collection pause that
  1787   // races with it goes around and waits for completeCleanup to finish.
  1788   g1h->increment_total_collections();
  1790   if (VerifyDuringGC) {
  1791     HandleMark hm;  // handle scope
  1792     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1793     Universe::heap()->prepare_for_verify();
  1794     Universe::verify(/* allow dirty  */ true,
  1795                      /* silent       */ false,
  1796                      /* prev marking */ true);
  1800 void ConcurrentMark::completeCleanup() {
  1801   // A full collection intervened.
  1802   if (has_aborted()) return;
  1804   int first = 0;
  1805   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1806   for (int t = 0; t < last; t++) {
  1807     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1808     assert(list->well_formed(), "Inv");
  1809     HeapRegion* hd = list->hd();
  1810     while (hd != NULL) {
  1811       // Now finish up the other stuff.
  1812       hd->rem_set()->clear();
  1813       HeapRegion* next_hd = hd->next_from_unclean_list();
  1814       (void)list->pop();
  1815       assert(list->hd() == next_hd, "how not?");
  1816       _g1h->put_region_on_unclean_list(hd);
  1817       if (!hd->isHumongous()) {
  1818         // Add this to the _free_regions count by 1.
  1819         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1821       hd = list->hd();
  1822       assert(hd == next_hd, "how not?");
  1828 class G1CMIsAliveClosure: public BoolObjectClosure {
  1829   G1CollectedHeap* _g1;
  1830  public:
  1831   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1832     _g1(g1)
  1833   {}
  1835   void do_object(oop obj) {
  1836     assert(false, "not to be invoked");
  1838   bool do_object_b(oop obj) {
  1839     HeapWord* addr = (HeapWord*)obj;
  1840     return addr != NULL &&
  1841            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1843 };
  1845 class G1CMKeepAliveClosure: public OopClosure {
  1846   G1CollectedHeap* _g1;
  1847   ConcurrentMark*  _cm;
  1848   CMBitMap*        _bitMap;
  1849  public:
  1850   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1851                        CMBitMap* bitMap) :
  1852     _g1(g1), _cm(cm),
  1853     _bitMap(bitMap) {}
  1855   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1856   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1858   template <class T> void do_oop_work(T* p) {
  1859     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1860     HeapWord* addr = (HeapWord*)thisOop;
  1861     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1862       _bitMap->mark(addr);
  1863       _cm->mark_stack_push(thisOop);
  1866 };
  1868 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1869   CMMarkStack*                  _markStack;
  1870   CMBitMap*                     _bitMap;
  1871   G1CMKeepAliveClosure*         _oopClosure;
  1872  public:
  1873   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1874                                G1CMKeepAliveClosure* oopClosure) :
  1875     _bitMap(bitMap),
  1876     _markStack(markStack),
  1877     _oopClosure(oopClosure)
  1878   {}
  1880   void do_void() {
  1881     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1883 };
  1885 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1886   ResourceMark rm;
  1887   HandleMark   hm;
  1888   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1889   ReferenceProcessor* rp = g1h->ref_processor();
  1891   // Process weak references.
  1892   rp->setup_policy(clear_all_soft_refs);
  1893   assert(_markStack.isEmpty(), "mark stack should be empty");
  1895   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1896   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1897   G1CMDrainMarkingStackClosure
  1898     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1899                                &g1KeepAliveClosure);
  1901   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1902   rp->process_discovered_references(&g1IsAliveClosure,
  1903                                     &g1KeepAliveClosure,
  1904                                     &g1DrainMarkingStackClosure,
  1905                                     NULL);
  1906   assert(_markStack.overflow() || _markStack.isEmpty(),
  1907          "mark stack should be empty (unless it overflowed)");
  1908   if (_markStack.overflow()) {
  1909     set_has_overflown();
  1912   rp->enqueue_discovered_references();
  1913   rp->verify_no_references_recorded();
  1914   assert(!rp->discovery_enabled(), "should have been disabled");
  1916   // Now clean up stale oops in SymbolTable and StringTable
  1917   SymbolTable::unlink(&g1IsAliveClosure);
  1918   StringTable::unlink(&g1IsAliveClosure);
  1921 void ConcurrentMark::swapMarkBitMaps() {
  1922   CMBitMapRO* temp = _prevMarkBitMap;
  1923   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1924   _nextMarkBitMap  = (CMBitMap*)  temp;
  1927 class CMRemarkTask: public AbstractGangTask {
  1928 private:
  1929   ConcurrentMark *_cm;
  1931 public:
  1932   void work(int worker_i) {
  1933     // Since all available tasks are actually started, we should
  1934     // only proceed if we're supposed to be actived.
  1935     if ((size_t)worker_i < _cm->active_tasks()) {
  1936       CMTask* task = _cm->task(worker_i);
  1937       task->record_start_time();
  1938       do {
  1939         task->do_marking_step(1000000000.0 /* something very large */);
  1940       } while (task->has_aborted() && !_cm->has_overflown());
  1941       // If we overflow, then we do not want to restart. We instead
  1942       // want to abort remark and do concurrent marking again.
  1943       task->record_end_time();
  1947   CMRemarkTask(ConcurrentMark* cm) :
  1948     AbstractGangTask("Par Remark"), _cm(cm) { }
  1949 };
  1951 void ConcurrentMark::checkpointRootsFinalWork() {
  1952   ResourceMark rm;
  1953   HandleMark   hm;
  1954   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1956   g1h->ensure_parsability(false);
  1958   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1959     G1CollectedHeap::StrongRootsScope srs(g1h);
  1960     // this is remark, so we'll use up all available threads
  1961     int active_workers = ParallelGCThreads;
  1962     set_phase(active_workers, false);
  1964     CMRemarkTask remarkTask(this);
  1965     // We will start all available threads, even if we decide that the
  1966     // active_workers will be fewer. The extra ones will just bail out
  1967     // immediately.
  1968     int n_workers = g1h->workers()->total_workers();
  1969     g1h->set_par_threads(n_workers);
  1970     g1h->workers()->run_task(&remarkTask);
  1971     g1h->set_par_threads(0);
  1972   } else {
  1973     G1CollectedHeap::StrongRootsScope srs(g1h);
  1974     // this is remark, so we'll use up all available threads
  1975     int active_workers = 1;
  1976     set_phase(active_workers, false);
  1978     CMRemarkTask remarkTask(this);
  1979     // We will start all available threads, even if we decide that the
  1980     // active_workers will be fewer. The extra ones will just bail out
  1981     // immediately.
  1982     remarkTask.work(0);
  1984   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1985   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1987   print_stats();
  1989   if (!restart_for_overflow())
  1990     set_non_marking_state();
  1992 #if VERIFY_OBJS_PROCESSED
  1993   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1994     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1995                            _scan_obj_cl.objs_processed,
  1996                            ThreadLocalObjQueue::objs_enqueued);
  1997     guarantee(_scan_obj_cl.objs_processed ==
  1998               ThreadLocalObjQueue::objs_enqueued,
  1999               "Different number of objs processed and enqueued.");
  2001 #endif
  2004 #ifndef PRODUCT
  2006 class PrintReachableOopClosure: public OopClosure {
  2007 private:
  2008   G1CollectedHeap* _g1h;
  2009   CMBitMapRO*      _bitmap;
  2010   outputStream*    _out;
  2011   bool             _use_prev_marking;
  2012   bool             _all;
  2014 public:
  2015   PrintReachableOopClosure(CMBitMapRO*   bitmap,
  2016                            outputStream* out,
  2017                            bool          use_prev_marking,
  2018                            bool          all) :
  2019     _g1h(G1CollectedHeap::heap()),
  2020     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2022   void do_oop(narrowOop* p) { do_oop_work(p); }
  2023   void do_oop(      oop* p) { do_oop_work(p); }
  2025   template <class T> void do_oop_work(T* p) {
  2026     oop         obj = oopDesc::load_decode_heap_oop(p);
  2027     const char* str = NULL;
  2028     const char* str2 = "";
  2030     if (obj == NULL) {
  2031       str = "";
  2032     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2033       str = " O";
  2034     } else {
  2035       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2036       guarantee(hr != NULL, "invariant");
  2037       bool over_tams = false;
  2038       if (_use_prev_marking) {
  2039         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2040       } else {
  2041         over_tams = hr->obj_allocated_since_next_marking(obj);
  2043       bool marked = _bitmap->isMarked((HeapWord*) obj);
  2045       if (over_tams) {
  2046         str = " >";
  2047         if (marked) {
  2048           str2 = " AND MARKED";
  2050       } else if (marked) {
  2051         str = " M";
  2052       } else {
  2053         str = " NOT";
  2057     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2058                    p, (void*) obj, str, str2);
  2060 };
  2062 class PrintReachableObjectClosure : public ObjectClosure {
  2063 private:
  2064   CMBitMapRO*   _bitmap;
  2065   outputStream* _out;
  2066   bool          _use_prev_marking;
  2067   bool          _all;
  2068   HeapRegion*   _hr;
  2070 public:
  2071   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
  2072                               outputStream* out,
  2073                               bool          use_prev_marking,
  2074                               bool          all,
  2075                               HeapRegion*   hr) :
  2076     _bitmap(bitmap), _out(out),
  2077     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
  2079   void do_object(oop o) {
  2080     bool over_tams;
  2081     if (_use_prev_marking) {
  2082       over_tams = _hr->obj_allocated_since_prev_marking(o);
  2083     } else {
  2084       over_tams = _hr->obj_allocated_since_next_marking(o);
  2086     bool marked = _bitmap->isMarked((HeapWord*) o);
  2087     bool print_it = _all || over_tams || marked;
  2089     if (print_it) {
  2090       _out->print_cr(" "PTR_FORMAT"%s",
  2091                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2092       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
  2093       o->oop_iterate(&oopCl);
  2096 };
  2098 class PrintReachableRegionClosure : public HeapRegionClosure {
  2099 private:
  2100   CMBitMapRO*   _bitmap;
  2101   outputStream* _out;
  2102   bool          _use_prev_marking;
  2103   bool          _all;
  2105 public:
  2106   bool doHeapRegion(HeapRegion* hr) {
  2107     HeapWord* b = hr->bottom();
  2108     HeapWord* e = hr->end();
  2109     HeapWord* t = hr->top();
  2110     HeapWord* p = NULL;
  2111     if (_use_prev_marking) {
  2112       p = hr->prev_top_at_mark_start();
  2113     } else {
  2114       p = hr->next_top_at_mark_start();
  2116     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2117                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2118     _out->cr();
  2120     HeapWord* from = b;
  2121     HeapWord* to   = t;
  2123     if (to > from) {
  2124       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2125       _out->cr();
  2126       PrintReachableObjectClosure ocl(_bitmap, _out,
  2127                                       _use_prev_marking, _all, hr);
  2128       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2129       _out->cr();
  2132     return false;
  2135   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
  2136                               outputStream* out,
  2137                               bool          use_prev_marking,
  2138                               bool          all) :
  2139     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2140 };
  2142 void ConcurrentMark::print_reachable(const char* str,
  2143                                      bool use_prev_marking,
  2144                                      bool all) {
  2145   gclog_or_tty->cr();
  2146   gclog_or_tty->print_cr("== Doing heap dump... ");
  2148   if (G1PrintReachableBaseFile == NULL) {
  2149     gclog_or_tty->print_cr("  #### error: no base file defined");
  2150     return;
  2153   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2154       (JVM_MAXPATHLEN - 1)) {
  2155     gclog_or_tty->print_cr("  #### error: file name too long");
  2156     return;
  2159   char file_name[JVM_MAXPATHLEN];
  2160   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2161   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2163   fileStream fout(file_name);
  2164   if (!fout.is_open()) {
  2165     gclog_or_tty->print_cr("  #### error: could not open file");
  2166     return;
  2169   outputStream* out = &fout;
  2171   CMBitMapRO* bitmap = NULL;
  2172   if (use_prev_marking) {
  2173     bitmap = _prevMarkBitMap;
  2174   } else {
  2175     bitmap = _nextMarkBitMap;
  2178   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2179   out->cr();
  2181   out->print_cr("--- ITERATING OVER REGIONS");
  2182   out->cr();
  2183   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
  2184   _g1h->heap_region_iterate(&rcl);
  2185   out->cr();
  2187   gclog_or_tty->print_cr("  done");
  2188   gclog_or_tty->flush();
  2191 #endif // PRODUCT
  2193 // This note is for drainAllSATBBuffers and the code in between.
  2194 // In the future we could reuse a task to do this work during an
  2195 // evacuation pause (since now tasks are not active and can be claimed
  2196 // during an evacuation pause). This was a late change to the code and
  2197 // is currently not being taken advantage of.
  2199 class CMGlobalObjectClosure : public ObjectClosure {
  2200 private:
  2201   ConcurrentMark* _cm;
  2203 public:
  2204   void do_object(oop obj) {
  2205     _cm->deal_with_reference(obj);
  2208   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2209 };
  2211 void ConcurrentMark::deal_with_reference(oop obj) {
  2212   if (verbose_high())
  2213     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2214                            (void*) obj);
  2217   HeapWord* objAddr = (HeapWord*) obj;
  2218   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2219   if (_g1h->is_in_g1_reserved(objAddr)) {
  2220     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2221     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2222     if (_g1h->is_obj_ill(obj, hr)) {
  2223       if (verbose_high())
  2224         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2225                                "marked", (void*) obj);
  2227       // we need to mark it first
  2228       if (_nextMarkBitMap->parMark(objAddr)) {
  2229         // No OrderAccess:store_load() is needed. It is implicit in the
  2230         // CAS done in parMark(objAddr) above
  2231         HeapWord* finger = _finger;
  2232         if (objAddr < finger) {
  2233           if (verbose_high())
  2234             gclog_or_tty->print_cr("[global] below the global finger "
  2235                                    "("PTR_FORMAT"), pushing it", finger);
  2236           if (!mark_stack_push(obj)) {
  2237             if (verbose_low())
  2238               gclog_or_tty->print_cr("[global] global stack overflow during "
  2239                                      "deal_with_reference");
  2247 void ConcurrentMark::drainAllSATBBuffers() {
  2248   CMGlobalObjectClosure oc(this);
  2249   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2250   satb_mq_set.set_closure(&oc);
  2252   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2253     if (verbose_medium())
  2254       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2257   // no need to check whether we should do this, as this is only
  2258   // called during an evacuation pause
  2259   satb_mq_set.iterate_closure_all_threads();
  2261   satb_mq_set.set_closure(NULL);
  2262   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2265 void ConcurrentMark::markPrev(oop p) {
  2266   // Note we are overriding the read-only view of the prev map here, via
  2267   // the cast.
  2268   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2271 void ConcurrentMark::clear(oop p) {
  2272   assert(p != NULL && p->is_oop(), "expected an oop");
  2273   HeapWord* addr = (HeapWord*)p;
  2274   assert(addr >= _nextMarkBitMap->startWord() ||
  2275          addr < _nextMarkBitMap->endWord(), "in a region");
  2277   _nextMarkBitMap->clear(addr);
  2280 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2281   // Note we are overriding the read-only view of the prev map here, via
  2282   // the cast.
  2283   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2284   _nextMarkBitMap->clearRange(mr);
  2287 HeapRegion*
  2288 ConcurrentMark::claim_region(int task_num) {
  2289   // "checkpoint" the finger
  2290   HeapWord* finger = _finger;
  2292   // _heap_end will not change underneath our feet; it only changes at
  2293   // yield points.
  2294   while (finger < _heap_end) {
  2295     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2297     // is the gap between reading the finger and doing the CAS too long?
  2299     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2300     HeapWord*   bottom        = curr_region->bottom();
  2301     HeapWord*   end           = curr_region->end();
  2302     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2304     if (verbose_low())
  2305       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2306                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2307                              "limit = "PTR_FORMAT,
  2308                              task_num, curr_region, bottom, end, limit);
  2310     HeapWord* res =
  2311       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2312     if (res == finger) {
  2313       // we succeeded
  2315       // notice that _finger == end cannot be guaranteed here since,
  2316       // someone else might have moved the finger even further
  2317       assert(_finger >= end, "the finger should have moved forward");
  2319       if (verbose_low())
  2320         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2321                                PTR_FORMAT, task_num, curr_region);
  2323       if (limit > bottom) {
  2324         if (verbose_low())
  2325           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2326                                  "returning it ", task_num, curr_region);
  2327         return curr_region;
  2328       } else {
  2329         assert(limit == bottom,
  2330                "the region limit should be at bottom");
  2331         if (verbose_low())
  2332           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2333                                  "returning NULL", task_num, curr_region);
  2334         // we return NULL and the caller should try calling
  2335         // claim_region() again.
  2336         return NULL;
  2338     } else {
  2339       assert(_finger > finger, "the finger should have moved forward");
  2340       if (verbose_low())
  2341         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2342                                "global finger = "PTR_FORMAT", "
  2343                                "our finger = "PTR_FORMAT,
  2344                                task_num, _finger, finger);
  2346       // read it again
  2347       finger = _finger;
  2351   return NULL;
  2354 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
  2355   bool result = false;
  2356   for (int i = 0; i < (int)_max_task_num; ++i) {
  2357     CMTask* the_task = _tasks[i];
  2358     MemRegion mr = the_task->aborted_region();
  2359     if (mr.start() != NULL) {
  2360       assert(mr.end() != NULL, "invariant");
  2361       assert(mr.word_size() > 0, "invariant");
  2362       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
  2363       assert(hr != NULL, "invariant");
  2364       if (hr->in_collection_set()) {
  2365         // The region points into the collection set
  2366         the_task->set_aborted_region(MemRegion());
  2367         result = true;
  2371   return result;
  2374 bool ConcurrentMark::has_aborted_regions() {
  2375   for (int i = 0; i < (int)_max_task_num; ++i) {
  2376     CMTask* the_task = _tasks[i];
  2377     MemRegion mr = the_task->aborted_region();
  2378     if (mr.start() != NULL) {
  2379       assert(mr.end() != NULL, "invariant");
  2380       assert(mr.word_size() > 0, "invariant");
  2381       return true;
  2384   return false;
  2387 void ConcurrentMark::oops_do(OopClosure* cl) {
  2388   if (_markStack.size() > 0 && verbose_low())
  2389     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2390                            "size = %d", _markStack.size());
  2391   // we first iterate over the contents of the mark stack...
  2392   _markStack.oops_do(cl);
  2394   for (int i = 0; i < (int)_max_task_num; ++i) {
  2395     OopTaskQueue* queue = _task_queues->queue((int)i);
  2397     if (queue->size() > 0 && verbose_low())
  2398       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2399                              "size = %d", i, queue->size());
  2401     // ...then over the contents of the all the task queues.
  2402     queue->oops_do(cl);
  2405   // Invalidate any entries, that are in the region stack, that
  2406   // point into the collection set
  2407   if (_regionStack.invalidate_entries_into_cset()) {
  2408     // otherwise, any gray objects copied during the evacuation pause
  2409     // might not be visited.
  2410     assert(_should_gray_objects, "invariant");
  2413   // Invalidate any aborted regions, recorded in the individual CM
  2414   // tasks, that point into the collection set.
  2415   if (invalidate_aborted_regions_in_cset()) {
  2416     // otherwise, any gray objects copied during the evacuation pause
  2417     // might not be visited.
  2418     assert(_should_gray_objects, "invariant");
  2423 void ConcurrentMark::clear_marking_state() {
  2424   _markStack.setEmpty();
  2425   _markStack.clear_overflow();
  2426   _regionStack.setEmpty();
  2427   _regionStack.clear_overflow();
  2428   clear_has_overflown();
  2429   _finger = _heap_start;
  2431   for (int i = 0; i < (int)_max_task_num; ++i) {
  2432     OopTaskQueue* queue = _task_queues->queue(i);
  2433     queue->set_empty();
  2434     // Clear any partial regions from the CMTasks
  2435     _tasks[i]->clear_aborted_region();
  2439 void ConcurrentMark::print_stats() {
  2440   if (verbose_stats()) {
  2441     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2442     for (size_t i = 0; i < _active_tasks; ++i) {
  2443       _tasks[i]->print_stats();
  2444       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2449 class CSMarkOopClosure: public OopClosure {
  2450   friend class CSMarkBitMapClosure;
  2452   G1CollectedHeap* _g1h;
  2453   CMBitMap*        _bm;
  2454   ConcurrentMark*  _cm;
  2455   oop*             _ms;
  2456   jint*            _array_ind_stack;
  2457   int              _ms_size;
  2458   int              _ms_ind;
  2459   int              _array_increment;
  2461   bool push(oop obj, int arr_ind = 0) {
  2462     if (_ms_ind == _ms_size) {
  2463       gclog_or_tty->print_cr("Mark stack is full.");
  2464       return false;
  2466     _ms[_ms_ind] = obj;
  2467     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2468     _ms_ind++;
  2469     return true;
  2472   oop pop() {
  2473     if (_ms_ind == 0) return NULL;
  2474     else {
  2475       _ms_ind--;
  2476       return _ms[_ms_ind];
  2480   template <class T> bool drain() {
  2481     while (_ms_ind > 0) {
  2482       oop obj = pop();
  2483       assert(obj != NULL, "Since index was non-zero.");
  2484       if (obj->is_objArray()) {
  2485         jint arr_ind = _array_ind_stack[_ms_ind];
  2486         objArrayOop aobj = objArrayOop(obj);
  2487         jint len = aobj->length();
  2488         jint next_arr_ind = arr_ind + _array_increment;
  2489         if (next_arr_ind < len) {
  2490           push(obj, next_arr_ind);
  2492         // Now process this portion of this one.
  2493         int lim = MIN2(next_arr_ind, len);
  2494         for (int j = arr_ind; j < lim; j++) {
  2495           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2498       } else {
  2499         obj->oop_iterate(this);
  2501       if (abort()) return false;
  2503     return true;
  2506 public:
  2507   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2508     _g1h(G1CollectedHeap::heap()),
  2509     _cm(cm),
  2510     _bm(cm->nextMarkBitMap()),
  2511     _ms_size(ms_size), _ms_ind(0),
  2512     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2513     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2514     _array_increment(MAX2(ms_size/8, 16))
  2515   {}
  2517   ~CSMarkOopClosure() {
  2518     FREE_C_HEAP_ARRAY(oop, _ms);
  2519     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2522   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2523   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2525   template <class T> void do_oop_work(T* p) {
  2526     T heap_oop = oopDesc::load_heap_oop(p);
  2527     if (oopDesc::is_null(heap_oop)) return;
  2528     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2529     if (obj->is_forwarded()) {
  2530       // If the object has already been forwarded, we have to make sure
  2531       // that it's marked.  So follow the forwarding pointer.  Note that
  2532       // this does the right thing for self-forwarding pointers in the
  2533       // evacuation failure case.
  2534       obj = obj->forwardee();
  2536     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2537     if (hr != NULL) {
  2538       if (hr->in_collection_set()) {
  2539         if (_g1h->is_obj_ill(obj)) {
  2540           _bm->mark((HeapWord*)obj);
  2541           if (!push(obj)) {
  2542             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2543             set_abort();
  2546       } else {
  2547         // Outside the collection set; we need to gray it
  2548         _cm->deal_with_reference(obj);
  2552 };
  2554 class CSMarkBitMapClosure: public BitMapClosure {
  2555   G1CollectedHeap* _g1h;
  2556   CMBitMap*        _bitMap;
  2557   ConcurrentMark*  _cm;
  2558   CSMarkOopClosure _oop_cl;
  2559 public:
  2560   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2561     _g1h(G1CollectedHeap::heap()),
  2562     _bitMap(cm->nextMarkBitMap()),
  2563     _oop_cl(cm, ms_size)
  2564   {}
  2566   ~CSMarkBitMapClosure() {}
  2568   bool do_bit(size_t offset) {
  2569     // convert offset into a HeapWord*
  2570     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2571     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2572            "address out of range");
  2573     assert(_bitMap->isMarked(addr), "tautology");
  2574     oop obj = oop(addr);
  2575     if (!obj->is_forwarded()) {
  2576       if (!_oop_cl.push(obj)) return false;
  2577       if (UseCompressedOops) {
  2578         if (!_oop_cl.drain<narrowOop>()) return false;
  2579       } else {
  2580         if (!_oop_cl.drain<oop>()) return false;
  2583     // Otherwise...
  2584     return true;
  2586 };
  2589 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2590   CMBitMap* _bm;
  2591   CSMarkBitMapClosure _bit_cl;
  2592   enum SomePrivateConstants {
  2593     MSSize = 1000
  2594   };
  2595   bool _completed;
  2596 public:
  2597   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2598     _bm(cm->nextMarkBitMap()),
  2599     _bit_cl(cm, MSSize),
  2600     _completed(true)
  2601   {}
  2603   ~CompleteMarkingInCSHRClosure() {}
  2605   bool doHeapRegion(HeapRegion* r) {
  2606     if (!r->evacuation_failed()) {
  2607       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2608       if (!mr.is_empty()) {
  2609         if (!_bm->iterate(&_bit_cl, mr)) {
  2610           _completed = false;
  2611           return true;
  2615     return false;
  2618   bool completed() { return _completed; }
  2619 };
  2621 class ClearMarksInHRClosure: public HeapRegionClosure {
  2622   CMBitMap* _bm;
  2623 public:
  2624   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2626   bool doHeapRegion(HeapRegion* r) {
  2627     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2628       MemRegion usedMR = r->used_region();
  2629       _bm->clearRange(r->used_region());
  2631     return false;
  2633 };
  2635 void ConcurrentMark::complete_marking_in_collection_set() {
  2636   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2638   if (!g1h->mark_in_progress()) {
  2639     g1h->g1_policy()->record_mark_closure_time(0.0);
  2640     return;
  2643   int i = 1;
  2644   double start = os::elapsedTime();
  2645   while (true) {
  2646     i++;
  2647     CompleteMarkingInCSHRClosure cmplt(this);
  2648     g1h->collection_set_iterate(&cmplt);
  2649     if (cmplt.completed()) break;
  2651   double end_time = os::elapsedTime();
  2652   double elapsed_time_ms = (end_time - start) * 1000.0;
  2653   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2655   ClearMarksInHRClosure clr(nextMarkBitMap());
  2656   g1h->collection_set_iterate(&clr);
  2659 // The next two methods deal with the following optimisation. Some
  2660 // objects are gray by being marked and located above the finger. If
  2661 // they are copied, during an evacuation pause, below the finger then
  2662 // the need to be pushed on the stack. The observation is that, if
  2663 // there are no regions in the collection set located above the
  2664 // finger, then the above cannot happen, hence we do not need to
  2665 // explicitly gray any objects when copying them to below the
  2666 // finger. The global stack will be scanned to ensure that, if it
  2667 // points to objects being copied, it will update their
  2668 // location. There is a tricky situation with the gray objects in
  2669 // region stack that are being coped, however. See the comment in
  2670 // newCSet().
  2672 void ConcurrentMark::newCSet() {
  2673   if (!concurrent_marking_in_progress())
  2674     // nothing to do if marking is not in progress
  2675     return;
  2677   // find what the lowest finger is among the global and local fingers
  2678   _min_finger = _finger;
  2679   for (int i = 0; i < (int)_max_task_num; ++i) {
  2680     CMTask* task = _tasks[i];
  2681     HeapWord* task_finger = task->finger();
  2682     if (task_finger != NULL && task_finger < _min_finger)
  2683       _min_finger = task_finger;
  2686   _should_gray_objects = false;
  2688   // This fixes a very subtle and fustrating bug. It might be the case
  2689   // that, during en evacuation pause, heap regions that contain
  2690   // objects that are gray (by being in regions contained in the
  2691   // region stack) are included in the collection set. Since such gray
  2692   // objects will be moved, and because it's not easy to redirect
  2693   // region stack entries to point to a new location (because objects
  2694   // in one region might be scattered to multiple regions after they
  2695   // are copied), one option is to ensure that all marked objects
  2696   // copied during a pause are pushed on the stack. Notice, however,
  2697   // that this problem can only happen when the region stack is not
  2698   // empty during an evacuation pause. So, we make the fix a bit less
  2699   // conservative and ensure that regions are pushed on the stack,
  2700   // irrespective whether all collection set regions are below the
  2701   // finger, if the region stack is not empty. This is expected to be
  2702   // a rare case, so I don't think it's necessary to be smarted about it.
  2703   if (!region_stack_empty() || has_aborted_regions())
  2704     _should_gray_objects = true;
  2707 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2708   if (!concurrent_marking_in_progress())
  2709     return;
  2711   HeapWord* region_end = hr->end();
  2712   if (region_end > _min_finger)
  2713     _should_gray_objects = true;
  2716 // abandon current marking iteration due to a Full GC
  2717 void ConcurrentMark::abort() {
  2718   // Clear all marks to force marking thread to do nothing
  2719   _nextMarkBitMap->clearAll();
  2720   // Empty mark stack
  2721   clear_marking_state();
  2722   for (int i = 0; i < (int)_max_task_num; ++i) {
  2723     _tasks[i]->clear_region_fields();
  2725   _has_aborted = true;
  2727   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2728   satb_mq_set.abandon_partial_marking();
  2729   // This can be called either during or outside marking, we'll read
  2730   // the expected_active value from the SATB queue set.
  2731   satb_mq_set.set_active_all_threads(
  2732                                  false, /* new active value */
  2733                                  satb_mq_set.is_active() /* expected_active */);
  2736 static void print_ms_time_info(const char* prefix, const char* name,
  2737                                NumberSeq& ns) {
  2738   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2739                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2740   if (ns.num() > 0) {
  2741     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2742                            prefix, ns.sd(), ns.maximum());
  2746 void ConcurrentMark::print_summary_info() {
  2747   gclog_or_tty->print_cr(" Concurrent marking:");
  2748   print_ms_time_info("  ", "init marks", _init_times);
  2749   print_ms_time_info("  ", "remarks", _remark_times);
  2751     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2752     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2755   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2756   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2757                          _total_counting_time,
  2758                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2759                           (double)_cleanup_times.num()
  2760                          : 0.0));
  2761   if (G1ScrubRemSets) {
  2762     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2763                            _total_rs_scrub_time,
  2764                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2765                             (double)_cleanup_times.num()
  2766                            : 0.0));
  2768   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2769                          (_init_times.sum() + _remark_times.sum() +
  2770                           _cleanup_times.sum())/1000.0);
  2771   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2772                 "(%8.2f s marking, %8.2f s counting).",
  2773                 cmThread()->vtime_accum(),
  2774                 cmThread()->vtime_mark_accum(),
  2775                 cmThread()->vtime_count_accum());
  2778 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2779   _parallel_workers->print_worker_threads_on(st);
  2782 // Closures
  2783 // XXX: there seems to be a lot of code  duplication here;
  2784 // should refactor and consolidate the shared code.
  2786 // This closure is used to mark refs into the CMS generation in
  2787 // the CMS bit map. Called at the first checkpoint.
  2789 // We take a break if someone is trying to stop the world.
  2790 bool ConcurrentMark::do_yield_check(int worker_i) {
  2791   if (should_yield()) {
  2792     if (worker_i == 0)
  2793       _g1h->g1_policy()->record_concurrent_pause();
  2794     cmThread()->yield();
  2795     if (worker_i == 0)
  2796       _g1h->g1_policy()->record_concurrent_pause_end();
  2797     return true;
  2798   } else {
  2799     return false;
  2803 bool ConcurrentMark::should_yield() {
  2804   return cmThread()->should_yield();
  2807 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2808   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2809   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2812 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2813                                                  void* last) {
  2814   return
  2815     containing_card_is_marked(start) &&
  2816     containing_card_is_marked(last);
  2819 #ifndef PRODUCT
  2820 // for debugging purposes
  2821 void ConcurrentMark::print_finger() {
  2822   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2823                          _heap_start, _heap_end, _finger);
  2824   for (int i = 0; i < (int) _max_task_num; ++i) {
  2825     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2827   gclog_or_tty->print_cr("");
  2829 #endif
  2831 // Closure for iteration over bitmaps
  2832 class CMBitMapClosure : public BitMapClosure {
  2833 private:
  2834   // the bitmap that is being iterated over
  2835   CMBitMap*                   _nextMarkBitMap;
  2836   ConcurrentMark*             _cm;
  2837   CMTask*                     _task;
  2838   // true if we're scanning a heap region claimed by the task (so that
  2839   // we move the finger along), false if we're not, i.e. currently when
  2840   // scanning a heap region popped from the region stack (so that we
  2841   // do not move the task finger along; it'd be a mistake if we did so).
  2842   bool                        _scanning_heap_region;
  2844 public:
  2845   CMBitMapClosure(CMTask *task,
  2846                   ConcurrentMark* cm,
  2847                   CMBitMap* nextMarkBitMap)
  2848     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2850   void set_scanning_heap_region(bool scanning_heap_region) {
  2851     _scanning_heap_region = scanning_heap_region;
  2854   bool do_bit(size_t offset) {
  2855     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2856     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2857     assert( addr < _cm->finger(), "invariant");
  2859     if (_scanning_heap_region) {
  2860       statsOnly( _task->increase_objs_found_on_bitmap() );
  2861       assert(addr >= _task->finger(), "invariant");
  2862       // We move that task's local finger along.
  2863       _task->move_finger_to(addr);
  2864     } else {
  2865       // We move the task's region finger along.
  2866       _task->move_region_finger_to(addr);
  2869     _task->scan_object(oop(addr));
  2870     // we only partially drain the local queue and global stack
  2871     _task->drain_local_queue(true);
  2872     _task->drain_global_stack(true);
  2874     // if the has_aborted flag has been raised, we need to bail out of
  2875     // the iteration
  2876     return !_task->has_aborted();
  2878 };
  2880 // Closure for iterating over objects, currently only used for
  2881 // processing SATB buffers.
  2882 class CMObjectClosure : public ObjectClosure {
  2883 private:
  2884   CMTask* _task;
  2886 public:
  2887   void do_object(oop obj) {
  2888     _task->deal_with_reference(obj);
  2891   CMObjectClosure(CMTask* task) : _task(task) { }
  2892 };
  2894 // Closure for iterating over object fields
  2895 class CMOopClosure : public OopClosure {
  2896 private:
  2897   G1CollectedHeap*   _g1h;
  2898   ConcurrentMark*    _cm;
  2899   CMTask*            _task;
  2901 public:
  2902   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2903   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2905   template <class T> void do_oop_work(T* p) {
  2906     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2907     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2908            "invariant");
  2910     oop obj = oopDesc::load_decode_heap_oop(p);
  2911     if (_cm->verbose_high())
  2912       gclog_or_tty->print_cr("[%d] we're looking at location "
  2913                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2914                              _task->task_id(), p, (void*) obj);
  2915     _task->deal_with_reference(obj);
  2918   CMOopClosure(G1CollectedHeap* g1h,
  2919                ConcurrentMark* cm,
  2920                CMTask* task)
  2921     : _g1h(g1h), _cm(cm), _task(task) { }
  2922 };
  2924 void CMTask::setup_for_region(HeapRegion* hr) {
  2925   // Separated the asserts so that we know which one fires.
  2926   assert(hr != NULL,
  2927         "claim_region() should have filtered out continues humongous regions");
  2928   assert(!hr->continuesHumongous(),
  2929         "claim_region() should have filtered out continues humongous regions");
  2931   if (_cm->verbose_low())
  2932     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2933                            _task_id, hr);
  2935   _curr_region  = hr;
  2936   _finger       = hr->bottom();
  2937   update_region_limit();
  2940 void CMTask::update_region_limit() {
  2941   HeapRegion* hr            = _curr_region;
  2942   HeapWord* bottom          = hr->bottom();
  2943   HeapWord* limit           = hr->next_top_at_mark_start();
  2945   if (limit == bottom) {
  2946     if (_cm->verbose_low())
  2947       gclog_or_tty->print_cr("[%d] found an empty region "
  2948                              "["PTR_FORMAT", "PTR_FORMAT")",
  2949                              _task_id, bottom, limit);
  2950     // The region was collected underneath our feet.
  2951     // We set the finger to bottom to ensure that the bitmap
  2952     // iteration that will follow this will not do anything.
  2953     // (this is not a condition that holds when we set the region up,
  2954     // as the region is not supposed to be empty in the first place)
  2955     _finger = bottom;
  2956   } else if (limit >= _region_limit) {
  2957     assert(limit >= _finger, "peace of mind");
  2958   } else {
  2959     assert(limit < _region_limit, "only way to get here");
  2960     // This can happen under some pretty unusual circumstances.  An
  2961     // evacuation pause empties the region underneath our feet (NTAMS
  2962     // at bottom). We then do some allocation in the region (NTAMS
  2963     // stays at bottom), followed by the region being used as a GC
  2964     // alloc region (NTAMS will move to top() and the objects
  2965     // originally below it will be grayed). All objects now marked in
  2966     // the region are explicitly grayed, if below the global finger,
  2967     // and we do not need in fact to scan anything else. So, we simply
  2968     // set _finger to be limit to ensure that the bitmap iteration
  2969     // doesn't do anything.
  2970     _finger = limit;
  2973   _region_limit = limit;
  2976 void CMTask::giveup_current_region() {
  2977   assert(_curr_region != NULL, "invariant");
  2978   if (_cm->verbose_low())
  2979     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2980                            _task_id, _curr_region);
  2981   clear_region_fields();
  2984 void CMTask::clear_region_fields() {
  2985   // Values for these three fields that indicate that we're not
  2986   // holding on to a region.
  2987   _curr_region   = NULL;
  2988   _finger        = NULL;
  2989   _region_limit  = NULL;
  2991   _region_finger = NULL;
  2994 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2995   guarantee(nextMarkBitMap != NULL, "invariant");
  2997   if (_cm->verbose_low())
  2998     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  3000   _nextMarkBitMap                = nextMarkBitMap;
  3001   clear_region_fields();
  3002   assert(_aborted_region.is_empty(), "should have been cleared");
  3004   _calls                         = 0;
  3005   _elapsed_time_ms               = 0.0;
  3006   _termination_time_ms           = 0.0;
  3007   _termination_start_time_ms     = 0.0;
  3009 #if _MARKING_STATS_
  3010   _local_pushes                  = 0;
  3011   _local_pops                    = 0;
  3012   _local_max_size                = 0;
  3013   _objs_scanned                  = 0;
  3014   _global_pushes                 = 0;
  3015   _global_pops                   = 0;
  3016   _global_max_size               = 0;
  3017   _global_transfers_to           = 0;
  3018   _global_transfers_from         = 0;
  3019   _region_stack_pops             = 0;
  3020   _regions_claimed               = 0;
  3021   _objs_found_on_bitmap          = 0;
  3022   _satb_buffers_processed        = 0;
  3023   _steal_attempts                = 0;
  3024   _steals                        = 0;
  3025   _aborted                       = 0;
  3026   _aborted_overflow              = 0;
  3027   _aborted_cm_aborted            = 0;
  3028   _aborted_yield                 = 0;
  3029   _aborted_timed_out             = 0;
  3030   _aborted_satb                  = 0;
  3031   _aborted_termination           = 0;
  3032 #endif // _MARKING_STATS_
  3035 bool CMTask::should_exit_termination() {
  3036   regular_clock_call();
  3037   // This is called when we are in the termination protocol. We should
  3038   // quit if, for some reason, this task wants to abort or the global
  3039   // stack is not empty (this means that we can get work from it).
  3040   return !_cm->mark_stack_empty() || has_aborted();
  3043 // This determines whether the method below will check both the local
  3044 // and global fingers when determining whether to push on the stack a
  3045 // gray object (value 1) or whether it will only check the global one
  3046 // (value 0). The tradeoffs are that the former will be a bit more
  3047 // accurate and possibly push less on the stack, but it might also be
  3048 // a little bit slower.
  3050 #define _CHECK_BOTH_FINGERS_      1
  3052 void CMTask::deal_with_reference(oop obj) {
  3053   if (_cm->verbose_high())
  3054     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  3055                            _task_id, (void*) obj);
  3057   ++_refs_reached;
  3059   HeapWord* objAddr = (HeapWord*) obj;
  3060   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  3061   if (_g1h->is_in_g1_reserved(objAddr)) {
  3062     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  3063     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  3064     if (_g1h->is_obj_ill(obj, hr)) {
  3065       if (_cm->verbose_high())
  3066         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  3067                                _task_id, (void*) obj);
  3069       // we need to mark it first
  3070       if (_nextMarkBitMap->parMark(objAddr)) {
  3071         // No OrderAccess:store_load() is needed. It is implicit in the
  3072         // CAS done in parMark(objAddr) above
  3073         HeapWord* global_finger = _cm->finger();
  3075 #if _CHECK_BOTH_FINGERS_
  3076         // we will check both the local and global fingers
  3078         if (_finger != NULL && objAddr < _finger) {
  3079           if (_cm->verbose_high())
  3080             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3081                                    "pushing it", _task_id, _finger);
  3082           push(obj);
  3083         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3084           // do nothing
  3085         } else if (objAddr < global_finger) {
  3086           // Notice that the global finger might be moving forward
  3087           // concurrently. This is not a problem. In the worst case, we
  3088           // mark the object while it is above the global finger and, by
  3089           // the time we read the global finger, it has moved forward
  3090           // passed this object. In this case, the object will probably
  3091           // be visited when a task is scanning the region and will also
  3092           // be pushed on the stack. So, some duplicate work, but no
  3093           // correctness problems.
  3095           if (_cm->verbose_high())
  3096             gclog_or_tty->print_cr("[%d] below the global finger "
  3097                                    "("PTR_FORMAT"), pushing it",
  3098                                    _task_id, global_finger);
  3099           push(obj);
  3100         } else {
  3101           // do nothing
  3103 #else // _CHECK_BOTH_FINGERS_
  3104       // we will only check the global finger
  3106         if (objAddr < global_finger) {
  3107           // see long comment above
  3109           if (_cm->verbose_high())
  3110             gclog_or_tty->print_cr("[%d] below the global finger "
  3111                                    "("PTR_FORMAT"), pushing it",
  3112                                    _task_id, global_finger);
  3113           push(obj);
  3115 #endif // _CHECK_BOTH_FINGERS_
  3121 void CMTask::push(oop obj) {
  3122   HeapWord* objAddr = (HeapWord*) obj;
  3123   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3124   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3125          "invariant");
  3126   assert(!_g1h->is_obj_ill(obj), "invariant");
  3127   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3129   if (_cm->verbose_high())
  3130     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3132   if (!_task_queue->push(obj)) {
  3133     // The local task queue looks full. We need to push some entries
  3134     // to the global stack.
  3136     if (_cm->verbose_medium())
  3137       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3138                              "moving entries to the global stack",
  3139                              _task_id);
  3140     move_entries_to_global_stack();
  3142     // this should succeed since, even if we overflow the global
  3143     // stack, we should have definitely removed some entries from the
  3144     // local queue. So, there must be space on it.
  3145     bool success = _task_queue->push(obj);
  3146     assert(success, "invariant");
  3149   statsOnly( int tmp_size = _task_queue->size();
  3150              if (tmp_size > _local_max_size)
  3151                _local_max_size = tmp_size;
  3152              ++_local_pushes );
  3155 void CMTask::reached_limit() {
  3156   assert(_words_scanned >= _words_scanned_limit ||
  3157          _refs_reached >= _refs_reached_limit ,
  3158          "shouldn't have been called otherwise");
  3159   regular_clock_call();
  3162 void CMTask::regular_clock_call() {
  3163   if (has_aborted())
  3164     return;
  3166   // First, we need to recalculate the words scanned and refs reached
  3167   // limits for the next clock call.
  3168   recalculate_limits();
  3170   // During the regular clock call we do the following
  3172   // (1) If an overflow has been flagged, then we abort.
  3173   if (_cm->has_overflown()) {
  3174     set_has_aborted();
  3175     return;
  3178   // If we are not concurrent (i.e. we're doing remark) we don't need
  3179   // to check anything else. The other steps are only needed during
  3180   // the concurrent marking phase.
  3181   if (!concurrent())
  3182     return;
  3184   // (2) If marking has been aborted for Full GC, then we also abort.
  3185   if (_cm->has_aborted()) {
  3186     set_has_aborted();
  3187     statsOnly( ++_aborted_cm_aborted );
  3188     return;
  3191   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3193   // (3) If marking stats are enabled, then we update the step history.
  3194 #if _MARKING_STATS_
  3195   if (_words_scanned >= _words_scanned_limit)
  3196     ++_clock_due_to_scanning;
  3197   if (_refs_reached >= _refs_reached_limit)
  3198     ++_clock_due_to_marking;
  3200   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3201   _interval_start_time_ms = curr_time_ms;
  3202   _all_clock_intervals_ms.add(last_interval_ms);
  3204   if (_cm->verbose_medium()) {
  3205     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3206                            "scanned = %d%s, refs reached = %d%s",
  3207                            _task_id, last_interval_ms,
  3208                            _words_scanned,
  3209                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3210                            _refs_reached,
  3211                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3213 #endif // _MARKING_STATS_
  3215   // (4) We check whether we should yield. If we have to, then we abort.
  3216   if (_cm->should_yield()) {
  3217     // We should yield. To do this we abort the task. The caller is
  3218     // responsible for yielding.
  3219     set_has_aborted();
  3220     statsOnly( ++_aborted_yield );
  3221     return;
  3224   // (5) We check whether we've reached our time quota. If we have,
  3225   // then we abort.
  3226   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3227   if (elapsed_time_ms > _time_target_ms) {
  3228     set_has_aborted();
  3229     _has_aborted_timed_out = true;
  3230     statsOnly( ++_aborted_timed_out );
  3231     return;
  3234   // (6) Finally, we check whether there are enough completed STAB
  3235   // buffers available for processing. If there are, we abort.
  3236   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3237   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3238     if (_cm->verbose_low())
  3239       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3240                              _task_id);
  3241     // we do need to process SATB buffers, we'll abort and restart
  3242     // the marking task to do so
  3243     set_has_aborted();
  3244     statsOnly( ++_aborted_satb );
  3245     return;
  3249 void CMTask::recalculate_limits() {
  3250   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3251   _words_scanned_limit      = _real_words_scanned_limit;
  3253   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3254   _refs_reached_limit       = _real_refs_reached_limit;
  3257 void CMTask::decrease_limits() {
  3258   // This is called when we believe that we're going to do an infrequent
  3259   // operation which will increase the per byte scanned cost (i.e. move
  3260   // entries to/from the global stack). It basically tries to decrease the
  3261   // scanning limit so that the clock is called earlier.
  3263   if (_cm->verbose_medium())
  3264     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3266   _words_scanned_limit = _real_words_scanned_limit -
  3267     3 * words_scanned_period / 4;
  3268   _refs_reached_limit  = _real_refs_reached_limit -
  3269     3 * refs_reached_period / 4;
  3272 void CMTask::move_entries_to_global_stack() {
  3273   // local array where we'll store the entries that will be popped
  3274   // from the local queue
  3275   oop buffer[global_stack_transfer_size];
  3277   int n = 0;
  3278   oop obj;
  3279   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3280     buffer[n] = obj;
  3281     ++n;
  3284   if (n > 0) {
  3285     // we popped at least one entry from the local queue
  3287     statsOnly( ++_global_transfers_to; _local_pops += n );
  3289     if (!_cm->mark_stack_push(buffer, n)) {
  3290       if (_cm->verbose_low())
  3291         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3292       set_has_aborted();
  3293     } else {
  3294       // the transfer was successful
  3296       if (_cm->verbose_medium())
  3297         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3298                                _task_id, n);
  3299       statsOnly( int tmp_size = _cm->mark_stack_size();
  3300                  if (tmp_size > _global_max_size)
  3301                    _global_max_size = tmp_size;
  3302                  _global_pushes += n );
  3306   // this operation was quite expensive, so decrease the limits
  3307   decrease_limits();
  3310 void CMTask::get_entries_from_global_stack() {
  3311   // local array where we'll store the entries that will be popped
  3312   // from the global stack.
  3313   oop buffer[global_stack_transfer_size];
  3314   int n;
  3315   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3316   assert(n <= global_stack_transfer_size,
  3317          "we should not pop more than the given limit");
  3318   if (n > 0) {
  3319     // yes, we did actually pop at least one entry
  3321     statsOnly( ++_global_transfers_from; _global_pops += n );
  3322     if (_cm->verbose_medium())
  3323       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3324                              _task_id, n);
  3325     for (int i = 0; i < n; ++i) {
  3326       bool success = _task_queue->push(buffer[i]);
  3327       // We only call this when the local queue is empty or under a
  3328       // given target limit. So, we do not expect this push to fail.
  3329       assert(success, "invariant");
  3332     statsOnly( int tmp_size = _task_queue->size();
  3333                if (tmp_size > _local_max_size)
  3334                  _local_max_size = tmp_size;
  3335                _local_pushes += n );
  3338   // this operation was quite expensive, so decrease the limits
  3339   decrease_limits();
  3342 void CMTask::drain_local_queue(bool partially) {
  3343   if (has_aborted())
  3344     return;
  3346   // Decide what the target size is, depending whether we're going to
  3347   // drain it partially (so that other tasks can steal if they run out
  3348   // of things to do) or totally (at the very end).
  3349   size_t target_size;
  3350   if (partially)
  3351     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3352   else
  3353     target_size = 0;
  3355   if (_task_queue->size() > target_size) {
  3356     if (_cm->verbose_high())
  3357       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3358                              _task_id, target_size);
  3360     oop obj;
  3361     bool ret = _task_queue->pop_local(obj);
  3362     while (ret) {
  3363       statsOnly( ++_local_pops );
  3365       if (_cm->verbose_high())
  3366         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3367                                (void*) obj);
  3369       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3370       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3371              "invariant");
  3373       scan_object(obj);
  3375       if (_task_queue->size() <= target_size || has_aborted())
  3376         ret = false;
  3377       else
  3378         ret = _task_queue->pop_local(obj);
  3381     if (_cm->verbose_high())
  3382       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3383                              _task_id, _task_queue->size());
  3387 void CMTask::drain_global_stack(bool partially) {
  3388   if (has_aborted())
  3389     return;
  3391   // We have a policy to drain the local queue before we attempt to
  3392   // drain the global stack.
  3393   assert(partially || _task_queue->size() == 0, "invariant");
  3395   // Decide what the target size is, depending whether we're going to
  3396   // drain it partially (so that other tasks can steal if they run out
  3397   // of things to do) or totally (at the very end).  Notice that,
  3398   // because we move entries from the global stack in chunks or
  3399   // because another task might be doing the same, we might in fact
  3400   // drop below the target. But, this is not a problem.
  3401   size_t target_size;
  3402   if (partially)
  3403     target_size = _cm->partial_mark_stack_size_target();
  3404   else
  3405     target_size = 0;
  3407   if (_cm->mark_stack_size() > target_size) {
  3408     if (_cm->verbose_low())
  3409       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3410                              _task_id, target_size);
  3412     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3413       get_entries_from_global_stack();
  3414       drain_local_queue(partially);
  3417     if (_cm->verbose_low())
  3418       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3419                              _task_id, _cm->mark_stack_size());
  3423 // SATB Queue has several assumptions on whether to call the par or
  3424 // non-par versions of the methods. this is why some of the code is
  3425 // replicated. We should really get rid of the single-threaded version
  3426 // of the code to simplify things.
  3427 void CMTask::drain_satb_buffers() {
  3428   if (has_aborted())
  3429     return;
  3431   // We set this so that the regular clock knows that we're in the
  3432   // middle of draining buffers and doesn't set the abort flag when it
  3433   // notices that SATB buffers are available for draining. It'd be
  3434   // very counter productive if it did that. :-)
  3435   _draining_satb_buffers = true;
  3437   CMObjectClosure oc(this);
  3438   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3439   if (G1CollectedHeap::use_parallel_gc_threads())
  3440     satb_mq_set.set_par_closure(_task_id, &oc);
  3441   else
  3442     satb_mq_set.set_closure(&oc);
  3444   // This keeps claiming and applying the closure to completed buffers
  3445   // until we run out of buffers or we need to abort.
  3446   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3447     while (!has_aborted() &&
  3448            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3449       if (_cm->verbose_medium())
  3450         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3451       statsOnly( ++_satb_buffers_processed );
  3452       regular_clock_call();
  3454   } else {
  3455     while (!has_aborted() &&
  3456            satb_mq_set.apply_closure_to_completed_buffer()) {
  3457       if (_cm->verbose_medium())
  3458         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3459       statsOnly( ++_satb_buffers_processed );
  3460       regular_clock_call();
  3464   if (!concurrent() && !has_aborted()) {
  3465     // We should only do this during remark.
  3466     if (G1CollectedHeap::use_parallel_gc_threads())
  3467       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3468     else
  3469       satb_mq_set.iterate_closure_all_threads();
  3472   _draining_satb_buffers = false;
  3474   assert(has_aborted() ||
  3475          concurrent() ||
  3476          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3478   if (G1CollectedHeap::use_parallel_gc_threads())
  3479     satb_mq_set.set_par_closure(_task_id, NULL);
  3480   else
  3481     satb_mq_set.set_closure(NULL);
  3483   // again, this was a potentially expensive operation, decrease the
  3484   // limits to get the regular clock call early
  3485   decrease_limits();
  3488 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3489   if (has_aborted())
  3490     return;
  3492   assert(_region_finger == NULL,
  3493          "it should be NULL when we're not scanning a region");
  3495   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
  3496     if (_cm->verbose_low())
  3497       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3498                              _task_id, _cm->region_stack_size());
  3500     MemRegion mr;
  3502     if (!_aborted_region.is_empty()) {
  3503       mr = _aborted_region;
  3504       _aborted_region = MemRegion();
  3506       if (_cm->verbose_low())
  3507         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
  3508                              _task_id, mr.start(), mr.end());
  3509     } else {
  3510       mr = _cm->region_stack_pop_lock_free();
  3511       // it returns MemRegion() if the pop fails
  3512       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3515     while (mr.start() != NULL) {
  3516       if (_cm->verbose_medium())
  3517         gclog_or_tty->print_cr("[%d] we are scanning region "
  3518                                "["PTR_FORMAT", "PTR_FORMAT")",
  3519                                _task_id, mr.start(), mr.end());
  3521       assert(mr.end() <= _cm->finger(),
  3522              "otherwise the region shouldn't be on the stack");
  3523       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3524       if (_nextMarkBitMap->iterate(bc, mr)) {
  3525         assert(!has_aborted(),
  3526                "cannot abort the task without aborting the bitmap iteration");
  3528         // We finished iterating over the region without aborting.
  3529         regular_clock_call();
  3530         if (has_aborted())
  3531           mr = MemRegion();
  3532         else {
  3533           mr = _cm->region_stack_pop_lock_free();
  3534           // it returns MemRegion() if the pop fails
  3535           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3537       } else {
  3538         assert(has_aborted(), "currently the only way to do so");
  3540         // The only way to abort the bitmap iteration is to return
  3541         // false from the do_bit() method. However, inside the
  3542         // do_bit() method we move the _region_finger to point to the
  3543         // object currently being looked at. So, if we bail out, we
  3544         // have definitely set _region_finger to something non-null.
  3545         assert(_region_finger != NULL, "invariant");
  3547         // Make sure that any previously aborted region has been
  3548         // cleared.
  3549         assert(_aborted_region.is_empty(), "aborted region not cleared");
  3551         // The iteration was actually aborted. So now _region_finger
  3552         // points to the address of the object we last scanned. If we
  3553         // leave it there, when we restart this task, we will rescan
  3554         // the object. It is easy to avoid this. We move the finger by
  3555         // enough to point to the next possible object header (the
  3556         // bitmap knows by how much we need to move it as it knows its
  3557         // granularity).
  3558         MemRegion newRegion =
  3559           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3561         if (!newRegion.is_empty()) {
  3562           if (_cm->verbose_low()) {
  3563             gclog_or_tty->print_cr("[%d] recording unscanned region"
  3564                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
  3565                                    _task_id,
  3566                                    newRegion.start(), newRegion.end());
  3568           // Now record the part of the region we didn't scan to
  3569           // make sure this task scans it later.
  3570           _aborted_region = newRegion;
  3572         // break from while
  3573         mr = MemRegion();
  3575       _region_finger = NULL;
  3578     if (_cm->verbose_low())
  3579       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3580                              _task_id, _cm->region_stack_size());
  3584 void CMTask::print_stats() {
  3585   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3586                          _task_id, _calls);
  3587   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3588                          _elapsed_time_ms, _termination_time_ms);
  3589   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3590                          _step_times_ms.num(), _step_times_ms.avg(),
  3591                          _step_times_ms.sd());
  3592   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3593                          _step_times_ms.maximum(), _step_times_ms.sum());
  3595 #if _MARKING_STATS_
  3596   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3597                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3598                          _all_clock_intervals_ms.sd());
  3599   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3600                          _all_clock_intervals_ms.maximum(),
  3601                          _all_clock_intervals_ms.sum());
  3602   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3603                          _clock_due_to_scanning, _clock_due_to_marking);
  3604   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3605                          _objs_scanned, _objs_found_on_bitmap);
  3606   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3607                          _local_pushes, _local_pops, _local_max_size);
  3608   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3609                          _global_pushes, _global_pops, _global_max_size);
  3610   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3611                          _global_transfers_to,_global_transfers_from);
  3612   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3613                          _regions_claimed, _region_stack_pops);
  3614   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3615   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3616                          _steal_attempts, _steals);
  3617   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3618   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3619                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3620   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3621                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3622 #endif // _MARKING_STATS_
  3625 /*****************************************************************************
  3627     The do_marking_step(time_target_ms) method is the building block
  3628     of the parallel marking framework. It can be called in parallel
  3629     with other invocations of do_marking_step() on different tasks
  3630     (but only one per task, obviously) and concurrently with the
  3631     mutator threads, or during remark, hence it eliminates the need
  3632     for two versions of the code. When called during remark, it will
  3633     pick up from where the task left off during the concurrent marking
  3634     phase. Interestingly, tasks are also claimable during evacuation
  3635     pauses too, since do_marking_step() ensures that it aborts before
  3636     it needs to yield.
  3638     The data structures that is uses to do marking work are the
  3639     following:
  3641       (1) Marking Bitmap. If there are gray objects that appear only
  3642       on the bitmap (this happens either when dealing with an overflow
  3643       or when the initial marking phase has simply marked the roots
  3644       and didn't push them on the stack), then tasks claim heap
  3645       regions whose bitmap they then scan to find gray objects. A
  3646       global finger indicates where the end of the last claimed region
  3647       is. A local finger indicates how far into the region a task has
  3648       scanned. The two fingers are used to determine how to gray an
  3649       object (i.e. whether simply marking it is OK, as it will be
  3650       visited by a task in the future, or whether it needs to be also
  3651       pushed on a stack).
  3653       (2) Local Queue. The local queue of the task which is accessed
  3654       reasonably efficiently by the task. Other tasks can steal from
  3655       it when they run out of work. Throughout the marking phase, a
  3656       task attempts to keep its local queue short but not totally
  3657       empty, so that entries are available for stealing by other
  3658       tasks. Only when there is no more work, a task will totally
  3659       drain its local queue.
  3661       (3) Global Mark Stack. This handles local queue overflow. During
  3662       marking only sets of entries are moved between it and the local
  3663       queues, as access to it requires a mutex and more fine-grain
  3664       interaction with it which might cause contention. If it
  3665       overflows, then the marking phase should restart and iterate
  3666       over the bitmap to identify gray objects. Throughout the marking
  3667       phase, tasks attempt to keep the global mark stack at a small
  3668       length but not totally empty, so that entries are available for
  3669       popping by other tasks. Only when there is no more work, tasks
  3670       will totally drain the global mark stack.
  3672       (4) Global Region Stack. Entries on it correspond to areas of
  3673       the bitmap that need to be scanned since they contain gray
  3674       objects. Pushes on the region stack only happen during
  3675       evacuation pauses and typically correspond to areas covered by
  3676       GC LABS. If it overflows, then the marking phase should restart
  3677       and iterate over the bitmap to identify gray objects. Tasks will
  3678       try to totally drain the region stack as soon as possible.
  3680       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3681       made available. Buffers are regularly removed from this queue
  3682       and scanned for roots, so that the queue doesn't get too
  3683       long. During remark, all completed buffers are processed, as
  3684       well as the filled in parts of any uncompleted buffers.
  3686     The do_marking_step() method tries to abort when the time target
  3687     has been reached. There are a few other cases when the
  3688     do_marking_step() method also aborts:
  3690       (1) When the marking phase has been aborted (after a Full GC).
  3692       (2) When a global overflow (either on the global stack or the
  3693       region stack) has been triggered. Before the task aborts, it
  3694       will actually sync up with the other tasks to ensure that all
  3695       the marking data structures (local queues, stacks, fingers etc.)
  3696       are re-initialised so that when do_marking_step() completes,
  3697       the marking phase can immediately restart.
  3699       (3) When enough completed SATB buffers are available. The
  3700       do_marking_step() method only tries to drain SATB buffers right
  3701       at the beginning. So, if enough buffers are available, the
  3702       marking step aborts and the SATB buffers are processed at
  3703       the beginning of the next invocation.
  3705       (4) To yield. when we have to yield then we abort and yield
  3706       right at the end of do_marking_step(). This saves us from a lot
  3707       of hassle as, by yielding we might allow a Full GC. If this
  3708       happens then objects will be compacted underneath our feet, the
  3709       heap might shrink, etc. We save checking for this by just
  3710       aborting and doing the yield right at the end.
  3712     From the above it follows that the do_marking_step() method should
  3713     be called in a loop (or, otherwise, regularly) until it completes.
  3715     If a marking step completes without its has_aborted() flag being
  3716     true, it means it has completed the current marking phase (and
  3717     also all other marking tasks have done so and have all synced up).
  3719     A method called regular_clock_call() is invoked "regularly" (in
  3720     sub ms intervals) throughout marking. It is this clock method that
  3721     checks all the abort conditions which were mentioned above and
  3722     decides when the task should abort. A work-based scheme is used to
  3723     trigger this clock method: when the number of object words the
  3724     marking phase has scanned or the number of references the marking
  3725     phase has visited reach a given limit. Additional invocations to
  3726     the method clock have been planted in a few other strategic places
  3727     too. The initial reason for the clock method was to avoid calling
  3728     vtime too regularly, as it is quite expensive. So, once it was in
  3729     place, it was natural to piggy-back all the other conditions on it
  3730     too and not constantly check them throughout the code.
  3732  *****************************************************************************/
  3734 void CMTask::do_marking_step(double time_target_ms) {
  3735   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3736   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3738   assert(concurrent() || _cm->region_stack_empty(),
  3739          "the region stack should have been cleared before remark");
  3740   assert(concurrent() || !_cm->has_aborted_regions(),
  3741          "aborted regions should have been cleared before remark");
  3742   assert(_region_finger == NULL,
  3743          "this should be non-null only when a region is being scanned");
  3745   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3746   assert(_task_queues != NULL, "invariant");
  3747   assert(_task_queue != NULL, "invariant");
  3748   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3750   assert(!_claimed,
  3751          "only one thread should claim this task at any one time");
  3753   // OK, this doesn't safeguard again all possible scenarios, as it is
  3754   // possible for two threads to set the _claimed flag at the same
  3755   // time. But it is only for debugging purposes anyway and it will
  3756   // catch most problems.
  3757   _claimed = true;
  3759   _start_time_ms = os::elapsedVTime() * 1000.0;
  3760   statsOnly( _interval_start_time_ms = _start_time_ms );
  3762   double diff_prediction_ms =
  3763     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3764   _time_target_ms = time_target_ms - diff_prediction_ms;
  3766   // set up the variables that are used in the work-based scheme to
  3767   // call the regular clock method
  3768   _words_scanned = 0;
  3769   _refs_reached  = 0;
  3770   recalculate_limits();
  3772   // clear all flags
  3773   clear_has_aborted();
  3774   _has_aborted_timed_out = false;
  3775   _draining_satb_buffers = false;
  3777   ++_calls;
  3779   if (_cm->verbose_low())
  3780     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3781                            "target = %1.2lfms >>>>>>>>>>",
  3782                            _task_id, _calls, _time_target_ms);
  3784   // Set up the bitmap and oop closures. Anything that uses them is
  3785   // eventually called from this method, so it is OK to allocate these
  3786   // statically.
  3787   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3788   CMOopClosure    oop_closure(_g1h, _cm, this);
  3789   set_oop_closure(&oop_closure);
  3791   if (_cm->has_overflown()) {
  3792     // This can happen if the region stack or the mark stack overflows
  3793     // during a GC pause and this task, after a yield point,
  3794     // restarts. We have to abort as we need to get into the overflow
  3795     // protocol which happens right at the end of this task.
  3796     set_has_aborted();
  3799   // First drain any available SATB buffers. After this, we will not
  3800   // look at SATB buffers before the next invocation of this method.
  3801   // If enough completed SATB buffers are queued up, the regular clock
  3802   // will abort this task so that it restarts.
  3803   drain_satb_buffers();
  3804   // ...then partially drain the local queue and the global stack
  3805   drain_local_queue(true);
  3806   drain_global_stack(true);
  3808   // Then totally drain the region stack.  We will not look at
  3809   // it again before the next invocation of this method. Entries on
  3810   // the region stack are only added during evacuation pauses, for
  3811   // which we have to yield. When we do, we abort the task anyway so
  3812   // it will look at the region stack again when it restarts.
  3813   bitmap_closure.set_scanning_heap_region(false);
  3814   drain_region_stack(&bitmap_closure);
  3815   // ...then partially drain the local queue and the global stack
  3816   drain_local_queue(true);
  3817   drain_global_stack(true);
  3819   do {
  3820     if (!has_aborted() && _curr_region != NULL) {
  3821       // This means that we're already holding on to a region.
  3822       assert(_finger != NULL, "if region is not NULL, then the finger "
  3823              "should not be NULL either");
  3825       // We might have restarted this task after an evacuation pause
  3826       // which might have evacuated the region we're holding on to
  3827       // underneath our feet. Let's read its limit again to make sure
  3828       // that we do not iterate over a region of the heap that
  3829       // contains garbage (update_region_limit() will also move
  3830       // _finger to the start of the region if it is found empty).
  3831       update_region_limit();
  3832       // We will start from _finger not from the start of the region,
  3833       // as we might be restarting this task after aborting half-way
  3834       // through scanning this region. In this case, _finger points to
  3835       // the address where we last found a marked object. If this is a
  3836       // fresh region, _finger points to start().
  3837       MemRegion mr = MemRegion(_finger, _region_limit);
  3839       if (_cm->verbose_low())
  3840         gclog_or_tty->print_cr("[%d] we're scanning part "
  3841                                "["PTR_FORMAT", "PTR_FORMAT") "
  3842                                "of region "PTR_FORMAT,
  3843                                _task_id, _finger, _region_limit, _curr_region);
  3845       // Let's iterate over the bitmap of the part of the
  3846       // region that is left.
  3847       bitmap_closure.set_scanning_heap_region(true);
  3848       if (mr.is_empty() ||
  3849           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3850         // We successfully completed iterating over the region. Now,
  3851         // let's give up the region.
  3852         giveup_current_region();
  3853         regular_clock_call();
  3854       } else {
  3855         assert(has_aborted(), "currently the only way to do so");
  3856         // The only way to abort the bitmap iteration is to return
  3857         // false from the do_bit() method. However, inside the
  3858         // do_bit() method we move the _finger to point to the
  3859         // object currently being looked at. So, if we bail out, we
  3860         // have definitely set _finger to something non-null.
  3861         assert(_finger != NULL, "invariant");
  3863         // Region iteration was actually aborted. So now _finger
  3864         // points to the address of the object we last scanned. If we
  3865         // leave it there, when we restart this task, we will rescan
  3866         // the object. It is easy to avoid this. We move the finger by
  3867         // enough to point to the next possible object header (the
  3868         // bitmap knows by how much we need to move it as it knows its
  3869         // granularity).
  3870         assert(_finger < _region_limit, "invariant");
  3871         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3872         // Check if bitmap iteration was aborted while scanning the last object
  3873         if (new_finger >= _region_limit) {
  3874             giveup_current_region();
  3875         } else {
  3876             move_finger_to(new_finger);
  3880     // At this point we have either completed iterating over the
  3881     // region we were holding on to, or we have aborted.
  3883     // We then partially drain the local queue and the global stack.
  3884     // (Do we really need this?)
  3885     drain_local_queue(true);
  3886     drain_global_stack(true);
  3888     // Read the note on the claim_region() method on why it might
  3889     // return NULL with potentially more regions available for
  3890     // claiming and why we have to check out_of_regions() to determine
  3891     // whether we're done or not.
  3892     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3893       // We are going to try to claim a new region. We should have
  3894       // given up on the previous one.
  3895       // Separated the asserts so that we know which one fires.
  3896       assert(_curr_region  == NULL, "invariant");
  3897       assert(_finger       == NULL, "invariant");
  3898       assert(_region_limit == NULL, "invariant");
  3899       if (_cm->verbose_low())
  3900         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3901       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3902       if (claimed_region != NULL) {
  3903         // Yes, we managed to claim one
  3904         statsOnly( ++_regions_claimed );
  3906         if (_cm->verbose_low())
  3907           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3908                                  "region "PTR_FORMAT,
  3909                                  _task_id, claimed_region);
  3911         setup_for_region(claimed_region);
  3912         assert(_curr_region == claimed_region, "invariant");
  3914       // It is important to call the regular clock here. It might take
  3915       // a while to claim a region if, for example, we hit a large
  3916       // block of empty regions. So we need to call the regular clock
  3917       // method once round the loop to make sure it's called
  3918       // frequently enough.
  3919       regular_clock_call();
  3922     if (!has_aborted() && _curr_region == NULL) {
  3923       assert(_cm->out_of_regions(),
  3924              "at this point we should be out of regions");
  3926   } while ( _curr_region != NULL && !has_aborted());
  3928   if (!has_aborted()) {
  3929     // We cannot check whether the global stack is empty, since other
  3930     // tasks might be pushing objects to it concurrently. We also cannot
  3931     // check if the region stack is empty because if a thread is aborting
  3932     // it can push a partially done region back.
  3933     assert(_cm->out_of_regions(),
  3934            "at this point we should be out of regions");
  3936     if (_cm->verbose_low())
  3937       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3939     // Try to reduce the number of available SATB buffers so that
  3940     // remark has less work to do.
  3941     drain_satb_buffers();
  3944   // Since we've done everything else, we can now totally drain the
  3945   // local queue and global stack.
  3946   drain_local_queue(false);
  3947   drain_global_stack(false);
  3949   // Attempt at work stealing from other task's queues.
  3950   if (!has_aborted()) {
  3951     // We have not aborted. This means that we have finished all that
  3952     // we could. Let's try to do some stealing...
  3954     // We cannot check whether the global stack is empty, since other
  3955     // tasks might be pushing objects to it concurrently. We also cannot
  3956     // check if the region stack is empty because if a thread is aborting
  3957     // it can push a partially done region back.
  3958     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3959            "only way to reach here");
  3961     if (_cm->verbose_low())
  3962       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3964     while (!has_aborted()) {
  3965       oop obj;
  3966       statsOnly( ++_steal_attempts );
  3968       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3969         if (_cm->verbose_medium())
  3970           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3971                                  _task_id, (void*) obj);
  3973         statsOnly( ++_steals );
  3975         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3976                "any stolen object should be marked");
  3977         scan_object(obj);
  3979         // And since we're towards the end, let's totally drain the
  3980         // local queue and global stack.
  3981         drain_local_queue(false);
  3982         drain_global_stack(false);
  3983       } else {
  3984         break;
  3989   // We still haven't aborted. Now, let's try to get into the
  3990   // termination protocol.
  3991   if (!has_aborted()) {
  3992     // We cannot check whether the global stack is empty, since other
  3993     // tasks might be concurrently pushing objects on it. We also cannot
  3994     // check if the region stack is empty because if a thread is aborting
  3995     // it can push a partially done region back.
  3996     // Separated the asserts so that we know which one fires.
  3997     assert(_cm->out_of_regions(), "only way to reach here");
  3998     assert(_task_queue->size() == 0, "only way to reach here");
  4000     if (_cm->verbose_low())
  4001       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  4003     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4004     // The CMTask class also extends the TerminatorTerminator class,
  4005     // hence its should_exit_termination() method will also decide
  4006     // whether to exit the termination protocol or not.
  4007     bool finished = _cm->terminator()->offer_termination(this);
  4008     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4009     _termination_time_ms +=
  4010       termination_end_time_ms - _termination_start_time_ms;
  4012     if (finished) {
  4013       // We're all done.
  4015       if (_task_id == 0) {
  4016         // let's allow task 0 to do this
  4017         if (concurrent()) {
  4018           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4019           // we need to set this to false before the next
  4020           // safepoint. This way we ensure that the marking phase
  4021           // doesn't observe any more heap expansions.
  4022           _cm->clear_concurrent_marking_in_progress();
  4026       // We can now guarantee that the global stack is empty, since
  4027       // all other tasks have finished. We separated the guarantees so
  4028       // that, if a condition is false, we can immediately find out
  4029       // which one.
  4030       guarantee(_cm->out_of_regions(), "only way to reach here");
  4031       guarantee(_aborted_region.is_empty(), "only way to reach here");
  4032       guarantee(_cm->region_stack_empty(), "only way to reach here");
  4033       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4034       guarantee(_task_queue->size() == 0, "only way to reach here");
  4035       guarantee(!_cm->has_overflown(), "only way to reach here");
  4036       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4037       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  4039       if (_cm->verbose_low())
  4040         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  4041     } else {
  4042       // Apparently there's more work to do. Let's abort this task. It
  4043       // will restart it and we can hopefully find more things to do.
  4045       if (_cm->verbose_low())
  4046         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  4048       set_has_aborted();
  4049       statsOnly( ++_aborted_termination );
  4053   // Mainly for debugging purposes to make sure that a pointer to the
  4054   // closure which was statically allocated in this frame doesn't
  4055   // escape it by accident.
  4056   set_oop_closure(NULL);
  4057   double end_time_ms = os::elapsedVTime() * 1000.0;
  4058   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4059   // Update the step history.
  4060   _step_times_ms.add(elapsed_time_ms);
  4062   if (has_aborted()) {
  4063     // The task was aborted for some reason.
  4065     statsOnly( ++_aborted );
  4067     if (_has_aborted_timed_out) {
  4068       double diff_ms = elapsed_time_ms - _time_target_ms;
  4069       // Keep statistics of how well we did with respect to hitting
  4070       // our target only if we actually timed out (if we aborted for
  4071       // other reasons, then the results might get skewed).
  4072       _marking_step_diffs_ms.add(diff_ms);
  4075     if (_cm->has_overflown()) {
  4076       // This is the interesting one. We aborted because a global
  4077       // overflow was raised. This means we have to restart the
  4078       // marking phase and start iterating over regions. However, in
  4079       // order to do this we have to make sure that all tasks stop
  4080       // what they are doing and re-initialise in a safe manner. We
  4081       // will achieve this with the use of two barrier sync points.
  4083       if (_cm->verbose_low())
  4084         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  4086       _cm->enter_first_sync_barrier(_task_id);
  4087       // When we exit this sync barrier we know that all tasks have
  4088       // stopped doing marking work. So, it's now safe to
  4089       // re-initialise our data structures. At the end of this method,
  4090       // task 0 will clear the global data structures.
  4092       statsOnly( ++_aborted_overflow );
  4094       // We clear the local state of this task...
  4095       clear_region_fields();
  4097       // ...and enter the second barrier.
  4098       _cm->enter_second_sync_barrier(_task_id);
  4099       // At this point everything has bee re-initialised and we're
  4100       // ready to restart.
  4103     if (_cm->verbose_low()) {
  4104       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4105                              "elapsed = %1.2lfms <<<<<<<<<<",
  4106                              _task_id, _time_target_ms, elapsed_time_ms);
  4107       if (_cm->has_aborted())
  4108         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4109                                _task_id);
  4111   } else {
  4112     if (_cm->verbose_low())
  4113       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4114                              "elapsed = %1.2lfms <<<<<<<<<<",
  4115                              _task_id, _time_target_ms, elapsed_time_ms);
  4118   _claimed = false;
  4121 CMTask::CMTask(int task_id,
  4122                ConcurrentMark* cm,
  4123                CMTaskQueue* task_queue,
  4124                CMTaskQueueSet* task_queues)
  4125   : _g1h(G1CollectedHeap::heap()),
  4126     _task_id(task_id), _cm(cm),
  4127     _claimed(false),
  4128     _nextMarkBitMap(NULL), _hash_seed(17),
  4129     _task_queue(task_queue),
  4130     _task_queues(task_queues),
  4131     _oop_closure(NULL),
  4132     _aborted_region(MemRegion()) {
  4133   guarantee(task_queue != NULL, "invariant");
  4134   guarantee(task_queues != NULL, "invariant");
  4136   statsOnly( _clock_due_to_scanning = 0;
  4137              _clock_due_to_marking  = 0 );
  4139   _marking_step_diffs_ms.add(0.5);

mercurial