src/share/vm/gc_implementation/g1/concurrentMark.cpp

Wed, 01 Dec 2010 17:34:02 -0800

author
johnc
date
Wed, 01 Dec 2010 17:34:02 -0800
changeset 2316
fd1d227ef1b9
parent 2314
f95d63e2154a
child 2379
b03260081e9b
permissions
-rw-r--r--

6983204: G1: Nightly test nsk/regression/b4958615 failing with +ExplicitGCInvokesConcurrent
Summary: Enable reference discovery during concurrent marking by setting the reference processor field of the concurrent marking closure. Keep reference objects on the discovered reference lists alive during incremental evacuation pauses until they are processed at the end of concurrent marking.
Reviewed-by: ysr, tonyp

     1 /*
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "gc_implementation/g1/concurrentMark.hpp"
    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    31 #include "gc_implementation/g1/g1RemSet.hpp"
    32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    34 #include "memory/genOopClosures.inline.hpp"
    35 #include "memory/referencePolicy.hpp"
    36 #include "memory/resourceArea.hpp"
    37 #include "oops/oop.inline.hpp"
    38 #include "runtime/handles.inline.hpp"
    39 #include "runtime/java.hpp"
    41 //
    42 // CMS Bit Map Wrapper
    44 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    45   _bm((uintptr_t*)NULL,0),
    46   _shifter(shifter) {
    47   _bmStartWord = (HeapWord*)(rs.base());
    48   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    49   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    50                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    52   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    53   // For now we'll just commit all of the bit map up fromt.
    54   // Later on we'll try to be more parsimonious with swap.
    55   guarantee(_virtual_space.initialize(brs, brs.size()),
    56             "couldn't reseve backing store for CMS bit map");
    57   assert(_virtual_space.committed_size() == brs.size(),
    58          "didn't reserve backing store for all of CMS bit map?");
    59   _bm.set_map((uintptr_t*)_virtual_space.low());
    60   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    61          _bmWordSize, "inconsistency in bit map sizing");
    62   _bm.set_size(_bmWordSize >> _shifter);
    63 }
    65 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    66                                                HeapWord* limit) const {
    67   // First we must round addr *up* to a possible object boundary.
    68   addr = (HeapWord*)align_size_up((intptr_t)addr,
    69                                   HeapWordSize << _shifter);
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    82                                                  HeapWord* limit) const {
    83   size_t addrOffset = heapWordToOffset(addr);
    84   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    85   size_t limitOffset = heapWordToOffset(limit);
    86   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    87   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    88   assert(nextAddr >= addr, "get_next_one postcondition");
    89   assert(nextAddr == limit || !isMarked(nextAddr),
    90          "get_next_one postcondition");
    91   return nextAddr;
    92 }
    94 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    95   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    96   return (int) (diff >> _shifter);
    97 }
    99 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
   100   HeapWord* left  = MAX2(_bmStartWord, mr.start());
   101   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
   102   if (right > left) {
   103     // Right-open interval [leftOffset, rightOffset).
   104     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
   105   } else {
   106     return true;
   107   }
   108 }
   110 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
   111                                              size_t    from_start_index,
   112                                              HeapWord* to_start_word,
   113                                              size_t    word_num) {
   114   _bm.mostly_disjoint_range_union(from_bitmap,
   115                                   from_start_index,
   116                                   heapWordToOffset(to_start_word),
   117                                   word_num);
   118 }
   120 #ifndef PRODUCT
   121 bool CMBitMapRO::covers(ReservedSpace rs) const {
   122   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   123   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   124          "size inconsistency");
   125   return _bmStartWord == (HeapWord*)(rs.base()) &&
   126          _bmWordSize  == rs.size()>>LogHeapWordSize;
   127 }
   128 #endif
   130 void CMBitMap::clearAll() {
   131   _bm.clear();
   132   return;
   133 }
   135 void CMBitMap::markRange(MemRegion mr) {
   136   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   137   assert(!mr.is_empty(), "unexpected empty region");
   138   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   139           ((HeapWord *) mr.end())),
   140          "markRange memory region end is not card aligned");
   141   // convert address range into offset range
   142   _bm.at_put_range(heapWordToOffset(mr.start()),
   143                    heapWordToOffset(mr.end()), true);
   144 }
   146 void CMBitMap::clearRange(MemRegion mr) {
   147   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   148   assert(!mr.is_empty(), "unexpected empty region");
   149   // convert address range into offset range
   150   _bm.at_put_range(heapWordToOffset(mr.start()),
   151                    heapWordToOffset(mr.end()), false);
   152 }
   154 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   155                                             HeapWord* end_addr) {
   156   HeapWord* start = getNextMarkedWordAddress(addr);
   157   start = MIN2(start, end_addr);
   158   HeapWord* end   = getNextUnmarkedWordAddress(start);
   159   end = MIN2(end, end_addr);
   160   assert(start <= end, "Consistency check");
   161   MemRegion mr(start, end);
   162   if (!mr.is_empty()) {
   163     clearRange(mr);
   164   }
   165   return mr;
   166 }
   168 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   169   _base(NULL), _cm(cm)
   170 #ifdef ASSERT
   171   , _drain_in_progress(false)
   172   , _drain_in_progress_yields(false)
   173 #endif
   174 {}
   176 void CMMarkStack::allocate(size_t size) {
   177   _base = NEW_C_HEAP_ARRAY(oop, size);
   178   if (_base == NULL)
   179     vm_exit_during_initialization("Failed to allocate "
   180                                   "CM region mark stack");
   181   _index = 0;
   182   // QQQQ cast ...
   183   _capacity = (jint) size;
   184   _oops_do_bound = -1;
   185   NOT_PRODUCT(_max_depth = 0);
   186 }
   188 CMMarkStack::~CMMarkStack() {
   189   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   190 }
   192 void CMMarkStack::par_push(oop ptr) {
   193   while (true) {
   194     if (isFull()) {
   195       _overflow = true;
   196       return;
   197     }
   198     // Otherwise...
   199     jint index = _index;
   200     jint next_index = index+1;
   201     jint res = Atomic::cmpxchg(next_index, &_index, index);
   202     if (res == index) {
   203       _base[index] = ptr;
   204       // Note that we don't maintain this atomically.  We could, but it
   205       // doesn't seem necessary.
   206       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   207       return;
   208     }
   209     // Otherwise, we need to try again.
   210   }
   211 }
   213 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   214   while (true) {
   215     if (isFull()) {
   216       _overflow = true;
   217       return;
   218     }
   219     // Otherwise...
   220     jint index = _index;
   221     jint next_index = index + n;
   222     if (next_index > _capacity) {
   223       _overflow = true;
   224       return;
   225     }
   226     jint res = Atomic::cmpxchg(next_index, &_index, index);
   227     if (res == index) {
   228       for (int i = 0; i < n; i++) {
   229         int ind = index + i;
   230         assert(ind < _capacity, "By overflow test above.");
   231         _base[ind] = ptr_arr[i];
   232       }
   233       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   234       return;
   235     }
   236     // Otherwise, we need to try again.
   237   }
   238 }
   241 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   242   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   243   jint start = _index;
   244   jint next_index = start + n;
   245   if (next_index > _capacity) {
   246     _overflow = true;
   247     return;
   248   }
   249   // Otherwise.
   250   _index = next_index;
   251   for (int i = 0; i < n; i++) {
   252     int ind = start + i;
   253     assert(ind < _capacity, "By overflow test above.");
   254     _base[ind] = ptr_arr[i];
   255   }
   256 }
   259 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   260   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   261   jint index = _index;
   262   if (index == 0) {
   263     *n = 0;
   264     return false;
   265   } else {
   266     int k = MIN2(max, index);
   267     jint new_ind = index - k;
   268     for (int j = 0; j < k; j++) {
   269       ptr_arr[j] = _base[new_ind + j];
   270     }
   271     _index = new_ind;
   272     *n = k;
   273     return true;
   274   }
   275 }
   278 CMRegionStack::CMRegionStack() : _base(NULL) {}
   280 void CMRegionStack::allocate(size_t size) {
   281   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   282   if (_base == NULL)
   283     vm_exit_during_initialization("Failed to allocate "
   284                                   "CM region mark stack");
   285   _index = 0;
   286   // QQQQ cast ...
   287   _capacity = (jint) size;
   288 }
   290 CMRegionStack::~CMRegionStack() {
   291   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   292 }
   294 void CMRegionStack::push_lock_free(MemRegion mr) {
   295   assert(mr.word_size() > 0, "Precondition");
   296   while (true) {
   297     jint index = _index;
   299     if (index >= _capacity) {
   300       _overflow = true;
   301       return;
   302     }
   303     // Otherwise...
   304     jint next_index = index+1;
   305     jint res = Atomic::cmpxchg(next_index, &_index, index);
   306     if (res == index) {
   307       _base[index] = mr;
   308       return;
   309     }
   310     // Otherwise, we need to try again.
   311   }
   312 }
   314 // Lock-free pop of the region stack. Called during the concurrent
   315 // marking / remark phases. Should only be called in tandem with
   316 // other lock-free pops.
   317 MemRegion CMRegionStack::pop_lock_free() {
   318   while (true) {
   319     jint index = _index;
   321     if (index == 0) {
   322       return MemRegion();
   323     }
   324     // Otherwise...
   325     jint next_index = index-1;
   326     jint res = Atomic::cmpxchg(next_index, &_index, index);
   327     if (res == index) {
   328       MemRegion mr = _base[next_index];
   329       if (mr.start() != NULL) {
   330         assert(mr.end() != NULL, "invariant");
   331         assert(mr.word_size() > 0, "invariant");
   332         return mr;
   333       } else {
   334         // that entry was invalidated... let's skip it
   335         assert(mr.end() == NULL, "invariant");
   336       }
   337     }
   338     // Otherwise, we need to try again.
   339   }
   340 }
   342 #if 0
   343 // The routines that manipulate the region stack with a lock are
   344 // not currently used. They should be retained, however, as a
   345 // diagnostic aid.
   347 void CMRegionStack::push_with_lock(MemRegion mr) {
   348   assert(mr.word_size() > 0, "Precondition");
   349   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   351   if (isFull()) {
   352     _overflow = true;
   353     return;
   354   }
   356   _base[_index] = mr;
   357   _index += 1;
   358 }
   360 MemRegion CMRegionStack::pop_with_lock() {
   361   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   363   while (true) {
   364     if (_index == 0) {
   365       return MemRegion();
   366     }
   367     _index -= 1;
   369     MemRegion mr = _base[_index];
   370     if (mr.start() != NULL) {
   371       assert(mr.end() != NULL, "invariant");
   372       assert(mr.word_size() > 0, "invariant");
   373       return mr;
   374     } else {
   375       // that entry was invalidated... let's skip it
   376       assert(mr.end() == NULL, "invariant");
   377     }
   378   }
   379 }
   380 #endif
   382 bool CMRegionStack::invalidate_entries_into_cset() {
   383   bool result = false;
   384   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   385   for (int i = 0; i < _oops_do_bound; ++i) {
   386     MemRegion mr = _base[i];
   387     if (mr.start() != NULL) {
   388       assert(mr.end() != NULL, "invariant");
   389       assert(mr.word_size() > 0, "invariant");
   390       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   391       assert(hr != NULL, "invariant");
   392       if (hr->in_collection_set()) {
   393         // The region points into the collection set
   394         _base[i] = MemRegion();
   395         result = true;
   396       }
   397     } else {
   398       // that entry was invalidated... let's skip it
   399       assert(mr.end() == NULL, "invariant");
   400     }
   401   }
   402   return result;
   403 }
   405 template<class OopClosureClass>
   406 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   407   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   408          || SafepointSynchronize::is_at_safepoint(),
   409          "Drain recursion must be yield-safe.");
   410   bool res = true;
   411   debug_only(_drain_in_progress = true);
   412   debug_only(_drain_in_progress_yields = yield_after);
   413   while (!isEmpty()) {
   414     oop newOop = pop();
   415     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   416     assert(newOop->is_oop(), "Expected an oop");
   417     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   418            "only grey objects on this stack");
   419     // iterate over the oops in this oop, marking and pushing
   420     // the ones in CMS generation.
   421     newOop->oop_iterate(cl);
   422     if (yield_after && _cm->do_yield_check()) {
   423       res = false; break;
   424     }
   425   }
   426   debug_only(_drain_in_progress = false);
   427   return res;
   428 }
   430 void CMMarkStack::oops_do(OopClosure* f) {
   431   if (_index == 0) return;
   432   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   433          "Bound must be set.");
   434   for (int i = 0; i < _oops_do_bound; i++) {
   435     f->do_oop(&_base[i]);
   436   }
   437   _oops_do_bound = -1;
   438 }
   440 bool ConcurrentMark::not_yet_marked(oop obj) const {
   441   return (_g1h->is_obj_ill(obj)
   442           || (_g1h->is_in_permanent(obj)
   443               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   444 }
   446 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   447 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   448 #endif // _MSC_VER
   450 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   451                                int max_regions) :
   452   _markBitMap1(rs, MinObjAlignment - 1),
   453   _markBitMap2(rs, MinObjAlignment - 1),
   455   _parallel_marking_threads(0),
   456   _sleep_factor(0.0),
   457   _marking_task_overhead(1.0),
   458   _cleanup_sleep_factor(0.0),
   459   _cleanup_task_overhead(1.0),
   460   _region_bm(max_regions, false /* in_resource_area*/),
   461   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   462            CardTableModRefBS::card_shift,
   463            false /* in_resource_area*/),
   464   _prevMarkBitMap(&_markBitMap1),
   465   _nextMarkBitMap(&_markBitMap2),
   466   _at_least_one_mark_complete(false),
   468   _markStack(this),
   469   _regionStack(),
   470   // _finger set in set_non_marking_state
   472   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   473   // _active_tasks set in set_non_marking_state
   474   // _tasks set inside the constructor
   475   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   476   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   478   _has_overflown(false),
   479   _concurrent(false),
   480   _has_aborted(false),
   481   _restart_for_overflow(false),
   482   _concurrent_marking_in_progress(false),
   483   _should_gray_objects(false),
   485   // _verbose_level set below
   487   _init_times(),
   488   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   489   _cleanup_times(),
   490   _total_counting_time(0.0),
   491   _total_rs_scrub_time(0.0),
   493   _parallel_workers(NULL)
   494 {
   495   CMVerboseLevel verbose_level =
   496     (CMVerboseLevel) G1MarkingVerboseLevel;
   497   if (verbose_level < no_verbose)
   498     verbose_level = no_verbose;
   499   if (verbose_level > high_verbose)
   500     verbose_level = high_verbose;
   501   _verbose_level = verbose_level;
   503   if (verbose_low())
   504     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   505                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   507   _markStack.allocate(MarkStackSize);
   508   _regionStack.allocate(G1MarkRegionStackSize);
   510   // Create & start a ConcurrentMark thread.
   511   _cmThread = new ConcurrentMarkThread(this);
   512   assert(cmThread() != NULL, "CM Thread should have been created");
   513   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   515   _g1h = G1CollectedHeap::heap();
   516   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   517   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   518   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   520   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   521   satb_qs.set_buffer_size(G1SATBBufferSize);
   523   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   524   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   525   for (int i = 0 ; i < size; i++) {
   526     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   527   }
   529   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   530   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   532   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   533   _active_tasks = _max_task_num;
   534   for (int i = 0; i < (int) _max_task_num; ++i) {
   535     CMTaskQueue* task_queue = new CMTaskQueue();
   536     task_queue->initialize();
   537     _task_queues->register_queue(i, task_queue);
   539     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   540     _accum_task_vtime[i] = 0.0;
   541   }
   543   if (ConcGCThreads > ParallelGCThreads) {
   544     vm_exit_during_initialization("Can't have more ConcGCThreads "
   545                                   "than ParallelGCThreads.");
   546   }
   547   if (ParallelGCThreads == 0) {
   548     // if we are not running with any parallel GC threads we will not
   549     // spawn any marking threads either
   550     _parallel_marking_threads =   0;
   551     _sleep_factor             = 0.0;
   552     _marking_task_overhead    = 1.0;
   553   } else {
   554     if (ConcGCThreads > 0) {
   555       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   556       // if both are set
   558       _parallel_marking_threads = ConcGCThreads;
   559       _sleep_factor             = 0.0;
   560       _marking_task_overhead    = 1.0;
   561     } else if (G1MarkingOverheadPercent > 0) {
   562       // we will calculate the number of parallel marking threads
   563       // based on a target overhead with respect to the soft real-time
   564       // goal
   566       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   567       double overall_cm_overhead =
   568         (double) MaxGCPauseMillis * marking_overhead /
   569         (double) GCPauseIntervalMillis;
   570       double cpu_ratio = 1.0 / (double) os::processor_count();
   571       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   572       double marking_task_overhead =
   573         overall_cm_overhead / marking_thread_num *
   574                                                 (double) os::processor_count();
   575       double sleep_factor =
   576                          (1.0 - marking_task_overhead) / marking_task_overhead;
   578       _parallel_marking_threads = (size_t) marking_thread_num;
   579       _sleep_factor             = sleep_factor;
   580       _marking_task_overhead    = marking_task_overhead;
   581     } else {
   582       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   583       _sleep_factor             = 0.0;
   584       _marking_task_overhead    = 1.0;
   585     }
   587     if (parallel_marking_threads() > 1)
   588       _cleanup_task_overhead = 1.0;
   589     else
   590       _cleanup_task_overhead = marking_task_overhead();
   591     _cleanup_sleep_factor =
   592                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   594 #if 0
   595     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   596     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   597     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   598     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   599     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   600 #endif
   602     guarantee(parallel_marking_threads() > 0, "peace of mind");
   603     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   604          (int) _parallel_marking_threads, false, true);
   605     if (_parallel_workers == NULL) {
   606       vm_exit_during_initialization("Failed necessary allocation.");
   607     } else {
   608       _parallel_workers->initialize_workers();
   609     }
   610   }
   612   // so that the call below can read a sensible value
   613   _heap_start = (HeapWord*) rs.base();
   614   set_non_marking_state();
   615 }
   617 void ConcurrentMark::update_g1_committed(bool force) {
   618   // If concurrent marking is not in progress, then we do not need to
   619   // update _heap_end. This has a subtle and important
   620   // side-effect. Imagine that two evacuation pauses happen between
   621   // marking completion and remark. The first one can grow the
   622   // heap (hence now the finger is below the heap end). Then, the
   623   // second one could unnecessarily push regions on the region
   624   // stack. This causes the invariant that the region stack is empty
   625   // at the beginning of remark to be false. By ensuring that we do
   626   // not observe heap expansions after marking is complete, then we do
   627   // not have this problem.
   628   if (!concurrent_marking_in_progress() && !force)
   629     return;
   631   MemRegion committed = _g1h->g1_committed();
   632   assert(committed.start() == _heap_start, "start shouldn't change");
   633   HeapWord* new_end = committed.end();
   634   if (new_end > _heap_end) {
   635     // The heap has been expanded.
   637     _heap_end = new_end;
   638   }
   639   // Notice that the heap can also shrink. However, this only happens
   640   // during a Full GC (at least currently) and the entire marking
   641   // phase will bail out and the task will not be restarted. So, let's
   642   // do nothing.
   643 }
   645 void ConcurrentMark::reset() {
   646   // Starting values for these two. This should be called in a STW
   647   // phase. CM will be notified of any future g1_committed expansions
   648   // will be at the end of evacuation pauses, when tasks are
   649   // inactive.
   650   MemRegion committed = _g1h->g1_committed();
   651   _heap_start = committed.start();
   652   _heap_end   = committed.end();
   654   // Separated the asserts so that we know which one fires.
   655   assert(_heap_start != NULL, "heap bounds should look ok");
   656   assert(_heap_end != NULL, "heap bounds should look ok");
   657   assert(_heap_start < _heap_end, "heap bounds should look ok");
   659   // reset all the marking data structures and any necessary flags
   660   clear_marking_state();
   662   if (verbose_low())
   663     gclog_or_tty->print_cr("[global] resetting");
   665   // We do reset all of them, since different phases will use
   666   // different number of active threads. So, it's easiest to have all
   667   // of them ready.
   668   for (int i = 0; i < (int) _max_task_num; ++i) {
   669     _tasks[i]->reset(_nextMarkBitMap);
   670   }
   672   // we need this to make sure that the flag is on during the evac
   673   // pause with initial mark piggy-backed
   674   set_concurrent_marking_in_progress();
   675 }
   677 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   678   assert(active_tasks <= _max_task_num, "we should not have more");
   680   _active_tasks = active_tasks;
   681   // Need to update the three data structures below according to the
   682   // number of active threads for this phase.
   683   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   684   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   685   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   687   _concurrent = concurrent;
   688   // We propagate this to all tasks, not just the active ones.
   689   for (int i = 0; i < (int) _max_task_num; ++i)
   690     _tasks[i]->set_concurrent(concurrent);
   692   if (concurrent) {
   693     set_concurrent_marking_in_progress();
   694   } else {
   695     // We currently assume that the concurrent flag has been set to
   696     // false before we start remark. At this point we should also be
   697     // in a STW phase.
   698     assert(!concurrent_marking_in_progress(), "invariant");
   699     assert(_finger == _heap_end, "only way to get here");
   700     update_g1_committed(true);
   701   }
   702 }
   704 void ConcurrentMark::set_non_marking_state() {
   705   // We set the global marking state to some default values when we're
   706   // not doing marking.
   707   clear_marking_state();
   708   _active_tasks = 0;
   709   clear_concurrent_marking_in_progress();
   710 }
   712 ConcurrentMark::~ConcurrentMark() {
   713   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   714   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   715   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   716                     _par_cleanup_thread_state);
   718   for (int i = 0; i < (int) _max_task_num; ++i) {
   719     delete _task_queues->queue(i);
   720     delete _tasks[i];
   721   }
   722   delete _task_queues;
   723   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   724 }
   726 // This closure is used to mark refs into the g1 generation
   727 // from external roots in the CMS bit map.
   728 // Called at the first checkpoint.
   729 //
   731 void ConcurrentMark::clearNextBitmap() {
   732   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   733   G1CollectorPolicy* g1p = g1h->g1_policy();
   735   // Make sure that the concurrent mark thread looks to still be in
   736   // the current cycle.
   737   guarantee(cmThread()->during_cycle(), "invariant");
   739   // We are finishing up the current cycle by clearing the next
   740   // marking bitmap and getting it ready for the next cycle. During
   741   // this time no other cycle can start. So, let's make sure that this
   742   // is the case.
   743   guarantee(!g1h->mark_in_progress(), "invariant");
   745   // clear the mark bitmap (no grey objects to start with).
   746   // We need to do this in chunks and offer to yield in between
   747   // each chunk.
   748   HeapWord* start  = _nextMarkBitMap->startWord();
   749   HeapWord* end    = _nextMarkBitMap->endWord();
   750   HeapWord* cur    = start;
   751   size_t chunkSize = M;
   752   while (cur < end) {
   753     HeapWord* next = cur + chunkSize;
   754     if (next > end)
   755       next = end;
   756     MemRegion mr(cur,next);
   757     _nextMarkBitMap->clearRange(mr);
   758     cur = next;
   759     do_yield_check();
   761     // Repeat the asserts from above. We'll do them as asserts here to
   762     // minimize their overhead on the product. However, we'll have
   763     // them as guarantees at the beginning / end of the bitmap
   764     // clearing to get some checking in the product.
   765     assert(cmThread()->during_cycle(), "invariant");
   766     assert(!g1h->mark_in_progress(), "invariant");
   767   }
   769   // Repeat the asserts from above.
   770   guarantee(cmThread()->during_cycle(), "invariant");
   771   guarantee(!g1h->mark_in_progress(), "invariant");
   772 }
   774 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   775 public:
   776   bool doHeapRegion(HeapRegion* r) {
   777     if (!r->continuesHumongous()) {
   778       r->note_start_of_marking(true);
   779     }
   780     return false;
   781   }
   782 };
   784 void ConcurrentMark::checkpointRootsInitialPre() {
   785   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   786   G1CollectorPolicy* g1p = g1h->g1_policy();
   788   _has_aborted = false;
   790 #ifndef PRODUCT
   791   if (G1PrintReachableAtInitialMark) {
   792     print_reachable("at-cycle-start",
   793                     true /* use_prev_marking */, true /* all */);
   794   }
   795 #endif
   797   // Initialise marking structures. This has to be done in a STW phase.
   798   reset();
   799 }
   801 class CMMarkRootsClosure: public OopsInGenClosure {
   802 private:
   803   ConcurrentMark*  _cm;
   804   G1CollectedHeap* _g1h;
   805   bool             _do_barrier;
   807 public:
   808   CMMarkRootsClosure(ConcurrentMark* cm,
   809                      G1CollectedHeap* g1h,
   810                      bool do_barrier) : _cm(cm), _g1h(g1h),
   811                                         _do_barrier(do_barrier) { }
   813   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   814   virtual void do_oop(      oop* p) { do_oop_work(p); }
   816   template <class T> void do_oop_work(T* p) {
   817     T heap_oop = oopDesc::load_heap_oop(p);
   818     if (!oopDesc::is_null(heap_oop)) {
   819       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   820       assert(obj->is_oop() || obj->mark() == NULL,
   821              "expected an oop, possibly with mark word displaced");
   822       HeapWord* addr = (HeapWord*)obj;
   823       if (_g1h->is_in_g1_reserved(addr)) {
   824         _cm->grayRoot(obj);
   825       }
   826     }
   827     if (_do_barrier) {
   828       assert(!_g1h->is_in_g1_reserved(p),
   829              "Should be called on external roots");
   830       do_barrier(p);
   831     }
   832   }
   833 };
   835 void ConcurrentMark::checkpointRootsInitialPost() {
   836   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   838   // For each region note start of marking.
   839   NoteStartOfMarkHRClosure startcl;
   840   g1h->heap_region_iterate(&startcl);
   842   // Start weak-reference discovery.
   843   ReferenceProcessor* rp = g1h->ref_processor();
   844   rp->verify_no_references_recorded();
   845   rp->enable_discovery(); // enable ("weak") refs discovery
   846   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   848   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   849   // This is the start of  the marking cycle, we're expected all
   850   // threads to have SATB queues with active set to false.
   851   satb_mq_set.set_active_all_threads(true, /* new active value */
   852                                      false /* expected_active */);
   854   // update_g1_committed() will be called at the end of an evac pause
   855   // when marking is on. So, it's also called at the end of the
   856   // initial-mark pause to update the heap end, if the heap expands
   857   // during it. No need to call it here.
   858 }
   860 // Checkpoint the roots into this generation from outside
   861 // this generation. [Note this initial checkpoint need only
   862 // be approximate -- we'll do a catch up phase subsequently.]
   863 void ConcurrentMark::checkpointRootsInitial() {
   864   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   865   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   867   double start = os::elapsedTime();
   869   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   870   g1p->record_concurrent_mark_init_start();
   871   checkpointRootsInitialPre();
   873   // YSR: when concurrent precleaning is in place, we'll
   874   // need to clear the cached card table here
   876   ResourceMark rm;
   877   HandleMark  hm;
   879   g1h->ensure_parsability(false);
   880   g1h->perm_gen()->save_marks();
   882   CMMarkRootsClosure notOlder(this, g1h, false);
   883   CMMarkRootsClosure older(this, g1h, true);
   885   g1h->set_marking_started();
   886   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   888   g1h->process_strong_roots(true,    // activate StrongRootsScope
   889                             false,   // fake perm gen collection
   890                             SharedHeap::SO_AllClasses,
   891                             &notOlder, // Regular roots
   892                             NULL,     // do not visit active blobs
   893                             &older    // Perm Gen Roots
   894                             );
   895   checkpointRootsInitialPost();
   897   // Statistics.
   898   double end = os::elapsedTime();
   899   _init_times.add((end - start) * 1000.0);
   901   g1p->record_concurrent_mark_init_end();
   902 }
   904 /*
   905    Notice that in the next two methods, we actually leave the STS
   906    during the barrier sync and join it immediately afterwards. If we
   907    do not do this, this then the following deadlock can occur: one
   908    thread could be in the barrier sync code, waiting for the other
   909    thread to also sync up, whereas another one could be trying to
   910    yield, while also waiting for the other threads to sync up too.
   912    Because the thread that does the sync barrier has left the STS, it
   913    is possible to be suspended for a Full GC or an evacuation pause
   914    could occur. This is actually safe, since the entering the sync
   915    barrier is one of the last things do_marking_step() does, and it
   916    doesn't manipulate any data structures afterwards.
   917 */
   919 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   920   if (verbose_low())
   921     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   923   ConcurrentGCThread::stsLeave();
   924   _first_overflow_barrier_sync.enter();
   925   ConcurrentGCThread::stsJoin();
   926   // at this point everyone should have synced up and not be doing any
   927   // more work
   929   if (verbose_low())
   930     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   932   // let task 0 do this
   933   if (task_num == 0) {
   934     // task 0 is responsible for clearing the global data structures
   935     clear_marking_state();
   937     if (PrintGC) {
   938       gclog_or_tty->date_stamp(PrintGCDateStamps);
   939       gclog_or_tty->stamp(PrintGCTimeStamps);
   940       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   941     }
   942   }
   944   // after this, each task should reset its own data structures then
   945   // then go into the second barrier
   946 }
   948 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   949   if (verbose_low())
   950     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   952   ConcurrentGCThread::stsLeave();
   953   _second_overflow_barrier_sync.enter();
   954   ConcurrentGCThread::stsJoin();
   955   // at this point everything should be re-initialised and ready to go
   957   if (verbose_low())
   958     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   959 }
   961 void ConcurrentMark::grayRoot(oop p) {
   962   HeapWord* addr = (HeapWord*) p;
   963   // We can't really check against _heap_start and _heap_end, since it
   964   // is possible during an evacuation pause with piggy-backed
   965   // initial-mark that the committed space is expanded during the
   966   // pause without CM observing this change. So the assertions below
   967   // is a bit conservative; but better than nothing.
   968   assert(_g1h->g1_committed().contains(addr),
   969          "address should be within the heap bounds");
   971   if (!_nextMarkBitMap->isMarked(addr))
   972     _nextMarkBitMap->parMark(addr);
   973 }
   975 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   976   // The objects on the region have already been marked "in bulk" by
   977   // the caller. We only need to decide whether to push the region on
   978   // the region stack or not.
   980   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   981     // We're done with marking and waiting for remark. We do not need to
   982     // push anything else on the region stack.
   983     return;
   985   HeapWord* finger = _finger;
   987   if (verbose_low())
   988     gclog_or_tty->print_cr("[global] attempting to push "
   989                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   990                            PTR_FORMAT, mr.start(), mr.end(), finger);
   992   if (mr.start() < finger) {
   993     // The finger is always heap region aligned and it is not possible
   994     // for mr to span heap regions.
   995     assert(mr.end() <= finger, "invariant");
   997     // Separated the asserts so that we know which one fires.
   998     assert(mr.start() <= mr.end(),
   999            "region boundaries should fall within the committed space");
  1000     assert(_heap_start <= mr.start(),
  1001            "region boundaries should fall within the committed space");
  1002     assert(mr.end() <= _heap_end,
  1003            "region boundaries should fall within the committed space");
  1004     if (verbose_low())
  1005       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
  1006                              "below the finger, pushing it",
  1007                              mr.start(), mr.end());
  1009     if (!region_stack_push_lock_free(mr)) {
  1010       if (verbose_low())
  1011         gclog_or_tty->print_cr("[global] region stack has overflown.");
  1016 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
  1017   // The object is not marked by the caller. We need to at least mark
  1018   // it and maybe push in on the stack.
  1020   HeapWord* addr = (HeapWord*)p;
  1021   if (!_nextMarkBitMap->isMarked(addr)) {
  1022     // We definitely need to mark it, irrespective whether we bail out
  1023     // because we're done with marking.
  1024     if (_nextMarkBitMap->parMark(addr)) {
  1025       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1026         // If we're done with concurrent marking and we're waiting for
  1027         // remark, then we're not pushing anything on the stack.
  1028         return;
  1030       // No OrderAccess:store_load() is needed. It is implicit in the
  1031       // CAS done in parMark(addr) above
  1032       HeapWord* finger = _finger;
  1034       if (addr < finger) {
  1035         if (!mark_stack_push(oop(addr))) {
  1036           if (verbose_low())
  1037             gclog_or_tty->print_cr("[global] global stack overflow "
  1038                                    "during parMark");
  1045 class CMConcurrentMarkingTask: public AbstractGangTask {
  1046 private:
  1047   ConcurrentMark*       _cm;
  1048   ConcurrentMarkThread* _cmt;
  1050 public:
  1051   void work(int worker_i) {
  1052     assert(Thread::current()->is_ConcurrentGC_thread(),
  1053            "this should only be done by a conc GC thread");
  1054     ResourceMark rm;
  1056     double start_vtime = os::elapsedVTime();
  1058     ConcurrentGCThread::stsJoin();
  1060     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1061     CMTask* the_task = _cm->task(worker_i);
  1062     the_task->record_start_time();
  1063     if (!_cm->has_aborted()) {
  1064       do {
  1065         double start_vtime_sec = os::elapsedVTime();
  1066         double start_time_sec = os::elapsedTime();
  1067         the_task->do_marking_step(10.0);
  1068         double end_time_sec = os::elapsedTime();
  1069         double end_vtime_sec = os::elapsedVTime();
  1070         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1071         double elapsed_time_sec = end_time_sec - start_time_sec;
  1072         _cm->clear_has_overflown();
  1074         bool ret = _cm->do_yield_check(worker_i);
  1076         jlong sleep_time_ms;
  1077         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1078           sleep_time_ms =
  1079             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1080           ConcurrentGCThread::stsLeave();
  1081           os::sleep(Thread::current(), sleep_time_ms, false);
  1082           ConcurrentGCThread::stsJoin();
  1084         double end_time2_sec = os::elapsedTime();
  1085         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1087 #if 0
  1088           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1089                                  "overhead %1.4lf",
  1090                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1091                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1092           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1093                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1094 #endif
  1095       } while (!_cm->has_aborted() && the_task->has_aborted());
  1097     the_task->record_end_time();
  1098     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1100     ConcurrentGCThread::stsLeave();
  1102     double end_vtime = os::elapsedVTime();
  1103     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1106   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1107                           ConcurrentMarkThread* cmt) :
  1108       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1110   ~CMConcurrentMarkingTask() { }
  1111 };
  1113 void ConcurrentMark::markFromRoots() {
  1114   // we might be tempted to assert that:
  1115   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1116   //        "inconsistent argument?");
  1117   // However that wouldn't be right, because it's possible that
  1118   // a safepoint is indeed in progress as a younger generation
  1119   // stop-the-world GC happens even as we mark in this generation.
  1121   _restart_for_overflow = false;
  1123   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1125   CMConcurrentMarkingTask markingTask(this, cmThread());
  1126   if (parallel_marking_threads() > 0)
  1127     _parallel_workers->run_task(&markingTask);
  1128   else
  1129     markingTask.work(0);
  1130   print_stats();
  1133 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1134   // world is stopped at this checkpoint
  1135   assert(SafepointSynchronize::is_at_safepoint(),
  1136          "world should be stopped");
  1137   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1139   // If a full collection has happened, we shouldn't do this.
  1140   if (has_aborted()) {
  1141     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1142     return;
  1145   if (VerifyDuringGC) {
  1146     HandleMark hm;  // handle scope
  1147     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1148     Universe::heap()->prepare_for_verify();
  1149     Universe::verify(true, false, true);
  1152   G1CollectorPolicy* g1p = g1h->g1_policy();
  1153   g1p->record_concurrent_mark_remark_start();
  1155   double start = os::elapsedTime();
  1157   checkpointRootsFinalWork();
  1159   double mark_work_end = os::elapsedTime();
  1161   weakRefsWork(clear_all_soft_refs);
  1163   if (has_overflown()) {
  1164     // Oops.  We overflowed.  Restart concurrent marking.
  1165     _restart_for_overflow = true;
  1166     // Clear the flag. We do not need it any more.
  1167     clear_has_overflown();
  1168     if (G1TraceMarkStackOverflow)
  1169       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1170   } else {
  1171     // We're done with marking.
  1172     // This is the end of  the marking cycle, we're expected all
  1173     // threads to have SATB queues with active set to true.
  1174     JavaThread::satb_mark_queue_set().set_active_all_threads(
  1175                                                   false, /* new active value */
  1176                                                   true /* expected_active */);
  1178     if (VerifyDuringGC) {
  1179       HandleMark hm;  // handle scope
  1180       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1181       Universe::heap()->prepare_for_verify();
  1182       Universe::heap()->verify(/* allow_dirty */      true,
  1183                                /* silent */           false,
  1184                                /* use_prev_marking */ false);
  1188 #if VERIFY_OBJS_PROCESSED
  1189   _scan_obj_cl.objs_processed = 0;
  1190   ThreadLocalObjQueue::objs_enqueued = 0;
  1191 #endif
  1193   // Statistics
  1194   double now = os::elapsedTime();
  1195   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1196   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1197   _remark_times.add((now - start) * 1000.0);
  1199   g1p->record_concurrent_mark_remark_end();
  1203 #define CARD_BM_TEST_MODE 0
  1205 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1207   CMBitMapRO* _bm;
  1208   ConcurrentMark* _cm;
  1209   bool _changed;
  1210   bool _yield;
  1211   size_t _words_done;
  1212   size_t _tot_live;
  1213   size_t _tot_used;
  1214   size_t _regions_done;
  1215   double _start_vtime_sec;
  1217   BitMap* _region_bm;
  1218   BitMap* _card_bm;
  1219   intptr_t _bottom_card_num;
  1220   bool _final;
  1222   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1223     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1224 #if CARD_BM_TEST_MODE
  1225       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1226 #else
  1227       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1228 #endif
  1232 public:
  1233   CalcLiveObjectsClosure(bool final,
  1234                          CMBitMapRO *bm, ConcurrentMark *cm,
  1235                          BitMap* region_bm, BitMap* card_bm) :
  1236     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1237     _words_done(0), _tot_live(0), _tot_used(0),
  1238     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1239     _regions_done(0), _start_vtime_sec(0.0)
  1241     _bottom_card_num =
  1242       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1243                CardTableModRefBS::card_shift);
  1246   // It takes a region that's not empty (i.e., it has at least one
  1247   // live object in it and sets its corresponding bit on the region
  1248   // bitmap to 1. If the region is "starts humongous" it will also set
  1249   // to 1 the bits on the region bitmap that correspond to its
  1250   // associated "continues humongous" regions.
  1251   void set_bit_for_region(HeapRegion* hr) {
  1252     assert(!hr->continuesHumongous(), "should have filtered those out");
  1254     size_t index = hr->hrs_index();
  1255     if (!hr->startsHumongous()) {
  1256       // Normal (non-humongous) case: just set the bit.
  1257       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1258     } else {
  1259       // Starts humongous case: calculate how many regions are part of
  1260       // this humongous region and then set the bit range. It might
  1261       // have been a bit more efficient to look at the object that
  1262       // spans these humongous regions to calculate their number from
  1263       // the object's size. However, it's a good idea to calculate
  1264       // this based on the metadata itself, and not the region
  1265       // contents, so that this code is not aware of what goes into
  1266       // the humongous regions (in case this changes in the future).
  1267       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1268       size_t end_index = index + 1;
  1269       while (end_index < g1h->n_regions()) {
  1270         HeapRegion* chr = g1h->region_at(end_index);
  1271         if (!chr->continuesHumongous()) {
  1272           break;
  1274         end_index += 1;
  1276       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1277                                    (BitMap::idx_t) end_index, true);
  1281   bool doHeapRegion(HeapRegion* hr) {
  1282     if (!_final && _regions_done == 0)
  1283       _start_vtime_sec = os::elapsedVTime();
  1285     if (hr->continuesHumongous()) {
  1286       // We will ignore these here and process them when their
  1287       // associated "starts humongous" region is processed (see
  1288       // set_bit_for_heap_region()). Note that we cannot rely on their
  1289       // associated "starts humongous" region to have their bit set to
  1290       // 1 since, due to the region chunking in the parallel region
  1291       // iteration, a "continues humongous" region might be visited
  1292       // before its associated "starts humongous".
  1293       return false;
  1296     HeapWord* nextTop = hr->next_top_at_mark_start();
  1297     HeapWord* start   = hr->top_at_conc_mark_count();
  1298     assert(hr->bottom() <= start && start <= hr->end() &&
  1299            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1300            start <= nextTop,
  1301            "Preconditions.");
  1302     // Otherwise, record the number of word's we'll examine.
  1303     size_t words_done = (nextTop - start);
  1304     // Find the first marked object at or after "start".
  1305     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1306     size_t marked_bytes = 0;
  1308     // Below, the term "card num" means the result of shifting an address
  1309     // by the card shift -- address 0 corresponds to card number 0.  One
  1310     // must subtract the card num of the bottom of the heap to obtain a
  1311     // card table index.
  1312     // The first card num of the sequence of live cards currently being
  1313     // constructed.  -1 ==> no sequence.
  1314     intptr_t start_card_num = -1;
  1315     // The last card num of the sequence of live cards currently being
  1316     // constructed.  -1 ==> no sequence.
  1317     intptr_t last_card_num = -1;
  1319     while (start < nextTop) {
  1320       if (_yield && _cm->do_yield_check()) {
  1321         // We yielded.  It might be for a full collection, in which case
  1322         // all bets are off; terminate the traversal.
  1323         if (_cm->has_aborted()) {
  1324           _changed = false;
  1325           return true;
  1326         } else {
  1327           // Otherwise, it might be a collection pause, and the region
  1328           // we're looking at might be in the collection set.  We'll
  1329           // abandon this region.
  1330           return false;
  1333       oop obj = oop(start);
  1334       int obj_sz = obj->size();
  1335       // The card num of the start of the current object.
  1336       intptr_t obj_card_num =
  1337         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1339       HeapWord* obj_last = start + obj_sz - 1;
  1340       intptr_t obj_last_card_num =
  1341         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1343       if (obj_card_num != last_card_num) {
  1344         if (start_card_num == -1) {
  1345           assert(last_card_num == -1, "Both or neither.");
  1346           start_card_num = obj_card_num;
  1347         } else {
  1348           assert(last_card_num != -1, "Both or neither.");
  1349           assert(obj_card_num >= last_card_num, "Inv");
  1350           if ((obj_card_num - last_card_num) > 1) {
  1351             // Mark the last run, and start a new one.
  1352             mark_card_num_range(start_card_num, last_card_num);
  1353             start_card_num = obj_card_num;
  1356 #if CARD_BM_TEST_MODE
  1357         /*
  1358         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1359                                obj_card_num - _bottom_card_num,
  1360                                obj_last_card_num - _bottom_card_num);
  1361         */
  1362         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1363           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1365 #endif
  1367       // In any case, we set the last card num.
  1368       last_card_num = obj_last_card_num;
  1370       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1371       // Find the next marked object after this one.
  1372       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1373       _changed = true;
  1375     // Handle the last range, if any.
  1376     if (start_card_num != -1)
  1377       mark_card_num_range(start_card_num, last_card_num);
  1378     if (_final) {
  1379       // Mark the allocated-since-marking portion...
  1380       HeapWord* tp = hr->top();
  1381       if (nextTop < tp) {
  1382         start_card_num =
  1383           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1384         last_card_num =
  1385           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1386         mark_card_num_range(start_card_num, last_card_num);
  1387         // This definitely means the region has live objects.
  1388         set_bit_for_region(hr);
  1392     hr->add_to_marked_bytes(marked_bytes);
  1393     // Update the live region bitmap.
  1394     if (marked_bytes > 0) {
  1395       set_bit_for_region(hr);
  1397     hr->set_top_at_conc_mark_count(nextTop);
  1398     _tot_live += hr->next_live_bytes();
  1399     _tot_used += hr->used();
  1400     _words_done = words_done;
  1402     if (!_final) {
  1403       ++_regions_done;
  1404       if (_regions_done % 10 == 0) {
  1405         double end_vtime_sec = os::elapsedVTime();
  1406         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1407         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1408           jlong sleep_time_ms =
  1409             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1410           os::sleep(Thread::current(), sleep_time_ms, false);
  1411           _start_vtime_sec = end_vtime_sec;
  1416     return false;
  1419   bool changed() { return _changed;  }
  1420   void reset()   { _changed = false; _words_done = 0; }
  1421   void no_yield() { _yield = false; }
  1422   size_t words_done() { return _words_done; }
  1423   size_t tot_live() { return _tot_live; }
  1424   size_t tot_used() { return _tot_used; }
  1425 };
  1428 void ConcurrentMark::calcDesiredRegions() {
  1429   _region_bm.clear();
  1430   _card_bm.clear();
  1431   CalcLiveObjectsClosure calccl(false /*final*/,
  1432                                 nextMarkBitMap(), this,
  1433                                 &_region_bm, &_card_bm);
  1434   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1435   g1h->heap_region_iterate(&calccl);
  1437   do {
  1438     calccl.reset();
  1439     g1h->heap_region_iterate(&calccl);
  1440   } while (calccl.changed());
  1443 class G1ParFinalCountTask: public AbstractGangTask {
  1444 protected:
  1445   G1CollectedHeap* _g1h;
  1446   CMBitMap* _bm;
  1447   size_t _n_workers;
  1448   size_t *_live_bytes;
  1449   size_t *_used_bytes;
  1450   BitMap* _region_bm;
  1451   BitMap* _card_bm;
  1452 public:
  1453   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1454                       BitMap* region_bm, BitMap* card_bm) :
  1455     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1456     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1458     if (ParallelGCThreads > 0)
  1459       _n_workers = _g1h->workers()->total_workers();
  1460     else
  1461       _n_workers = 1;
  1462     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1463     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1466   ~G1ParFinalCountTask() {
  1467     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1468     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1471   void work(int i) {
  1472     CalcLiveObjectsClosure calccl(true /*final*/,
  1473                                   _bm, _g1h->concurrent_mark(),
  1474                                   _region_bm, _card_bm);
  1475     calccl.no_yield();
  1476     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1477       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1478                                             HeapRegion::FinalCountClaimValue);
  1479     } else {
  1480       _g1h->heap_region_iterate(&calccl);
  1482     assert(calccl.complete(), "Shouldn't have yielded!");
  1484     assert((size_t) i < _n_workers, "invariant");
  1485     _live_bytes[i] = calccl.tot_live();
  1486     _used_bytes[i] = calccl.tot_used();
  1488   size_t live_bytes()  {
  1489     size_t live_bytes = 0;
  1490     for (size_t i = 0; i < _n_workers; ++i)
  1491       live_bytes += _live_bytes[i];
  1492     return live_bytes;
  1494   size_t used_bytes()  {
  1495     size_t used_bytes = 0;
  1496     for (size_t i = 0; i < _n_workers; ++i)
  1497       used_bytes += _used_bytes[i];
  1498     return used_bytes;
  1500 };
  1502 class G1ParNoteEndTask;
  1504 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1505   G1CollectedHeap* _g1;
  1506   int _worker_num;
  1507   size_t _max_live_bytes;
  1508   size_t _regions_claimed;
  1509   size_t _freed_bytes;
  1510   size_t _cleared_h_regions;
  1511   size_t _freed_regions;
  1512   UncleanRegionList* _unclean_region_list;
  1513   double _claimed_region_time;
  1514   double _max_region_time;
  1516 public:
  1517   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1518                              UncleanRegionList* list,
  1519                              int worker_num);
  1520   size_t freed_bytes() { return _freed_bytes; }
  1521   size_t cleared_h_regions() { return _cleared_h_regions; }
  1522   size_t freed_regions() { return  _freed_regions; }
  1523   UncleanRegionList* unclean_region_list() {
  1524     return _unclean_region_list;
  1527   bool doHeapRegion(HeapRegion *r);
  1529   size_t max_live_bytes() { return _max_live_bytes; }
  1530   size_t regions_claimed() { return _regions_claimed; }
  1531   double claimed_region_time_sec() { return _claimed_region_time; }
  1532   double max_region_time_sec() { return _max_region_time; }
  1533 };
  1535 class G1ParNoteEndTask: public AbstractGangTask {
  1536   friend class G1NoteEndOfConcMarkClosure;
  1537 protected:
  1538   G1CollectedHeap* _g1h;
  1539   size_t _max_live_bytes;
  1540   size_t _freed_bytes;
  1541   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1542 public:
  1543   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1544                    ConcurrentMark::ParCleanupThreadState**
  1545                    par_cleanup_thread_state) :
  1546     AbstractGangTask("G1 note end"), _g1h(g1h),
  1547     _max_live_bytes(0), _freed_bytes(0),
  1548     _par_cleanup_thread_state(par_cleanup_thread_state)
  1549   {}
  1551   void work(int i) {
  1552     double start = os::elapsedTime();
  1553     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1554                                            &_par_cleanup_thread_state[i]->list,
  1555                                            i);
  1556     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1557       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1558                                             HeapRegion::NoteEndClaimValue);
  1559     } else {
  1560       _g1h->heap_region_iterate(&g1_note_end);
  1562     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1564     // Now finish up freeing the current thread's regions.
  1565     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1566                                   g1_note_end.cleared_h_regions(),
  1567                                   0, NULL);
  1569       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1570       _max_live_bytes += g1_note_end.max_live_bytes();
  1571       _freed_bytes += g1_note_end.freed_bytes();
  1573     double end = os::elapsedTime();
  1574     if (G1PrintParCleanupStats) {
  1575       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1576                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1577                           i, start, end, (end-start)*1000.0,
  1578                           g1_note_end.regions_claimed(),
  1579                           g1_note_end.claimed_region_time_sec()*1000.0,
  1580                           g1_note_end.max_region_time_sec()*1000.0);
  1583   size_t max_live_bytes() { return _max_live_bytes; }
  1584   size_t freed_bytes() { return _freed_bytes; }
  1585 };
  1587 class G1ParScrubRemSetTask: public AbstractGangTask {
  1588 protected:
  1589   G1RemSet* _g1rs;
  1590   BitMap* _region_bm;
  1591   BitMap* _card_bm;
  1592 public:
  1593   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1594                        BitMap* region_bm, BitMap* card_bm) :
  1595     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1596     _region_bm(region_bm), _card_bm(card_bm)
  1597   {}
  1599   void work(int i) {
  1600     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1601       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1602                        HeapRegion::ScrubRemSetClaimValue);
  1603     } else {
  1604       _g1rs->scrub(_region_bm, _card_bm);
  1608 };
  1610 G1NoteEndOfConcMarkClosure::
  1611 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1612                            UncleanRegionList* list,
  1613                            int worker_num)
  1614   : _g1(g1), _worker_num(worker_num),
  1615     _max_live_bytes(0), _regions_claimed(0),
  1616     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1617     _claimed_region_time(0.0), _max_region_time(0.0),
  1618     _unclean_region_list(list)
  1619 {}
  1621 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1622   // We use a claim value of zero here because all regions
  1623   // were claimed with value 1 in the FinalCount task.
  1624   r->reset_gc_time_stamp();
  1625   if (!r->continuesHumongous()) {
  1626     double start = os::elapsedTime();
  1627     _regions_claimed++;
  1628     r->note_end_of_marking();
  1629     _max_live_bytes += r->max_live_bytes();
  1630     _g1->free_region_if_totally_empty_work(r,
  1631                                            _freed_bytes,
  1632                                            _cleared_h_regions,
  1633                                            _freed_regions,
  1634                                            _unclean_region_list,
  1635                                            true /*par*/);
  1636     double region_time = (os::elapsedTime() - start);
  1637     _claimed_region_time += region_time;
  1638     if (region_time > _max_region_time) _max_region_time = region_time;
  1640   return false;
  1643 void ConcurrentMark::cleanup() {
  1644   // world is stopped at this checkpoint
  1645   assert(SafepointSynchronize::is_at_safepoint(),
  1646          "world should be stopped");
  1647   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1649   // If a full collection has happened, we shouldn't do this.
  1650   if (has_aborted()) {
  1651     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1652     return;
  1655   if (VerifyDuringGC) {
  1656     HandleMark hm;  // handle scope
  1657     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1658     Universe::heap()->prepare_for_verify();
  1659     Universe::verify(/* allow dirty  */ true,
  1660                      /* silent       */ false,
  1661                      /* prev marking */ true);
  1664   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1665   g1p->record_concurrent_mark_cleanup_start();
  1667   double start = os::elapsedTime();
  1669   // Do counting once more with the world stopped for good measure.
  1670   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1671                                         &_region_bm, &_card_bm);
  1672   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1673     assert(g1h->check_heap_region_claim_values(
  1674                                                HeapRegion::InitialClaimValue),
  1675            "sanity check");
  1677     int n_workers = g1h->workers()->total_workers();
  1678     g1h->set_par_threads(n_workers);
  1679     g1h->workers()->run_task(&g1_par_count_task);
  1680     g1h->set_par_threads(0);
  1682     assert(g1h->check_heap_region_claim_values(
  1683                                              HeapRegion::FinalCountClaimValue),
  1684            "sanity check");
  1685   } else {
  1686     g1_par_count_task.work(0);
  1689   size_t known_garbage_bytes =
  1690     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1691 #if 0
  1692   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1693                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1694                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1695                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1696 #endif // 0
  1697   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1699   size_t start_used_bytes = g1h->used();
  1700   _at_least_one_mark_complete = true;
  1701   g1h->set_marking_complete();
  1703   double count_end = os::elapsedTime();
  1704   double this_final_counting_time = (count_end - start);
  1705   if (G1PrintParCleanupStats) {
  1706     gclog_or_tty->print_cr("Cleanup:");
  1707     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1708                            this_final_counting_time*1000.0);
  1710   _total_counting_time += this_final_counting_time;
  1712   // Install newly created mark bitMap as "prev".
  1713   swapMarkBitMaps();
  1715   g1h->reset_gc_time_stamp();
  1717   // Note end of marking in all heap regions.
  1718   double note_end_start = os::elapsedTime();
  1719   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1720   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1721     int n_workers = g1h->workers()->total_workers();
  1722     g1h->set_par_threads(n_workers);
  1723     g1h->workers()->run_task(&g1_par_note_end_task);
  1724     g1h->set_par_threads(0);
  1726     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1727            "sanity check");
  1728   } else {
  1729     g1_par_note_end_task.work(0);
  1731   g1h->set_unclean_regions_coming(true);
  1732   double note_end_end = os::elapsedTime();
  1733   // Tell the mutators that there might be unclean regions coming...
  1734   if (G1PrintParCleanupStats) {
  1735     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1736                            (note_end_end - note_end_start)*1000.0);
  1740   // call below, since it affects the metric by which we sort the heap
  1741   // regions.
  1742   if (G1ScrubRemSets) {
  1743     double rs_scrub_start = os::elapsedTime();
  1744     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1745     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1746       int n_workers = g1h->workers()->total_workers();
  1747       g1h->set_par_threads(n_workers);
  1748       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1749       g1h->set_par_threads(0);
  1751       assert(g1h->check_heap_region_claim_values(
  1752                                             HeapRegion::ScrubRemSetClaimValue),
  1753              "sanity check");
  1754     } else {
  1755       g1_par_scrub_rs_task.work(0);
  1758     double rs_scrub_end = os::elapsedTime();
  1759     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1760     _total_rs_scrub_time += this_rs_scrub_time;
  1763   // this will also free any regions totally full of garbage objects,
  1764   // and sort the regions.
  1765   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1766                         g1_par_note_end_task.freed_bytes(),
  1767                         g1_par_note_end_task.max_live_bytes());
  1769   // Statistics.
  1770   double end = os::elapsedTime();
  1771   _cleanup_times.add((end - start) * 1000.0);
  1773   // G1CollectedHeap::heap()->print();
  1774   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1775   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1777   if (PrintGC || PrintGCDetails) {
  1778     g1h->print_size_transition(gclog_or_tty,
  1779                                start_used_bytes,
  1780                                g1h->used(),
  1781                                g1h->capacity());
  1784   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1785   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1787   // We need to make this be a "collection" so any collection pause that
  1788   // races with it goes around and waits for completeCleanup to finish.
  1789   g1h->increment_total_collections();
  1791   if (VerifyDuringGC) {
  1792     HandleMark hm;  // handle scope
  1793     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1794     Universe::heap()->prepare_for_verify();
  1795     Universe::verify(/* allow dirty  */ true,
  1796                      /* silent       */ false,
  1797                      /* prev marking */ true);
  1801 void ConcurrentMark::completeCleanup() {
  1802   // A full collection intervened.
  1803   if (has_aborted()) return;
  1805   int first = 0;
  1806   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1807   for (int t = 0; t < last; t++) {
  1808     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1809     assert(list->well_formed(), "Inv");
  1810     HeapRegion* hd = list->hd();
  1811     while (hd != NULL) {
  1812       // Now finish up the other stuff.
  1813       hd->rem_set()->clear();
  1814       HeapRegion* next_hd = hd->next_from_unclean_list();
  1815       (void)list->pop();
  1816       assert(list->hd() == next_hd, "how not?");
  1817       _g1h->put_region_on_unclean_list(hd);
  1818       if (!hd->isHumongous()) {
  1819         // Add this to the _free_regions count by 1.
  1820         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1822       hd = list->hd();
  1823       assert(hd == next_hd, "how not?");
  1829 class G1CMIsAliveClosure: public BoolObjectClosure {
  1830   G1CollectedHeap* _g1;
  1831  public:
  1832   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1833     _g1(g1)
  1834   {}
  1836   void do_object(oop obj) {
  1837     assert(false, "not to be invoked");
  1839   bool do_object_b(oop obj) {
  1840     HeapWord* addr = (HeapWord*)obj;
  1841     return addr != NULL &&
  1842            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1844 };
  1846 class G1CMKeepAliveClosure: public OopClosure {
  1847   G1CollectedHeap* _g1;
  1848   ConcurrentMark*  _cm;
  1849   CMBitMap*        _bitMap;
  1850  public:
  1851   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1852                        CMBitMap* bitMap) :
  1853     _g1(g1), _cm(cm),
  1854     _bitMap(bitMap) {}
  1856   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1857   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1859   template <class T> void do_oop_work(T* p) {
  1860     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1861     HeapWord* addr = (HeapWord*)thisOop;
  1862     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1863       _bitMap->mark(addr);
  1864       _cm->mark_stack_push(thisOop);
  1867 };
  1869 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1870   CMMarkStack*                  _markStack;
  1871   CMBitMap*                     _bitMap;
  1872   G1CMKeepAliveClosure*         _oopClosure;
  1873  public:
  1874   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1875                                G1CMKeepAliveClosure* oopClosure) :
  1876     _bitMap(bitMap),
  1877     _markStack(markStack),
  1878     _oopClosure(oopClosure)
  1879   {}
  1881   void do_void() {
  1882     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1884 };
  1886 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1887   ResourceMark rm;
  1888   HandleMark   hm;
  1889   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1890   ReferenceProcessor* rp = g1h->ref_processor();
  1892   // See the comment in G1CollectedHeap::ref_processing_init()
  1893   // about how reference processing currently works in G1.
  1895   // Process weak references.
  1896   rp->setup_policy(clear_all_soft_refs);
  1897   assert(_markStack.isEmpty(), "mark stack should be empty");
  1899   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1900   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1901   G1CMDrainMarkingStackClosure
  1902     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1903                                &g1KeepAliveClosure);
  1905   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1906   rp->process_discovered_references(&g1IsAliveClosure,
  1907                                     &g1KeepAliveClosure,
  1908                                     &g1DrainMarkingStackClosure,
  1909                                     NULL);
  1910   assert(_markStack.overflow() || _markStack.isEmpty(),
  1911          "mark stack should be empty (unless it overflowed)");
  1912   if (_markStack.overflow()) {
  1913     set_has_overflown();
  1916   rp->enqueue_discovered_references();
  1917   rp->verify_no_references_recorded();
  1918   assert(!rp->discovery_enabled(), "should have been disabled");
  1920   // Now clean up stale oops in SymbolTable and StringTable
  1921   SymbolTable::unlink(&g1IsAliveClosure);
  1922   StringTable::unlink(&g1IsAliveClosure);
  1925 void ConcurrentMark::swapMarkBitMaps() {
  1926   CMBitMapRO* temp = _prevMarkBitMap;
  1927   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1928   _nextMarkBitMap  = (CMBitMap*)  temp;
  1931 class CMRemarkTask: public AbstractGangTask {
  1932 private:
  1933   ConcurrentMark *_cm;
  1935 public:
  1936   void work(int worker_i) {
  1937     // Since all available tasks are actually started, we should
  1938     // only proceed if we're supposed to be actived.
  1939     if ((size_t)worker_i < _cm->active_tasks()) {
  1940       CMTask* task = _cm->task(worker_i);
  1941       task->record_start_time();
  1942       do {
  1943         task->do_marking_step(1000000000.0 /* something very large */);
  1944       } while (task->has_aborted() && !_cm->has_overflown());
  1945       // If we overflow, then we do not want to restart. We instead
  1946       // want to abort remark and do concurrent marking again.
  1947       task->record_end_time();
  1951   CMRemarkTask(ConcurrentMark* cm) :
  1952     AbstractGangTask("Par Remark"), _cm(cm) { }
  1953 };
  1955 void ConcurrentMark::checkpointRootsFinalWork() {
  1956   ResourceMark rm;
  1957   HandleMark   hm;
  1958   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1960   g1h->ensure_parsability(false);
  1962   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1963     G1CollectedHeap::StrongRootsScope srs(g1h);
  1964     // this is remark, so we'll use up all available threads
  1965     int active_workers = ParallelGCThreads;
  1966     set_phase(active_workers, false);
  1968     CMRemarkTask remarkTask(this);
  1969     // We will start all available threads, even if we decide that the
  1970     // active_workers will be fewer. The extra ones will just bail out
  1971     // immediately.
  1972     int n_workers = g1h->workers()->total_workers();
  1973     g1h->set_par_threads(n_workers);
  1974     g1h->workers()->run_task(&remarkTask);
  1975     g1h->set_par_threads(0);
  1976   } else {
  1977     G1CollectedHeap::StrongRootsScope srs(g1h);
  1978     // this is remark, so we'll use up all available threads
  1979     int active_workers = 1;
  1980     set_phase(active_workers, false);
  1982     CMRemarkTask remarkTask(this);
  1983     // We will start all available threads, even if we decide that the
  1984     // active_workers will be fewer. The extra ones will just bail out
  1985     // immediately.
  1986     remarkTask.work(0);
  1988   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1989   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1991   print_stats();
  1993   if (!restart_for_overflow())
  1994     set_non_marking_state();
  1996 #if VERIFY_OBJS_PROCESSED
  1997   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1998     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1999                            _scan_obj_cl.objs_processed,
  2000                            ThreadLocalObjQueue::objs_enqueued);
  2001     guarantee(_scan_obj_cl.objs_processed ==
  2002               ThreadLocalObjQueue::objs_enqueued,
  2003               "Different number of objs processed and enqueued.");
  2005 #endif
  2008 #ifndef PRODUCT
  2010 class PrintReachableOopClosure: public OopClosure {
  2011 private:
  2012   G1CollectedHeap* _g1h;
  2013   CMBitMapRO*      _bitmap;
  2014   outputStream*    _out;
  2015   bool             _use_prev_marking;
  2016   bool             _all;
  2018 public:
  2019   PrintReachableOopClosure(CMBitMapRO*   bitmap,
  2020                            outputStream* out,
  2021                            bool          use_prev_marking,
  2022                            bool          all) :
  2023     _g1h(G1CollectedHeap::heap()),
  2024     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2026   void do_oop(narrowOop* p) { do_oop_work(p); }
  2027   void do_oop(      oop* p) { do_oop_work(p); }
  2029   template <class T> void do_oop_work(T* p) {
  2030     oop         obj = oopDesc::load_decode_heap_oop(p);
  2031     const char* str = NULL;
  2032     const char* str2 = "";
  2034     if (obj == NULL) {
  2035       str = "";
  2036     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2037       str = " O";
  2038     } else {
  2039       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2040       guarantee(hr != NULL, "invariant");
  2041       bool over_tams = false;
  2042       if (_use_prev_marking) {
  2043         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2044       } else {
  2045         over_tams = hr->obj_allocated_since_next_marking(obj);
  2047       bool marked = _bitmap->isMarked((HeapWord*) obj);
  2049       if (over_tams) {
  2050         str = " >";
  2051         if (marked) {
  2052           str2 = " AND MARKED";
  2054       } else if (marked) {
  2055         str = " M";
  2056       } else {
  2057         str = " NOT";
  2061     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2062                    p, (void*) obj, str, str2);
  2064 };
  2066 class PrintReachableObjectClosure : public ObjectClosure {
  2067 private:
  2068   CMBitMapRO*   _bitmap;
  2069   outputStream* _out;
  2070   bool          _use_prev_marking;
  2071   bool          _all;
  2072   HeapRegion*   _hr;
  2074 public:
  2075   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
  2076                               outputStream* out,
  2077                               bool          use_prev_marking,
  2078                               bool          all,
  2079                               HeapRegion*   hr) :
  2080     _bitmap(bitmap), _out(out),
  2081     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
  2083   void do_object(oop o) {
  2084     bool over_tams;
  2085     if (_use_prev_marking) {
  2086       over_tams = _hr->obj_allocated_since_prev_marking(o);
  2087     } else {
  2088       over_tams = _hr->obj_allocated_since_next_marking(o);
  2090     bool marked = _bitmap->isMarked((HeapWord*) o);
  2091     bool print_it = _all || over_tams || marked;
  2093     if (print_it) {
  2094       _out->print_cr(" "PTR_FORMAT"%s",
  2095                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2096       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
  2097       o->oop_iterate(&oopCl);
  2100 };
  2102 class PrintReachableRegionClosure : public HeapRegionClosure {
  2103 private:
  2104   CMBitMapRO*   _bitmap;
  2105   outputStream* _out;
  2106   bool          _use_prev_marking;
  2107   bool          _all;
  2109 public:
  2110   bool doHeapRegion(HeapRegion* hr) {
  2111     HeapWord* b = hr->bottom();
  2112     HeapWord* e = hr->end();
  2113     HeapWord* t = hr->top();
  2114     HeapWord* p = NULL;
  2115     if (_use_prev_marking) {
  2116       p = hr->prev_top_at_mark_start();
  2117     } else {
  2118       p = hr->next_top_at_mark_start();
  2120     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2121                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2122     _out->cr();
  2124     HeapWord* from = b;
  2125     HeapWord* to   = t;
  2127     if (to > from) {
  2128       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2129       _out->cr();
  2130       PrintReachableObjectClosure ocl(_bitmap, _out,
  2131                                       _use_prev_marking, _all, hr);
  2132       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2133       _out->cr();
  2136     return false;
  2139   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
  2140                               outputStream* out,
  2141                               bool          use_prev_marking,
  2142                               bool          all) :
  2143     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2144 };
  2146 void ConcurrentMark::print_reachable(const char* str,
  2147                                      bool use_prev_marking,
  2148                                      bool all) {
  2149   gclog_or_tty->cr();
  2150   gclog_or_tty->print_cr("== Doing heap dump... ");
  2152   if (G1PrintReachableBaseFile == NULL) {
  2153     gclog_or_tty->print_cr("  #### error: no base file defined");
  2154     return;
  2157   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2158       (JVM_MAXPATHLEN - 1)) {
  2159     gclog_or_tty->print_cr("  #### error: file name too long");
  2160     return;
  2163   char file_name[JVM_MAXPATHLEN];
  2164   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2165   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2167   fileStream fout(file_name);
  2168   if (!fout.is_open()) {
  2169     gclog_or_tty->print_cr("  #### error: could not open file");
  2170     return;
  2173   outputStream* out = &fout;
  2175   CMBitMapRO* bitmap = NULL;
  2176   if (use_prev_marking) {
  2177     bitmap = _prevMarkBitMap;
  2178   } else {
  2179     bitmap = _nextMarkBitMap;
  2182   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2183   out->cr();
  2185   out->print_cr("--- ITERATING OVER REGIONS");
  2186   out->cr();
  2187   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
  2188   _g1h->heap_region_iterate(&rcl);
  2189   out->cr();
  2191   gclog_or_tty->print_cr("  done");
  2192   gclog_or_tty->flush();
  2195 #endif // PRODUCT
  2197 // This note is for drainAllSATBBuffers and the code in between.
  2198 // In the future we could reuse a task to do this work during an
  2199 // evacuation pause (since now tasks are not active and can be claimed
  2200 // during an evacuation pause). This was a late change to the code and
  2201 // is currently not being taken advantage of.
  2203 class CMGlobalObjectClosure : public ObjectClosure {
  2204 private:
  2205   ConcurrentMark* _cm;
  2207 public:
  2208   void do_object(oop obj) {
  2209     _cm->deal_with_reference(obj);
  2212   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2213 };
  2215 void ConcurrentMark::deal_with_reference(oop obj) {
  2216   if (verbose_high())
  2217     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2218                            (void*) obj);
  2221   HeapWord* objAddr = (HeapWord*) obj;
  2222   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2223   if (_g1h->is_in_g1_reserved(objAddr)) {
  2224     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2225     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2226     if (_g1h->is_obj_ill(obj, hr)) {
  2227       if (verbose_high())
  2228         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2229                                "marked", (void*) obj);
  2231       // we need to mark it first
  2232       if (_nextMarkBitMap->parMark(objAddr)) {
  2233         // No OrderAccess:store_load() is needed. It is implicit in the
  2234         // CAS done in parMark(objAddr) above
  2235         HeapWord* finger = _finger;
  2236         if (objAddr < finger) {
  2237           if (verbose_high())
  2238             gclog_or_tty->print_cr("[global] below the global finger "
  2239                                    "("PTR_FORMAT"), pushing it", finger);
  2240           if (!mark_stack_push(obj)) {
  2241             if (verbose_low())
  2242               gclog_or_tty->print_cr("[global] global stack overflow during "
  2243                                      "deal_with_reference");
  2251 void ConcurrentMark::drainAllSATBBuffers() {
  2252   CMGlobalObjectClosure oc(this);
  2253   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2254   satb_mq_set.set_closure(&oc);
  2256   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2257     if (verbose_medium())
  2258       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2261   // no need to check whether we should do this, as this is only
  2262   // called during an evacuation pause
  2263   satb_mq_set.iterate_closure_all_threads();
  2265   satb_mq_set.set_closure(NULL);
  2266   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2269 void ConcurrentMark::markPrev(oop p) {
  2270   // Note we are overriding the read-only view of the prev map here, via
  2271   // the cast.
  2272   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2275 void ConcurrentMark::clear(oop p) {
  2276   assert(p != NULL && p->is_oop(), "expected an oop");
  2277   HeapWord* addr = (HeapWord*)p;
  2278   assert(addr >= _nextMarkBitMap->startWord() ||
  2279          addr < _nextMarkBitMap->endWord(), "in a region");
  2281   _nextMarkBitMap->clear(addr);
  2284 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2285   // Note we are overriding the read-only view of the prev map here, via
  2286   // the cast.
  2287   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2288   _nextMarkBitMap->clearRange(mr);
  2291 HeapRegion*
  2292 ConcurrentMark::claim_region(int task_num) {
  2293   // "checkpoint" the finger
  2294   HeapWord* finger = _finger;
  2296   // _heap_end will not change underneath our feet; it only changes at
  2297   // yield points.
  2298   while (finger < _heap_end) {
  2299     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2301     // is the gap between reading the finger and doing the CAS too long?
  2303     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2304     HeapWord*   bottom        = curr_region->bottom();
  2305     HeapWord*   end           = curr_region->end();
  2306     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2308     if (verbose_low())
  2309       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2310                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2311                              "limit = "PTR_FORMAT,
  2312                              task_num, curr_region, bottom, end, limit);
  2314     HeapWord* res =
  2315       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2316     if (res == finger) {
  2317       // we succeeded
  2319       // notice that _finger == end cannot be guaranteed here since,
  2320       // someone else might have moved the finger even further
  2321       assert(_finger >= end, "the finger should have moved forward");
  2323       if (verbose_low())
  2324         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2325                                PTR_FORMAT, task_num, curr_region);
  2327       if (limit > bottom) {
  2328         if (verbose_low())
  2329           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2330                                  "returning it ", task_num, curr_region);
  2331         return curr_region;
  2332       } else {
  2333         assert(limit == bottom,
  2334                "the region limit should be at bottom");
  2335         if (verbose_low())
  2336           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2337                                  "returning NULL", task_num, curr_region);
  2338         // we return NULL and the caller should try calling
  2339         // claim_region() again.
  2340         return NULL;
  2342     } else {
  2343       assert(_finger > finger, "the finger should have moved forward");
  2344       if (verbose_low())
  2345         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2346                                "global finger = "PTR_FORMAT", "
  2347                                "our finger = "PTR_FORMAT,
  2348                                task_num, _finger, finger);
  2350       // read it again
  2351       finger = _finger;
  2355   return NULL;
  2358 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
  2359   bool result = false;
  2360   for (int i = 0; i < (int)_max_task_num; ++i) {
  2361     CMTask* the_task = _tasks[i];
  2362     MemRegion mr = the_task->aborted_region();
  2363     if (mr.start() != NULL) {
  2364       assert(mr.end() != NULL, "invariant");
  2365       assert(mr.word_size() > 0, "invariant");
  2366       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
  2367       assert(hr != NULL, "invariant");
  2368       if (hr->in_collection_set()) {
  2369         // The region points into the collection set
  2370         the_task->set_aborted_region(MemRegion());
  2371         result = true;
  2375   return result;
  2378 bool ConcurrentMark::has_aborted_regions() {
  2379   for (int i = 0; i < (int)_max_task_num; ++i) {
  2380     CMTask* the_task = _tasks[i];
  2381     MemRegion mr = the_task->aborted_region();
  2382     if (mr.start() != NULL) {
  2383       assert(mr.end() != NULL, "invariant");
  2384       assert(mr.word_size() > 0, "invariant");
  2385       return true;
  2388   return false;
  2391 void ConcurrentMark::oops_do(OopClosure* cl) {
  2392   if (_markStack.size() > 0 && verbose_low())
  2393     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2394                            "size = %d", _markStack.size());
  2395   // we first iterate over the contents of the mark stack...
  2396   _markStack.oops_do(cl);
  2398   for (int i = 0; i < (int)_max_task_num; ++i) {
  2399     OopTaskQueue* queue = _task_queues->queue((int)i);
  2401     if (queue->size() > 0 && verbose_low())
  2402       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2403                              "size = %d", i, queue->size());
  2405     // ...then over the contents of the all the task queues.
  2406     queue->oops_do(cl);
  2409   // Invalidate any entries, that are in the region stack, that
  2410   // point into the collection set
  2411   if (_regionStack.invalidate_entries_into_cset()) {
  2412     // otherwise, any gray objects copied during the evacuation pause
  2413     // might not be visited.
  2414     assert(_should_gray_objects, "invariant");
  2417   // Invalidate any aborted regions, recorded in the individual CM
  2418   // tasks, that point into the collection set.
  2419   if (invalidate_aborted_regions_in_cset()) {
  2420     // otherwise, any gray objects copied during the evacuation pause
  2421     // might not be visited.
  2422     assert(_should_gray_objects, "invariant");
  2427 void ConcurrentMark::clear_marking_state() {
  2428   _markStack.setEmpty();
  2429   _markStack.clear_overflow();
  2430   _regionStack.setEmpty();
  2431   _regionStack.clear_overflow();
  2432   clear_has_overflown();
  2433   _finger = _heap_start;
  2435   for (int i = 0; i < (int)_max_task_num; ++i) {
  2436     OopTaskQueue* queue = _task_queues->queue(i);
  2437     queue->set_empty();
  2438     // Clear any partial regions from the CMTasks
  2439     _tasks[i]->clear_aborted_region();
  2443 void ConcurrentMark::print_stats() {
  2444   if (verbose_stats()) {
  2445     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2446     for (size_t i = 0; i < _active_tasks; ++i) {
  2447       _tasks[i]->print_stats();
  2448       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2453 class CSMarkOopClosure: public OopClosure {
  2454   friend class CSMarkBitMapClosure;
  2456   G1CollectedHeap* _g1h;
  2457   CMBitMap*        _bm;
  2458   ConcurrentMark*  _cm;
  2459   oop*             _ms;
  2460   jint*            _array_ind_stack;
  2461   int              _ms_size;
  2462   int              _ms_ind;
  2463   int              _array_increment;
  2465   bool push(oop obj, int arr_ind = 0) {
  2466     if (_ms_ind == _ms_size) {
  2467       gclog_or_tty->print_cr("Mark stack is full.");
  2468       return false;
  2470     _ms[_ms_ind] = obj;
  2471     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2472     _ms_ind++;
  2473     return true;
  2476   oop pop() {
  2477     if (_ms_ind == 0) return NULL;
  2478     else {
  2479       _ms_ind--;
  2480       return _ms[_ms_ind];
  2484   template <class T> bool drain() {
  2485     while (_ms_ind > 0) {
  2486       oop obj = pop();
  2487       assert(obj != NULL, "Since index was non-zero.");
  2488       if (obj->is_objArray()) {
  2489         jint arr_ind = _array_ind_stack[_ms_ind];
  2490         objArrayOop aobj = objArrayOop(obj);
  2491         jint len = aobj->length();
  2492         jint next_arr_ind = arr_ind + _array_increment;
  2493         if (next_arr_ind < len) {
  2494           push(obj, next_arr_ind);
  2496         // Now process this portion of this one.
  2497         int lim = MIN2(next_arr_ind, len);
  2498         for (int j = arr_ind; j < lim; j++) {
  2499           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2502       } else {
  2503         obj->oop_iterate(this);
  2505       if (abort()) return false;
  2507     return true;
  2510 public:
  2511   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2512     _g1h(G1CollectedHeap::heap()),
  2513     _cm(cm),
  2514     _bm(cm->nextMarkBitMap()),
  2515     _ms_size(ms_size), _ms_ind(0),
  2516     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2517     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2518     _array_increment(MAX2(ms_size/8, 16))
  2519   {}
  2521   ~CSMarkOopClosure() {
  2522     FREE_C_HEAP_ARRAY(oop, _ms);
  2523     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2526   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2527   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2529   template <class T> void do_oop_work(T* p) {
  2530     T heap_oop = oopDesc::load_heap_oop(p);
  2531     if (oopDesc::is_null(heap_oop)) return;
  2532     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2533     if (obj->is_forwarded()) {
  2534       // If the object has already been forwarded, we have to make sure
  2535       // that it's marked.  So follow the forwarding pointer.  Note that
  2536       // this does the right thing for self-forwarding pointers in the
  2537       // evacuation failure case.
  2538       obj = obj->forwardee();
  2540     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2541     if (hr != NULL) {
  2542       if (hr->in_collection_set()) {
  2543         if (_g1h->is_obj_ill(obj)) {
  2544           _bm->mark((HeapWord*)obj);
  2545           if (!push(obj)) {
  2546             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2547             set_abort();
  2550       } else {
  2551         // Outside the collection set; we need to gray it
  2552         _cm->deal_with_reference(obj);
  2556 };
  2558 class CSMarkBitMapClosure: public BitMapClosure {
  2559   G1CollectedHeap* _g1h;
  2560   CMBitMap*        _bitMap;
  2561   ConcurrentMark*  _cm;
  2562   CSMarkOopClosure _oop_cl;
  2563 public:
  2564   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2565     _g1h(G1CollectedHeap::heap()),
  2566     _bitMap(cm->nextMarkBitMap()),
  2567     _oop_cl(cm, ms_size)
  2568   {}
  2570   ~CSMarkBitMapClosure() {}
  2572   bool do_bit(size_t offset) {
  2573     // convert offset into a HeapWord*
  2574     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2575     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2576            "address out of range");
  2577     assert(_bitMap->isMarked(addr), "tautology");
  2578     oop obj = oop(addr);
  2579     if (!obj->is_forwarded()) {
  2580       if (!_oop_cl.push(obj)) return false;
  2581       if (UseCompressedOops) {
  2582         if (!_oop_cl.drain<narrowOop>()) return false;
  2583       } else {
  2584         if (!_oop_cl.drain<oop>()) return false;
  2587     // Otherwise...
  2588     return true;
  2590 };
  2593 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2594   CMBitMap* _bm;
  2595   CSMarkBitMapClosure _bit_cl;
  2596   enum SomePrivateConstants {
  2597     MSSize = 1000
  2598   };
  2599   bool _completed;
  2600 public:
  2601   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2602     _bm(cm->nextMarkBitMap()),
  2603     _bit_cl(cm, MSSize),
  2604     _completed(true)
  2605   {}
  2607   ~CompleteMarkingInCSHRClosure() {}
  2609   bool doHeapRegion(HeapRegion* r) {
  2610     if (!r->evacuation_failed()) {
  2611       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2612       if (!mr.is_empty()) {
  2613         if (!_bm->iterate(&_bit_cl, mr)) {
  2614           _completed = false;
  2615           return true;
  2619     return false;
  2622   bool completed() { return _completed; }
  2623 };
  2625 class ClearMarksInHRClosure: public HeapRegionClosure {
  2626   CMBitMap* _bm;
  2627 public:
  2628   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2630   bool doHeapRegion(HeapRegion* r) {
  2631     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2632       MemRegion usedMR = r->used_region();
  2633       _bm->clearRange(r->used_region());
  2635     return false;
  2637 };
  2639 void ConcurrentMark::complete_marking_in_collection_set() {
  2640   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2642   if (!g1h->mark_in_progress()) {
  2643     g1h->g1_policy()->record_mark_closure_time(0.0);
  2644     return;
  2647   int i = 1;
  2648   double start = os::elapsedTime();
  2649   while (true) {
  2650     i++;
  2651     CompleteMarkingInCSHRClosure cmplt(this);
  2652     g1h->collection_set_iterate(&cmplt);
  2653     if (cmplt.completed()) break;
  2655   double end_time = os::elapsedTime();
  2656   double elapsed_time_ms = (end_time - start) * 1000.0;
  2657   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2659   ClearMarksInHRClosure clr(nextMarkBitMap());
  2660   g1h->collection_set_iterate(&clr);
  2663 // The next two methods deal with the following optimisation. Some
  2664 // objects are gray by being marked and located above the finger. If
  2665 // they are copied, during an evacuation pause, below the finger then
  2666 // the need to be pushed on the stack. The observation is that, if
  2667 // there are no regions in the collection set located above the
  2668 // finger, then the above cannot happen, hence we do not need to
  2669 // explicitly gray any objects when copying them to below the
  2670 // finger. The global stack will be scanned to ensure that, if it
  2671 // points to objects being copied, it will update their
  2672 // location. There is a tricky situation with the gray objects in
  2673 // region stack that are being coped, however. See the comment in
  2674 // newCSet().
  2676 void ConcurrentMark::newCSet() {
  2677   if (!concurrent_marking_in_progress())
  2678     // nothing to do if marking is not in progress
  2679     return;
  2681   // find what the lowest finger is among the global and local fingers
  2682   _min_finger = _finger;
  2683   for (int i = 0; i < (int)_max_task_num; ++i) {
  2684     CMTask* task = _tasks[i];
  2685     HeapWord* task_finger = task->finger();
  2686     if (task_finger != NULL && task_finger < _min_finger)
  2687       _min_finger = task_finger;
  2690   _should_gray_objects = false;
  2692   // This fixes a very subtle and fustrating bug. It might be the case
  2693   // that, during en evacuation pause, heap regions that contain
  2694   // objects that are gray (by being in regions contained in the
  2695   // region stack) are included in the collection set. Since such gray
  2696   // objects will be moved, and because it's not easy to redirect
  2697   // region stack entries to point to a new location (because objects
  2698   // in one region might be scattered to multiple regions after they
  2699   // are copied), one option is to ensure that all marked objects
  2700   // copied during a pause are pushed on the stack. Notice, however,
  2701   // that this problem can only happen when the region stack is not
  2702   // empty during an evacuation pause. So, we make the fix a bit less
  2703   // conservative and ensure that regions are pushed on the stack,
  2704   // irrespective whether all collection set regions are below the
  2705   // finger, if the region stack is not empty. This is expected to be
  2706   // a rare case, so I don't think it's necessary to be smarted about it.
  2707   if (!region_stack_empty() || has_aborted_regions())
  2708     _should_gray_objects = true;
  2711 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2712   if (!concurrent_marking_in_progress())
  2713     return;
  2715   HeapWord* region_end = hr->end();
  2716   if (region_end > _min_finger)
  2717     _should_gray_objects = true;
  2720 // abandon current marking iteration due to a Full GC
  2721 void ConcurrentMark::abort() {
  2722   // Clear all marks to force marking thread to do nothing
  2723   _nextMarkBitMap->clearAll();
  2724   // Empty mark stack
  2725   clear_marking_state();
  2726   for (int i = 0; i < (int)_max_task_num; ++i) {
  2727     _tasks[i]->clear_region_fields();
  2729   _has_aborted = true;
  2731   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2732   satb_mq_set.abandon_partial_marking();
  2733   // This can be called either during or outside marking, we'll read
  2734   // the expected_active value from the SATB queue set.
  2735   satb_mq_set.set_active_all_threads(
  2736                                  false, /* new active value */
  2737                                  satb_mq_set.is_active() /* expected_active */);
  2740 static void print_ms_time_info(const char* prefix, const char* name,
  2741                                NumberSeq& ns) {
  2742   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2743                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2744   if (ns.num() > 0) {
  2745     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2746                            prefix, ns.sd(), ns.maximum());
  2750 void ConcurrentMark::print_summary_info() {
  2751   gclog_or_tty->print_cr(" Concurrent marking:");
  2752   print_ms_time_info("  ", "init marks", _init_times);
  2753   print_ms_time_info("  ", "remarks", _remark_times);
  2755     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2756     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2759   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2760   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2761                          _total_counting_time,
  2762                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2763                           (double)_cleanup_times.num()
  2764                          : 0.0));
  2765   if (G1ScrubRemSets) {
  2766     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2767                            _total_rs_scrub_time,
  2768                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2769                             (double)_cleanup_times.num()
  2770                            : 0.0));
  2772   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2773                          (_init_times.sum() + _remark_times.sum() +
  2774                           _cleanup_times.sum())/1000.0);
  2775   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2776                 "(%8.2f s marking, %8.2f s counting).",
  2777                 cmThread()->vtime_accum(),
  2778                 cmThread()->vtime_mark_accum(),
  2779                 cmThread()->vtime_count_accum());
  2782 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2783   _parallel_workers->print_worker_threads_on(st);
  2786 // Closures
  2787 // XXX: there seems to be a lot of code  duplication here;
  2788 // should refactor and consolidate the shared code.
  2790 // This closure is used to mark refs into the CMS generation in
  2791 // the CMS bit map. Called at the first checkpoint.
  2793 // We take a break if someone is trying to stop the world.
  2794 bool ConcurrentMark::do_yield_check(int worker_i) {
  2795   if (should_yield()) {
  2796     if (worker_i == 0)
  2797       _g1h->g1_policy()->record_concurrent_pause();
  2798     cmThread()->yield();
  2799     if (worker_i == 0)
  2800       _g1h->g1_policy()->record_concurrent_pause_end();
  2801     return true;
  2802   } else {
  2803     return false;
  2807 bool ConcurrentMark::should_yield() {
  2808   return cmThread()->should_yield();
  2811 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2812   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2813   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2816 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2817                                                  void* last) {
  2818   return
  2819     containing_card_is_marked(start) &&
  2820     containing_card_is_marked(last);
  2823 #ifndef PRODUCT
  2824 // for debugging purposes
  2825 void ConcurrentMark::print_finger() {
  2826   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2827                          _heap_start, _heap_end, _finger);
  2828   for (int i = 0; i < (int) _max_task_num; ++i) {
  2829     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2831   gclog_or_tty->print_cr("");
  2833 #endif
  2835 // Closure for iteration over bitmaps
  2836 class CMBitMapClosure : public BitMapClosure {
  2837 private:
  2838   // the bitmap that is being iterated over
  2839   CMBitMap*                   _nextMarkBitMap;
  2840   ConcurrentMark*             _cm;
  2841   CMTask*                     _task;
  2842   // true if we're scanning a heap region claimed by the task (so that
  2843   // we move the finger along), false if we're not, i.e. currently when
  2844   // scanning a heap region popped from the region stack (so that we
  2845   // do not move the task finger along; it'd be a mistake if we did so).
  2846   bool                        _scanning_heap_region;
  2848 public:
  2849   CMBitMapClosure(CMTask *task,
  2850                   ConcurrentMark* cm,
  2851                   CMBitMap* nextMarkBitMap)
  2852     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2854   void set_scanning_heap_region(bool scanning_heap_region) {
  2855     _scanning_heap_region = scanning_heap_region;
  2858   bool do_bit(size_t offset) {
  2859     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2860     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2861     assert( addr < _cm->finger(), "invariant");
  2863     if (_scanning_heap_region) {
  2864       statsOnly( _task->increase_objs_found_on_bitmap() );
  2865       assert(addr >= _task->finger(), "invariant");
  2866       // We move that task's local finger along.
  2867       _task->move_finger_to(addr);
  2868     } else {
  2869       // We move the task's region finger along.
  2870       _task->move_region_finger_to(addr);
  2873     _task->scan_object(oop(addr));
  2874     // we only partially drain the local queue and global stack
  2875     _task->drain_local_queue(true);
  2876     _task->drain_global_stack(true);
  2878     // if the has_aborted flag has been raised, we need to bail out of
  2879     // the iteration
  2880     return !_task->has_aborted();
  2882 };
  2884 // Closure for iterating over objects, currently only used for
  2885 // processing SATB buffers.
  2886 class CMObjectClosure : public ObjectClosure {
  2887 private:
  2888   CMTask* _task;
  2890 public:
  2891   void do_object(oop obj) {
  2892     _task->deal_with_reference(obj);
  2895   CMObjectClosure(CMTask* task) : _task(task) { }
  2896 };
  2898 // Closure for iterating over object fields
  2899 class CMOopClosure : public OopClosure {
  2900 private:
  2901   G1CollectedHeap*   _g1h;
  2902   ConcurrentMark*    _cm;
  2903   CMTask*            _task;
  2905 public:
  2906   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2907   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2909   template <class T> void do_oop_work(T* p) {
  2910     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2911     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2912            "invariant");
  2914     oop obj = oopDesc::load_decode_heap_oop(p);
  2915     if (_cm->verbose_high())
  2916       gclog_or_tty->print_cr("[%d] we're looking at location "
  2917                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2918                              _task->task_id(), p, (void*) obj);
  2919     _task->deal_with_reference(obj);
  2922   CMOopClosure(G1CollectedHeap* g1h,
  2923                ConcurrentMark* cm,
  2924                CMTask* task)
  2925     : _g1h(g1h), _cm(cm), _task(task)
  2927     _ref_processor = g1h->ref_processor();
  2928     assert(_ref_processor != NULL, "should not be NULL");
  2930 };
  2932 void CMTask::setup_for_region(HeapRegion* hr) {
  2933   // Separated the asserts so that we know which one fires.
  2934   assert(hr != NULL,
  2935         "claim_region() should have filtered out continues humongous regions");
  2936   assert(!hr->continuesHumongous(),
  2937         "claim_region() should have filtered out continues humongous regions");
  2939   if (_cm->verbose_low())
  2940     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2941                            _task_id, hr);
  2943   _curr_region  = hr;
  2944   _finger       = hr->bottom();
  2945   update_region_limit();
  2948 void CMTask::update_region_limit() {
  2949   HeapRegion* hr            = _curr_region;
  2950   HeapWord* bottom          = hr->bottom();
  2951   HeapWord* limit           = hr->next_top_at_mark_start();
  2953   if (limit == bottom) {
  2954     if (_cm->verbose_low())
  2955       gclog_or_tty->print_cr("[%d] found an empty region "
  2956                              "["PTR_FORMAT", "PTR_FORMAT")",
  2957                              _task_id, bottom, limit);
  2958     // The region was collected underneath our feet.
  2959     // We set the finger to bottom to ensure that the bitmap
  2960     // iteration that will follow this will not do anything.
  2961     // (this is not a condition that holds when we set the region up,
  2962     // as the region is not supposed to be empty in the first place)
  2963     _finger = bottom;
  2964   } else if (limit >= _region_limit) {
  2965     assert(limit >= _finger, "peace of mind");
  2966   } else {
  2967     assert(limit < _region_limit, "only way to get here");
  2968     // This can happen under some pretty unusual circumstances.  An
  2969     // evacuation pause empties the region underneath our feet (NTAMS
  2970     // at bottom). We then do some allocation in the region (NTAMS
  2971     // stays at bottom), followed by the region being used as a GC
  2972     // alloc region (NTAMS will move to top() and the objects
  2973     // originally below it will be grayed). All objects now marked in
  2974     // the region are explicitly grayed, if below the global finger,
  2975     // and we do not need in fact to scan anything else. So, we simply
  2976     // set _finger to be limit to ensure that the bitmap iteration
  2977     // doesn't do anything.
  2978     _finger = limit;
  2981   _region_limit = limit;
  2984 void CMTask::giveup_current_region() {
  2985   assert(_curr_region != NULL, "invariant");
  2986   if (_cm->verbose_low())
  2987     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2988                            _task_id, _curr_region);
  2989   clear_region_fields();
  2992 void CMTask::clear_region_fields() {
  2993   // Values for these three fields that indicate that we're not
  2994   // holding on to a region.
  2995   _curr_region   = NULL;
  2996   _finger        = NULL;
  2997   _region_limit  = NULL;
  2999   _region_finger = NULL;
  3002 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3003   guarantee(nextMarkBitMap != NULL, "invariant");
  3005   if (_cm->verbose_low())
  3006     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  3008   _nextMarkBitMap                = nextMarkBitMap;
  3009   clear_region_fields();
  3010   assert(_aborted_region.is_empty(), "should have been cleared");
  3012   _calls                         = 0;
  3013   _elapsed_time_ms               = 0.0;
  3014   _termination_time_ms           = 0.0;
  3015   _termination_start_time_ms     = 0.0;
  3017 #if _MARKING_STATS_
  3018   _local_pushes                  = 0;
  3019   _local_pops                    = 0;
  3020   _local_max_size                = 0;
  3021   _objs_scanned                  = 0;
  3022   _global_pushes                 = 0;
  3023   _global_pops                   = 0;
  3024   _global_max_size               = 0;
  3025   _global_transfers_to           = 0;
  3026   _global_transfers_from         = 0;
  3027   _region_stack_pops             = 0;
  3028   _regions_claimed               = 0;
  3029   _objs_found_on_bitmap          = 0;
  3030   _satb_buffers_processed        = 0;
  3031   _steal_attempts                = 0;
  3032   _steals                        = 0;
  3033   _aborted                       = 0;
  3034   _aborted_overflow              = 0;
  3035   _aborted_cm_aborted            = 0;
  3036   _aborted_yield                 = 0;
  3037   _aborted_timed_out             = 0;
  3038   _aborted_satb                  = 0;
  3039   _aborted_termination           = 0;
  3040 #endif // _MARKING_STATS_
  3043 bool CMTask::should_exit_termination() {
  3044   regular_clock_call();
  3045   // This is called when we are in the termination protocol. We should
  3046   // quit if, for some reason, this task wants to abort or the global
  3047   // stack is not empty (this means that we can get work from it).
  3048   return !_cm->mark_stack_empty() || has_aborted();
  3051 // This determines whether the method below will check both the local
  3052 // and global fingers when determining whether to push on the stack a
  3053 // gray object (value 1) or whether it will only check the global one
  3054 // (value 0). The tradeoffs are that the former will be a bit more
  3055 // accurate and possibly push less on the stack, but it might also be
  3056 // a little bit slower.
  3058 #define _CHECK_BOTH_FINGERS_      1
  3060 void CMTask::deal_with_reference(oop obj) {
  3061   if (_cm->verbose_high())
  3062     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  3063                            _task_id, (void*) obj);
  3065   ++_refs_reached;
  3067   HeapWord* objAddr = (HeapWord*) obj;
  3068   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  3069   if (_g1h->is_in_g1_reserved(objAddr)) {
  3070     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  3071     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  3072     if (_g1h->is_obj_ill(obj, hr)) {
  3073       if (_cm->verbose_high())
  3074         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  3075                                _task_id, (void*) obj);
  3077       // we need to mark it first
  3078       if (_nextMarkBitMap->parMark(objAddr)) {
  3079         // No OrderAccess:store_load() is needed. It is implicit in the
  3080         // CAS done in parMark(objAddr) above
  3081         HeapWord* global_finger = _cm->finger();
  3083 #if _CHECK_BOTH_FINGERS_
  3084         // we will check both the local and global fingers
  3086         if (_finger != NULL && objAddr < _finger) {
  3087           if (_cm->verbose_high())
  3088             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3089                                    "pushing it", _task_id, _finger);
  3090           push(obj);
  3091         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3092           // do nothing
  3093         } else if (objAddr < global_finger) {
  3094           // Notice that the global finger might be moving forward
  3095           // concurrently. This is not a problem. In the worst case, we
  3096           // mark the object while it is above the global finger and, by
  3097           // the time we read the global finger, it has moved forward
  3098           // passed this object. In this case, the object will probably
  3099           // be visited when a task is scanning the region and will also
  3100           // be pushed on the stack. So, some duplicate work, but no
  3101           // correctness problems.
  3103           if (_cm->verbose_high())
  3104             gclog_or_tty->print_cr("[%d] below the global finger "
  3105                                    "("PTR_FORMAT"), pushing it",
  3106                                    _task_id, global_finger);
  3107           push(obj);
  3108         } else {
  3109           // do nothing
  3111 #else // _CHECK_BOTH_FINGERS_
  3112       // we will only check the global finger
  3114         if (objAddr < global_finger) {
  3115           // see long comment above
  3117           if (_cm->verbose_high())
  3118             gclog_or_tty->print_cr("[%d] below the global finger "
  3119                                    "("PTR_FORMAT"), pushing it",
  3120                                    _task_id, global_finger);
  3121           push(obj);
  3123 #endif // _CHECK_BOTH_FINGERS_
  3129 void CMTask::push(oop obj) {
  3130   HeapWord* objAddr = (HeapWord*) obj;
  3131   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3132   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3133          "invariant");
  3134   assert(!_g1h->is_obj_ill(obj), "invariant");
  3135   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3137   if (_cm->verbose_high())
  3138     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3140   if (!_task_queue->push(obj)) {
  3141     // The local task queue looks full. We need to push some entries
  3142     // to the global stack.
  3144     if (_cm->verbose_medium())
  3145       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3146                              "moving entries to the global stack",
  3147                              _task_id);
  3148     move_entries_to_global_stack();
  3150     // this should succeed since, even if we overflow the global
  3151     // stack, we should have definitely removed some entries from the
  3152     // local queue. So, there must be space on it.
  3153     bool success = _task_queue->push(obj);
  3154     assert(success, "invariant");
  3157   statsOnly( int tmp_size = _task_queue->size();
  3158              if (tmp_size > _local_max_size)
  3159                _local_max_size = tmp_size;
  3160              ++_local_pushes );
  3163 void CMTask::reached_limit() {
  3164   assert(_words_scanned >= _words_scanned_limit ||
  3165          _refs_reached >= _refs_reached_limit ,
  3166          "shouldn't have been called otherwise");
  3167   regular_clock_call();
  3170 void CMTask::regular_clock_call() {
  3171   if (has_aborted())
  3172     return;
  3174   // First, we need to recalculate the words scanned and refs reached
  3175   // limits for the next clock call.
  3176   recalculate_limits();
  3178   // During the regular clock call we do the following
  3180   // (1) If an overflow has been flagged, then we abort.
  3181   if (_cm->has_overflown()) {
  3182     set_has_aborted();
  3183     return;
  3186   // If we are not concurrent (i.e. we're doing remark) we don't need
  3187   // to check anything else. The other steps are only needed during
  3188   // the concurrent marking phase.
  3189   if (!concurrent())
  3190     return;
  3192   // (2) If marking has been aborted for Full GC, then we also abort.
  3193   if (_cm->has_aborted()) {
  3194     set_has_aborted();
  3195     statsOnly( ++_aborted_cm_aborted );
  3196     return;
  3199   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3201   // (3) If marking stats are enabled, then we update the step history.
  3202 #if _MARKING_STATS_
  3203   if (_words_scanned >= _words_scanned_limit)
  3204     ++_clock_due_to_scanning;
  3205   if (_refs_reached >= _refs_reached_limit)
  3206     ++_clock_due_to_marking;
  3208   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3209   _interval_start_time_ms = curr_time_ms;
  3210   _all_clock_intervals_ms.add(last_interval_ms);
  3212   if (_cm->verbose_medium()) {
  3213     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3214                            "scanned = %d%s, refs reached = %d%s",
  3215                            _task_id, last_interval_ms,
  3216                            _words_scanned,
  3217                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3218                            _refs_reached,
  3219                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3221 #endif // _MARKING_STATS_
  3223   // (4) We check whether we should yield. If we have to, then we abort.
  3224   if (_cm->should_yield()) {
  3225     // We should yield. To do this we abort the task. The caller is
  3226     // responsible for yielding.
  3227     set_has_aborted();
  3228     statsOnly( ++_aborted_yield );
  3229     return;
  3232   // (5) We check whether we've reached our time quota. If we have,
  3233   // then we abort.
  3234   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3235   if (elapsed_time_ms > _time_target_ms) {
  3236     set_has_aborted();
  3237     _has_aborted_timed_out = true;
  3238     statsOnly( ++_aborted_timed_out );
  3239     return;
  3242   // (6) Finally, we check whether there are enough completed STAB
  3243   // buffers available for processing. If there are, we abort.
  3244   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3245   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3246     if (_cm->verbose_low())
  3247       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3248                              _task_id);
  3249     // we do need to process SATB buffers, we'll abort and restart
  3250     // the marking task to do so
  3251     set_has_aborted();
  3252     statsOnly( ++_aborted_satb );
  3253     return;
  3257 void CMTask::recalculate_limits() {
  3258   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3259   _words_scanned_limit      = _real_words_scanned_limit;
  3261   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3262   _refs_reached_limit       = _real_refs_reached_limit;
  3265 void CMTask::decrease_limits() {
  3266   // This is called when we believe that we're going to do an infrequent
  3267   // operation which will increase the per byte scanned cost (i.e. move
  3268   // entries to/from the global stack). It basically tries to decrease the
  3269   // scanning limit so that the clock is called earlier.
  3271   if (_cm->verbose_medium())
  3272     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3274   _words_scanned_limit = _real_words_scanned_limit -
  3275     3 * words_scanned_period / 4;
  3276   _refs_reached_limit  = _real_refs_reached_limit -
  3277     3 * refs_reached_period / 4;
  3280 void CMTask::move_entries_to_global_stack() {
  3281   // local array where we'll store the entries that will be popped
  3282   // from the local queue
  3283   oop buffer[global_stack_transfer_size];
  3285   int n = 0;
  3286   oop obj;
  3287   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3288     buffer[n] = obj;
  3289     ++n;
  3292   if (n > 0) {
  3293     // we popped at least one entry from the local queue
  3295     statsOnly( ++_global_transfers_to; _local_pops += n );
  3297     if (!_cm->mark_stack_push(buffer, n)) {
  3298       if (_cm->verbose_low())
  3299         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3300       set_has_aborted();
  3301     } else {
  3302       // the transfer was successful
  3304       if (_cm->verbose_medium())
  3305         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3306                                _task_id, n);
  3307       statsOnly( int tmp_size = _cm->mark_stack_size();
  3308                  if (tmp_size > _global_max_size)
  3309                    _global_max_size = tmp_size;
  3310                  _global_pushes += n );
  3314   // this operation was quite expensive, so decrease the limits
  3315   decrease_limits();
  3318 void CMTask::get_entries_from_global_stack() {
  3319   // local array where we'll store the entries that will be popped
  3320   // from the global stack.
  3321   oop buffer[global_stack_transfer_size];
  3322   int n;
  3323   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3324   assert(n <= global_stack_transfer_size,
  3325          "we should not pop more than the given limit");
  3326   if (n > 0) {
  3327     // yes, we did actually pop at least one entry
  3329     statsOnly( ++_global_transfers_from; _global_pops += n );
  3330     if (_cm->verbose_medium())
  3331       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3332                              _task_id, n);
  3333     for (int i = 0; i < n; ++i) {
  3334       bool success = _task_queue->push(buffer[i]);
  3335       // We only call this when the local queue is empty or under a
  3336       // given target limit. So, we do not expect this push to fail.
  3337       assert(success, "invariant");
  3340     statsOnly( int tmp_size = _task_queue->size();
  3341                if (tmp_size > _local_max_size)
  3342                  _local_max_size = tmp_size;
  3343                _local_pushes += n );
  3346   // this operation was quite expensive, so decrease the limits
  3347   decrease_limits();
  3350 void CMTask::drain_local_queue(bool partially) {
  3351   if (has_aborted())
  3352     return;
  3354   // Decide what the target size is, depending whether we're going to
  3355   // drain it partially (so that other tasks can steal if they run out
  3356   // of things to do) or totally (at the very end).
  3357   size_t target_size;
  3358   if (partially)
  3359     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3360   else
  3361     target_size = 0;
  3363   if (_task_queue->size() > target_size) {
  3364     if (_cm->verbose_high())
  3365       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3366                              _task_id, target_size);
  3368     oop obj;
  3369     bool ret = _task_queue->pop_local(obj);
  3370     while (ret) {
  3371       statsOnly( ++_local_pops );
  3373       if (_cm->verbose_high())
  3374         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3375                                (void*) obj);
  3377       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3378       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3379              "invariant");
  3381       scan_object(obj);
  3383       if (_task_queue->size() <= target_size || has_aborted())
  3384         ret = false;
  3385       else
  3386         ret = _task_queue->pop_local(obj);
  3389     if (_cm->verbose_high())
  3390       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3391                              _task_id, _task_queue->size());
  3395 void CMTask::drain_global_stack(bool partially) {
  3396   if (has_aborted())
  3397     return;
  3399   // We have a policy to drain the local queue before we attempt to
  3400   // drain the global stack.
  3401   assert(partially || _task_queue->size() == 0, "invariant");
  3403   // Decide what the target size is, depending whether we're going to
  3404   // drain it partially (so that other tasks can steal if they run out
  3405   // of things to do) or totally (at the very end).  Notice that,
  3406   // because we move entries from the global stack in chunks or
  3407   // because another task might be doing the same, we might in fact
  3408   // drop below the target. But, this is not a problem.
  3409   size_t target_size;
  3410   if (partially)
  3411     target_size = _cm->partial_mark_stack_size_target();
  3412   else
  3413     target_size = 0;
  3415   if (_cm->mark_stack_size() > target_size) {
  3416     if (_cm->verbose_low())
  3417       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3418                              _task_id, target_size);
  3420     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3421       get_entries_from_global_stack();
  3422       drain_local_queue(partially);
  3425     if (_cm->verbose_low())
  3426       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3427                              _task_id, _cm->mark_stack_size());
  3431 // SATB Queue has several assumptions on whether to call the par or
  3432 // non-par versions of the methods. this is why some of the code is
  3433 // replicated. We should really get rid of the single-threaded version
  3434 // of the code to simplify things.
  3435 void CMTask::drain_satb_buffers() {
  3436   if (has_aborted())
  3437     return;
  3439   // We set this so that the regular clock knows that we're in the
  3440   // middle of draining buffers and doesn't set the abort flag when it
  3441   // notices that SATB buffers are available for draining. It'd be
  3442   // very counter productive if it did that. :-)
  3443   _draining_satb_buffers = true;
  3445   CMObjectClosure oc(this);
  3446   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3447   if (G1CollectedHeap::use_parallel_gc_threads())
  3448     satb_mq_set.set_par_closure(_task_id, &oc);
  3449   else
  3450     satb_mq_set.set_closure(&oc);
  3452   // This keeps claiming and applying the closure to completed buffers
  3453   // until we run out of buffers or we need to abort.
  3454   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3455     while (!has_aborted() &&
  3456            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3457       if (_cm->verbose_medium())
  3458         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3459       statsOnly( ++_satb_buffers_processed );
  3460       regular_clock_call();
  3462   } else {
  3463     while (!has_aborted() &&
  3464            satb_mq_set.apply_closure_to_completed_buffer()) {
  3465       if (_cm->verbose_medium())
  3466         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3467       statsOnly( ++_satb_buffers_processed );
  3468       regular_clock_call();
  3472   if (!concurrent() && !has_aborted()) {
  3473     // We should only do this during remark.
  3474     if (G1CollectedHeap::use_parallel_gc_threads())
  3475       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3476     else
  3477       satb_mq_set.iterate_closure_all_threads();
  3480   _draining_satb_buffers = false;
  3482   assert(has_aborted() ||
  3483          concurrent() ||
  3484          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3486   if (G1CollectedHeap::use_parallel_gc_threads())
  3487     satb_mq_set.set_par_closure(_task_id, NULL);
  3488   else
  3489     satb_mq_set.set_closure(NULL);
  3491   // again, this was a potentially expensive operation, decrease the
  3492   // limits to get the regular clock call early
  3493   decrease_limits();
  3496 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3497   if (has_aborted())
  3498     return;
  3500   assert(_region_finger == NULL,
  3501          "it should be NULL when we're not scanning a region");
  3503   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
  3504     if (_cm->verbose_low())
  3505       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3506                              _task_id, _cm->region_stack_size());
  3508     MemRegion mr;
  3510     if (!_aborted_region.is_empty()) {
  3511       mr = _aborted_region;
  3512       _aborted_region = MemRegion();
  3514       if (_cm->verbose_low())
  3515         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
  3516                              _task_id, mr.start(), mr.end());
  3517     } else {
  3518       mr = _cm->region_stack_pop_lock_free();
  3519       // it returns MemRegion() if the pop fails
  3520       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3523     while (mr.start() != NULL) {
  3524       if (_cm->verbose_medium())
  3525         gclog_or_tty->print_cr("[%d] we are scanning region "
  3526                                "["PTR_FORMAT", "PTR_FORMAT")",
  3527                                _task_id, mr.start(), mr.end());
  3529       assert(mr.end() <= _cm->finger(),
  3530              "otherwise the region shouldn't be on the stack");
  3531       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3532       if (_nextMarkBitMap->iterate(bc, mr)) {
  3533         assert(!has_aborted(),
  3534                "cannot abort the task without aborting the bitmap iteration");
  3536         // We finished iterating over the region without aborting.
  3537         regular_clock_call();
  3538         if (has_aborted())
  3539           mr = MemRegion();
  3540         else {
  3541           mr = _cm->region_stack_pop_lock_free();
  3542           // it returns MemRegion() if the pop fails
  3543           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3545       } else {
  3546         assert(has_aborted(), "currently the only way to do so");
  3548         // The only way to abort the bitmap iteration is to return
  3549         // false from the do_bit() method. However, inside the
  3550         // do_bit() method we move the _region_finger to point to the
  3551         // object currently being looked at. So, if we bail out, we
  3552         // have definitely set _region_finger to something non-null.
  3553         assert(_region_finger != NULL, "invariant");
  3555         // Make sure that any previously aborted region has been
  3556         // cleared.
  3557         assert(_aborted_region.is_empty(), "aborted region not cleared");
  3559         // The iteration was actually aborted. So now _region_finger
  3560         // points to the address of the object we last scanned. If we
  3561         // leave it there, when we restart this task, we will rescan
  3562         // the object. It is easy to avoid this. We move the finger by
  3563         // enough to point to the next possible object header (the
  3564         // bitmap knows by how much we need to move it as it knows its
  3565         // granularity).
  3566         MemRegion newRegion =
  3567           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3569         if (!newRegion.is_empty()) {
  3570           if (_cm->verbose_low()) {
  3571             gclog_or_tty->print_cr("[%d] recording unscanned region"
  3572                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
  3573                                    _task_id,
  3574                                    newRegion.start(), newRegion.end());
  3576           // Now record the part of the region we didn't scan to
  3577           // make sure this task scans it later.
  3578           _aborted_region = newRegion;
  3580         // break from while
  3581         mr = MemRegion();
  3583       _region_finger = NULL;
  3586     if (_cm->verbose_low())
  3587       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3588                              _task_id, _cm->region_stack_size());
  3592 void CMTask::print_stats() {
  3593   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3594                          _task_id, _calls);
  3595   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3596                          _elapsed_time_ms, _termination_time_ms);
  3597   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3598                          _step_times_ms.num(), _step_times_ms.avg(),
  3599                          _step_times_ms.sd());
  3600   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3601                          _step_times_ms.maximum(), _step_times_ms.sum());
  3603 #if _MARKING_STATS_
  3604   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3605                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3606                          _all_clock_intervals_ms.sd());
  3607   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3608                          _all_clock_intervals_ms.maximum(),
  3609                          _all_clock_intervals_ms.sum());
  3610   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3611                          _clock_due_to_scanning, _clock_due_to_marking);
  3612   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3613                          _objs_scanned, _objs_found_on_bitmap);
  3614   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3615                          _local_pushes, _local_pops, _local_max_size);
  3616   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3617                          _global_pushes, _global_pops, _global_max_size);
  3618   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3619                          _global_transfers_to,_global_transfers_from);
  3620   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3621                          _regions_claimed, _region_stack_pops);
  3622   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3623   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3624                          _steal_attempts, _steals);
  3625   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3626   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3627                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3628   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3629                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3630 #endif // _MARKING_STATS_
  3633 /*****************************************************************************
  3635     The do_marking_step(time_target_ms) method is the building block
  3636     of the parallel marking framework. It can be called in parallel
  3637     with other invocations of do_marking_step() on different tasks
  3638     (but only one per task, obviously) and concurrently with the
  3639     mutator threads, or during remark, hence it eliminates the need
  3640     for two versions of the code. When called during remark, it will
  3641     pick up from where the task left off during the concurrent marking
  3642     phase. Interestingly, tasks are also claimable during evacuation
  3643     pauses too, since do_marking_step() ensures that it aborts before
  3644     it needs to yield.
  3646     The data structures that is uses to do marking work are the
  3647     following:
  3649       (1) Marking Bitmap. If there are gray objects that appear only
  3650       on the bitmap (this happens either when dealing with an overflow
  3651       or when the initial marking phase has simply marked the roots
  3652       and didn't push them on the stack), then tasks claim heap
  3653       regions whose bitmap they then scan to find gray objects. A
  3654       global finger indicates where the end of the last claimed region
  3655       is. A local finger indicates how far into the region a task has
  3656       scanned. The two fingers are used to determine how to gray an
  3657       object (i.e. whether simply marking it is OK, as it will be
  3658       visited by a task in the future, or whether it needs to be also
  3659       pushed on a stack).
  3661       (2) Local Queue. The local queue of the task which is accessed
  3662       reasonably efficiently by the task. Other tasks can steal from
  3663       it when they run out of work. Throughout the marking phase, a
  3664       task attempts to keep its local queue short but not totally
  3665       empty, so that entries are available for stealing by other
  3666       tasks. Only when there is no more work, a task will totally
  3667       drain its local queue.
  3669       (3) Global Mark Stack. This handles local queue overflow. During
  3670       marking only sets of entries are moved between it and the local
  3671       queues, as access to it requires a mutex and more fine-grain
  3672       interaction with it which might cause contention. If it
  3673       overflows, then the marking phase should restart and iterate
  3674       over the bitmap to identify gray objects. Throughout the marking
  3675       phase, tasks attempt to keep the global mark stack at a small
  3676       length but not totally empty, so that entries are available for
  3677       popping by other tasks. Only when there is no more work, tasks
  3678       will totally drain the global mark stack.
  3680       (4) Global Region Stack. Entries on it correspond to areas of
  3681       the bitmap that need to be scanned since they contain gray
  3682       objects. Pushes on the region stack only happen during
  3683       evacuation pauses and typically correspond to areas covered by
  3684       GC LABS. If it overflows, then the marking phase should restart
  3685       and iterate over the bitmap to identify gray objects. Tasks will
  3686       try to totally drain the region stack as soon as possible.
  3688       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3689       made available. Buffers are regularly removed from this queue
  3690       and scanned for roots, so that the queue doesn't get too
  3691       long. During remark, all completed buffers are processed, as
  3692       well as the filled in parts of any uncompleted buffers.
  3694     The do_marking_step() method tries to abort when the time target
  3695     has been reached. There are a few other cases when the
  3696     do_marking_step() method also aborts:
  3698       (1) When the marking phase has been aborted (after a Full GC).
  3700       (2) When a global overflow (either on the global stack or the
  3701       region stack) has been triggered. Before the task aborts, it
  3702       will actually sync up with the other tasks to ensure that all
  3703       the marking data structures (local queues, stacks, fingers etc.)
  3704       are re-initialised so that when do_marking_step() completes,
  3705       the marking phase can immediately restart.
  3707       (3) When enough completed SATB buffers are available. The
  3708       do_marking_step() method only tries to drain SATB buffers right
  3709       at the beginning. So, if enough buffers are available, the
  3710       marking step aborts and the SATB buffers are processed at
  3711       the beginning of the next invocation.
  3713       (4) To yield. when we have to yield then we abort and yield
  3714       right at the end of do_marking_step(). This saves us from a lot
  3715       of hassle as, by yielding we might allow a Full GC. If this
  3716       happens then objects will be compacted underneath our feet, the
  3717       heap might shrink, etc. We save checking for this by just
  3718       aborting and doing the yield right at the end.
  3720     From the above it follows that the do_marking_step() method should
  3721     be called in a loop (or, otherwise, regularly) until it completes.
  3723     If a marking step completes without its has_aborted() flag being
  3724     true, it means it has completed the current marking phase (and
  3725     also all other marking tasks have done so and have all synced up).
  3727     A method called regular_clock_call() is invoked "regularly" (in
  3728     sub ms intervals) throughout marking. It is this clock method that
  3729     checks all the abort conditions which were mentioned above and
  3730     decides when the task should abort. A work-based scheme is used to
  3731     trigger this clock method: when the number of object words the
  3732     marking phase has scanned or the number of references the marking
  3733     phase has visited reach a given limit. Additional invocations to
  3734     the method clock have been planted in a few other strategic places
  3735     too. The initial reason for the clock method was to avoid calling
  3736     vtime too regularly, as it is quite expensive. So, once it was in
  3737     place, it was natural to piggy-back all the other conditions on it
  3738     too and not constantly check them throughout the code.
  3740  *****************************************************************************/
  3742 void CMTask::do_marking_step(double time_target_ms) {
  3743   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3744   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3746   assert(concurrent() || _cm->region_stack_empty(),
  3747          "the region stack should have been cleared before remark");
  3748   assert(concurrent() || !_cm->has_aborted_regions(),
  3749          "aborted regions should have been cleared before remark");
  3750   assert(_region_finger == NULL,
  3751          "this should be non-null only when a region is being scanned");
  3753   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3754   assert(_task_queues != NULL, "invariant");
  3755   assert(_task_queue != NULL, "invariant");
  3756   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3758   assert(!_claimed,
  3759          "only one thread should claim this task at any one time");
  3761   // OK, this doesn't safeguard again all possible scenarios, as it is
  3762   // possible for two threads to set the _claimed flag at the same
  3763   // time. But it is only for debugging purposes anyway and it will
  3764   // catch most problems.
  3765   _claimed = true;
  3767   _start_time_ms = os::elapsedVTime() * 1000.0;
  3768   statsOnly( _interval_start_time_ms = _start_time_ms );
  3770   double diff_prediction_ms =
  3771     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3772   _time_target_ms = time_target_ms - diff_prediction_ms;
  3774   // set up the variables that are used in the work-based scheme to
  3775   // call the regular clock method
  3776   _words_scanned = 0;
  3777   _refs_reached  = 0;
  3778   recalculate_limits();
  3780   // clear all flags
  3781   clear_has_aborted();
  3782   _has_aborted_timed_out = false;
  3783   _draining_satb_buffers = false;
  3785   ++_calls;
  3787   if (_cm->verbose_low())
  3788     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3789                            "target = %1.2lfms >>>>>>>>>>",
  3790                            _task_id, _calls, _time_target_ms);
  3792   // Set up the bitmap and oop closures. Anything that uses them is
  3793   // eventually called from this method, so it is OK to allocate these
  3794   // statically.
  3795   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3796   CMOopClosure    oop_closure(_g1h, _cm, this);
  3797   set_oop_closure(&oop_closure);
  3799   if (_cm->has_overflown()) {
  3800     // This can happen if the region stack or the mark stack overflows
  3801     // during a GC pause and this task, after a yield point,
  3802     // restarts. We have to abort as we need to get into the overflow
  3803     // protocol which happens right at the end of this task.
  3804     set_has_aborted();
  3807   // First drain any available SATB buffers. After this, we will not
  3808   // look at SATB buffers before the next invocation of this method.
  3809   // If enough completed SATB buffers are queued up, the regular clock
  3810   // will abort this task so that it restarts.
  3811   drain_satb_buffers();
  3812   // ...then partially drain the local queue and the global stack
  3813   drain_local_queue(true);
  3814   drain_global_stack(true);
  3816   // Then totally drain the region stack.  We will not look at
  3817   // it again before the next invocation of this method. Entries on
  3818   // the region stack are only added during evacuation pauses, for
  3819   // which we have to yield. When we do, we abort the task anyway so
  3820   // it will look at the region stack again when it restarts.
  3821   bitmap_closure.set_scanning_heap_region(false);
  3822   drain_region_stack(&bitmap_closure);
  3823   // ...then partially drain the local queue and the global stack
  3824   drain_local_queue(true);
  3825   drain_global_stack(true);
  3827   do {
  3828     if (!has_aborted() && _curr_region != NULL) {
  3829       // This means that we're already holding on to a region.
  3830       assert(_finger != NULL, "if region is not NULL, then the finger "
  3831              "should not be NULL either");
  3833       // We might have restarted this task after an evacuation pause
  3834       // which might have evacuated the region we're holding on to
  3835       // underneath our feet. Let's read its limit again to make sure
  3836       // that we do not iterate over a region of the heap that
  3837       // contains garbage (update_region_limit() will also move
  3838       // _finger to the start of the region if it is found empty).
  3839       update_region_limit();
  3840       // We will start from _finger not from the start of the region,
  3841       // as we might be restarting this task after aborting half-way
  3842       // through scanning this region. In this case, _finger points to
  3843       // the address where we last found a marked object. If this is a
  3844       // fresh region, _finger points to start().
  3845       MemRegion mr = MemRegion(_finger, _region_limit);
  3847       if (_cm->verbose_low())
  3848         gclog_or_tty->print_cr("[%d] we're scanning part "
  3849                                "["PTR_FORMAT", "PTR_FORMAT") "
  3850                                "of region "PTR_FORMAT,
  3851                                _task_id, _finger, _region_limit, _curr_region);
  3853       // Let's iterate over the bitmap of the part of the
  3854       // region that is left.
  3855       bitmap_closure.set_scanning_heap_region(true);
  3856       if (mr.is_empty() ||
  3857           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3858         // We successfully completed iterating over the region. Now,
  3859         // let's give up the region.
  3860         giveup_current_region();
  3861         regular_clock_call();
  3862       } else {
  3863         assert(has_aborted(), "currently the only way to do so");
  3864         // The only way to abort the bitmap iteration is to return
  3865         // false from the do_bit() method. However, inside the
  3866         // do_bit() method we move the _finger to point to the
  3867         // object currently being looked at. So, if we bail out, we
  3868         // have definitely set _finger to something non-null.
  3869         assert(_finger != NULL, "invariant");
  3871         // Region iteration was actually aborted. So now _finger
  3872         // points to the address of the object we last scanned. If we
  3873         // leave it there, when we restart this task, we will rescan
  3874         // the object. It is easy to avoid this. We move the finger by
  3875         // enough to point to the next possible object header (the
  3876         // bitmap knows by how much we need to move it as it knows its
  3877         // granularity).
  3878         assert(_finger < _region_limit, "invariant");
  3879         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3880         // Check if bitmap iteration was aborted while scanning the last object
  3881         if (new_finger >= _region_limit) {
  3882             giveup_current_region();
  3883         } else {
  3884             move_finger_to(new_finger);
  3888     // At this point we have either completed iterating over the
  3889     // region we were holding on to, or we have aborted.
  3891     // We then partially drain the local queue and the global stack.
  3892     // (Do we really need this?)
  3893     drain_local_queue(true);
  3894     drain_global_stack(true);
  3896     // Read the note on the claim_region() method on why it might
  3897     // return NULL with potentially more regions available for
  3898     // claiming and why we have to check out_of_regions() to determine
  3899     // whether we're done or not.
  3900     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3901       // We are going to try to claim a new region. We should have
  3902       // given up on the previous one.
  3903       // Separated the asserts so that we know which one fires.
  3904       assert(_curr_region  == NULL, "invariant");
  3905       assert(_finger       == NULL, "invariant");
  3906       assert(_region_limit == NULL, "invariant");
  3907       if (_cm->verbose_low())
  3908         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3909       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3910       if (claimed_region != NULL) {
  3911         // Yes, we managed to claim one
  3912         statsOnly( ++_regions_claimed );
  3914         if (_cm->verbose_low())
  3915           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3916                                  "region "PTR_FORMAT,
  3917                                  _task_id, claimed_region);
  3919         setup_for_region(claimed_region);
  3920         assert(_curr_region == claimed_region, "invariant");
  3922       // It is important to call the regular clock here. It might take
  3923       // a while to claim a region if, for example, we hit a large
  3924       // block of empty regions. So we need to call the regular clock
  3925       // method once round the loop to make sure it's called
  3926       // frequently enough.
  3927       regular_clock_call();
  3930     if (!has_aborted() && _curr_region == NULL) {
  3931       assert(_cm->out_of_regions(),
  3932              "at this point we should be out of regions");
  3934   } while ( _curr_region != NULL && !has_aborted());
  3936   if (!has_aborted()) {
  3937     // We cannot check whether the global stack is empty, since other
  3938     // tasks might be pushing objects to it concurrently. We also cannot
  3939     // check if the region stack is empty because if a thread is aborting
  3940     // it can push a partially done region back.
  3941     assert(_cm->out_of_regions(),
  3942            "at this point we should be out of regions");
  3944     if (_cm->verbose_low())
  3945       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3947     // Try to reduce the number of available SATB buffers so that
  3948     // remark has less work to do.
  3949     drain_satb_buffers();
  3952   // Since we've done everything else, we can now totally drain the
  3953   // local queue and global stack.
  3954   drain_local_queue(false);
  3955   drain_global_stack(false);
  3957   // Attempt at work stealing from other task's queues.
  3958   if (!has_aborted()) {
  3959     // We have not aborted. This means that we have finished all that
  3960     // we could. Let's try to do some stealing...
  3962     // We cannot check whether the global stack is empty, since other
  3963     // tasks might be pushing objects to it concurrently. We also cannot
  3964     // check if the region stack is empty because if a thread is aborting
  3965     // it can push a partially done region back.
  3966     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3967            "only way to reach here");
  3969     if (_cm->verbose_low())
  3970       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3972     while (!has_aborted()) {
  3973       oop obj;
  3974       statsOnly( ++_steal_attempts );
  3976       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3977         if (_cm->verbose_medium())
  3978           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3979                                  _task_id, (void*) obj);
  3981         statsOnly( ++_steals );
  3983         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3984                "any stolen object should be marked");
  3985         scan_object(obj);
  3987         // And since we're towards the end, let's totally drain the
  3988         // local queue and global stack.
  3989         drain_local_queue(false);
  3990         drain_global_stack(false);
  3991       } else {
  3992         break;
  3997   // We still haven't aborted. Now, let's try to get into the
  3998   // termination protocol.
  3999   if (!has_aborted()) {
  4000     // We cannot check whether the global stack is empty, since other
  4001     // tasks might be concurrently pushing objects on it. We also cannot
  4002     // check if the region stack is empty because if a thread is aborting
  4003     // it can push a partially done region back.
  4004     // Separated the asserts so that we know which one fires.
  4005     assert(_cm->out_of_regions(), "only way to reach here");
  4006     assert(_task_queue->size() == 0, "only way to reach here");
  4008     if (_cm->verbose_low())
  4009       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  4011     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4012     // The CMTask class also extends the TerminatorTerminator class,
  4013     // hence its should_exit_termination() method will also decide
  4014     // whether to exit the termination protocol or not.
  4015     bool finished = _cm->terminator()->offer_termination(this);
  4016     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4017     _termination_time_ms +=
  4018       termination_end_time_ms - _termination_start_time_ms;
  4020     if (finished) {
  4021       // We're all done.
  4023       if (_task_id == 0) {
  4024         // let's allow task 0 to do this
  4025         if (concurrent()) {
  4026           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4027           // we need to set this to false before the next
  4028           // safepoint. This way we ensure that the marking phase
  4029           // doesn't observe any more heap expansions.
  4030           _cm->clear_concurrent_marking_in_progress();
  4034       // We can now guarantee that the global stack is empty, since
  4035       // all other tasks have finished. We separated the guarantees so
  4036       // that, if a condition is false, we can immediately find out
  4037       // which one.
  4038       guarantee(_cm->out_of_regions(), "only way to reach here");
  4039       guarantee(_aborted_region.is_empty(), "only way to reach here");
  4040       guarantee(_cm->region_stack_empty(), "only way to reach here");
  4041       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4042       guarantee(_task_queue->size() == 0, "only way to reach here");
  4043       guarantee(!_cm->has_overflown(), "only way to reach here");
  4044       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4045       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  4047       if (_cm->verbose_low())
  4048         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  4049     } else {
  4050       // Apparently there's more work to do. Let's abort this task. It
  4051       // will restart it and we can hopefully find more things to do.
  4053       if (_cm->verbose_low())
  4054         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  4056       set_has_aborted();
  4057       statsOnly( ++_aborted_termination );
  4061   // Mainly for debugging purposes to make sure that a pointer to the
  4062   // closure which was statically allocated in this frame doesn't
  4063   // escape it by accident.
  4064   set_oop_closure(NULL);
  4065   double end_time_ms = os::elapsedVTime() * 1000.0;
  4066   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4067   // Update the step history.
  4068   _step_times_ms.add(elapsed_time_ms);
  4070   if (has_aborted()) {
  4071     // The task was aborted for some reason.
  4073     statsOnly( ++_aborted );
  4075     if (_has_aborted_timed_out) {
  4076       double diff_ms = elapsed_time_ms - _time_target_ms;
  4077       // Keep statistics of how well we did with respect to hitting
  4078       // our target only if we actually timed out (if we aborted for
  4079       // other reasons, then the results might get skewed).
  4080       _marking_step_diffs_ms.add(diff_ms);
  4083     if (_cm->has_overflown()) {
  4084       // This is the interesting one. We aborted because a global
  4085       // overflow was raised. This means we have to restart the
  4086       // marking phase and start iterating over regions. However, in
  4087       // order to do this we have to make sure that all tasks stop
  4088       // what they are doing and re-initialise in a safe manner. We
  4089       // will achieve this with the use of two barrier sync points.
  4091       if (_cm->verbose_low())
  4092         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  4094       _cm->enter_first_sync_barrier(_task_id);
  4095       // When we exit this sync barrier we know that all tasks have
  4096       // stopped doing marking work. So, it's now safe to
  4097       // re-initialise our data structures. At the end of this method,
  4098       // task 0 will clear the global data structures.
  4100       statsOnly( ++_aborted_overflow );
  4102       // We clear the local state of this task...
  4103       clear_region_fields();
  4105       // ...and enter the second barrier.
  4106       _cm->enter_second_sync_barrier(_task_id);
  4107       // At this point everything has bee re-initialised and we're
  4108       // ready to restart.
  4111     if (_cm->verbose_low()) {
  4112       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4113                              "elapsed = %1.2lfms <<<<<<<<<<",
  4114                              _task_id, _time_target_ms, elapsed_time_ms);
  4115       if (_cm->has_aborted())
  4116         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4117                                _task_id);
  4119   } else {
  4120     if (_cm->verbose_low())
  4121       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4122                              "elapsed = %1.2lfms <<<<<<<<<<",
  4123                              _task_id, _time_target_ms, elapsed_time_ms);
  4126   _claimed = false;
  4129 CMTask::CMTask(int task_id,
  4130                ConcurrentMark* cm,
  4131                CMTaskQueue* task_queue,
  4132                CMTaskQueueSet* task_queues)
  4133   : _g1h(G1CollectedHeap::heap()),
  4134     _task_id(task_id), _cm(cm),
  4135     _claimed(false),
  4136     _nextMarkBitMap(NULL), _hash_seed(17),
  4137     _task_queue(task_queue),
  4138     _task_queues(task_queues),
  4139     _oop_closure(NULL),
  4140     _aborted_region(MemRegion()) {
  4141   guarantee(task_queue != NULL, "invariant");
  4142   guarantee(task_queues != NULL, "invariant");
  4144   statsOnly( _clock_due_to_scanning = 0;
  4145              _clock_due_to_marking  = 0 );
  4147   _marking_step_diffs_ms.add(0.5);

mercurial