src/share/vm/gc_implementation/g1/concurrentMark.cpp

Wed, 19 Jan 2011 09:35:17 -0500

author
tonyp
date
Wed, 19 Jan 2011 09:35:17 -0500
changeset 2469
7e37af9d69ef
parent 2445
7246a374a9f2
child 2472
0fa27f37d4d4
permissions
-rw-r--r--

7011379: G1: overly long concurrent marking cycles
Summary: This changeset introduces filtering of SATB buffers at the point when they are about to be enqueued. If this filtering clears enough entries on each buffer, the buffer can then be re-used and not enqueued. This cuts down the number of SATB buffers that need to be processed by the concurrent marking threads.
Reviewed-by: johnc, ysr

     1 /*
     2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "gc_implementation/g1/concurrentMark.hpp"
    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    31 #include "gc_implementation/g1/g1RemSet.hpp"
    32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    34 #include "gc_implementation/shared/vmGCOperations.hpp"
    35 #include "memory/genOopClosures.inline.hpp"
    36 #include "memory/referencePolicy.hpp"
    37 #include "memory/resourceArea.hpp"
    38 #include "oops/oop.inline.hpp"
    39 #include "runtime/handles.inline.hpp"
    40 #include "runtime/java.hpp"
    42 //
    43 // CMS Bit Map Wrapper
    45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    46   _bm((uintptr_t*)NULL,0),
    47   _shifter(shifter) {
    48   _bmStartWord = (HeapWord*)(rs.base());
    49   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    50   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    51                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    53   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    54   // For now we'll just commit all of the bit map up fromt.
    55   // Later on we'll try to be more parsimonious with swap.
    56   guarantee(_virtual_space.initialize(brs, brs.size()),
    57             "couldn't reseve backing store for CMS bit map");
    58   assert(_virtual_space.committed_size() == brs.size(),
    59          "didn't reserve backing store for all of CMS bit map?");
    60   _bm.set_map((uintptr_t*)_virtual_space.low());
    61   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    62          _bmWordSize, "inconsistency in bit map sizing");
    63   _bm.set_size(_bmWordSize >> _shifter);
    64 }
    66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    67                                                HeapWord* limit) const {
    68   // First we must round addr *up* to a possible object boundary.
    69   addr = (HeapWord*)align_size_up((intptr_t)addr,
    70                                   HeapWordSize << _shifter);
    71   size_t addrOffset = heapWordToOffset(addr);
    72   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    73   size_t limitOffset = heapWordToOffset(limit);
    74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    76   assert(nextAddr >= addr, "get_next_one postcondition");
    77   assert(nextAddr == limit || isMarked(nextAddr),
    78          "get_next_one postcondition");
    79   return nextAddr;
    80 }
    82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    83                                                  HeapWord* limit) const {
    84   size_t addrOffset = heapWordToOffset(addr);
    85   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    86   size_t limitOffset = heapWordToOffset(limit);
    87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    89   assert(nextAddr >= addr, "get_next_one postcondition");
    90   assert(nextAddr == limit || !isMarked(nextAddr),
    91          "get_next_one postcondition");
    92   return nextAddr;
    93 }
    95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    97   return (int) (diff >> _shifter);
    98 }
   100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
   101   HeapWord* left  = MAX2(_bmStartWord, mr.start());
   102   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
   103   if (right > left) {
   104     // Right-open interval [leftOffset, rightOffset).
   105     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
   106   } else {
   107     return true;
   108   }
   109 }
   111 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
   112                                              size_t    from_start_index,
   113                                              HeapWord* to_start_word,
   114                                              size_t    word_num) {
   115   _bm.mostly_disjoint_range_union(from_bitmap,
   116                                   from_start_index,
   117                                   heapWordToOffset(to_start_word),
   118                                   word_num);
   119 }
   121 #ifndef PRODUCT
   122 bool CMBitMapRO::covers(ReservedSpace rs) const {
   123   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   124   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   125          "size inconsistency");
   126   return _bmStartWord == (HeapWord*)(rs.base()) &&
   127          _bmWordSize  == rs.size()>>LogHeapWordSize;
   128 }
   129 #endif
   131 void CMBitMap::clearAll() {
   132   _bm.clear();
   133   return;
   134 }
   136 void CMBitMap::markRange(MemRegion mr) {
   137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   138   assert(!mr.is_empty(), "unexpected empty region");
   139   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   140           ((HeapWord *) mr.end())),
   141          "markRange memory region end is not card aligned");
   142   // convert address range into offset range
   143   _bm.at_put_range(heapWordToOffset(mr.start()),
   144                    heapWordToOffset(mr.end()), true);
   145 }
   147 void CMBitMap::clearRange(MemRegion mr) {
   148   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   149   assert(!mr.is_empty(), "unexpected empty region");
   150   // convert address range into offset range
   151   _bm.at_put_range(heapWordToOffset(mr.start()),
   152                    heapWordToOffset(mr.end()), false);
   153 }
   155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   156                                             HeapWord* end_addr) {
   157   HeapWord* start = getNextMarkedWordAddress(addr);
   158   start = MIN2(start, end_addr);
   159   HeapWord* end   = getNextUnmarkedWordAddress(start);
   160   end = MIN2(end, end_addr);
   161   assert(start <= end, "Consistency check");
   162   MemRegion mr(start, end);
   163   if (!mr.is_empty()) {
   164     clearRange(mr);
   165   }
   166   return mr;
   167 }
   169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   170   _base(NULL), _cm(cm)
   171 #ifdef ASSERT
   172   , _drain_in_progress(false)
   173   , _drain_in_progress_yields(false)
   174 #endif
   175 {}
   177 void CMMarkStack::allocate(size_t size) {
   178   _base = NEW_C_HEAP_ARRAY(oop, size);
   179   if (_base == NULL)
   180     vm_exit_during_initialization("Failed to allocate "
   181                                   "CM region mark stack");
   182   _index = 0;
   183   // QQQQ cast ...
   184   _capacity = (jint) size;
   185   _oops_do_bound = -1;
   186   NOT_PRODUCT(_max_depth = 0);
   187 }
   189 CMMarkStack::~CMMarkStack() {
   190   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   191 }
   193 void CMMarkStack::par_push(oop ptr) {
   194   while (true) {
   195     if (isFull()) {
   196       _overflow = true;
   197       return;
   198     }
   199     // Otherwise...
   200     jint index = _index;
   201     jint next_index = index+1;
   202     jint res = Atomic::cmpxchg(next_index, &_index, index);
   203     if (res == index) {
   204       _base[index] = ptr;
   205       // Note that we don't maintain this atomically.  We could, but it
   206       // doesn't seem necessary.
   207       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   208       return;
   209     }
   210     // Otherwise, we need to try again.
   211   }
   212 }
   214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   215   while (true) {
   216     if (isFull()) {
   217       _overflow = true;
   218       return;
   219     }
   220     // Otherwise...
   221     jint index = _index;
   222     jint next_index = index + n;
   223     if (next_index > _capacity) {
   224       _overflow = true;
   225       return;
   226     }
   227     jint res = Atomic::cmpxchg(next_index, &_index, index);
   228     if (res == index) {
   229       for (int i = 0; i < n; i++) {
   230         int ind = index + i;
   231         assert(ind < _capacity, "By overflow test above.");
   232         _base[ind] = ptr_arr[i];
   233       }
   234       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   235       return;
   236     }
   237     // Otherwise, we need to try again.
   238   }
   239 }
   242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   243   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   244   jint start = _index;
   245   jint next_index = start + n;
   246   if (next_index > _capacity) {
   247     _overflow = true;
   248     return;
   249   }
   250   // Otherwise.
   251   _index = next_index;
   252   for (int i = 0; i < n; i++) {
   253     int ind = start + i;
   254     assert(ind < _capacity, "By overflow test above.");
   255     _base[ind] = ptr_arr[i];
   256   }
   257 }
   260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   261   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   262   jint index = _index;
   263   if (index == 0) {
   264     *n = 0;
   265     return false;
   266   } else {
   267     int k = MIN2(max, index);
   268     jint new_ind = index - k;
   269     for (int j = 0; j < k; j++) {
   270       ptr_arr[j] = _base[new_ind + j];
   271     }
   272     _index = new_ind;
   273     *n = k;
   274     return true;
   275   }
   276 }
   279 CMRegionStack::CMRegionStack() : _base(NULL) {}
   281 void CMRegionStack::allocate(size_t size) {
   282   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   283   if (_base == NULL)
   284     vm_exit_during_initialization("Failed to allocate "
   285                                   "CM region mark stack");
   286   _index = 0;
   287   // QQQQ cast ...
   288   _capacity = (jint) size;
   289 }
   291 CMRegionStack::~CMRegionStack() {
   292   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   293 }
   295 void CMRegionStack::push_lock_free(MemRegion mr) {
   296   assert(mr.word_size() > 0, "Precondition");
   297   while (true) {
   298     jint index = _index;
   300     if (index >= _capacity) {
   301       _overflow = true;
   302       return;
   303     }
   304     // Otherwise...
   305     jint next_index = index+1;
   306     jint res = Atomic::cmpxchg(next_index, &_index, index);
   307     if (res == index) {
   308       _base[index] = mr;
   309       return;
   310     }
   311     // Otherwise, we need to try again.
   312   }
   313 }
   315 // Lock-free pop of the region stack. Called during the concurrent
   316 // marking / remark phases. Should only be called in tandem with
   317 // other lock-free pops.
   318 MemRegion CMRegionStack::pop_lock_free() {
   319   while (true) {
   320     jint index = _index;
   322     if (index == 0) {
   323       return MemRegion();
   324     }
   325     // Otherwise...
   326     jint next_index = index-1;
   327     jint res = Atomic::cmpxchg(next_index, &_index, index);
   328     if (res == index) {
   329       MemRegion mr = _base[next_index];
   330       if (mr.start() != NULL) {
   331         assert(mr.end() != NULL, "invariant");
   332         assert(mr.word_size() > 0, "invariant");
   333         return mr;
   334       } else {
   335         // that entry was invalidated... let's skip it
   336         assert(mr.end() == NULL, "invariant");
   337       }
   338     }
   339     // Otherwise, we need to try again.
   340   }
   341 }
   343 #if 0
   344 // The routines that manipulate the region stack with a lock are
   345 // not currently used. They should be retained, however, as a
   346 // diagnostic aid.
   348 void CMRegionStack::push_with_lock(MemRegion mr) {
   349   assert(mr.word_size() > 0, "Precondition");
   350   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   352   if (isFull()) {
   353     _overflow = true;
   354     return;
   355   }
   357   _base[_index] = mr;
   358   _index += 1;
   359 }
   361 MemRegion CMRegionStack::pop_with_lock() {
   362   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   364   while (true) {
   365     if (_index == 0) {
   366       return MemRegion();
   367     }
   368     _index -= 1;
   370     MemRegion mr = _base[_index];
   371     if (mr.start() != NULL) {
   372       assert(mr.end() != NULL, "invariant");
   373       assert(mr.word_size() > 0, "invariant");
   374       return mr;
   375     } else {
   376       // that entry was invalidated... let's skip it
   377       assert(mr.end() == NULL, "invariant");
   378     }
   379   }
   380 }
   381 #endif
   383 bool CMRegionStack::invalidate_entries_into_cset() {
   384   bool result = false;
   385   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   386   for (int i = 0; i < _oops_do_bound; ++i) {
   387     MemRegion mr = _base[i];
   388     if (mr.start() != NULL) {
   389       assert(mr.end() != NULL, "invariant");
   390       assert(mr.word_size() > 0, "invariant");
   391       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   392       assert(hr != NULL, "invariant");
   393       if (hr->in_collection_set()) {
   394         // The region points into the collection set
   395         _base[i] = MemRegion();
   396         result = true;
   397       }
   398     } else {
   399       // that entry was invalidated... let's skip it
   400       assert(mr.end() == NULL, "invariant");
   401     }
   402   }
   403   return result;
   404 }
   406 template<class OopClosureClass>
   407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   408   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   409          || SafepointSynchronize::is_at_safepoint(),
   410          "Drain recursion must be yield-safe.");
   411   bool res = true;
   412   debug_only(_drain_in_progress = true);
   413   debug_only(_drain_in_progress_yields = yield_after);
   414   while (!isEmpty()) {
   415     oop newOop = pop();
   416     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   417     assert(newOop->is_oop(), "Expected an oop");
   418     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   419            "only grey objects on this stack");
   420     // iterate over the oops in this oop, marking and pushing
   421     // the ones in CMS generation.
   422     newOop->oop_iterate(cl);
   423     if (yield_after && _cm->do_yield_check()) {
   424       res = false; break;
   425     }
   426   }
   427   debug_only(_drain_in_progress = false);
   428   return res;
   429 }
   431 void CMMarkStack::oops_do(OopClosure* f) {
   432   if (_index == 0) return;
   433   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   434          "Bound must be set.");
   435   for (int i = 0; i < _oops_do_bound; i++) {
   436     f->do_oop(&_base[i]);
   437   }
   438   _oops_do_bound = -1;
   439 }
   441 bool ConcurrentMark::not_yet_marked(oop obj) const {
   442   return (_g1h->is_obj_ill(obj)
   443           || (_g1h->is_in_permanent(obj)
   444               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   445 }
   447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   449 #endif // _MSC_VER
   451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   452                                int max_regions) :
   453   _markBitMap1(rs, MinObjAlignment - 1),
   454   _markBitMap2(rs, MinObjAlignment - 1),
   456   _parallel_marking_threads(0),
   457   _sleep_factor(0.0),
   458   _marking_task_overhead(1.0),
   459   _cleanup_sleep_factor(0.0),
   460   _cleanup_task_overhead(1.0),
   461   _region_bm(max_regions, false /* in_resource_area*/),
   462   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   463            CardTableModRefBS::card_shift,
   464            false /* in_resource_area*/),
   465   _prevMarkBitMap(&_markBitMap1),
   466   _nextMarkBitMap(&_markBitMap2),
   467   _at_least_one_mark_complete(false),
   469   _markStack(this),
   470   _regionStack(),
   471   // _finger set in set_non_marking_state
   473   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   474   // _active_tasks set in set_non_marking_state
   475   // _tasks set inside the constructor
   476   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   477   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   479   _has_overflown(false),
   480   _concurrent(false),
   481   _has_aborted(false),
   482   _restart_for_overflow(false),
   483   _concurrent_marking_in_progress(false),
   484   _should_gray_objects(false),
   486   // _verbose_level set below
   488   _init_times(),
   489   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   490   _cleanup_times(),
   491   _total_counting_time(0.0),
   492   _total_rs_scrub_time(0.0),
   494   _parallel_workers(NULL)
   495 {
   496   CMVerboseLevel verbose_level =
   497     (CMVerboseLevel) G1MarkingVerboseLevel;
   498   if (verbose_level < no_verbose)
   499     verbose_level = no_verbose;
   500   if (verbose_level > high_verbose)
   501     verbose_level = high_verbose;
   502   _verbose_level = verbose_level;
   504   if (verbose_low())
   505     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   506                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   508   _markStack.allocate(MarkStackSize);
   509   _regionStack.allocate(G1MarkRegionStackSize);
   511   // Create & start a ConcurrentMark thread.
   512   _cmThread = new ConcurrentMarkThread(this);
   513   assert(cmThread() != NULL, "CM Thread should have been created");
   514   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   516   _g1h = G1CollectedHeap::heap();
   517   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   518   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   519   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   521   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   522   satb_qs.set_buffer_size(G1SATBBufferSize);
   524   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   525   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   526   for (int i = 0 ; i < size; i++) {
   527     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   528   }
   530   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   531   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   533   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   534   _active_tasks = _max_task_num;
   535   for (int i = 0; i < (int) _max_task_num; ++i) {
   536     CMTaskQueue* task_queue = new CMTaskQueue();
   537     task_queue->initialize();
   538     _task_queues->register_queue(i, task_queue);
   540     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   541     _accum_task_vtime[i] = 0.0;
   542   }
   544   if (ConcGCThreads > ParallelGCThreads) {
   545     vm_exit_during_initialization("Can't have more ConcGCThreads "
   546                                   "than ParallelGCThreads.");
   547   }
   548   if (ParallelGCThreads == 0) {
   549     // if we are not running with any parallel GC threads we will not
   550     // spawn any marking threads either
   551     _parallel_marking_threads =   0;
   552     _sleep_factor             = 0.0;
   553     _marking_task_overhead    = 1.0;
   554   } else {
   555     if (ConcGCThreads > 0) {
   556       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   557       // if both are set
   559       _parallel_marking_threads = ConcGCThreads;
   560       _sleep_factor             = 0.0;
   561       _marking_task_overhead    = 1.0;
   562     } else if (G1MarkingOverheadPercent > 0) {
   563       // we will calculate the number of parallel marking threads
   564       // based on a target overhead with respect to the soft real-time
   565       // goal
   567       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   568       double overall_cm_overhead =
   569         (double) MaxGCPauseMillis * marking_overhead /
   570         (double) GCPauseIntervalMillis;
   571       double cpu_ratio = 1.0 / (double) os::processor_count();
   572       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   573       double marking_task_overhead =
   574         overall_cm_overhead / marking_thread_num *
   575                                                 (double) os::processor_count();
   576       double sleep_factor =
   577                          (1.0 - marking_task_overhead) / marking_task_overhead;
   579       _parallel_marking_threads = (size_t) marking_thread_num;
   580       _sleep_factor             = sleep_factor;
   581       _marking_task_overhead    = marking_task_overhead;
   582     } else {
   583       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   584       _sleep_factor             = 0.0;
   585       _marking_task_overhead    = 1.0;
   586     }
   588     if (parallel_marking_threads() > 1)
   589       _cleanup_task_overhead = 1.0;
   590     else
   591       _cleanup_task_overhead = marking_task_overhead();
   592     _cleanup_sleep_factor =
   593                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   595 #if 0
   596     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   597     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   598     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   599     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   600     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   601 #endif
   603     guarantee(parallel_marking_threads() > 0, "peace of mind");
   604     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   605          (int) _parallel_marking_threads, false, true);
   606     if (_parallel_workers == NULL) {
   607       vm_exit_during_initialization("Failed necessary allocation.");
   608     } else {
   609       _parallel_workers->initialize_workers();
   610     }
   611   }
   613   // so that the call below can read a sensible value
   614   _heap_start = (HeapWord*) rs.base();
   615   set_non_marking_state();
   616 }
   618 void ConcurrentMark::update_g1_committed(bool force) {
   619   // If concurrent marking is not in progress, then we do not need to
   620   // update _heap_end. This has a subtle and important
   621   // side-effect. Imagine that two evacuation pauses happen between
   622   // marking completion and remark. The first one can grow the
   623   // heap (hence now the finger is below the heap end). Then, the
   624   // second one could unnecessarily push regions on the region
   625   // stack. This causes the invariant that the region stack is empty
   626   // at the beginning of remark to be false. By ensuring that we do
   627   // not observe heap expansions after marking is complete, then we do
   628   // not have this problem.
   629   if (!concurrent_marking_in_progress() && !force)
   630     return;
   632   MemRegion committed = _g1h->g1_committed();
   633   assert(committed.start() == _heap_start, "start shouldn't change");
   634   HeapWord* new_end = committed.end();
   635   if (new_end > _heap_end) {
   636     // The heap has been expanded.
   638     _heap_end = new_end;
   639   }
   640   // Notice that the heap can also shrink. However, this only happens
   641   // during a Full GC (at least currently) and the entire marking
   642   // phase will bail out and the task will not be restarted. So, let's
   643   // do nothing.
   644 }
   646 void ConcurrentMark::reset() {
   647   // Starting values for these two. This should be called in a STW
   648   // phase. CM will be notified of any future g1_committed expansions
   649   // will be at the end of evacuation pauses, when tasks are
   650   // inactive.
   651   MemRegion committed = _g1h->g1_committed();
   652   _heap_start = committed.start();
   653   _heap_end   = committed.end();
   655   // Separated the asserts so that we know which one fires.
   656   assert(_heap_start != NULL, "heap bounds should look ok");
   657   assert(_heap_end != NULL, "heap bounds should look ok");
   658   assert(_heap_start < _heap_end, "heap bounds should look ok");
   660   // reset all the marking data structures and any necessary flags
   661   clear_marking_state();
   663   if (verbose_low())
   664     gclog_or_tty->print_cr("[global] resetting");
   666   // We do reset all of them, since different phases will use
   667   // different number of active threads. So, it's easiest to have all
   668   // of them ready.
   669   for (int i = 0; i < (int) _max_task_num; ++i) {
   670     _tasks[i]->reset(_nextMarkBitMap);
   671   }
   673   // we need this to make sure that the flag is on during the evac
   674   // pause with initial mark piggy-backed
   675   set_concurrent_marking_in_progress();
   676 }
   678 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   679   assert(active_tasks <= _max_task_num, "we should not have more");
   681   _active_tasks = active_tasks;
   682   // Need to update the three data structures below according to the
   683   // number of active threads for this phase.
   684   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   685   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   686   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   688   _concurrent = concurrent;
   689   // We propagate this to all tasks, not just the active ones.
   690   for (int i = 0; i < (int) _max_task_num; ++i)
   691     _tasks[i]->set_concurrent(concurrent);
   693   if (concurrent) {
   694     set_concurrent_marking_in_progress();
   695   } else {
   696     // We currently assume that the concurrent flag has been set to
   697     // false before we start remark. At this point we should also be
   698     // in a STW phase.
   699     assert(!concurrent_marking_in_progress(), "invariant");
   700     assert(_finger == _heap_end, "only way to get here");
   701     update_g1_committed(true);
   702   }
   703 }
   705 void ConcurrentMark::set_non_marking_state() {
   706   // We set the global marking state to some default values when we're
   707   // not doing marking.
   708   clear_marking_state();
   709   _active_tasks = 0;
   710   clear_concurrent_marking_in_progress();
   711 }
   713 ConcurrentMark::~ConcurrentMark() {
   714   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   715   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   716   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   717                     _par_cleanup_thread_state);
   719   for (int i = 0; i < (int) _max_task_num; ++i) {
   720     delete _task_queues->queue(i);
   721     delete _tasks[i];
   722   }
   723   delete _task_queues;
   724   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   725 }
   727 // This closure is used to mark refs into the g1 generation
   728 // from external roots in the CMS bit map.
   729 // Called at the first checkpoint.
   730 //
   732 void ConcurrentMark::clearNextBitmap() {
   733   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   734   G1CollectorPolicy* g1p = g1h->g1_policy();
   736   // Make sure that the concurrent mark thread looks to still be in
   737   // the current cycle.
   738   guarantee(cmThread()->during_cycle(), "invariant");
   740   // We are finishing up the current cycle by clearing the next
   741   // marking bitmap and getting it ready for the next cycle. During
   742   // this time no other cycle can start. So, let's make sure that this
   743   // is the case.
   744   guarantee(!g1h->mark_in_progress(), "invariant");
   746   // clear the mark bitmap (no grey objects to start with).
   747   // We need to do this in chunks and offer to yield in between
   748   // each chunk.
   749   HeapWord* start  = _nextMarkBitMap->startWord();
   750   HeapWord* end    = _nextMarkBitMap->endWord();
   751   HeapWord* cur    = start;
   752   size_t chunkSize = M;
   753   while (cur < end) {
   754     HeapWord* next = cur + chunkSize;
   755     if (next > end)
   756       next = end;
   757     MemRegion mr(cur,next);
   758     _nextMarkBitMap->clearRange(mr);
   759     cur = next;
   760     do_yield_check();
   762     // Repeat the asserts from above. We'll do them as asserts here to
   763     // minimize their overhead on the product. However, we'll have
   764     // them as guarantees at the beginning / end of the bitmap
   765     // clearing to get some checking in the product.
   766     assert(cmThread()->during_cycle(), "invariant");
   767     assert(!g1h->mark_in_progress(), "invariant");
   768   }
   770   // Repeat the asserts from above.
   771   guarantee(cmThread()->during_cycle(), "invariant");
   772   guarantee(!g1h->mark_in_progress(), "invariant");
   773 }
   775 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   776 public:
   777   bool doHeapRegion(HeapRegion* r) {
   778     if (!r->continuesHumongous()) {
   779       r->note_start_of_marking(true);
   780     }
   781     return false;
   782   }
   783 };
   785 void ConcurrentMark::checkpointRootsInitialPre() {
   786   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   787   G1CollectorPolicy* g1p = g1h->g1_policy();
   789   _has_aborted = false;
   791 #ifndef PRODUCT
   792   if (G1PrintReachableAtInitialMark) {
   793     print_reachable("at-cycle-start",
   794                     true /* use_prev_marking */, true /* all */);
   795   }
   796 #endif
   798   // Initialise marking structures. This has to be done in a STW phase.
   799   reset();
   800 }
   802 class CMMarkRootsClosure: public OopsInGenClosure {
   803 private:
   804   ConcurrentMark*  _cm;
   805   G1CollectedHeap* _g1h;
   806   bool             _do_barrier;
   808 public:
   809   CMMarkRootsClosure(ConcurrentMark* cm,
   810                      G1CollectedHeap* g1h,
   811                      bool do_barrier) : _cm(cm), _g1h(g1h),
   812                                         _do_barrier(do_barrier) { }
   814   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   815   virtual void do_oop(      oop* p) { do_oop_work(p); }
   817   template <class T> void do_oop_work(T* p) {
   818     T heap_oop = oopDesc::load_heap_oop(p);
   819     if (!oopDesc::is_null(heap_oop)) {
   820       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   821       assert(obj->is_oop() || obj->mark() == NULL,
   822              "expected an oop, possibly with mark word displaced");
   823       HeapWord* addr = (HeapWord*)obj;
   824       if (_g1h->is_in_g1_reserved(addr)) {
   825         _cm->grayRoot(obj);
   826       }
   827     }
   828     if (_do_barrier) {
   829       assert(!_g1h->is_in_g1_reserved(p),
   830              "Should be called on external roots");
   831       do_barrier(p);
   832     }
   833   }
   834 };
   836 void ConcurrentMark::checkpointRootsInitialPost() {
   837   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   839   // For each region note start of marking.
   840   NoteStartOfMarkHRClosure startcl;
   841   g1h->heap_region_iterate(&startcl);
   843   // Start weak-reference discovery.
   844   ReferenceProcessor* rp = g1h->ref_processor();
   845   rp->verify_no_references_recorded();
   846   rp->enable_discovery(); // enable ("weak") refs discovery
   847   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   849   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   850   // This is the start of  the marking cycle, we're expected all
   851   // threads to have SATB queues with active set to false.
   852   satb_mq_set.set_active_all_threads(true, /* new active value */
   853                                      false /* expected_active */);
   855   // update_g1_committed() will be called at the end of an evac pause
   856   // when marking is on. So, it's also called at the end of the
   857   // initial-mark pause to update the heap end, if the heap expands
   858   // during it. No need to call it here.
   859 }
   861 // Checkpoint the roots into this generation from outside
   862 // this generation. [Note this initial checkpoint need only
   863 // be approximate -- we'll do a catch up phase subsequently.]
   864 void ConcurrentMark::checkpointRootsInitial() {
   865   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   866   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   868   double start = os::elapsedTime();
   870   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   871   g1p->record_concurrent_mark_init_start();
   872   checkpointRootsInitialPre();
   874   // YSR: when concurrent precleaning is in place, we'll
   875   // need to clear the cached card table here
   877   ResourceMark rm;
   878   HandleMark  hm;
   880   g1h->ensure_parsability(false);
   881   g1h->perm_gen()->save_marks();
   883   CMMarkRootsClosure notOlder(this, g1h, false);
   884   CMMarkRootsClosure older(this, g1h, true);
   886   g1h->set_marking_started();
   887   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   889   g1h->process_strong_roots(true,    // activate StrongRootsScope
   890                             false,   // fake perm gen collection
   891                             SharedHeap::SO_AllClasses,
   892                             &notOlder, // Regular roots
   893                             NULL,     // do not visit active blobs
   894                             &older    // Perm Gen Roots
   895                             );
   896   checkpointRootsInitialPost();
   898   // Statistics.
   899   double end = os::elapsedTime();
   900   _init_times.add((end - start) * 1000.0);
   902   g1p->record_concurrent_mark_init_end();
   903 }
   905 /*
   906    Notice that in the next two methods, we actually leave the STS
   907    during the barrier sync and join it immediately afterwards. If we
   908    do not do this, this then the following deadlock can occur: one
   909    thread could be in the barrier sync code, waiting for the other
   910    thread to also sync up, whereas another one could be trying to
   911    yield, while also waiting for the other threads to sync up too.
   913    Because the thread that does the sync barrier has left the STS, it
   914    is possible to be suspended for a Full GC or an evacuation pause
   915    could occur. This is actually safe, since the entering the sync
   916    barrier is one of the last things do_marking_step() does, and it
   917    doesn't manipulate any data structures afterwards.
   918 */
   920 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   921   if (verbose_low())
   922     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   924   ConcurrentGCThread::stsLeave();
   925   _first_overflow_barrier_sync.enter();
   926   ConcurrentGCThread::stsJoin();
   927   // at this point everyone should have synced up and not be doing any
   928   // more work
   930   if (verbose_low())
   931     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   933   // let task 0 do this
   934   if (task_num == 0) {
   935     // task 0 is responsible for clearing the global data structures
   936     clear_marking_state();
   938     if (PrintGC) {
   939       gclog_or_tty->date_stamp(PrintGCDateStamps);
   940       gclog_or_tty->stamp(PrintGCTimeStamps);
   941       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   942     }
   943   }
   945   // after this, each task should reset its own data structures then
   946   // then go into the second barrier
   947 }
   949 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   950   if (verbose_low())
   951     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   953   ConcurrentGCThread::stsLeave();
   954   _second_overflow_barrier_sync.enter();
   955   ConcurrentGCThread::stsJoin();
   956   // at this point everything should be re-initialised and ready to go
   958   if (verbose_low())
   959     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   960 }
   962 void ConcurrentMark::grayRoot(oop p) {
   963   HeapWord* addr = (HeapWord*) p;
   964   // We can't really check against _heap_start and _heap_end, since it
   965   // is possible during an evacuation pause with piggy-backed
   966   // initial-mark that the committed space is expanded during the
   967   // pause without CM observing this change. So the assertions below
   968   // is a bit conservative; but better than nothing.
   969   assert(_g1h->g1_committed().contains(addr),
   970          "address should be within the heap bounds");
   972   if (!_nextMarkBitMap->isMarked(addr))
   973     _nextMarkBitMap->parMark(addr);
   974 }
   976 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   977   // The objects on the region have already been marked "in bulk" by
   978   // the caller. We only need to decide whether to push the region on
   979   // the region stack or not.
   981   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   982     // We're done with marking and waiting for remark. We do not need to
   983     // push anything else on the region stack.
   984     return;
   986   HeapWord* finger = _finger;
   988   if (verbose_low())
   989     gclog_or_tty->print_cr("[global] attempting to push "
   990                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   991                            PTR_FORMAT, mr.start(), mr.end(), finger);
   993   if (mr.start() < finger) {
   994     // The finger is always heap region aligned and it is not possible
   995     // for mr to span heap regions.
   996     assert(mr.end() <= finger, "invariant");
   998     // Separated the asserts so that we know which one fires.
   999     assert(mr.start() <= mr.end(),
  1000            "region boundaries should fall within the committed space");
  1001     assert(_heap_start <= mr.start(),
  1002            "region boundaries should fall within the committed space");
  1003     assert(mr.end() <= _heap_end,
  1004            "region boundaries should fall within the committed space");
  1005     if (verbose_low())
  1006       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
  1007                              "below the finger, pushing it",
  1008                              mr.start(), mr.end());
  1010     if (!region_stack_push_lock_free(mr)) {
  1011       if (verbose_low())
  1012         gclog_or_tty->print_cr("[global] region stack has overflown.");
  1017 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
  1018   // The object is not marked by the caller. We need to at least mark
  1019   // it and maybe push in on the stack.
  1021   HeapWord* addr = (HeapWord*)p;
  1022   if (!_nextMarkBitMap->isMarked(addr)) {
  1023     // We definitely need to mark it, irrespective whether we bail out
  1024     // because we're done with marking.
  1025     if (_nextMarkBitMap->parMark(addr)) {
  1026       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1027         // If we're done with concurrent marking and we're waiting for
  1028         // remark, then we're not pushing anything on the stack.
  1029         return;
  1031       // No OrderAccess:store_load() is needed. It is implicit in the
  1032       // CAS done in parMark(addr) above
  1033       HeapWord* finger = _finger;
  1035       if (addr < finger) {
  1036         if (!mark_stack_push(oop(addr))) {
  1037           if (verbose_low())
  1038             gclog_or_tty->print_cr("[global] global stack overflow "
  1039                                    "during parMark");
  1046 class CMConcurrentMarkingTask: public AbstractGangTask {
  1047 private:
  1048   ConcurrentMark*       _cm;
  1049   ConcurrentMarkThread* _cmt;
  1051 public:
  1052   void work(int worker_i) {
  1053     assert(Thread::current()->is_ConcurrentGC_thread(),
  1054            "this should only be done by a conc GC thread");
  1055     ResourceMark rm;
  1057     double start_vtime = os::elapsedVTime();
  1059     ConcurrentGCThread::stsJoin();
  1061     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1062     CMTask* the_task = _cm->task(worker_i);
  1063     the_task->record_start_time();
  1064     if (!_cm->has_aborted()) {
  1065       do {
  1066         double start_vtime_sec = os::elapsedVTime();
  1067         double start_time_sec = os::elapsedTime();
  1068         the_task->do_marking_step(10.0);
  1069         double end_time_sec = os::elapsedTime();
  1070         double end_vtime_sec = os::elapsedVTime();
  1071         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1072         double elapsed_time_sec = end_time_sec - start_time_sec;
  1073         _cm->clear_has_overflown();
  1075         bool ret = _cm->do_yield_check(worker_i);
  1077         jlong sleep_time_ms;
  1078         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1079           sleep_time_ms =
  1080             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1081           ConcurrentGCThread::stsLeave();
  1082           os::sleep(Thread::current(), sleep_time_ms, false);
  1083           ConcurrentGCThread::stsJoin();
  1085         double end_time2_sec = os::elapsedTime();
  1086         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1088 #if 0
  1089           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1090                                  "overhead %1.4lf",
  1091                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1092                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1093           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1094                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1095 #endif
  1096       } while (!_cm->has_aborted() && the_task->has_aborted());
  1098     the_task->record_end_time();
  1099     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1101     ConcurrentGCThread::stsLeave();
  1103     double end_vtime = os::elapsedVTime();
  1104     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1107   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1108                           ConcurrentMarkThread* cmt) :
  1109       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1111   ~CMConcurrentMarkingTask() { }
  1112 };
  1114 void ConcurrentMark::markFromRoots() {
  1115   // we might be tempted to assert that:
  1116   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1117   //        "inconsistent argument?");
  1118   // However that wouldn't be right, because it's possible that
  1119   // a safepoint is indeed in progress as a younger generation
  1120   // stop-the-world GC happens even as we mark in this generation.
  1122   _restart_for_overflow = false;
  1124   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1126   CMConcurrentMarkingTask markingTask(this, cmThread());
  1127   if (parallel_marking_threads() > 0)
  1128     _parallel_workers->run_task(&markingTask);
  1129   else
  1130     markingTask.work(0);
  1131   print_stats();
  1134 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1135   // world is stopped at this checkpoint
  1136   assert(SafepointSynchronize::is_at_safepoint(),
  1137          "world should be stopped");
  1138   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1140   // If a full collection has happened, we shouldn't do this.
  1141   if (has_aborted()) {
  1142     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1143     return;
  1146   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1148   if (VerifyDuringGC) {
  1149     HandleMark hm;  // handle scope
  1150     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1151     Universe::heap()->prepare_for_verify();
  1152     Universe::verify(true, false, true);
  1155   G1CollectorPolicy* g1p = g1h->g1_policy();
  1156   g1p->record_concurrent_mark_remark_start();
  1158   double start = os::elapsedTime();
  1160   checkpointRootsFinalWork();
  1162   double mark_work_end = os::elapsedTime();
  1164   weakRefsWork(clear_all_soft_refs);
  1166   if (has_overflown()) {
  1167     // Oops.  We overflowed.  Restart concurrent marking.
  1168     _restart_for_overflow = true;
  1169     // Clear the flag. We do not need it any more.
  1170     clear_has_overflown();
  1171     if (G1TraceMarkStackOverflow)
  1172       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1173   } else {
  1174     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1175     // We're done with marking.
  1176     // This is the end of  the marking cycle, we're expected all
  1177     // threads to have SATB queues with active set to true.
  1178     satb_mq_set.set_active_all_threads(false, /* new active value */
  1179                                        true /* expected_active */);
  1181     if (VerifyDuringGC) {
  1182       HandleMark hm;  // handle scope
  1183       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1184       Universe::heap()->prepare_for_verify();
  1185       Universe::heap()->verify(/* allow_dirty */      true,
  1186                                /* silent */           false,
  1187                                /* use_prev_marking */ false);
  1191 #if VERIFY_OBJS_PROCESSED
  1192   _scan_obj_cl.objs_processed = 0;
  1193   ThreadLocalObjQueue::objs_enqueued = 0;
  1194 #endif
  1196   // Statistics
  1197   double now = os::elapsedTime();
  1198   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1199   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1200   _remark_times.add((now - start) * 1000.0);
  1202   g1p->record_concurrent_mark_remark_end();
  1206 #define CARD_BM_TEST_MODE 0
  1208 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1210   CMBitMapRO* _bm;
  1211   ConcurrentMark* _cm;
  1212   bool _changed;
  1213   bool _yield;
  1214   size_t _words_done;
  1215   size_t _tot_live;
  1216   size_t _tot_used;
  1217   size_t _regions_done;
  1218   double _start_vtime_sec;
  1220   BitMap* _region_bm;
  1221   BitMap* _card_bm;
  1222   intptr_t _bottom_card_num;
  1223   bool _final;
  1225   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1226     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1227 #if CARD_BM_TEST_MODE
  1228       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1229 #else
  1230       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1231 #endif
  1235 public:
  1236   CalcLiveObjectsClosure(bool final,
  1237                          CMBitMapRO *bm, ConcurrentMark *cm,
  1238                          BitMap* region_bm, BitMap* card_bm) :
  1239     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1240     _words_done(0), _tot_live(0), _tot_used(0),
  1241     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1242     _regions_done(0), _start_vtime_sec(0.0)
  1244     _bottom_card_num =
  1245       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1246                CardTableModRefBS::card_shift);
  1249   // It takes a region that's not empty (i.e., it has at least one
  1250   // live object in it and sets its corresponding bit on the region
  1251   // bitmap to 1. If the region is "starts humongous" it will also set
  1252   // to 1 the bits on the region bitmap that correspond to its
  1253   // associated "continues humongous" regions.
  1254   void set_bit_for_region(HeapRegion* hr) {
  1255     assert(!hr->continuesHumongous(), "should have filtered those out");
  1257     size_t index = hr->hrs_index();
  1258     if (!hr->startsHumongous()) {
  1259       // Normal (non-humongous) case: just set the bit.
  1260       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1261     } else {
  1262       // Starts humongous case: calculate how many regions are part of
  1263       // this humongous region and then set the bit range. It might
  1264       // have been a bit more efficient to look at the object that
  1265       // spans these humongous regions to calculate their number from
  1266       // the object's size. However, it's a good idea to calculate
  1267       // this based on the metadata itself, and not the region
  1268       // contents, so that this code is not aware of what goes into
  1269       // the humongous regions (in case this changes in the future).
  1270       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1271       size_t end_index = index + 1;
  1272       while (end_index < g1h->n_regions()) {
  1273         HeapRegion* chr = g1h->region_at(end_index);
  1274         if (!chr->continuesHumongous()) {
  1275           break;
  1277         end_index += 1;
  1279       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1280                                    (BitMap::idx_t) end_index, true);
  1284   bool doHeapRegion(HeapRegion* hr) {
  1285     if (!_final && _regions_done == 0)
  1286       _start_vtime_sec = os::elapsedVTime();
  1288     if (hr->continuesHumongous()) {
  1289       // We will ignore these here and process them when their
  1290       // associated "starts humongous" region is processed (see
  1291       // set_bit_for_heap_region()). Note that we cannot rely on their
  1292       // associated "starts humongous" region to have their bit set to
  1293       // 1 since, due to the region chunking in the parallel region
  1294       // iteration, a "continues humongous" region might be visited
  1295       // before its associated "starts humongous".
  1296       return false;
  1299     HeapWord* nextTop = hr->next_top_at_mark_start();
  1300     HeapWord* start   = hr->top_at_conc_mark_count();
  1301     assert(hr->bottom() <= start && start <= hr->end() &&
  1302            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1303            start <= nextTop,
  1304            "Preconditions.");
  1305     // Otherwise, record the number of word's we'll examine.
  1306     size_t words_done = (nextTop - start);
  1307     // Find the first marked object at or after "start".
  1308     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1309     size_t marked_bytes = 0;
  1311     // Below, the term "card num" means the result of shifting an address
  1312     // by the card shift -- address 0 corresponds to card number 0.  One
  1313     // must subtract the card num of the bottom of the heap to obtain a
  1314     // card table index.
  1315     // The first card num of the sequence of live cards currently being
  1316     // constructed.  -1 ==> no sequence.
  1317     intptr_t start_card_num = -1;
  1318     // The last card num of the sequence of live cards currently being
  1319     // constructed.  -1 ==> no sequence.
  1320     intptr_t last_card_num = -1;
  1322     while (start < nextTop) {
  1323       if (_yield && _cm->do_yield_check()) {
  1324         // We yielded.  It might be for a full collection, in which case
  1325         // all bets are off; terminate the traversal.
  1326         if (_cm->has_aborted()) {
  1327           _changed = false;
  1328           return true;
  1329         } else {
  1330           // Otherwise, it might be a collection pause, and the region
  1331           // we're looking at might be in the collection set.  We'll
  1332           // abandon this region.
  1333           return false;
  1336       oop obj = oop(start);
  1337       int obj_sz = obj->size();
  1338       // The card num of the start of the current object.
  1339       intptr_t obj_card_num =
  1340         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1342       HeapWord* obj_last = start + obj_sz - 1;
  1343       intptr_t obj_last_card_num =
  1344         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1346       if (obj_card_num != last_card_num) {
  1347         if (start_card_num == -1) {
  1348           assert(last_card_num == -1, "Both or neither.");
  1349           start_card_num = obj_card_num;
  1350         } else {
  1351           assert(last_card_num != -1, "Both or neither.");
  1352           assert(obj_card_num >= last_card_num, "Inv");
  1353           if ((obj_card_num - last_card_num) > 1) {
  1354             // Mark the last run, and start a new one.
  1355             mark_card_num_range(start_card_num, last_card_num);
  1356             start_card_num = obj_card_num;
  1359 #if CARD_BM_TEST_MODE
  1360         /*
  1361         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1362                                obj_card_num - _bottom_card_num,
  1363                                obj_last_card_num - _bottom_card_num);
  1364         */
  1365         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1366           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1368 #endif
  1370       // In any case, we set the last card num.
  1371       last_card_num = obj_last_card_num;
  1373       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1374       // Find the next marked object after this one.
  1375       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1376       _changed = true;
  1378     // Handle the last range, if any.
  1379     if (start_card_num != -1)
  1380       mark_card_num_range(start_card_num, last_card_num);
  1381     if (_final) {
  1382       // Mark the allocated-since-marking portion...
  1383       HeapWord* tp = hr->top();
  1384       if (nextTop < tp) {
  1385         start_card_num =
  1386           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1387         last_card_num =
  1388           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1389         mark_card_num_range(start_card_num, last_card_num);
  1390         // This definitely means the region has live objects.
  1391         set_bit_for_region(hr);
  1395     hr->add_to_marked_bytes(marked_bytes);
  1396     // Update the live region bitmap.
  1397     if (marked_bytes > 0) {
  1398       set_bit_for_region(hr);
  1400     hr->set_top_at_conc_mark_count(nextTop);
  1401     _tot_live += hr->next_live_bytes();
  1402     _tot_used += hr->used();
  1403     _words_done = words_done;
  1405     if (!_final) {
  1406       ++_regions_done;
  1407       if (_regions_done % 10 == 0) {
  1408         double end_vtime_sec = os::elapsedVTime();
  1409         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1410         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1411           jlong sleep_time_ms =
  1412             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1413           os::sleep(Thread::current(), sleep_time_ms, false);
  1414           _start_vtime_sec = end_vtime_sec;
  1419     return false;
  1422   bool changed() { return _changed;  }
  1423   void reset()   { _changed = false; _words_done = 0; }
  1424   void no_yield() { _yield = false; }
  1425   size_t words_done() { return _words_done; }
  1426   size_t tot_live() { return _tot_live; }
  1427   size_t tot_used() { return _tot_used; }
  1428 };
  1431 void ConcurrentMark::calcDesiredRegions() {
  1432   _region_bm.clear();
  1433   _card_bm.clear();
  1434   CalcLiveObjectsClosure calccl(false /*final*/,
  1435                                 nextMarkBitMap(), this,
  1436                                 &_region_bm, &_card_bm);
  1437   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1438   g1h->heap_region_iterate(&calccl);
  1440   do {
  1441     calccl.reset();
  1442     g1h->heap_region_iterate(&calccl);
  1443   } while (calccl.changed());
  1446 class G1ParFinalCountTask: public AbstractGangTask {
  1447 protected:
  1448   G1CollectedHeap* _g1h;
  1449   CMBitMap* _bm;
  1450   size_t _n_workers;
  1451   size_t *_live_bytes;
  1452   size_t *_used_bytes;
  1453   BitMap* _region_bm;
  1454   BitMap* _card_bm;
  1455 public:
  1456   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1457                       BitMap* region_bm, BitMap* card_bm) :
  1458     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1459     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1461     if (ParallelGCThreads > 0)
  1462       _n_workers = _g1h->workers()->total_workers();
  1463     else
  1464       _n_workers = 1;
  1465     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1466     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1469   ~G1ParFinalCountTask() {
  1470     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1471     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1474   void work(int i) {
  1475     CalcLiveObjectsClosure calccl(true /*final*/,
  1476                                   _bm, _g1h->concurrent_mark(),
  1477                                   _region_bm, _card_bm);
  1478     calccl.no_yield();
  1479     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1480       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1481                                             HeapRegion::FinalCountClaimValue);
  1482     } else {
  1483       _g1h->heap_region_iterate(&calccl);
  1485     assert(calccl.complete(), "Shouldn't have yielded!");
  1487     assert((size_t) i < _n_workers, "invariant");
  1488     _live_bytes[i] = calccl.tot_live();
  1489     _used_bytes[i] = calccl.tot_used();
  1491   size_t live_bytes()  {
  1492     size_t live_bytes = 0;
  1493     for (size_t i = 0; i < _n_workers; ++i)
  1494       live_bytes += _live_bytes[i];
  1495     return live_bytes;
  1497   size_t used_bytes()  {
  1498     size_t used_bytes = 0;
  1499     for (size_t i = 0; i < _n_workers; ++i)
  1500       used_bytes += _used_bytes[i];
  1501     return used_bytes;
  1503 };
  1505 class G1ParNoteEndTask;
  1507 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1508   G1CollectedHeap* _g1;
  1509   int _worker_num;
  1510   size_t _max_live_bytes;
  1511   size_t _regions_claimed;
  1512   size_t _freed_bytes;
  1513   size_t _cleared_h_regions;
  1514   size_t _freed_regions;
  1515   UncleanRegionList* _unclean_region_list;
  1516   double _claimed_region_time;
  1517   double _max_region_time;
  1519 public:
  1520   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1521                              UncleanRegionList* list,
  1522                              int worker_num);
  1523   size_t freed_bytes() { return _freed_bytes; }
  1524   size_t cleared_h_regions() { return _cleared_h_regions; }
  1525   size_t freed_regions() { return  _freed_regions; }
  1526   UncleanRegionList* unclean_region_list() {
  1527     return _unclean_region_list;
  1530   bool doHeapRegion(HeapRegion *r);
  1532   size_t max_live_bytes() { return _max_live_bytes; }
  1533   size_t regions_claimed() { return _regions_claimed; }
  1534   double claimed_region_time_sec() { return _claimed_region_time; }
  1535   double max_region_time_sec() { return _max_region_time; }
  1536 };
  1538 class G1ParNoteEndTask: public AbstractGangTask {
  1539   friend class G1NoteEndOfConcMarkClosure;
  1540 protected:
  1541   G1CollectedHeap* _g1h;
  1542   size_t _max_live_bytes;
  1543   size_t _freed_bytes;
  1544   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1545 public:
  1546   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1547                    ConcurrentMark::ParCleanupThreadState**
  1548                    par_cleanup_thread_state) :
  1549     AbstractGangTask("G1 note end"), _g1h(g1h),
  1550     _max_live_bytes(0), _freed_bytes(0),
  1551     _par_cleanup_thread_state(par_cleanup_thread_state)
  1552   {}
  1554   void work(int i) {
  1555     double start = os::elapsedTime();
  1556     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1557                                            &_par_cleanup_thread_state[i]->list,
  1558                                            i);
  1559     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1560       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1561                                             HeapRegion::NoteEndClaimValue);
  1562     } else {
  1563       _g1h->heap_region_iterate(&g1_note_end);
  1565     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1567     // Now finish up freeing the current thread's regions.
  1568     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1569                                   g1_note_end.cleared_h_regions(),
  1570                                   0, NULL);
  1572       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1573       _max_live_bytes += g1_note_end.max_live_bytes();
  1574       _freed_bytes += g1_note_end.freed_bytes();
  1576     double end = os::elapsedTime();
  1577     if (G1PrintParCleanupStats) {
  1578       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1579                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1580                           i, start, end, (end-start)*1000.0,
  1581                           g1_note_end.regions_claimed(),
  1582                           g1_note_end.claimed_region_time_sec()*1000.0,
  1583                           g1_note_end.max_region_time_sec()*1000.0);
  1586   size_t max_live_bytes() { return _max_live_bytes; }
  1587   size_t freed_bytes() { return _freed_bytes; }
  1588 };
  1590 class G1ParScrubRemSetTask: public AbstractGangTask {
  1591 protected:
  1592   G1RemSet* _g1rs;
  1593   BitMap* _region_bm;
  1594   BitMap* _card_bm;
  1595 public:
  1596   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1597                        BitMap* region_bm, BitMap* card_bm) :
  1598     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1599     _region_bm(region_bm), _card_bm(card_bm)
  1600   {}
  1602   void work(int i) {
  1603     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1604       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1605                        HeapRegion::ScrubRemSetClaimValue);
  1606     } else {
  1607       _g1rs->scrub(_region_bm, _card_bm);
  1611 };
  1613 G1NoteEndOfConcMarkClosure::
  1614 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1615                            UncleanRegionList* list,
  1616                            int worker_num)
  1617   : _g1(g1), _worker_num(worker_num),
  1618     _max_live_bytes(0), _regions_claimed(0),
  1619     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1620     _claimed_region_time(0.0), _max_region_time(0.0),
  1621     _unclean_region_list(list)
  1622 {}
  1624 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1625   // We use a claim value of zero here because all regions
  1626   // were claimed with value 1 in the FinalCount task.
  1627   r->reset_gc_time_stamp();
  1628   if (!r->continuesHumongous()) {
  1629     double start = os::elapsedTime();
  1630     _regions_claimed++;
  1631     r->note_end_of_marking();
  1632     _max_live_bytes += r->max_live_bytes();
  1633     _g1->free_region_if_totally_empty_work(r,
  1634                                            _freed_bytes,
  1635                                            _cleared_h_regions,
  1636                                            _freed_regions,
  1637                                            _unclean_region_list,
  1638                                            true /*par*/);
  1639     double region_time = (os::elapsedTime() - start);
  1640     _claimed_region_time += region_time;
  1641     if (region_time > _max_region_time) _max_region_time = region_time;
  1643   return false;
  1646 void ConcurrentMark::cleanup() {
  1647   // world is stopped at this checkpoint
  1648   assert(SafepointSynchronize::is_at_safepoint(),
  1649          "world should be stopped");
  1650   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1652   // If a full collection has happened, we shouldn't do this.
  1653   if (has_aborted()) {
  1654     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1655     return;
  1658   if (VerifyDuringGC) {
  1659     HandleMark hm;  // handle scope
  1660     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1661     Universe::heap()->prepare_for_verify();
  1662     Universe::verify(/* allow dirty  */ true,
  1663                      /* silent       */ false,
  1664                      /* prev marking */ true);
  1667   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1668   g1p->record_concurrent_mark_cleanup_start();
  1670   double start = os::elapsedTime();
  1672   // Do counting once more with the world stopped for good measure.
  1673   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1674                                         &_region_bm, &_card_bm);
  1675   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1676     assert(g1h->check_heap_region_claim_values(
  1677                                                HeapRegion::InitialClaimValue),
  1678            "sanity check");
  1680     int n_workers = g1h->workers()->total_workers();
  1681     g1h->set_par_threads(n_workers);
  1682     g1h->workers()->run_task(&g1_par_count_task);
  1683     g1h->set_par_threads(0);
  1685     assert(g1h->check_heap_region_claim_values(
  1686                                              HeapRegion::FinalCountClaimValue),
  1687            "sanity check");
  1688   } else {
  1689     g1_par_count_task.work(0);
  1692   size_t known_garbage_bytes =
  1693     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1694 #if 0
  1695   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1696                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1697                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1698                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1699 #endif // 0
  1700   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1702   size_t start_used_bytes = g1h->used();
  1703   _at_least_one_mark_complete = true;
  1704   g1h->set_marking_complete();
  1706   double count_end = os::elapsedTime();
  1707   double this_final_counting_time = (count_end - start);
  1708   if (G1PrintParCleanupStats) {
  1709     gclog_or_tty->print_cr("Cleanup:");
  1710     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1711                            this_final_counting_time*1000.0);
  1713   _total_counting_time += this_final_counting_time;
  1715   // Install newly created mark bitMap as "prev".
  1716   swapMarkBitMaps();
  1718   g1h->reset_gc_time_stamp();
  1720   // Note end of marking in all heap regions.
  1721   double note_end_start = os::elapsedTime();
  1722   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1723   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1724     int n_workers = g1h->workers()->total_workers();
  1725     g1h->set_par_threads(n_workers);
  1726     g1h->workers()->run_task(&g1_par_note_end_task);
  1727     g1h->set_par_threads(0);
  1729     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1730            "sanity check");
  1731   } else {
  1732     g1_par_note_end_task.work(0);
  1734   g1h->set_unclean_regions_coming(true);
  1735   double note_end_end = os::elapsedTime();
  1736   // Tell the mutators that there might be unclean regions coming...
  1737   if (G1PrintParCleanupStats) {
  1738     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1739                            (note_end_end - note_end_start)*1000.0);
  1743   // call below, since it affects the metric by which we sort the heap
  1744   // regions.
  1745   if (G1ScrubRemSets) {
  1746     double rs_scrub_start = os::elapsedTime();
  1747     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1748     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1749       int n_workers = g1h->workers()->total_workers();
  1750       g1h->set_par_threads(n_workers);
  1751       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1752       g1h->set_par_threads(0);
  1754       assert(g1h->check_heap_region_claim_values(
  1755                                             HeapRegion::ScrubRemSetClaimValue),
  1756              "sanity check");
  1757     } else {
  1758       g1_par_scrub_rs_task.work(0);
  1761     double rs_scrub_end = os::elapsedTime();
  1762     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1763     _total_rs_scrub_time += this_rs_scrub_time;
  1766   // this will also free any regions totally full of garbage objects,
  1767   // and sort the regions.
  1768   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1769                         g1_par_note_end_task.freed_bytes(),
  1770                         g1_par_note_end_task.max_live_bytes());
  1772   // Statistics.
  1773   double end = os::elapsedTime();
  1774   _cleanup_times.add((end - start) * 1000.0);
  1776   // G1CollectedHeap::heap()->print();
  1777   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1778   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1780   if (PrintGC || PrintGCDetails) {
  1781     g1h->print_size_transition(gclog_or_tty,
  1782                                start_used_bytes,
  1783                                g1h->used(),
  1784                                g1h->capacity());
  1787   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1788   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1790   // We need to make this be a "collection" so any collection pause that
  1791   // races with it goes around and waits for completeCleanup to finish.
  1792   g1h->increment_total_collections();
  1794   if (VerifyDuringGC) {
  1795     HandleMark hm;  // handle scope
  1796     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1797     Universe::heap()->prepare_for_verify();
  1798     Universe::verify(/* allow dirty  */ true,
  1799                      /* silent       */ false,
  1800                      /* prev marking */ true);
  1804 void ConcurrentMark::completeCleanup() {
  1805   // A full collection intervened.
  1806   if (has_aborted()) return;
  1808   int first = 0;
  1809   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1810   for (int t = 0; t < last; t++) {
  1811     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1812     assert(list->well_formed(), "Inv");
  1813     HeapRegion* hd = list->hd();
  1814     while (hd != NULL) {
  1815       // Now finish up the other stuff.
  1816       hd->rem_set()->clear();
  1817       HeapRegion* next_hd = hd->next_from_unclean_list();
  1818       (void)list->pop();
  1819       assert(list->hd() == next_hd, "how not?");
  1820       _g1h->put_region_on_unclean_list(hd);
  1821       if (!hd->isHumongous()) {
  1822         // Add this to the _free_regions count by 1.
  1823         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1825       hd = list->hd();
  1826       assert(hd == next_hd, "how not?");
  1831 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  1832   HeapWord* addr = (HeapWord*)obj;
  1833   return addr != NULL &&
  1834          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1837 class G1CMKeepAliveClosure: public OopClosure {
  1838   G1CollectedHeap* _g1;
  1839   ConcurrentMark*  _cm;
  1840   CMBitMap*        _bitMap;
  1841  public:
  1842   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1843                        CMBitMap* bitMap) :
  1844     _g1(g1), _cm(cm),
  1845     _bitMap(bitMap) {}
  1847   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1848   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1850   template <class T> void do_oop_work(T* p) {
  1851     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1852     HeapWord* addr = (HeapWord*)thisOop;
  1853     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1854       _bitMap->mark(addr);
  1855       _cm->mark_stack_push(thisOop);
  1858 };
  1860 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1861   CMMarkStack*                  _markStack;
  1862   CMBitMap*                     _bitMap;
  1863   G1CMKeepAliveClosure*         _oopClosure;
  1864  public:
  1865   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1866                                G1CMKeepAliveClosure* oopClosure) :
  1867     _bitMap(bitMap),
  1868     _markStack(markStack),
  1869     _oopClosure(oopClosure)
  1870   {}
  1872   void do_void() {
  1873     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1875 };
  1877 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1878   ResourceMark rm;
  1879   HandleMark   hm;
  1880   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1881   ReferenceProcessor* rp = g1h->ref_processor();
  1883   // See the comment in G1CollectedHeap::ref_processing_init()
  1884   // about how reference processing currently works in G1.
  1886   // Process weak references.
  1887   rp->setup_policy(clear_all_soft_refs);
  1888   assert(_markStack.isEmpty(), "mark stack should be empty");
  1890   G1CMIsAliveClosure   g1_is_alive(g1h);
  1891   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
  1892   G1CMDrainMarkingStackClosure
  1893     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
  1895   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1896   rp->process_discovered_references(&g1_is_alive,
  1897                                     &g1_keep_alive,
  1898                                     &g1_drain_mark_stack,
  1899                                     NULL);
  1900   assert(_markStack.overflow() || _markStack.isEmpty(),
  1901          "mark stack should be empty (unless it overflowed)");
  1902   if (_markStack.overflow()) {
  1903     set_has_overflown();
  1906   rp->enqueue_discovered_references();
  1907   rp->verify_no_references_recorded();
  1908   assert(!rp->discovery_enabled(), "should have been disabled");
  1910   // Now clean up stale oops in SymbolTable and StringTable
  1911   SymbolTable::unlink(&g1_is_alive);
  1912   StringTable::unlink(&g1_is_alive);
  1915 void ConcurrentMark::swapMarkBitMaps() {
  1916   CMBitMapRO* temp = _prevMarkBitMap;
  1917   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1918   _nextMarkBitMap  = (CMBitMap*)  temp;
  1921 class CMRemarkTask: public AbstractGangTask {
  1922 private:
  1923   ConcurrentMark *_cm;
  1925 public:
  1926   void work(int worker_i) {
  1927     // Since all available tasks are actually started, we should
  1928     // only proceed if we're supposed to be actived.
  1929     if ((size_t)worker_i < _cm->active_tasks()) {
  1930       CMTask* task = _cm->task(worker_i);
  1931       task->record_start_time();
  1932       do {
  1933         task->do_marking_step(1000000000.0 /* something very large */);
  1934       } while (task->has_aborted() && !_cm->has_overflown());
  1935       // If we overflow, then we do not want to restart. We instead
  1936       // want to abort remark and do concurrent marking again.
  1937       task->record_end_time();
  1941   CMRemarkTask(ConcurrentMark* cm) :
  1942     AbstractGangTask("Par Remark"), _cm(cm) { }
  1943 };
  1945 void ConcurrentMark::checkpointRootsFinalWork() {
  1946   ResourceMark rm;
  1947   HandleMark   hm;
  1948   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1950   g1h->ensure_parsability(false);
  1952   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1953     G1CollectedHeap::StrongRootsScope srs(g1h);
  1954     // this is remark, so we'll use up all available threads
  1955     int active_workers = ParallelGCThreads;
  1956     set_phase(active_workers, false);
  1958     CMRemarkTask remarkTask(this);
  1959     // We will start all available threads, even if we decide that the
  1960     // active_workers will be fewer. The extra ones will just bail out
  1961     // immediately.
  1962     int n_workers = g1h->workers()->total_workers();
  1963     g1h->set_par_threads(n_workers);
  1964     g1h->workers()->run_task(&remarkTask);
  1965     g1h->set_par_threads(0);
  1966   } else {
  1967     G1CollectedHeap::StrongRootsScope srs(g1h);
  1968     // this is remark, so we'll use up all available threads
  1969     int active_workers = 1;
  1970     set_phase(active_workers, false);
  1972     CMRemarkTask remarkTask(this);
  1973     // We will start all available threads, even if we decide that the
  1974     // active_workers will be fewer. The extra ones will just bail out
  1975     // immediately.
  1976     remarkTask.work(0);
  1978   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1979   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1981   print_stats();
  1983   if (!restart_for_overflow())
  1984     set_non_marking_state();
  1986 #if VERIFY_OBJS_PROCESSED
  1987   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1988     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1989                            _scan_obj_cl.objs_processed,
  1990                            ThreadLocalObjQueue::objs_enqueued);
  1991     guarantee(_scan_obj_cl.objs_processed ==
  1992               ThreadLocalObjQueue::objs_enqueued,
  1993               "Different number of objs processed and enqueued.");
  1995 #endif
  1998 #ifndef PRODUCT
  2000 class PrintReachableOopClosure: public OopClosure {
  2001 private:
  2002   G1CollectedHeap* _g1h;
  2003   CMBitMapRO*      _bitmap;
  2004   outputStream*    _out;
  2005   bool             _use_prev_marking;
  2006   bool             _all;
  2008 public:
  2009   PrintReachableOopClosure(CMBitMapRO*   bitmap,
  2010                            outputStream* out,
  2011                            bool          use_prev_marking,
  2012                            bool          all) :
  2013     _g1h(G1CollectedHeap::heap()),
  2014     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2016   void do_oop(narrowOop* p) { do_oop_work(p); }
  2017   void do_oop(      oop* p) { do_oop_work(p); }
  2019   template <class T> void do_oop_work(T* p) {
  2020     oop         obj = oopDesc::load_decode_heap_oop(p);
  2021     const char* str = NULL;
  2022     const char* str2 = "";
  2024     if (obj == NULL) {
  2025       str = "";
  2026     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2027       str = " O";
  2028     } else {
  2029       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2030       guarantee(hr != NULL, "invariant");
  2031       bool over_tams = false;
  2032       if (_use_prev_marking) {
  2033         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2034       } else {
  2035         over_tams = hr->obj_allocated_since_next_marking(obj);
  2037       bool marked = _bitmap->isMarked((HeapWord*) obj);
  2039       if (over_tams) {
  2040         str = " >";
  2041         if (marked) {
  2042           str2 = " AND MARKED";
  2044       } else if (marked) {
  2045         str = " M";
  2046       } else {
  2047         str = " NOT";
  2051     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2052                    p, (void*) obj, str, str2);
  2054 };
  2056 class PrintReachableObjectClosure : public ObjectClosure {
  2057 private:
  2058   CMBitMapRO*   _bitmap;
  2059   outputStream* _out;
  2060   bool          _use_prev_marking;
  2061   bool          _all;
  2062   HeapRegion*   _hr;
  2064 public:
  2065   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
  2066                               outputStream* out,
  2067                               bool          use_prev_marking,
  2068                               bool          all,
  2069                               HeapRegion*   hr) :
  2070     _bitmap(bitmap), _out(out),
  2071     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
  2073   void do_object(oop o) {
  2074     bool over_tams;
  2075     if (_use_prev_marking) {
  2076       over_tams = _hr->obj_allocated_since_prev_marking(o);
  2077     } else {
  2078       over_tams = _hr->obj_allocated_since_next_marking(o);
  2080     bool marked = _bitmap->isMarked((HeapWord*) o);
  2081     bool print_it = _all || over_tams || marked;
  2083     if (print_it) {
  2084       _out->print_cr(" "PTR_FORMAT"%s",
  2085                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2086       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
  2087       o->oop_iterate(&oopCl);
  2090 };
  2092 class PrintReachableRegionClosure : public HeapRegionClosure {
  2093 private:
  2094   CMBitMapRO*   _bitmap;
  2095   outputStream* _out;
  2096   bool          _use_prev_marking;
  2097   bool          _all;
  2099 public:
  2100   bool doHeapRegion(HeapRegion* hr) {
  2101     HeapWord* b = hr->bottom();
  2102     HeapWord* e = hr->end();
  2103     HeapWord* t = hr->top();
  2104     HeapWord* p = NULL;
  2105     if (_use_prev_marking) {
  2106       p = hr->prev_top_at_mark_start();
  2107     } else {
  2108       p = hr->next_top_at_mark_start();
  2110     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2111                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2112     _out->cr();
  2114     HeapWord* from = b;
  2115     HeapWord* to   = t;
  2117     if (to > from) {
  2118       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2119       _out->cr();
  2120       PrintReachableObjectClosure ocl(_bitmap, _out,
  2121                                       _use_prev_marking, _all, hr);
  2122       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2123       _out->cr();
  2126     return false;
  2129   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
  2130                               outputStream* out,
  2131                               bool          use_prev_marking,
  2132                               bool          all) :
  2133     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2134 };
  2136 void ConcurrentMark::print_reachable(const char* str,
  2137                                      bool use_prev_marking,
  2138                                      bool all) {
  2139   gclog_or_tty->cr();
  2140   gclog_or_tty->print_cr("== Doing heap dump... ");
  2142   if (G1PrintReachableBaseFile == NULL) {
  2143     gclog_or_tty->print_cr("  #### error: no base file defined");
  2144     return;
  2147   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2148       (JVM_MAXPATHLEN - 1)) {
  2149     gclog_or_tty->print_cr("  #### error: file name too long");
  2150     return;
  2153   char file_name[JVM_MAXPATHLEN];
  2154   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2155   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2157   fileStream fout(file_name);
  2158   if (!fout.is_open()) {
  2159     gclog_or_tty->print_cr("  #### error: could not open file");
  2160     return;
  2163   outputStream* out = &fout;
  2165   CMBitMapRO* bitmap = NULL;
  2166   if (use_prev_marking) {
  2167     bitmap = _prevMarkBitMap;
  2168   } else {
  2169     bitmap = _nextMarkBitMap;
  2172   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2173   out->cr();
  2175   out->print_cr("--- ITERATING OVER REGIONS");
  2176   out->cr();
  2177   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
  2178   _g1h->heap_region_iterate(&rcl);
  2179   out->cr();
  2181   gclog_or_tty->print_cr("  done");
  2182   gclog_or_tty->flush();
  2185 #endif // PRODUCT
  2187 // This note is for drainAllSATBBuffers and the code in between.
  2188 // In the future we could reuse a task to do this work during an
  2189 // evacuation pause (since now tasks are not active and can be claimed
  2190 // during an evacuation pause). This was a late change to the code and
  2191 // is currently not being taken advantage of.
  2193 class CMGlobalObjectClosure : public ObjectClosure {
  2194 private:
  2195   ConcurrentMark* _cm;
  2197 public:
  2198   void do_object(oop obj) {
  2199     _cm->deal_with_reference(obj);
  2202   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2203 };
  2205 void ConcurrentMark::deal_with_reference(oop obj) {
  2206   if (verbose_high())
  2207     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2208                            (void*) obj);
  2211   HeapWord* objAddr = (HeapWord*) obj;
  2212   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2213   if (_g1h->is_in_g1_reserved(objAddr)) {
  2214     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2215     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2216     if (_g1h->is_obj_ill(obj, hr)) {
  2217       if (verbose_high())
  2218         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2219                                "marked", (void*) obj);
  2221       // we need to mark it first
  2222       if (_nextMarkBitMap->parMark(objAddr)) {
  2223         // No OrderAccess:store_load() is needed. It is implicit in the
  2224         // CAS done in parMark(objAddr) above
  2225         HeapWord* finger = _finger;
  2226         if (objAddr < finger) {
  2227           if (verbose_high())
  2228             gclog_or_tty->print_cr("[global] below the global finger "
  2229                                    "("PTR_FORMAT"), pushing it", finger);
  2230           if (!mark_stack_push(obj)) {
  2231             if (verbose_low())
  2232               gclog_or_tty->print_cr("[global] global stack overflow during "
  2233                                      "deal_with_reference");
  2241 void ConcurrentMark::drainAllSATBBuffers() {
  2242   CMGlobalObjectClosure oc(this);
  2243   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2244   satb_mq_set.set_closure(&oc);
  2246   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2247     if (verbose_medium())
  2248       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2251   // no need to check whether we should do this, as this is only
  2252   // called during an evacuation pause
  2253   satb_mq_set.iterate_closure_all_threads();
  2255   satb_mq_set.set_closure(NULL);
  2256   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2259 void ConcurrentMark::markPrev(oop p) {
  2260   // Note we are overriding the read-only view of the prev map here, via
  2261   // the cast.
  2262   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2265 void ConcurrentMark::clear(oop p) {
  2266   assert(p != NULL && p->is_oop(), "expected an oop");
  2267   HeapWord* addr = (HeapWord*)p;
  2268   assert(addr >= _nextMarkBitMap->startWord() ||
  2269          addr < _nextMarkBitMap->endWord(), "in a region");
  2271   _nextMarkBitMap->clear(addr);
  2274 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2275   // Note we are overriding the read-only view of the prev map here, via
  2276   // the cast.
  2277   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2278   _nextMarkBitMap->clearRange(mr);
  2281 HeapRegion*
  2282 ConcurrentMark::claim_region(int task_num) {
  2283   // "checkpoint" the finger
  2284   HeapWord* finger = _finger;
  2286   // _heap_end will not change underneath our feet; it only changes at
  2287   // yield points.
  2288   while (finger < _heap_end) {
  2289     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2291     // is the gap between reading the finger and doing the CAS too long?
  2293     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2294     HeapWord*   bottom        = curr_region->bottom();
  2295     HeapWord*   end           = curr_region->end();
  2296     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2298     if (verbose_low())
  2299       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2300                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2301                              "limit = "PTR_FORMAT,
  2302                              task_num, curr_region, bottom, end, limit);
  2304     HeapWord* res =
  2305       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2306     if (res == finger) {
  2307       // we succeeded
  2309       // notice that _finger == end cannot be guaranteed here since,
  2310       // someone else might have moved the finger even further
  2311       assert(_finger >= end, "the finger should have moved forward");
  2313       if (verbose_low())
  2314         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2315                                PTR_FORMAT, task_num, curr_region);
  2317       if (limit > bottom) {
  2318         if (verbose_low())
  2319           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2320                                  "returning it ", task_num, curr_region);
  2321         return curr_region;
  2322       } else {
  2323         assert(limit == bottom,
  2324                "the region limit should be at bottom");
  2325         if (verbose_low())
  2326           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2327                                  "returning NULL", task_num, curr_region);
  2328         // we return NULL and the caller should try calling
  2329         // claim_region() again.
  2330         return NULL;
  2332     } else {
  2333       assert(_finger > finger, "the finger should have moved forward");
  2334       if (verbose_low())
  2335         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2336                                "global finger = "PTR_FORMAT", "
  2337                                "our finger = "PTR_FORMAT,
  2338                                task_num, _finger, finger);
  2340       // read it again
  2341       finger = _finger;
  2345   return NULL;
  2348 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
  2349   bool result = false;
  2350   for (int i = 0; i < (int)_max_task_num; ++i) {
  2351     CMTask* the_task = _tasks[i];
  2352     MemRegion mr = the_task->aborted_region();
  2353     if (mr.start() != NULL) {
  2354       assert(mr.end() != NULL, "invariant");
  2355       assert(mr.word_size() > 0, "invariant");
  2356       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
  2357       assert(hr != NULL, "invariant");
  2358       if (hr->in_collection_set()) {
  2359         // The region points into the collection set
  2360         the_task->set_aborted_region(MemRegion());
  2361         result = true;
  2365   return result;
  2368 bool ConcurrentMark::has_aborted_regions() {
  2369   for (int i = 0; i < (int)_max_task_num; ++i) {
  2370     CMTask* the_task = _tasks[i];
  2371     MemRegion mr = the_task->aborted_region();
  2372     if (mr.start() != NULL) {
  2373       assert(mr.end() != NULL, "invariant");
  2374       assert(mr.word_size() > 0, "invariant");
  2375       return true;
  2378   return false;
  2381 void ConcurrentMark::oops_do(OopClosure* cl) {
  2382   if (_markStack.size() > 0 && verbose_low())
  2383     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2384                            "size = %d", _markStack.size());
  2385   // we first iterate over the contents of the mark stack...
  2386   _markStack.oops_do(cl);
  2388   for (int i = 0; i < (int)_max_task_num; ++i) {
  2389     OopTaskQueue* queue = _task_queues->queue((int)i);
  2391     if (queue->size() > 0 && verbose_low())
  2392       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2393                              "size = %d", i, queue->size());
  2395     // ...then over the contents of the all the task queues.
  2396     queue->oops_do(cl);
  2399   // Invalidate any entries, that are in the region stack, that
  2400   // point into the collection set
  2401   if (_regionStack.invalidate_entries_into_cset()) {
  2402     // otherwise, any gray objects copied during the evacuation pause
  2403     // might not be visited.
  2404     assert(_should_gray_objects, "invariant");
  2407   // Invalidate any aborted regions, recorded in the individual CM
  2408   // tasks, that point into the collection set.
  2409   if (invalidate_aborted_regions_in_cset()) {
  2410     // otherwise, any gray objects copied during the evacuation pause
  2411     // might not be visited.
  2412     assert(_should_gray_objects, "invariant");
  2417 void ConcurrentMark::clear_marking_state() {
  2418   _markStack.setEmpty();
  2419   _markStack.clear_overflow();
  2420   _regionStack.setEmpty();
  2421   _regionStack.clear_overflow();
  2422   clear_has_overflown();
  2423   _finger = _heap_start;
  2425   for (int i = 0; i < (int)_max_task_num; ++i) {
  2426     OopTaskQueue* queue = _task_queues->queue(i);
  2427     queue->set_empty();
  2428     // Clear any partial regions from the CMTasks
  2429     _tasks[i]->clear_aborted_region();
  2433 void ConcurrentMark::print_stats() {
  2434   if (verbose_stats()) {
  2435     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2436     for (size_t i = 0; i < _active_tasks; ++i) {
  2437       _tasks[i]->print_stats();
  2438       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2443 class CSMarkOopClosure: public OopClosure {
  2444   friend class CSMarkBitMapClosure;
  2446   G1CollectedHeap* _g1h;
  2447   CMBitMap*        _bm;
  2448   ConcurrentMark*  _cm;
  2449   oop*             _ms;
  2450   jint*            _array_ind_stack;
  2451   int              _ms_size;
  2452   int              _ms_ind;
  2453   int              _array_increment;
  2455   bool push(oop obj, int arr_ind = 0) {
  2456     if (_ms_ind == _ms_size) {
  2457       gclog_or_tty->print_cr("Mark stack is full.");
  2458       return false;
  2460     _ms[_ms_ind] = obj;
  2461     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2462     _ms_ind++;
  2463     return true;
  2466   oop pop() {
  2467     if (_ms_ind == 0) return NULL;
  2468     else {
  2469       _ms_ind--;
  2470       return _ms[_ms_ind];
  2474   template <class T> bool drain() {
  2475     while (_ms_ind > 0) {
  2476       oop obj = pop();
  2477       assert(obj != NULL, "Since index was non-zero.");
  2478       if (obj->is_objArray()) {
  2479         jint arr_ind = _array_ind_stack[_ms_ind];
  2480         objArrayOop aobj = objArrayOop(obj);
  2481         jint len = aobj->length();
  2482         jint next_arr_ind = arr_ind + _array_increment;
  2483         if (next_arr_ind < len) {
  2484           push(obj, next_arr_ind);
  2486         // Now process this portion of this one.
  2487         int lim = MIN2(next_arr_ind, len);
  2488         for (int j = arr_ind; j < lim; j++) {
  2489           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2492       } else {
  2493         obj->oop_iterate(this);
  2495       if (abort()) return false;
  2497     return true;
  2500 public:
  2501   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2502     _g1h(G1CollectedHeap::heap()),
  2503     _cm(cm),
  2504     _bm(cm->nextMarkBitMap()),
  2505     _ms_size(ms_size), _ms_ind(0),
  2506     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2507     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2508     _array_increment(MAX2(ms_size/8, 16))
  2509   {}
  2511   ~CSMarkOopClosure() {
  2512     FREE_C_HEAP_ARRAY(oop, _ms);
  2513     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2516   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2517   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2519   template <class T> void do_oop_work(T* p) {
  2520     T heap_oop = oopDesc::load_heap_oop(p);
  2521     if (oopDesc::is_null(heap_oop)) return;
  2522     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2523     if (obj->is_forwarded()) {
  2524       // If the object has already been forwarded, we have to make sure
  2525       // that it's marked.  So follow the forwarding pointer.  Note that
  2526       // this does the right thing for self-forwarding pointers in the
  2527       // evacuation failure case.
  2528       obj = obj->forwardee();
  2530     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2531     if (hr != NULL) {
  2532       if (hr->in_collection_set()) {
  2533         if (_g1h->is_obj_ill(obj)) {
  2534           _bm->mark((HeapWord*)obj);
  2535           if (!push(obj)) {
  2536             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2537             set_abort();
  2540       } else {
  2541         // Outside the collection set; we need to gray it
  2542         _cm->deal_with_reference(obj);
  2546 };
  2548 class CSMarkBitMapClosure: public BitMapClosure {
  2549   G1CollectedHeap* _g1h;
  2550   CMBitMap*        _bitMap;
  2551   ConcurrentMark*  _cm;
  2552   CSMarkOopClosure _oop_cl;
  2553 public:
  2554   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2555     _g1h(G1CollectedHeap::heap()),
  2556     _bitMap(cm->nextMarkBitMap()),
  2557     _oop_cl(cm, ms_size)
  2558   {}
  2560   ~CSMarkBitMapClosure() {}
  2562   bool do_bit(size_t offset) {
  2563     // convert offset into a HeapWord*
  2564     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2565     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2566            "address out of range");
  2567     assert(_bitMap->isMarked(addr), "tautology");
  2568     oop obj = oop(addr);
  2569     if (!obj->is_forwarded()) {
  2570       if (!_oop_cl.push(obj)) return false;
  2571       if (UseCompressedOops) {
  2572         if (!_oop_cl.drain<narrowOop>()) return false;
  2573       } else {
  2574         if (!_oop_cl.drain<oop>()) return false;
  2577     // Otherwise...
  2578     return true;
  2580 };
  2583 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2584   CMBitMap* _bm;
  2585   CSMarkBitMapClosure _bit_cl;
  2586   enum SomePrivateConstants {
  2587     MSSize = 1000
  2588   };
  2589   bool _completed;
  2590 public:
  2591   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2592     _bm(cm->nextMarkBitMap()),
  2593     _bit_cl(cm, MSSize),
  2594     _completed(true)
  2595   {}
  2597   ~CompleteMarkingInCSHRClosure() {}
  2599   bool doHeapRegion(HeapRegion* r) {
  2600     if (!r->evacuation_failed()) {
  2601       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2602       if (!mr.is_empty()) {
  2603         if (!_bm->iterate(&_bit_cl, mr)) {
  2604           _completed = false;
  2605           return true;
  2609     return false;
  2612   bool completed() { return _completed; }
  2613 };
  2615 class ClearMarksInHRClosure: public HeapRegionClosure {
  2616   CMBitMap* _bm;
  2617 public:
  2618   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2620   bool doHeapRegion(HeapRegion* r) {
  2621     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2622       MemRegion usedMR = r->used_region();
  2623       _bm->clearRange(r->used_region());
  2625     return false;
  2627 };
  2629 void ConcurrentMark::complete_marking_in_collection_set() {
  2630   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2632   if (!g1h->mark_in_progress()) {
  2633     g1h->g1_policy()->record_mark_closure_time(0.0);
  2634     return;
  2637   int i = 1;
  2638   double start = os::elapsedTime();
  2639   while (true) {
  2640     i++;
  2641     CompleteMarkingInCSHRClosure cmplt(this);
  2642     g1h->collection_set_iterate(&cmplt);
  2643     if (cmplt.completed()) break;
  2645   double end_time = os::elapsedTime();
  2646   double elapsed_time_ms = (end_time - start) * 1000.0;
  2647   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2649   ClearMarksInHRClosure clr(nextMarkBitMap());
  2650   g1h->collection_set_iterate(&clr);
  2653 // The next two methods deal with the following optimisation. Some
  2654 // objects are gray by being marked and located above the finger. If
  2655 // they are copied, during an evacuation pause, below the finger then
  2656 // the need to be pushed on the stack. The observation is that, if
  2657 // there are no regions in the collection set located above the
  2658 // finger, then the above cannot happen, hence we do not need to
  2659 // explicitly gray any objects when copying them to below the
  2660 // finger. The global stack will be scanned to ensure that, if it
  2661 // points to objects being copied, it will update their
  2662 // location. There is a tricky situation with the gray objects in
  2663 // region stack that are being coped, however. See the comment in
  2664 // newCSet().
  2666 void ConcurrentMark::newCSet() {
  2667   if (!concurrent_marking_in_progress())
  2668     // nothing to do if marking is not in progress
  2669     return;
  2671   // find what the lowest finger is among the global and local fingers
  2672   _min_finger = _finger;
  2673   for (int i = 0; i < (int)_max_task_num; ++i) {
  2674     CMTask* task = _tasks[i];
  2675     HeapWord* task_finger = task->finger();
  2676     if (task_finger != NULL && task_finger < _min_finger)
  2677       _min_finger = task_finger;
  2680   _should_gray_objects = false;
  2682   // This fixes a very subtle and fustrating bug. It might be the case
  2683   // that, during en evacuation pause, heap regions that contain
  2684   // objects that are gray (by being in regions contained in the
  2685   // region stack) are included in the collection set. Since such gray
  2686   // objects will be moved, and because it's not easy to redirect
  2687   // region stack entries to point to a new location (because objects
  2688   // in one region might be scattered to multiple regions after they
  2689   // are copied), one option is to ensure that all marked objects
  2690   // copied during a pause are pushed on the stack. Notice, however,
  2691   // that this problem can only happen when the region stack is not
  2692   // empty during an evacuation pause. So, we make the fix a bit less
  2693   // conservative and ensure that regions are pushed on the stack,
  2694   // irrespective whether all collection set regions are below the
  2695   // finger, if the region stack is not empty. This is expected to be
  2696   // a rare case, so I don't think it's necessary to be smarted about it.
  2697   if (!region_stack_empty() || has_aborted_regions())
  2698     _should_gray_objects = true;
  2701 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2702   if (!concurrent_marking_in_progress())
  2703     return;
  2705   HeapWord* region_end = hr->end();
  2706   if (region_end > _min_finger)
  2707     _should_gray_objects = true;
  2710 // abandon current marking iteration due to a Full GC
  2711 void ConcurrentMark::abort() {
  2712   // Clear all marks to force marking thread to do nothing
  2713   _nextMarkBitMap->clearAll();
  2714   // Empty mark stack
  2715   clear_marking_state();
  2716   for (int i = 0; i < (int)_max_task_num; ++i) {
  2717     _tasks[i]->clear_region_fields();
  2719   _has_aborted = true;
  2721   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2722   satb_mq_set.abandon_partial_marking();
  2723   // This can be called either during or outside marking, we'll read
  2724   // the expected_active value from the SATB queue set.
  2725   satb_mq_set.set_active_all_threads(
  2726                                  false, /* new active value */
  2727                                  satb_mq_set.is_active() /* expected_active */);
  2730 static void print_ms_time_info(const char* prefix, const char* name,
  2731                                NumberSeq& ns) {
  2732   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2733                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2734   if (ns.num() > 0) {
  2735     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2736                            prefix, ns.sd(), ns.maximum());
  2740 void ConcurrentMark::print_summary_info() {
  2741   gclog_or_tty->print_cr(" Concurrent marking:");
  2742   print_ms_time_info("  ", "init marks", _init_times);
  2743   print_ms_time_info("  ", "remarks", _remark_times);
  2745     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2746     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2749   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2750   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2751                          _total_counting_time,
  2752                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2753                           (double)_cleanup_times.num()
  2754                          : 0.0));
  2755   if (G1ScrubRemSets) {
  2756     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2757                            _total_rs_scrub_time,
  2758                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2759                             (double)_cleanup_times.num()
  2760                            : 0.0));
  2762   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2763                          (_init_times.sum() + _remark_times.sum() +
  2764                           _cleanup_times.sum())/1000.0);
  2765   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2766                 "(%8.2f s marking, %8.2f s counting).",
  2767                 cmThread()->vtime_accum(),
  2768                 cmThread()->vtime_mark_accum(),
  2769                 cmThread()->vtime_count_accum());
  2772 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2773   _parallel_workers->print_worker_threads_on(st);
  2776 // Closures
  2777 // XXX: there seems to be a lot of code  duplication here;
  2778 // should refactor and consolidate the shared code.
  2780 // This closure is used to mark refs into the CMS generation in
  2781 // the CMS bit map. Called at the first checkpoint.
  2783 // We take a break if someone is trying to stop the world.
  2784 bool ConcurrentMark::do_yield_check(int worker_i) {
  2785   if (should_yield()) {
  2786     if (worker_i == 0)
  2787       _g1h->g1_policy()->record_concurrent_pause();
  2788     cmThread()->yield();
  2789     if (worker_i == 0)
  2790       _g1h->g1_policy()->record_concurrent_pause_end();
  2791     return true;
  2792   } else {
  2793     return false;
  2797 bool ConcurrentMark::should_yield() {
  2798   return cmThread()->should_yield();
  2801 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2802   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2803   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2806 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2807                                                  void* last) {
  2808   return
  2809     containing_card_is_marked(start) &&
  2810     containing_card_is_marked(last);
  2813 #ifndef PRODUCT
  2814 // for debugging purposes
  2815 void ConcurrentMark::print_finger() {
  2816   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2817                          _heap_start, _heap_end, _finger);
  2818   for (int i = 0; i < (int) _max_task_num; ++i) {
  2819     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2821   gclog_or_tty->print_cr("");
  2823 #endif
  2825 // Closure for iteration over bitmaps
  2826 class CMBitMapClosure : public BitMapClosure {
  2827 private:
  2828   // the bitmap that is being iterated over
  2829   CMBitMap*                   _nextMarkBitMap;
  2830   ConcurrentMark*             _cm;
  2831   CMTask*                     _task;
  2832   // true if we're scanning a heap region claimed by the task (so that
  2833   // we move the finger along), false if we're not, i.e. currently when
  2834   // scanning a heap region popped from the region stack (so that we
  2835   // do not move the task finger along; it'd be a mistake if we did so).
  2836   bool                        _scanning_heap_region;
  2838 public:
  2839   CMBitMapClosure(CMTask *task,
  2840                   ConcurrentMark* cm,
  2841                   CMBitMap* nextMarkBitMap)
  2842     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2844   void set_scanning_heap_region(bool scanning_heap_region) {
  2845     _scanning_heap_region = scanning_heap_region;
  2848   bool do_bit(size_t offset) {
  2849     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2850     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2851     assert( addr < _cm->finger(), "invariant");
  2853     if (_scanning_heap_region) {
  2854       statsOnly( _task->increase_objs_found_on_bitmap() );
  2855       assert(addr >= _task->finger(), "invariant");
  2856       // We move that task's local finger along.
  2857       _task->move_finger_to(addr);
  2858     } else {
  2859       // We move the task's region finger along.
  2860       _task->move_region_finger_to(addr);
  2863     _task->scan_object(oop(addr));
  2864     // we only partially drain the local queue and global stack
  2865     _task->drain_local_queue(true);
  2866     _task->drain_global_stack(true);
  2868     // if the has_aborted flag has been raised, we need to bail out of
  2869     // the iteration
  2870     return !_task->has_aborted();
  2872 };
  2874 // Closure for iterating over objects, currently only used for
  2875 // processing SATB buffers.
  2876 class CMObjectClosure : public ObjectClosure {
  2877 private:
  2878   CMTask* _task;
  2880 public:
  2881   void do_object(oop obj) {
  2882     _task->deal_with_reference(obj);
  2885   CMObjectClosure(CMTask* task) : _task(task) { }
  2886 };
  2888 // Closure for iterating over object fields
  2889 class CMOopClosure : public OopClosure {
  2890 private:
  2891   G1CollectedHeap*   _g1h;
  2892   ConcurrentMark*    _cm;
  2893   CMTask*            _task;
  2895 public:
  2896   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2897   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2899   template <class T> void do_oop_work(T* p) {
  2900     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2901     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2902            "invariant");
  2904     oop obj = oopDesc::load_decode_heap_oop(p);
  2905     if (_cm->verbose_high())
  2906       gclog_or_tty->print_cr("[%d] we're looking at location "
  2907                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2908                              _task->task_id(), p, (void*) obj);
  2909     _task->deal_with_reference(obj);
  2912   CMOopClosure(G1CollectedHeap* g1h,
  2913                ConcurrentMark* cm,
  2914                CMTask* task)
  2915     : _g1h(g1h), _cm(cm), _task(task)
  2917     _ref_processor = g1h->ref_processor();
  2918     assert(_ref_processor != NULL, "should not be NULL");
  2920 };
  2922 void CMTask::setup_for_region(HeapRegion* hr) {
  2923   // Separated the asserts so that we know which one fires.
  2924   assert(hr != NULL,
  2925         "claim_region() should have filtered out continues humongous regions");
  2926   assert(!hr->continuesHumongous(),
  2927         "claim_region() should have filtered out continues humongous regions");
  2929   if (_cm->verbose_low())
  2930     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2931                            _task_id, hr);
  2933   _curr_region  = hr;
  2934   _finger       = hr->bottom();
  2935   update_region_limit();
  2938 void CMTask::update_region_limit() {
  2939   HeapRegion* hr            = _curr_region;
  2940   HeapWord* bottom          = hr->bottom();
  2941   HeapWord* limit           = hr->next_top_at_mark_start();
  2943   if (limit == bottom) {
  2944     if (_cm->verbose_low())
  2945       gclog_or_tty->print_cr("[%d] found an empty region "
  2946                              "["PTR_FORMAT", "PTR_FORMAT")",
  2947                              _task_id, bottom, limit);
  2948     // The region was collected underneath our feet.
  2949     // We set the finger to bottom to ensure that the bitmap
  2950     // iteration that will follow this will not do anything.
  2951     // (this is not a condition that holds when we set the region up,
  2952     // as the region is not supposed to be empty in the first place)
  2953     _finger = bottom;
  2954   } else if (limit >= _region_limit) {
  2955     assert(limit >= _finger, "peace of mind");
  2956   } else {
  2957     assert(limit < _region_limit, "only way to get here");
  2958     // This can happen under some pretty unusual circumstances.  An
  2959     // evacuation pause empties the region underneath our feet (NTAMS
  2960     // at bottom). We then do some allocation in the region (NTAMS
  2961     // stays at bottom), followed by the region being used as a GC
  2962     // alloc region (NTAMS will move to top() and the objects
  2963     // originally below it will be grayed). All objects now marked in
  2964     // the region are explicitly grayed, if below the global finger,
  2965     // and we do not need in fact to scan anything else. So, we simply
  2966     // set _finger to be limit to ensure that the bitmap iteration
  2967     // doesn't do anything.
  2968     _finger = limit;
  2971   _region_limit = limit;
  2974 void CMTask::giveup_current_region() {
  2975   assert(_curr_region != NULL, "invariant");
  2976   if (_cm->verbose_low())
  2977     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2978                            _task_id, _curr_region);
  2979   clear_region_fields();
  2982 void CMTask::clear_region_fields() {
  2983   // Values for these three fields that indicate that we're not
  2984   // holding on to a region.
  2985   _curr_region   = NULL;
  2986   _finger        = NULL;
  2987   _region_limit  = NULL;
  2989   _region_finger = NULL;
  2992 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2993   guarantee(nextMarkBitMap != NULL, "invariant");
  2995   if (_cm->verbose_low())
  2996     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2998   _nextMarkBitMap                = nextMarkBitMap;
  2999   clear_region_fields();
  3000   assert(_aborted_region.is_empty(), "should have been cleared");
  3002   _calls                         = 0;
  3003   _elapsed_time_ms               = 0.0;
  3004   _termination_time_ms           = 0.0;
  3005   _termination_start_time_ms     = 0.0;
  3007 #if _MARKING_STATS_
  3008   _local_pushes                  = 0;
  3009   _local_pops                    = 0;
  3010   _local_max_size                = 0;
  3011   _objs_scanned                  = 0;
  3012   _global_pushes                 = 0;
  3013   _global_pops                   = 0;
  3014   _global_max_size               = 0;
  3015   _global_transfers_to           = 0;
  3016   _global_transfers_from         = 0;
  3017   _region_stack_pops             = 0;
  3018   _regions_claimed               = 0;
  3019   _objs_found_on_bitmap          = 0;
  3020   _satb_buffers_processed        = 0;
  3021   _steal_attempts                = 0;
  3022   _steals                        = 0;
  3023   _aborted                       = 0;
  3024   _aborted_overflow              = 0;
  3025   _aborted_cm_aborted            = 0;
  3026   _aborted_yield                 = 0;
  3027   _aborted_timed_out             = 0;
  3028   _aborted_satb                  = 0;
  3029   _aborted_termination           = 0;
  3030 #endif // _MARKING_STATS_
  3033 bool CMTask::should_exit_termination() {
  3034   regular_clock_call();
  3035   // This is called when we are in the termination protocol. We should
  3036   // quit if, for some reason, this task wants to abort or the global
  3037   // stack is not empty (this means that we can get work from it).
  3038   return !_cm->mark_stack_empty() || has_aborted();
  3041 // This determines whether the method below will check both the local
  3042 // and global fingers when determining whether to push on the stack a
  3043 // gray object (value 1) or whether it will only check the global one
  3044 // (value 0). The tradeoffs are that the former will be a bit more
  3045 // accurate and possibly push less on the stack, but it might also be
  3046 // a little bit slower.
  3048 #define _CHECK_BOTH_FINGERS_      1
  3050 void CMTask::deal_with_reference(oop obj) {
  3051   if (_cm->verbose_high())
  3052     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  3053                            _task_id, (void*) obj);
  3055   ++_refs_reached;
  3057   HeapWord* objAddr = (HeapWord*) obj;
  3058   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  3059   if (_g1h->is_in_g1_reserved(objAddr)) {
  3060     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  3061     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  3062     if (_g1h->is_obj_ill(obj, hr)) {
  3063       if (_cm->verbose_high())
  3064         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  3065                                _task_id, (void*) obj);
  3067       // we need to mark it first
  3068       if (_nextMarkBitMap->parMark(objAddr)) {
  3069         // No OrderAccess:store_load() is needed. It is implicit in the
  3070         // CAS done in parMark(objAddr) above
  3071         HeapWord* global_finger = _cm->finger();
  3073 #if _CHECK_BOTH_FINGERS_
  3074         // we will check both the local and global fingers
  3076         if (_finger != NULL && objAddr < _finger) {
  3077           if (_cm->verbose_high())
  3078             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3079                                    "pushing it", _task_id, _finger);
  3080           push(obj);
  3081         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3082           // do nothing
  3083         } else if (objAddr < global_finger) {
  3084           // Notice that the global finger might be moving forward
  3085           // concurrently. This is not a problem. In the worst case, we
  3086           // mark the object while it is above the global finger and, by
  3087           // the time we read the global finger, it has moved forward
  3088           // passed this object. In this case, the object will probably
  3089           // be visited when a task is scanning the region and will also
  3090           // be pushed on the stack. So, some duplicate work, but no
  3091           // correctness problems.
  3093           if (_cm->verbose_high())
  3094             gclog_or_tty->print_cr("[%d] below the global finger "
  3095                                    "("PTR_FORMAT"), pushing it",
  3096                                    _task_id, global_finger);
  3097           push(obj);
  3098         } else {
  3099           // do nothing
  3101 #else // _CHECK_BOTH_FINGERS_
  3102       // we will only check the global finger
  3104         if (objAddr < global_finger) {
  3105           // see long comment above
  3107           if (_cm->verbose_high())
  3108             gclog_or_tty->print_cr("[%d] below the global finger "
  3109                                    "("PTR_FORMAT"), pushing it",
  3110                                    _task_id, global_finger);
  3111           push(obj);
  3113 #endif // _CHECK_BOTH_FINGERS_
  3119 void CMTask::push(oop obj) {
  3120   HeapWord* objAddr = (HeapWord*) obj;
  3121   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3122   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3123          "invariant");
  3124   assert(!_g1h->is_obj_ill(obj), "invariant");
  3125   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3127   if (_cm->verbose_high())
  3128     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3130   if (!_task_queue->push(obj)) {
  3131     // The local task queue looks full. We need to push some entries
  3132     // to the global stack.
  3134     if (_cm->verbose_medium())
  3135       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3136                              "moving entries to the global stack",
  3137                              _task_id);
  3138     move_entries_to_global_stack();
  3140     // this should succeed since, even if we overflow the global
  3141     // stack, we should have definitely removed some entries from the
  3142     // local queue. So, there must be space on it.
  3143     bool success = _task_queue->push(obj);
  3144     assert(success, "invariant");
  3147   statsOnly( int tmp_size = _task_queue->size();
  3148              if (tmp_size > _local_max_size)
  3149                _local_max_size = tmp_size;
  3150              ++_local_pushes );
  3153 void CMTask::reached_limit() {
  3154   assert(_words_scanned >= _words_scanned_limit ||
  3155          _refs_reached >= _refs_reached_limit ,
  3156          "shouldn't have been called otherwise");
  3157   regular_clock_call();
  3160 void CMTask::regular_clock_call() {
  3161   if (has_aborted())
  3162     return;
  3164   // First, we need to recalculate the words scanned and refs reached
  3165   // limits for the next clock call.
  3166   recalculate_limits();
  3168   // During the regular clock call we do the following
  3170   // (1) If an overflow has been flagged, then we abort.
  3171   if (_cm->has_overflown()) {
  3172     set_has_aborted();
  3173     return;
  3176   // If we are not concurrent (i.e. we're doing remark) we don't need
  3177   // to check anything else. The other steps are only needed during
  3178   // the concurrent marking phase.
  3179   if (!concurrent())
  3180     return;
  3182   // (2) If marking has been aborted for Full GC, then we also abort.
  3183   if (_cm->has_aborted()) {
  3184     set_has_aborted();
  3185     statsOnly( ++_aborted_cm_aborted );
  3186     return;
  3189   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3191   // (3) If marking stats are enabled, then we update the step history.
  3192 #if _MARKING_STATS_
  3193   if (_words_scanned >= _words_scanned_limit)
  3194     ++_clock_due_to_scanning;
  3195   if (_refs_reached >= _refs_reached_limit)
  3196     ++_clock_due_to_marking;
  3198   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3199   _interval_start_time_ms = curr_time_ms;
  3200   _all_clock_intervals_ms.add(last_interval_ms);
  3202   if (_cm->verbose_medium()) {
  3203     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3204                            "scanned = %d%s, refs reached = %d%s",
  3205                            _task_id, last_interval_ms,
  3206                            _words_scanned,
  3207                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3208                            _refs_reached,
  3209                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3211 #endif // _MARKING_STATS_
  3213   // (4) We check whether we should yield. If we have to, then we abort.
  3214   if (_cm->should_yield()) {
  3215     // We should yield. To do this we abort the task. The caller is
  3216     // responsible for yielding.
  3217     set_has_aborted();
  3218     statsOnly( ++_aborted_yield );
  3219     return;
  3222   // (5) We check whether we've reached our time quota. If we have,
  3223   // then we abort.
  3224   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3225   if (elapsed_time_ms > _time_target_ms) {
  3226     set_has_aborted();
  3227     _has_aborted_timed_out = true;
  3228     statsOnly( ++_aborted_timed_out );
  3229     return;
  3232   // (6) Finally, we check whether there are enough completed STAB
  3233   // buffers available for processing. If there are, we abort.
  3234   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3235   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3236     if (_cm->verbose_low())
  3237       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3238                              _task_id);
  3239     // we do need to process SATB buffers, we'll abort and restart
  3240     // the marking task to do so
  3241     set_has_aborted();
  3242     statsOnly( ++_aborted_satb );
  3243     return;
  3247 void CMTask::recalculate_limits() {
  3248   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3249   _words_scanned_limit      = _real_words_scanned_limit;
  3251   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3252   _refs_reached_limit       = _real_refs_reached_limit;
  3255 void CMTask::decrease_limits() {
  3256   // This is called when we believe that we're going to do an infrequent
  3257   // operation which will increase the per byte scanned cost (i.e. move
  3258   // entries to/from the global stack). It basically tries to decrease the
  3259   // scanning limit so that the clock is called earlier.
  3261   if (_cm->verbose_medium())
  3262     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3264   _words_scanned_limit = _real_words_scanned_limit -
  3265     3 * words_scanned_period / 4;
  3266   _refs_reached_limit  = _real_refs_reached_limit -
  3267     3 * refs_reached_period / 4;
  3270 void CMTask::move_entries_to_global_stack() {
  3271   // local array where we'll store the entries that will be popped
  3272   // from the local queue
  3273   oop buffer[global_stack_transfer_size];
  3275   int n = 0;
  3276   oop obj;
  3277   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3278     buffer[n] = obj;
  3279     ++n;
  3282   if (n > 0) {
  3283     // we popped at least one entry from the local queue
  3285     statsOnly( ++_global_transfers_to; _local_pops += n );
  3287     if (!_cm->mark_stack_push(buffer, n)) {
  3288       if (_cm->verbose_low())
  3289         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3290       set_has_aborted();
  3291     } else {
  3292       // the transfer was successful
  3294       if (_cm->verbose_medium())
  3295         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3296                                _task_id, n);
  3297       statsOnly( int tmp_size = _cm->mark_stack_size();
  3298                  if (tmp_size > _global_max_size)
  3299                    _global_max_size = tmp_size;
  3300                  _global_pushes += n );
  3304   // this operation was quite expensive, so decrease the limits
  3305   decrease_limits();
  3308 void CMTask::get_entries_from_global_stack() {
  3309   // local array where we'll store the entries that will be popped
  3310   // from the global stack.
  3311   oop buffer[global_stack_transfer_size];
  3312   int n;
  3313   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3314   assert(n <= global_stack_transfer_size,
  3315          "we should not pop more than the given limit");
  3316   if (n > 0) {
  3317     // yes, we did actually pop at least one entry
  3319     statsOnly( ++_global_transfers_from; _global_pops += n );
  3320     if (_cm->verbose_medium())
  3321       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3322                              _task_id, n);
  3323     for (int i = 0; i < n; ++i) {
  3324       bool success = _task_queue->push(buffer[i]);
  3325       // We only call this when the local queue is empty or under a
  3326       // given target limit. So, we do not expect this push to fail.
  3327       assert(success, "invariant");
  3330     statsOnly( int tmp_size = _task_queue->size();
  3331                if (tmp_size > _local_max_size)
  3332                  _local_max_size = tmp_size;
  3333                _local_pushes += n );
  3336   // this operation was quite expensive, so decrease the limits
  3337   decrease_limits();
  3340 void CMTask::drain_local_queue(bool partially) {
  3341   if (has_aborted())
  3342     return;
  3344   // Decide what the target size is, depending whether we're going to
  3345   // drain it partially (so that other tasks can steal if they run out
  3346   // of things to do) or totally (at the very end).
  3347   size_t target_size;
  3348   if (partially)
  3349     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3350   else
  3351     target_size = 0;
  3353   if (_task_queue->size() > target_size) {
  3354     if (_cm->verbose_high())
  3355       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3356                              _task_id, target_size);
  3358     oop obj;
  3359     bool ret = _task_queue->pop_local(obj);
  3360     while (ret) {
  3361       statsOnly( ++_local_pops );
  3363       if (_cm->verbose_high())
  3364         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3365                                (void*) obj);
  3367       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3368       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3369              "invariant");
  3371       scan_object(obj);
  3373       if (_task_queue->size() <= target_size || has_aborted())
  3374         ret = false;
  3375       else
  3376         ret = _task_queue->pop_local(obj);
  3379     if (_cm->verbose_high())
  3380       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3381                              _task_id, _task_queue->size());
  3385 void CMTask::drain_global_stack(bool partially) {
  3386   if (has_aborted())
  3387     return;
  3389   // We have a policy to drain the local queue before we attempt to
  3390   // drain the global stack.
  3391   assert(partially || _task_queue->size() == 0, "invariant");
  3393   // Decide what the target size is, depending whether we're going to
  3394   // drain it partially (so that other tasks can steal if they run out
  3395   // of things to do) or totally (at the very end).  Notice that,
  3396   // because we move entries from the global stack in chunks or
  3397   // because another task might be doing the same, we might in fact
  3398   // drop below the target. But, this is not a problem.
  3399   size_t target_size;
  3400   if (partially)
  3401     target_size = _cm->partial_mark_stack_size_target();
  3402   else
  3403     target_size = 0;
  3405   if (_cm->mark_stack_size() > target_size) {
  3406     if (_cm->verbose_low())
  3407       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3408                              _task_id, target_size);
  3410     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3411       get_entries_from_global_stack();
  3412       drain_local_queue(partially);
  3415     if (_cm->verbose_low())
  3416       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3417                              _task_id, _cm->mark_stack_size());
  3421 // SATB Queue has several assumptions on whether to call the par or
  3422 // non-par versions of the methods. this is why some of the code is
  3423 // replicated. We should really get rid of the single-threaded version
  3424 // of the code to simplify things.
  3425 void CMTask::drain_satb_buffers() {
  3426   if (has_aborted())
  3427     return;
  3429   // We set this so that the regular clock knows that we're in the
  3430   // middle of draining buffers and doesn't set the abort flag when it
  3431   // notices that SATB buffers are available for draining. It'd be
  3432   // very counter productive if it did that. :-)
  3433   _draining_satb_buffers = true;
  3435   CMObjectClosure oc(this);
  3436   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3437   if (G1CollectedHeap::use_parallel_gc_threads())
  3438     satb_mq_set.set_par_closure(_task_id, &oc);
  3439   else
  3440     satb_mq_set.set_closure(&oc);
  3442   // This keeps claiming and applying the closure to completed buffers
  3443   // until we run out of buffers or we need to abort.
  3444   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3445     while (!has_aborted() &&
  3446            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3447       if (_cm->verbose_medium())
  3448         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3449       statsOnly( ++_satb_buffers_processed );
  3450       regular_clock_call();
  3452   } else {
  3453     while (!has_aborted() &&
  3454            satb_mq_set.apply_closure_to_completed_buffer()) {
  3455       if (_cm->verbose_medium())
  3456         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3457       statsOnly( ++_satb_buffers_processed );
  3458       regular_clock_call();
  3462   if (!concurrent() && !has_aborted()) {
  3463     // We should only do this during remark.
  3464     if (G1CollectedHeap::use_parallel_gc_threads())
  3465       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3466     else
  3467       satb_mq_set.iterate_closure_all_threads();
  3470   _draining_satb_buffers = false;
  3472   assert(has_aborted() ||
  3473          concurrent() ||
  3474          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3476   if (G1CollectedHeap::use_parallel_gc_threads())
  3477     satb_mq_set.set_par_closure(_task_id, NULL);
  3478   else
  3479     satb_mq_set.set_closure(NULL);
  3481   // again, this was a potentially expensive operation, decrease the
  3482   // limits to get the regular clock call early
  3483   decrease_limits();
  3486 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3487   if (has_aborted())
  3488     return;
  3490   assert(_region_finger == NULL,
  3491          "it should be NULL when we're not scanning a region");
  3493   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
  3494     if (_cm->verbose_low())
  3495       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3496                              _task_id, _cm->region_stack_size());
  3498     MemRegion mr;
  3500     if (!_aborted_region.is_empty()) {
  3501       mr = _aborted_region;
  3502       _aborted_region = MemRegion();
  3504       if (_cm->verbose_low())
  3505         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
  3506                              _task_id, mr.start(), mr.end());
  3507     } else {
  3508       mr = _cm->region_stack_pop_lock_free();
  3509       // it returns MemRegion() if the pop fails
  3510       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3513     while (mr.start() != NULL) {
  3514       if (_cm->verbose_medium())
  3515         gclog_or_tty->print_cr("[%d] we are scanning region "
  3516                                "["PTR_FORMAT", "PTR_FORMAT")",
  3517                                _task_id, mr.start(), mr.end());
  3519       assert(mr.end() <= _cm->finger(),
  3520              "otherwise the region shouldn't be on the stack");
  3521       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3522       if (_nextMarkBitMap->iterate(bc, mr)) {
  3523         assert(!has_aborted(),
  3524                "cannot abort the task without aborting the bitmap iteration");
  3526         // We finished iterating over the region without aborting.
  3527         regular_clock_call();
  3528         if (has_aborted())
  3529           mr = MemRegion();
  3530         else {
  3531           mr = _cm->region_stack_pop_lock_free();
  3532           // it returns MemRegion() if the pop fails
  3533           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3535       } else {
  3536         assert(has_aborted(), "currently the only way to do so");
  3538         // The only way to abort the bitmap iteration is to return
  3539         // false from the do_bit() method. However, inside the
  3540         // do_bit() method we move the _region_finger to point to the
  3541         // object currently being looked at. So, if we bail out, we
  3542         // have definitely set _region_finger to something non-null.
  3543         assert(_region_finger != NULL, "invariant");
  3545         // Make sure that any previously aborted region has been
  3546         // cleared.
  3547         assert(_aborted_region.is_empty(), "aborted region not cleared");
  3549         // The iteration was actually aborted. So now _region_finger
  3550         // points to the address of the object we last scanned. If we
  3551         // leave it there, when we restart this task, we will rescan
  3552         // the object. It is easy to avoid this. We move the finger by
  3553         // enough to point to the next possible object header (the
  3554         // bitmap knows by how much we need to move it as it knows its
  3555         // granularity).
  3556         MemRegion newRegion =
  3557           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3559         if (!newRegion.is_empty()) {
  3560           if (_cm->verbose_low()) {
  3561             gclog_or_tty->print_cr("[%d] recording unscanned region"
  3562                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
  3563                                    _task_id,
  3564                                    newRegion.start(), newRegion.end());
  3566           // Now record the part of the region we didn't scan to
  3567           // make sure this task scans it later.
  3568           _aborted_region = newRegion;
  3570         // break from while
  3571         mr = MemRegion();
  3573       _region_finger = NULL;
  3576     if (_cm->verbose_low())
  3577       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3578                              _task_id, _cm->region_stack_size());
  3582 void CMTask::print_stats() {
  3583   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3584                          _task_id, _calls);
  3585   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3586                          _elapsed_time_ms, _termination_time_ms);
  3587   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3588                          _step_times_ms.num(), _step_times_ms.avg(),
  3589                          _step_times_ms.sd());
  3590   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3591                          _step_times_ms.maximum(), _step_times_ms.sum());
  3593 #if _MARKING_STATS_
  3594   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3595                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3596                          _all_clock_intervals_ms.sd());
  3597   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3598                          _all_clock_intervals_ms.maximum(),
  3599                          _all_clock_intervals_ms.sum());
  3600   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3601                          _clock_due_to_scanning, _clock_due_to_marking);
  3602   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3603                          _objs_scanned, _objs_found_on_bitmap);
  3604   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3605                          _local_pushes, _local_pops, _local_max_size);
  3606   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3607                          _global_pushes, _global_pops, _global_max_size);
  3608   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3609                          _global_transfers_to,_global_transfers_from);
  3610   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3611                          _regions_claimed, _region_stack_pops);
  3612   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3613   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3614                          _steal_attempts, _steals);
  3615   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3616   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3617                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3618   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3619                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3620 #endif // _MARKING_STATS_
  3623 /*****************************************************************************
  3625     The do_marking_step(time_target_ms) method is the building block
  3626     of the parallel marking framework. It can be called in parallel
  3627     with other invocations of do_marking_step() on different tasks
  3628     (but only one per task, obviously) and concurrently with the
  3629     mutator threads, or during remark, hence it eliminates the need
  3630     for two versions of the code. When called during remark, it will
  3631     pick up from where the task left off during the concurrent marking
  3632     phase. Interestingly, tasks are also claimable during evacuation
  3633     pauses too, since do_marking_step() ensures that it aborts before
  3634     it needs to yield.
  3636     The data structures that is uses to do marking work are the
  3637     following:
  3639       (1) Marking Bitmap. If there are gray objects that appear only
  3640       on the bitmap (this happens either when dealing with an overflow
  3641       or when the initial marking phase has simply marked the roots
  3642       and didn't push them on the stack), then tasks claim heap
  3643       regions whose bitmap they then scan to find gray objects. A
  3644       global finger indicates where the end of the last claimed region
  3645       is. A local finger indicates how far into the region a task has
  3646       scanned. The two fingers are used to determine how to gray an
  3647       object (i.e. whether simply marking it is OK, as it will be
  3648       visited by a task in the future, or whether it needs to be also
  3649       pushed on a stack).
  3651       (2) Local Queue. The local queue of the task which is accessed
  3652       reasonably efficiently by the task. Other tasks can steal from
  3653       it when they run out of work. Throughout the marking phase, a
  3654       task attempts to keep its local queue short but not totally
  3655       empty, so that entries are available for stealing by other
  3656       tasks. Only when there is no more work, a task will totally
  3657       drain its local queue.
  3659       (3) Global Mark Stack. This handles local queue overflow. During
  3660       marking only sets of entries are moved between it and the local
  3661       queues, as access to it requires a mutex and more fine-grain
  3662       interaction with it which might cause contention. If it
  3663       overflows, then the marking phase should restart and iterate
  3664       over the bitmap to identify gray objects. Throughout the marking
  3665       phase, tasks attempt to keep the global mark stack at a small
  3666       length but not totally empty, so that entries are available for
  3667       popping by other tasks. Only when there is no more work, tasks
  3668       will totally drain the global mark stack.
  3670       (4) Global Region Stack. Entries on it correspond to areas of
  3671       the bitmap that need to be scanned since they contain gray
  3672       objects. Pushes on the region stack only happen during
  3673       evacuation pauses and typically correspond to areas covered by
  3674       GC LABS. If it overflows, then the marking phase should restart
  3675       and iterate over the bitmap to identify gray objects. Tasks will
  3676       try to totally drain the region stack as soon as possible.
  3678       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3679       made available. Buffers are regularly removed from this queue
  3680       and scanned for roots, so that the queue doesn't get too
  3681       long. During remark, all completed buffers are processed, as
  3682       well as the filled in parts of any uncompleted buffers.
  3684     The do_marking_step() method tries to abort when the time target
  3685     has been reached. There are a few other cases when the
  3686     do_marking_step() method also aborts:
  3688       (1) When the marking phase has been aborted (after a Full GC).
  3690       (2) When a global overflow (either on the global stack or the
  3691       region stack) has been triggered. Before the task aborts, it
  3692       will actually sync up with the other tasks to ensure that all
  3693       the marking data structures (local queues, stacks, fingers etc.)
  3694       are re-initialised so that when do_marking_step() completes,
  3695       the marking phase can immediately restart.
  3697       (3) When enough completed SATB buffers are available. The
  3698       do_marking_step() method only tries to drain SATB buffers right
  3699       at the beginning. So, if enough buffers are available, the
  3700       marking step aborts and the SATB buffers are processed at
  3701       the beginning of the next invocation.
  3703       (4) To yield. when we have to yield then we abort and yield
  3704       right at the end of do_marking_step(). This saves us from a lot
  3705       of hassle as, by yielding we might allow a Full GC. If this
  3706       happens then objects will be compacted underneath our feet, the
  3707       heap might shrink, etc. We save checking for this by just
  3708       aborting and doing the yield right at the end.
  3710     From the above it follows that the do_marking_step() method should
  3711     be called in a loop (or, otherwise, regularly) until it completes.
  3713     If a marking step completes without its has_aborted() flag being
  3714     true, it means it has completed the current marking phase (and
  3715     also all other marking tasks have done so and have all synced up).
  3717     A method called regular_clock_call() is invoked "regularly" (in
  3718     sub ms intervals) throughout marking. It is this clock method that
  3719     checks all the abort conditions which were mentioned above and
  3720     decides when the task should abort. A work-based scheme is used to
  3721     trigger this clock method: when the number of object words the
  3722     marking phase has scanned or the number of references the marking
  3723     phase has visited reach a given limit. Additional invocations to
  3724     the method clock have been planted in a few other strategic places
  3725     too. The initial reason for the clock method was to avoid calling
  3726     vtime too regularly, as it is quite expensive. So, once it was in
  3727     place, it was natural to piggy-back all the other conditions on it
  3728     too and not constantly check them throughout the code.
  3730  *****************************************************************************/
  3732 void CMTask::do_marking_step(double time_target_ms) {
  3733   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3734   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3736   assert(concurrent() || _cm->region_stack_empty(),
  3737          "the region stack should have been cleared before remark");
  3738   assert(concurrent() || !_cm->has_aborted_regions(),
  3739          "aborted regions should have been cleared before remark");
  3740   assert(_region_finger == NULL,
  3741          "this should be non-null only when a region is being scanned");
  3743   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3744   assert(_task_queues != NULL, "invariant");
  3745   assert(_task_queue != NULL, "invariant");
  3746   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3748   assert(!_claimed,
  3749          "only one thread should claim this task at any one time");
  3751   // OK, this doesn't safeguard again all possible scenarios, as it is
  3752   // possible for two threads to set the _claimed flag at the same
  3753   // time. But it is only for debugging purposes anyway and it will
  3754   // catch most problems.
  3755   _claimed = true;
  3757   _start_time_ms = os::elapsedVTime() * 1000.0;
  3758   statsOnly( _interval_start_time_ms = _start_time_ms );
  3760   double diff_prediction_ms =
  3761     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3762   _time_target_ms = time_target_ms - diff_prediction_ms;
  3764   // set up the variables that are used in the work-based scheme to
  3765   // call the regular clock method
  3766   _words_scanned = 0;
  3767   _refs_reached  = 0;
  3768   recalculate_limits();
  3770   // clear all flags
  3771   clear_has_aborted();
  3772   _has_aborted_timed_out = false;
  3773   _draining_satb_buffers = false;
  3775   ++_calls;
  3777   if (_cm->verbose_low())
  3778     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3779                            "target = %1.2lfms >>>>>>>>>>",
  3780                            _task_id, _calls, _time_target_ms);
  3782   // Set up the bitmap and oop closures. Anything that uses them is
  3783   // eventually called from this method, so it is OK to allocate these
  3784   // statically.
  3785   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3786   CMOopClosure    oop_closure(_g1h, _cm, this);
  3787   set_oop_closure(&oop_closure);
  3789   if (_cm->has_overflown()) {
  3790     // This can happen if the region stack or the mark stack overflows
  3791     // during a GC pause and this task, after a yield point,
  3792     // restarts. We have to abort as we need to get into the overflow
  3793     // protocol which happens right at the end of this task.
  3794     set_has_aborted();
  3797   // First drain any available SATB buffers. After this, we will not
  3798   // look at SATB buffers before the next invocation of this method.
  3799   // If enough completed SATB buffers are queued up, the regular clock
  3800   // will abort this task so that it restarts.
  3801   drain_satb_buffers();
  3802   // ...then partially drain the local queue and the global stack
  3803   drain_local_queue(true);
  3804   drain_global_stack(true);
  3806   // Then totally drain the region stack.  We will not look at
  3807   // it again before the next invocation of this method. Entries on
  3808   // the region stack are only added during evacuation pauses, for
  3809   // which we have to yield. When we do, we abort the task anyway so
  3810   // it will look at the region stack again when it restarts.
  3811   bitmap_closure.set_scanning_heap_region(false);
  3812   drain_region_stack(&bitmap_closure);
  3813   // ...then partially drain the local queue and the global stack
  3814   drain_local_queue(true);
  3815   drain_global_stack(true);
  3817   do {
  3818     if (!has_aborted() && _curr_region != NULL) {
  3819       // This means that we're already holding on to a region.
  3820       assert(_finger != NULL, "if region is not NULL, then the finger "
  3821              "should not be NULL either");
  3823       // We might have restarted this task after an evacuation pause
  3824       // which might have evacuated the region we're holding on to
  3825       // underneath our feet. Let's read its limit again to make sure
  3826       // that we do not iterate over a region of the heap that
  3827       // contains garbage (update_region_limit() will also move
  3828       // _finger to the start of the region if it is found empty).
  3829       update_region_limit();
  3830       // We will start from _finger not from the start of the region,
  3831       // as we might be restarting this task after aborting half-way
  3832       // through scanning this region. In this case, _finger points to
  3833       // the address where we last found a marked object. If this is a
  3834       // fresh region, _finger points to start().
  3835       MemRegion mr = MemRegion(_finger, _region_limit);
  3837       if (_cm->verbose_low())
  3838         gclog_or_tty->print_cr("[%d] we're scanning part "
  3839                                "["PTR_FORMAT", "PTR_FORMAT") "
  3840                                "of region "PTR_FORMAT,
  3841                                _task_id, _finger, _region_limit, _curr_region);
  3843       // Let's iterate over the bitmap of the part of the
  3844       // region that is left.
  3845       bitmap_closure.set_scanning_heap_region(true);
  3846       if (mr.is_empty() ||
  3847           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3848         // We successfully completed iterating over the region. Now,
  3849         // let's give up the region.
  3850         giveup_current_region();
  3851         regular_clock_call();
  3852       } else {
  3853         assert(has_aborted(), "currently the only way to do so");
  3854         // The only way to abort the bitmap iteration is to return
  3855         // false from the do_bit() method. However, inside the
  3856         // do_bit() method we move the _finger to point to the
  3857         // object currently being looked at. So, if we bail out, we
  3858         // have definitely set _finger to something non-null.
  3859         assert(_finger != NULL, "invariant");
  3861         // Region iteration was actually aborted. So now _finger
  3862         // points to the address of the object we last scanned. If we
  3863         // leave it there, when we restart this task, we will rescan
  3864         // the object. It is easy to avoid this. We move the finger by
  3865         // enough to point to the next possible object header (the
  3866         // bitmap knows by how much we need to move it as it knows its
  3867         // granularity).
  3868         assert(_finger < _region_limit, "invariant");
  3869         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3870         // Check if bitmap iteration was aborted while scanning the last object
  3871         if (new_finger >= _region_limit) {
  3872             giveup_current_region();
  3873         } else {
  3874             move_finger_to(new_finger);
  3878     // At this point we have either completed iterating over the
  3879     // region we were holding on to, or we have aborted.
  3881     // We then partially drain the local queue and the global stack.
  3882     // (Do we really need this?)
  3883     drain_local_queue(true);
  3884     drain_global_stack(true);
  3886     // Read the note on the claim_region() method on why it might
  3887     // return NULL with potentially more regions available for
  3888     // claiming and why we have to check out_of_regions() to determine
  3889     // whether we're done or not.
  3890     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3891       // We are going to try to claim a new region. We should have
  3892       // given up on the previous one.
  3893       // Separated the asserts so that we know which one fires.
  3894       assert(_curr_region  == NULL, "invariant");
  3895       assert(_finger       == NULL, "invariant");
  3896       assert(_region_limit == NULL, "invariant");
  3897       if (_cm->verbose_low())
  3898         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3899       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3900       if (claimed_region != NULL) {
  3901         // Yes, we managed to claim one
  3902         statsOnly( ++_regions_claimed );
  3904         if (_cm->verbose_low())
  3905           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3906                                  "region "PTR_FORMAT,
  3907                                  _task_id, claimed_region);
  3909         setup_for_region(claimed_region);
  3910         assert(_curr_region == claimed_region, "invariant");
  3912       // It is important to call the regular clock here. It might take
  3913       // a while to claim a region if, for example, we hit a large
  3914       // block of empty regions. So we need to call the regular clock
  3915       // method once round the loop to make sure it's called
  3916       // frequently enough.
  3917       regular_clock_call();
  3920     if (!has_aborted() && _curr_region == NULL) {
  3921       assert(_cm->out_of_regions(),
  3922              "at this point we should be out of regions");
  3924   } while ( _curr_region != NULL && !has_aborted());
  3926   if (!has_aborted()) {
  3927     // We cannot check whether the global stack is empty, since other
  3928     // tasks might be pushing objects to it concurrently. We also cannot
  3929     // check if the region stack is empty because if a thread is aborting
  3930     // it can push a partially done region back.
  3931     assert(_cm->out_of_regions(),
  3932            "at this point we should be out of regions");
  3934     if (_cm->verbose_low())
  3935       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3937     // Try to reduce the number of available SATB buffers so that
  3938     // remark has less work to do.
  3939     drain_satb_buffers();
  3942   // Since we've done everything else, we can now totally drain the
  3943   // local queue and global stack.
  3944   drain_local_queue(false);
  3945   drain_global_stack(false);
  3947   // Attempt at work stealing from other task's queues.
  3948   if (!has_aborted()) {
  3949     // We have not aborted. This means that we have finished all that
  3950     // we could. Let's try to do some stealing...
  3952     // We cannot check whether the global stack is empty, since other
  3953     // tasks might be pushing objects to it concurrently. We also cannot
  3954     // check if the region stack is empty because if a thread is aborting
  3955     // it can push a partially done region back.
  3956     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3957            "only way to reach here");
  3959     if (_cm->verbose_low())
  3960       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3962     while (!has_aborted()) {
  3963       oop obj;
  3964       statsOnly( ++_steal_attempts );
  3966       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3967         if (_cm->verbose_medium())
  3968           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3969                                  _task_id, (void*) obj);
  3971         statsOnly( ++_steals );
  3973         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3974                "any stolen object should be marked");
  3975         scan_object(obj);
  3977         // And since we're towards the end, let's totally drain the
  3978         // local queue and global stack.
  3979         drain_local_queue(false);
  3980         drain_global_stack(false);
  3981       } else {
  3982         break;
  3987   // We still haven't aborted. Now, let's try to get into the
  3988   // termination protocol.
  3989   if (!has_aborted()) {
  3990     // We cannot check whether the global stack is empty, since other
  3991     // tasks might be concurrently pushing objects on it. We also cannot
  3992     // check if the region stack is empty because if a thread is aborting
  3993     // it can push a partially done region back.
  3994     // Separated the asserts so that we know which one fires.
  3995     assert(_cm->out_of_regions(), "only way to reach here");
  3996     assert(_task_queue->size() == 0, "only way to reach here");
  3998     if (_cm->verbose_low())
  3999       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  4001     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4002     // The CMTask class also extends the TerminatorTerminator class,
  4003     // hence its should_exit_termination() method will also decide
  4004     // whether to exit the termination protocol or not.
  4005     bool finished = _cm->terminator()->offer_termination(this);
  4006     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4007     _termination_time_ms +=
  4008       termination_end_time_ms - _termination_start_time_ms;
  4010     if (finished) {
  4011       // We're all done.
  4013       if (_task_id == 0) {
  4014         // let's allow task 0 to do this
  4015         if (concurrent()) {
  4016           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4017           // we need to set this to false before the next
  4018           // safepoint. This way we ensure that the marking phase
  4019           // doesn't observe any more heap expansions.
  4020           _cm->clear_concurrent_marking_in_progress();
  4024       // We can now guarantee that the global stack is empty, since
  4025       // all other tasks have finished. We separated the guarantees so
  4026       // that, if a condition is false, we can immediately find out
  4027       // which one.
  4028       guarantee(_cm->out_of_regions(), "only way to reach here");
  4029       guarantee(_aborted_region.is_empty(), "only way to reach here");
  4030       guarantee(_cm->region_stack_empty(), "only way to reach here");
  4031       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4032       guarantee(_task_queue->size() == 0, "only way to reach here");
  4033       guarantee(!_cm->has_overflown(), "only way to reach here");
  4034       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4035       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  4037       if (_cm->verbose_low())
  4038         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  4039     } else {
  4040       // Apparently there's more work to do. Let's abort this task. It
  4041       // will restart it and we can hopefully find more things to do.
  4043       if (_cm->verbose_low())
  4044         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  4046       set_has_aborted();
  4047       statsOnly( ++_aborted_termination );
  4051   // Mainly for debugging purposes to make sure that a pointer to the
  4052   // closure which was statically allocated in this frame doesn't
  4053   // escape it by accident.
  4054   set_oop_closure(NULL);
  4055   double end_time_ms = os::elapsedVTime() * 1000.0;
  4056   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4057   // Update the step history.
  4058   _step_times_ms.add(elapsed_time_ms);
  4060   if (has_aborted()) {
  4061     // The task was aborted for some reason.
  4063     statsOnly( ++_aborted );
  4065     if (_has_aborted_timed_out) {
  4066       double diff_ms = elapsed_time_ms - _time_target_ms;
  4067       // Keep statistics of how well we did with respect to hitting
  4068       // our target only if we actually timed out (if we aborted for
  4069       // other reasons, then the results might get skewed).
  4070       _marking_step_diffs_ms.add(diff_ms);
  4073     if (_cm->has_overflown()) {
  4074       // This is the interesting one. We aborted because a global
  4075       // overflow was raised. This means we have to restart the
  4076       // marking phase and start iterating over regions. However, in
  4077       // order to do this we have to make sure that all tasks stop
  4078       // what they are doing and re-initialise in a safe manner. We
  4079       // will achieve this with the use of two barrier sync points.
  4081       if (_cm->verbose_low())
  4082         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  4084       _cm->enter_first_sync_barrier(_task_id);
  4085       // When we exit this sync barrier we know that all tasks have
  4086       // stopped doing marking work. So, it's now safe to
  4087       // re-initialise our data structures. At the end of this method,
  4088       // task 0 will clear the global data structures.
  4090       statsOnly( ++_aborted_overflow );
  4092       // We clear the local state of this task...
  4093       clear_region_fields();
  4095       // ...and enter the second barrier.
  4096       _cm->enter_second_sync_barrier(_task_id);
  4097       // At this point everything has bee re-initialised and we're
  4098       // ready to restart.
  4101     if (_cm->verbose_low()) {
  4102       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4103                              "elapsed = %1.2lfms <<<<<<<<<<",
  4104                              _task_id, _time_target_ms, elapsed_time_ms);
  4105       if (_cm->has_aborted())
  4106         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4107                                _task_id);
  4109   } else {
  4110     if (_cm->verbose_low())
  4111       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4112                              "elapsed = %1.2lfms <<<<<<<<<<",
  4113                              _task_id, _time_target_ms, elapsed_time_ms);
  4116   _claimed = false;
  4119 CMTask::CMTask(int task_id,
  4120                ConcurrentMark* cm,
  4121                CMTaskQueue* task_queue,
  4122                CMTaskQueueSet* task_queues)
  4123   : _g1h(G1CollectedHeap::heap()),
  4124     _task_id(task_id), _cm(cm),
  4125     _claimed(false),
  4126     _nextMarkBitMap(NULL), _hash_seed(17),
  4127     _task_queue(task_queue),
  4128     _task_queues(task_queues),
  4129     _oop_closure(NULL),
  4130     _aborted_region(MemRegion()) {
  4131   guarantee(task_queue != NULL, "invariant");
  4132   guarantee(task_queues != NULL, "invariant");
  4134   statsOnly( _clock_due_to_scanning = 0;
  4135              _clock_due_to_marking  = 0 );
  4137   _marking_step_diffs_ms.add(0.5);

mercurial