src/share/vm/gc_implementation/g1/concurrentMark.cpp

Wed, 19 Jan 2011 19:30:42 -0500

author
tonyp
date
Wed, 19 Jan 2011 19:30:42 -0500
changeset 2472
0fa27f37d4d4
parent 2469
7e37af9d69ef
child 2493
97ba643ea3ed
child 2494
234761c55641
permissions
-rw-r--r--

6977804: G1: remove the zero-filling thread
Summary: This changeset removes the zero-filling thread from G1 and collapses the two free region lists we had before (the "free" and "unclean" lists) into one. The new free list uses the new heap region sets / lists abstractions that we'll ultimately use it to keep track of all regions in the heap. A heap region set was also introduced for the humongous regions. Finally, this change increases the concurrency between the thread that completes freeing regions (after a cleanup pause) and the rest of the system (before we'd have to wait for said thread to complete before allocating a new region). The changest also includes a lot of refactoring and code simplification.
Reviewed-by: jcoomes, johnc

     1 /*
     2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/symbolTable.hpp"
    27 #include "gc_implementation/g1/concurrentMark.hpp"
    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    31 #include "gc_implementation/g1/g1RemSet.hpp"
    32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
    33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    34 #include "gc_implementation/shared/vmGCOperations.hpp"
    35 #include "memory/genOopClosures.inline.hpp"
    36 #include "memory/referencePolicy.hpp"
    37 #include "memory/resourceArea.hpp"
    38 #include "oops/oop.inline.hpp"
    39 #include "runtime/handles.inline.hpp"
    40 #include "runtime/java.hpp"
    42 //
    43 // CMS Bit Map Wrapper
    45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    46   _bm((uintptr_t*)NULL,0),
    47   _shifter(shifter) {
    48   _bmStartWord = (HeapWord*)(rs.base());
    49   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    50   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    51                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    53   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    54   // For now we'll just commit all of the bit map up fromt.
    55   // Later on we'll try to be more parsimonious with swap.
    56   guarantee(_virtual_space.initialize(brs, brs.size()),
    57             "couldn't reseve backing store for CMS bit map");
    58   assert(_virtual_space.committed_size() == brs.size(),
    59          "didn't reserve backing store for all of CMS bit map?");
    60   _bm.set_map((uintptr_t*)_virtual_space.low());
    61   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    62          _bmWordSize, "inconsistency in bit map sizing");
    63   _bm.set_size(_bmWordSize >> _shifter);
    64 }
    66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    67                                                HeapWord* limit) const {
    68   // First we must round addr *up* to a possible object boundary.
    69   addr = (HeapWord*)align_size_up((intptr_t)addr,
    70                                   HeapWordSize << _shifter);
    71   size_t addrOffset = heapWordToOffset(addr);
    72   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    73   size_t limitOffset = heapWordToOffset(limit);
    74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    76   assert(nextAddr >= addr, "get_next_one postcondition");
    77   assert(nextAddr == limit || isMarked(nextAddr),
    78          "get_next_one postcondition");
    79   return nextAddr;
    80 }
    82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    83                                                  HeapWord* limit) const {
    84   size_t addrOffset = heapWordToOffset(addr);
    85   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    86   size_t limitOffset = heapWordToOffset(limit);
    87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    89   assert(nextAddr >= addr, "get_next_one postcondition");
    90   assert(nextAddr == limit || !isMarked(nextAddr),
    91          "get_next_one postcondition");
    92   return nextAddr;
    93 }
    95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    97   return (int) (diff >> _shifter);
    98 }
   100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
   101   HeapWord* left  = MAX2(_bmStartWord, mr.start());
   102   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
   103   if (right > left) {
   104     // Right-open interval [leftOffset, rightOffset).
   105     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
   106   } else {
   107     return true;
   108   }
   109 }
   111 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
   112                                              size_t    from_start_index,
   113                                              HeapWord* to_start_word,
   114                                              size_t    word_num) {
   115   _bm.mostly_disjoint_range_union(from_bitmap,
   116                                   from_start_index,
   117                                   heapWordToOffset(to_start_word),
   118                                   word_num);
   119 }
   121 #ifndef PRODUCT
   122 bool CMBitMapRO::covers(ReservedSpace rs) const {
   123   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   124   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   125          "size inconsistency");
   126   return _bmStartWord == (HeapWord*)(rs.base()) &&
   127          _bmWordSize  == rs.size()>>LogHeapWordSize;
   128 }
   129 #endif
   131 void CMBitMap::clearAll() {
   132   _bm.clear();
   133   return;
   134 }
   136 void CMBitMap::markRange(MemRegion mr) {
   137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   138   assert(!mr.is_empty(), "unexpected empty region");
   139   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   140           ((HeapWord *) mr.end())),
   141          "markRange memory region end is not card aligned");
   142   // convert address range into offset range
   143   _bm.at_put_range(heapWordToOffset(mr.start()),
   144                    heapWordToOffset(mr.end()), true);
   145 }
   147 void CMBitMap::clearRange(MemRegion mr) {
   148   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   149   assert(!mr.is_empty(), "unexpected empty region");
   150   // convert address range into offset range
   151   _bm.at_put_range(heapWordToOffset(mr.start()),
   152                    heapWordToOffset(mr.end()), false);
   153 }
   155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   156                                             HeapWord* end_addr) {
   157   HeapWord* start = getNextMarkedWordAddress(addr);
   158   start = MIN2(start, end_addr);
   159   HeapWord* end   = getNextUnmarkedWordAddress(start);
   160   end = MIN2(end, end_addr);
   161   assert(start <= end, "Consistency check");
   162   MemRegion mr(start, end);
   163   if (!mr.is_empty()) {
   164     clearRange(mr);
   165   }
   166   return mr;
   167 }
   169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   170   _base(NULL), _cm(cm)
   171 #ifdef ASSERT
   172   , _drain_in_progress(false)
   173   , _drain_in_progress_yields(false)
   174 #endif
   175 {}
   177 void CMMarkStack::allocate(size_t size) {
   178   _base = NEW_C_HEAP_ARRAY(oop, size);
   179   if (_base == NULL)
   180     vm_exit_during_initialization("Failed to allocate "
   181                                   "CM region mark stack");
   182   _index = 0;
   183   // QQQQ cast ...
   184   _capacity = (jint) size;
   185   _oops_do_bound = -1;
   186   NOT_PRODUCT(_max_depth = 0);
   187 }
   189 CMMarkStack::~CMMarkStack() {
   190   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   191 }
   193 void CMMarkStack::par_push(oop ptr) {
   194   while (true) {
   195     if (isFull()) {
   196       _overflow = true;
   197       return;
   198     }
   199     // Otherwise...
   200     jint index = _index;
   201     jint next_index = index+1;
   202     jint res = Atomic::cmpxchg(next_index, &_index, index);
   203     if (res == index) {
   204       _base[index] = ptr;
   205       // Note that we don't maintain this atomically.  We could, but it
   206       // doesn't seem necessary.
   207       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   208       return;
   209     }
   210     // Otherwise, we need to try again.
   211   }
   212 }
   214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   215   while (true) {
   216     if (isFull()) {
   217       _overflow = true;
   218       return;
   219     }
   220     // Otherwise...
   221     jint index = _index;
   222     jint next_index = index + n;
   223     if (next_index > _capacity) {
   224       _overflow = true;
   225       return;
   226     }
   227     jint res = Atomic::cmpxchg(next_index, &_index, index);
   228     if (res == index) {
   229       for (int i = 0; i < n; i++) {
   230         int ind = index + i;
   231         assert(ind < _capacity, "By overflow test above.");
   232         _base[ind] = ptr_arr[i];
   233       }
   234       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   235       return;
   236     }
   237     // Otherwise, we need to try again.
   238   }
   239 }
   242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   243   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   244   jint start = _index;
   245   jint next_index = start + n;
   246   if (next_index > _capacity) {
   247     _overflow = true;
   248     return;
   249   }
   250   // Otherwise.
   251   _index = next_index;
   252   for (int i = 0; i < n; i++) {
   253     int ind = start + i;
   254     assert(ind < _capacity, "By overflow test above.");
   255     _base[ind] = ptr_arr[i];
   256   }
   257 }
   260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   261   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   262   jint index = _index;
   263   if (index == 0) {
   264     *n = 0;
   265     return false;
   266   } else {
   267     int k = MIN2(max, index);
   268     jint new_ind = index - k;
   269     for (int j = 0; j < k; j++) {
   270       ptr_arr[j] = _base[new_ind + j];
   271     }
   272     _index = new_ind;
   273     *n = k;
   274     return true;
   275   }
   276 }
   279 CMRegionStack::CMRegionStack() : _base(NULL) {}
   281 void CMRegionStack::allocate(size_t size) {
   282   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   283   if (_base == NULL)
   284     vm_exit_during_initialization("Failed to allocate "
   285                                   "CM region mark stack");
   286   _index = 0;
   287   // QQQQ cast ...
   288   _capacity = (jint) size;
   289 }
   291 CMRegionStack::~CMRegionStack() {
   292   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   293 }
   295 void CMRegionStack::push_lock_free(MemRegion mr) {
   296   assert(mr.word_size() > 0, "Precondition");
   297   while (true) {
   298     jint index = _index;
   300     if (index >= _capacity) {
   301       _overflow = true;
   302       return;
   303     }
   304     // Otherwise...
   305     jint next_index = index+1;
   306     jint res = Atomic::cmpxchg(next_index, &_index, index);
   307     if (res == index) {
   308       _base[index] = mr;
   309       return;
   310     }
   311     // Otherwise, we need to try again.
   312   }
   313 }
   315 // Lock-free pop of the region stack. Called during the concurrent
   316 // marking / remark phases. Should only be called in tandem with
   317 // other lock-free pops.
   318 MemRegion CMRegionStack::pop_lock_free() {
   319   while (true) {
   320     jint index = _index;
   322     if (index == 0) {
   323       return MemRegion();
   324     }
   325     // Otherwise...
   326     jint next_index = index-1;
   327     jint res = Atomic::cmpxchg(next_index, &_index, index);
   328     if (res == index) {
   329       MemRegion mr = _base[next_index];
   330       if (mr.start() != NULL) {
   331         assert(mr.end() != NULL, "invariant");
   332         assert(mr.word_size() > 0, "invariant");
   333         return mr;
   334       } else {
   335         // that entry was invalidated... let's skip it
   336         assert(mr.end() == NULL, "invariant");
   337       }
   338     }
   339     // Otherwise, we need to try again.
   340   }
   341 }
   343 #if 0
   344 // The routines that manipulate the region stack with a lock are
   345 // not currently used. They should be retained, however, as a
   346 // diagnostic aid.
   348 void CMRegionStack::push_with_lock(MemRegion mr) {
   349   assert(mr.word_size() > 0, "Precondition");
   350   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   352   if (isFull()) {
   353     _overflow = true;
   354     return;
   355   }
   357   _base[_index] = mr;
   358   _index += 1;
   359 }
   361 MemRegion CMRegionStack::pop_with_lock() {
   362   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   364   while (true) {
   365     if (_index == 0) {
   366       return MemRegion();
   367     }
   368     _index -= 1;
   370     MemRegion mr = _base[_index];
   371     if (mr.start() != NULL) {
   372       assert(mr.end() != NULL, "invariant");
   373       assert(mr.word_size() > 0, "invariant");
   374       return mr;
   375     } else {
   376       // that entry was invalidated... let's skip it
   377       assert(mr.end() == NULL, "invariant");
   378     }
   379   }
   380 }
   381 #endif
   383 bool CMRegionStack::invalidate_entries_into_cset() {
   384   bool result = false;
   385   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   386   for (int i = 0; i < _oops_do_bound; ++i) {
   387     MemRegion mr = _base[i];
   388     if (mr.start() != NULL) {
   389       assert(mr.end() != NULL, "invariant");
   390       assert(mr.word_size() > 0, "invariant");
   391       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   392       assert(hr != NULL, "invariant");
   393       if (hr->in_collection_set()) {
   394         // The region points into the collection set
   395         _base[i] = MemRegion();
   396         result = true;
   397       }
   398     } else {
   399       // that entry was invalidated... let's skip it
   400       assert(mr.end() == NULL, "invariant");
   401     }
   402   }
   403   return result;
   404 }
   406 template<class OopClosureClass>
   407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   408   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   409          || SafepointSynchronize::is_at_safepoint(),
   410          "Drain recursion must be yield-safe.");
   411   bool res = true;
   412   debug_only(_drain_in_progress = true);
   413   debug_only(_drain_in_progress_yields = yield_after);
   414   while (!isEmpty()) {
   415     oop newOop = pop();
   416     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   417     assert(newOop->is_oop(), "Expected an oop");
   418     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   419            "only grey objects on this stack");
   420     // iterate over the oops in this oop, marking and pushing
   421     // the ones in CMS generation.
   422     newOop->oop_iterate(cl);
   423     if (yield_after && _cm->do_yield_check()) {
   424       res = false; break;
   425     }
   426   }
   427   debug_only(_drain_in_progress = false);
   428   return res;
   429 }
   431 void CMMarkStack::oops_do(OopClosure* f) {
   432   if (_index == 0) return;
   433   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   434          "Bound must be set.");
   435   for (int i = 0; i < _oops_do_bound; i++) {
   436     f->do_oop(&_base[i]);
   437   }
   438   _oops_do_bound = -1;
   439 }
   441 bool ConcurrentMark::not_yet_marked(oop obj) const {
   442   return (_g1h->is_obj_ill(obj)
   443           || (_g1h->is_in_permanent(obj)
   444               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   445 }
   447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   449 #endif // _MSC_VER
   451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   452                                int max_regions) :
   453   _markBitMap1(rs, MinObjAlignment - 1),
   454   _markBitMap2(rs, MinObjAlignment - 1),
   456   _parallel_marking_threads(0),
   457   _sleep_factor(0.0),
   458   _marking_task_overhead(1.0),
   459   _cleanup_sleep_factor(0.0),
   460   _cleanup_task_overhead(1.0),
   461   _cleanup_list("Cleanup List"),
   462   _region_bm(max_regions, false /* in_resource_area*/),
   463   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   464            CardTableModRefBS::card_shift,
   465            false /* in_resource_area*/),
   466   _prevMarkBitMap(&_markBitMap1),
   467   _nextMarkBitMap(&_markBitMap2),
   468   _at_least_one_mark_complete(false),
   470   _markStack(this),
   471   _regionStack(),
   472   // _finger set in set_non_marking_state
   474   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   475   // _active_tasks set in set_non_marking_state
   476   // _tasks set inside the constructor
   477   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   478   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   480   _has_overflown(false),
   481   _concurrent(false),
   482   _has_aborted(false),
   483   _restart_for_overflow(false),
   484   _concurrent_marking_in_progress(false),
   485   _should_gray_objects(false),
   487   // _verbose_level set below
   489   _init_times(),
   490   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   491   _cleanup_times(),
   492   _total_counting_time(0.0),
   493   _total_rs_scrub_time(0.0),
   495   _parallel_workers(NULL)
   496 {
   497   CMVerboseLevel verbose_level =
   498     (CMVerboseLevel) G1MarkingVerboseLevel;
   499   if (verbose_level < no_verbose)
   500     verbose_level = no_verbose;
   501   if (verbose_level > high_verbose)
   502     verbose_level = high_verbose;
   503   _verbose_level = verbose_level;
   505   if (verbose_low())
   506     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   507                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   509   _markStack.allocate(MarkStackSize);
   510   _regionStack.allocate(G1MarkRegionStackSize);
   512   // Create & start a ConcurrentMark thread.
   513   _cmThread = new ConcurrentMarkThread(this);
   514   assert(cmThread() != NULL, "CM Thread should have been created");
   515   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   517   _g1h = G1CollectedHeap::heap();
   518   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   519   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   520   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   522   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   523   satb_qs.set_buffer_size(G1SATBBufferSize);
   525   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   526   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   528   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   529   _active_tasks = _max_task_num;
   530   for (int i = 0; i < (int) _max_task_num; ++i) {
   531     CMTaskQueue* task_queue = new CMTaskQueue();
   532     task_queue->initialize();
   533     _task_queues->register_queue(i, task_queue);
   535     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   536     _accum_task_vtime[i] = 0.0;
   537   }
   539   if (ConcGCThreads > ParallelGCThreads) {
   540     vm_exit_during_initialization("Can't have more ConcGCThreads "
   541                                   "than ParallelGCThreads.");
   542   }
   543   if (ParallelGCThreads == 0) {
   544     // if we are not running with any parallel GC threads we will not
   545     // spawn any marking threads either
   546     _parallel_marking_threads =   0;
   547     _sleep_factor             = 0.0;
   548     _marking_task_overhead    = 1.0;
   549   } else {
   550     if (ConcGCThreads > 0) {
   551       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   552       // if both are set
   554       _parallel_marking_threads = ConcGCThreads;
   555       _sleep_factor             = 0.0;
   556       _marking_task_overhead    = 1.0;
   557     } else if (G1MarkingOverheadPercent > 0) {
   558       // we will calculate the number of parallel marking threads
   559       // based on a target overhead with respect to the soft real-time
   560       // goal
   562       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   563       double overall_cm_overhead =
   564         (double) MaxGCPauseMillis * marking_overhead /
   565         (double) GCPauseIntervalMillis;
   566       double cpu_ratio = 1.0 / (double) os::processor_count();
   567       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   568       double marking_task_overhead =
   569         overall_cm_overhead / marking_thread_num *
   570                                                 (double) os::processor_count();
   571       double sleep_factor =
   572                          (1.0 - marking_task_overhead) / marking_task_overhead;
   574       _parallel_marking_threads = (size_t) marking_thread_num;
   575       _sleep_factor             = sleep_factor;
   576       _marking_task_overhead    = marking_task_overhead;
   577     } else {
   578       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   579       _sleep_factor             = 0.0;
   580       _marking_task_overhead    = 1.0;
   581     }
   583     if (parallel_marking_threads() > 1)
   584       _cleanup_task_overhead = 1.0;
   585     else
   586       _cleanup_task_overhead = marking_task_overhead();
   587     _cleanup_sleep_factor =
   588                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   590 #if 0
   591     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   592     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   593     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   594     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   595     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   596 #endif
   598     guarantee(parallel_marking_threads() > 0, "peace of mind");
   599     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
   600          (int) _parallel_marking_threads, false, true);
   601     if (_parallel_workers == NULL) {
   602       vm_exit_during_initialization("Failed necessary allocation.");
   603     } else {
   604       _parallel_workers->initialize_workers();
   605     }
   606   }
   608   // so that the call below can read a sensible value
   609   _heap_start = (HeapWord*) rs.base();
   610   set_non_marking_state();
   611 }
   613 void ConcurrentMark::update_g1_committed(bool force) {
   614   // If concurrent marking is not in progress, then we do not need to
   615   // update _heap_end. This has a subtle and important
   616   // side-effect. Imagine that two evacuation pauses happen between
   617   // marking completion and remark. The first one can grow the
   618   // heap (hence now the finger is below the heap end). Then, the
   619   // second one could unnecessarily push regions on the region
   620   // stack. This causes the invariant that the region stack is empty
   621   // at the beginning of remark to be false. By ensuring that we do
   622   // not observe heap expansions after marking is complete, then we do
   623   // not have this problem.
   624   if (!concurrent_marking_in_progress() && !force)
   625     return;
   627   MemRegion committed = _g1h->g1_committed();
   628   assert(committed.start() == _heap_start, "start shouldn't change");
   629   HeapWord* new_end = committed.end();
   630   if (new_end > _heap_end) {
   631     // The heap has been expanded.
   633     _heap_end = new_end;
   634   }
   635   // Notice that the heap can also shrink. However, this only happens
   636   // during a Full GC (at least currently) and the entire marking
   637   // phase will bail out and the task will not be restarted. So, let's
   638   // do nothing.
   639 }
   641 void ConcurrentMark::reset() {
   642   // Starting values for these two. This should be called in a STW
   643   // phase. CM will be notified of any future g1_committed expansions
   644   // will be at the end of evacuation pauses, when tasks are
   645   // inactive.
   646   MemRegion committed = _g1h->g1_committed();
   647   _heap_start = committed.start();
   648   _heap_end   = committed.end();
   650   // Separated the asserts so that we know which one fires.
   651   assert(_heap_start != NULL, "heap bounds should look ok");
   652   assert(_heap_end != NULL, "heap bounds should look ok");
   653   assert(_heap_start < _heap_end, "heap bounds should look ok");
   655   // reset all the marking data structures and any necessary flags
   656   clear_marking_state();
   658   if (verbose_low())
   659     gclog_or_tty->print_cr("[global] resetting");
   661   // We do reset all of them, since different phases will use
   662   // different number of active threads. So, it's easiest to have all
   663   // of them ready.
   664   for (int i = 0; i < (int) _max_task_num; ++i) {
   665     _tasks[i]->reset(_nextMarkBitMap);
   666   }
   668   // we need this to make sure that the flag is on during the evac
   669   // pause with initial mark piggy-backed
   670   set_concurrent_marking_in_progress();
   671 }
   673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   674   assert(active_tasks <= _max_task_num, "we should not have more");
   676   _active_tasks = active_tasks;
   677   // Need to update the three data structures below according to the
   678   // number of active threads for this phase.
   679   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   680   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   681   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   683   _concurrent = concurrent;
   684   // We propagate this to all tasks, not just the active ones.
   685   for (int i = 0; i < (int) _max_task_num; ++i)
   686     _tasks[i]->set_concurrent(concurrent);
   688   if (concurrent) {
   689     set_concurrent_marking_in_progress();
   690   } else {
   691     // We currently assume that the concurrent flag has been set to
   692     // false before we start remark. At this point we should also be
   693     // in a STW phase.
   694     assert(!concurrent_marking_in_progress(), "invariant");
   695     assert(_finger == _heap_end, "only way to get here");
   696     update_g1_committed(true);
   697   }
   698 }
   700 void ConcurrentMark::set_non_marking_state() {
   701   // We set the global marking state to some default values when we're
   702   // not doing marking.
   703   clear_marking_state();
   704   _active_tasks = 0;
   705   clear_concurrent_marking_in_progress();
   706 }
   708 ConcurrentMark::~ConcurrentMark() {
   709   for (int i = 0; i < (int) _max_task_num; ++i) {
   710     delete _task_queues->queue(i);
   711     delete _tasks[i];
   712   }
   713   delete _task_queues;
   714   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   715 }
   717 // This closure is used to mark refs into the g1 generation
   718 // from external roots in the CMS bit map.
   719 // Called at the first checkpoint.
   720 //
   722 void ConcurrentMark::clearNextBitmap() {
   723   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   724   G1CollectorPolicy* g1p = g1h->g1_policy();
   726   // Make sure that the concurrent mark thread looks to still be in
   727   // the current cycle.
   728   guarantee(cmThread()->during_cycle(), "invariant");
   730   // We are finishing up the current cycle by clearing the next
   731   // marking bitmap and getting it ready for the next cycle. During
   732   // this time no other cycle can start. So, let's make sure that this
   733   // is the case.
   734   guarantee(!g1h->mark_in_progress(), "invariant");
   736   // clear the mark bitmap (no grey objects to start with).
   737   // We need to do this in chunks and offer to yield in between
   738   // each chunk.
   739   HeapWord* start  = _nextMarkBitMap->startWord();
   740   HeapWord* end    = _nextMarkBitMap->endWord();
   741   HeapWord* cur    = start;
   742   size_t chunkSize = M;
   743   while (cur < end) {
   744     HeapWord* next = cur + chunkSize;
   745     if (next > end)
   746       next = end;
   747     MemRegion mr(cur,next);
   748     _nextMarkBitMap->clearRange(mr);
   749     cur = next;
   750     do_yield_check();
   752     // Repeat the asserts from above. We'll do them as asserts here to
   753     // minimize their overhead on the product. However, we'll have
   754     // them as guarantees at the beginning / end of the bitmap
   755     // clearing to get some checking in the product.
   756     assert(cmThread()->during_cycle(), "invariant");
   757     assert(!g1h->mark_in_progress(), "invariant");
   758   }
   760   // Repeat the asserts from above.
   761   guarantee(cmThread()->during_cycle(), "invariant");
   762   guarantee(!g1h->mark_in_progress(), "invariant");
   763 }
   765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   766 public:
   767   bool doHeapRegion(HeapRegion* r) {
   768     if (!r->continuesHumongous()) {
   769       r->note_start_of_marking(true);
   770     }
   771     return false;
   772   }
   773 };
   775 void ConcurrentMark::checkpointRootsInitialPre() {
   776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   777   G1CollectorPolicy* g1p = g1h->g1_policy();
   779   _has_aborted = false;
   781 #ifndef PRODUCT
   782   if (G1PrintReachableAtInitialMark) {
   783     print_reachable("at-cycle-start",
   784                     true /* use_prev_marking */, true /* all */);
   785   }
   786 #endif
   788   // Initialise marking structures. This has to be done in a STW phase.
   789   reset();
   790 }
   792 class CMMarkRootsClosure: public OopsInGenClosure {
   793 private:
   794   ConcurrentMark*  _cm;
   795   G1CollectedHeap* _g1h;
   796   bool             _do_barrier;
   798 public:
   799   CMMarkRootsClosure(ConcurrentMark* cm,
   800                      G1CollectedHeap* g1h,
   801                      bool do_barrier) : _cm(cm), _g1h(g1h),
   802                                         _do_barrier(do_barrier) { }
   804   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   805   virtual void do_oop(      oop* p) { do_oop_work(p); }
   807   template <class T> void do_oop_work(T* p) {
   808     T heap_oop = oopDesc::load_heap_oop(p);
   809     if (!oopDesc::is_null(heap_oop)) {
   810       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   811       assert(obj->is_oop() || obj->mark() == NULL,
   812              "expected an oop, possibly with mark word displaced");
   813       HeapWord* addr = (HeapWord*)obj;
   814       if (_g1h->is_in_g1_reserved(addr)) {
   815         _cm->grayRoot(obj);
   816       }
   817     }
   818     if (_do_barrier) {
   819       assert(!_g1h->is_in_g1_reserved(p),
   820              "Should be called on external roots");
   821       do_barrier(p);
   822     }
   823   }
   824 };
   826 void ConcurrentMark::checkpointRootsInitialPost() {
   827   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   829   // For each region note start of marking.
   830   NoteStartOfMarkHRClosure startcl;
   831   g1h->heap_region_iterate(&startcl);
   833   // Start weak-reference discovery.
   834   ReferenceProcessor* rp = g1h->ref_processor();
   835   rp->verify_no_references_recorded();
   836   rp->enable_discovery(); // enable ("weak") refs discovery
   837   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   839   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   840   // This is the start of  the marking cycle, we're expected all
   841   // threads to have SATB queues with active set to false.
   842   satb_mq_set.set_active_all_threads(true, /* new active value */
   843                                      false /* expected_active */);
   845   // update_g1_committed() will be called at the end of an evac pause
   846   // when marking is on. So, it's also called at the end of the
   847   // initial-mark pause to update the heap end, if the heap expands
   848   // during it. No need to call it here.
   849 }
   851 // Checkpoint the roots into this generation from outside
   852 // this generation. [Note this initial checkpoint need only
   853 // be approximate -- we'll do a catch up phase subsequently.]
   854 void ConcurrentMark::checkpointRootsInitial() {
   855   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   856   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   858   double start = os::elapsedTime();
   860   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   861   g1p->record_concurrent_mark_init_start();
   862   checkpointRootsInitialPre();
   864   // YSR: when concurrent precleaning is in place, we'll
   865   // need to clear the cached card table here
   867   ResourceMark rm;
   868   HandleMark  hm;
   870   g1h->ensure_parsability(false);
   871   g1h->perm_gen()->save_marks();
   873   CMMarkRootsClosure notOlder(this, g1h, false);
   874   CMMarkRootsClosure older(this, g1h, true);
   876   g1h->set_marking_started();
   877   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   879   g1h->process_strong_roots(true,    // activate StrongRootsScope
   880                             false,   // fake perm gen collection
   881                             SharedHeap::SO_AllClasses,
   882                             &notOlder, // Regular roots
   883                             NULL,     // do not visit active blobs
   884                             &older    // Perm Gen Roots
   885                             );
   886   checkpointRootsInitialPost();
   888   // Statistics.
   889   double end = os::elapsedTime();
   890   _init_times.add((end - start) * 1000.0);
   892   g1p->record_concurrent_mark_init_end();
   893 }
   895 /*
   896    Notice that in the next two methods, we actually leave the STS
   897    during the barrier sync and join it immediately afterwards. If we
   898    do not do this, this then the following deadlock can occur: one
   899    thread could be in the barrier sync code, waiting for the other
   900    thread to also sync up, whereas another one could be trying to
   901    yield, while also waiting for the other threads to sync up too.
   903    Because the thread that does the sync barrier has left the STS, it
   904    is possible to be suspended for a Full GC or an evacuation pause
   905    could occur. This is actually safe, since the entering the sync
   906    barrier is one of the last things do_marking_step() does, and it
   907    doesn't manipulate any data structures afterwards.
   908 */
   910 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   911   if (verbose_low())
   912     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   914   ConcurrentGCThread::stsLeave();
   915   _first_overflow_barrier_sync.enter();
   916   ConcurrentGCThread::stsJoin();
   917   // at this point everyone should have synced up and not be doing any
   918   // more work
   920   if (verbose_low())
   921     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   923   // let task 0 do this
   924   if (task_num == 0) {
   925     // task 0 is responsible for clearing the global data structures
   926     clear_marking_state();
   928     if (PrintGC) {
   929       gclog_or_tty->date_stamp(PrintGCDateStamps);
   930       gclog_or_tty->stamp(PrintGCTimeStamps);
   931       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   932     }
   933   }
   935   // after this, each task should reset its own data structures then
   936   // then go into the second barrier
   937 }
   939 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   940   if (verbose_low())
   941     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   943   ConcurrentGCThread::stsLeave();
   944   _second_overflow_barrier_sync.enter();
   945   ConcurrentGCThread::stsJoin();
   946   // at this point everything should be re-initialised and ready to go
   948   if (verbose_low())
   949     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   950 }
   952 void ConcurrentMark::grayRoot(oop p) {
   953   HeapWord* addr = (HeapWord*) p;
   954   // We can't really check against _heap_start and _heap_end, since it
   955   // is possible during an evacuation pause with piggy-backed
   956   // initial-mark that the committed space is expanded during the
   957   // pause without CM observing this change. So the assertions below
   958   // is a bit conservative; but better than nothing.
   959   assert(_g1h->g1_committed().contains(addr),
   960          "address should be within the heap bounds");
   962   if (!_nextMarkBitMap->isMarked(addr))
   963     _nextMarkBitMap->parMark(addr);
   964 }
   966 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   967   // The objects on the region have already been marked "in bulk" by
   968   // the caller. We only need to decide whether to push the region on
   969   // the region stack or not.
   971   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   972     // We're done with marking and waiting for remark. We do not need to
   973     // push anything else on the region stack.
   974     return;
   976   HeapWord* finger = _finger;
   978   if (verbose_low())
   979     gclog_or_tty->print_cr("[global] attempting to push "
   980                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   981                            PTR_FORMAT, mr.start(), mr.end(), finger);
   983   if (mr.start() < finger) {
   984     // The finger is always heap region aligned and it is not possible
   985     // for mr to span heap regions.
   986     assert(mr.end() <= finger, "invariant");
   988     // Separated the asserts so that we know which one fires.
   989     assert(mr.start() <= mr.end(),
   990            "region boundaries should fall within the committed space");
   991     assert(_heap_start <= mr.start(),
   992            "region boundaries should fall within the committed space");
   993     assert(mr.end() <= _heap_end,
   994            "region boundaries should fall within the committed space");
   995     if (verbose_low())
   996       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   997                              "below the finger, pushing it",
   998                              mr.start(), mr.end());
  1000     if (!region_stack_push_lock_free(mr)) {
  1001       if (verbose_low())
  1002         gclog_or_tty->print_cr("[global] region stack has overflown.");
  1007 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
  1008   // The object is not marked by the caller. We need to at least mark
  1009   // it and maybe push in on the stack.
  1011   HeapWord* addr = (HeapWord*)p;
  1012   if (!_nextMarkBitMap->isMarked(addr)) {
  1013     // We definitely need to mark it, irrespective whether we bail out
  1014     // because we're done with marking.
  1015     if (_nextMarkBitMap->parMark(addr)) {
  1016       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1017         // If we're done with concurrent marking and we're waiting for
  1018         // remark, then we're not pushing anything on the stack.
  1019         return;
  1021       // No OrderAccess:store_load() is needed. It is implicit in the
  1022       // CAS done in parMark(addr) above
  1023       HeapWord* finger = _finger;
  1025       if (addr < finger) {
  1026         if (!mark_stack_push(oop(addr))) {
  1027           if (verbose_low())
  1028             gclog_or_tty->print_cr("[global] global stack overflow "
  1029                                    "during parMark");
  1036 class CMConcurrentMarkingTask: public AbstractGangTask {
  1037 private:
  1038   ConcurrentMark*       _cm;
  1039   ConcurrentMarkThread* _cmt;
  1041 public:
  1042   void work(int worker_i) {
  1043     assert(Thread::current()->is_ConcurrentGC_thread(),
  1044            "this should only be done by a conc GC thread");
  1045     ResourceMark rm;
  1047     double start_vtime = os::elapsedVTime();
  1049     ConcurrentGCThread::stsJoin();
  1051     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1052     CMTask* the_task = _cm->task(worker_i);
  1053     the_task->record_start_time();
  1054     if (!_cm->has_aborted()) {
  1055       do {
  1056         double start_vtime_sec = os::elapsedVTime();
  1057         double start_time_sec = os::elapsedTime();
  1058         the_task->do_marking_step(10.0);
  1059         double end_time_sec = os::elapsedTime();
  1060         double end_vtime_sec = os::elapsedVTime();
  1061         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1062         double elapsed_time_sec = end_time_sec - start_time_sec;
  1063         _cm->clear_has_overflown();
  1065         bool ret = _cm->do_yield_check(worker_i);
  1067         jlong sleep_time_ms;
  1068         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1069           sleep_time_ms =
  1070             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1071           ConcurrentGCThread::stsLeave();
  1072           os::sleep(Thread::current(), sleep_time_ms, false);
  1073           ConcurrentGCThread::stsJoin();
  1075         double end_time2_sec = os::elapsedTime();
  1076         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1078 #if 0
  1079           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1080                                  "overhead %1.4lf",
  1081                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1082                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1083           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1084                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1085 #endif
  1086       } while (!_cm->has_aborted() && the_task->has_aborted());
  1088     the_task->record_end_time();
  1089     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1091     ConcurrentGCThread::stsLeave();
  1093     double end_vtime = os::elapsedVTime();
  1094     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1097   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1098                           ConcurrentMarkThread* cmt) :
  1099       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1101   ~CMConcurrentMarkingTask() { }
  1102 };
  1104 void ConcurrentMark::markFromRoots() {
  1105   // we might be tempted to assert that:
  1106   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1107   //        "inconsistent argument?");
  1108   // However that wouldn't be right, because it's possible that
  1109   // a safepoint is indeed in progress as a younger generation
  1110   // stop-the-world GC happens even as we mark in this generation.
  1112   _restart_for_overflow = false;
  1114   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1116   CMConcurrentMarkingTask markingTask(this, cmThread());
  1117   if (parallel_marking_threads() > 0)
  1118     _parallel_workers->run_task(&markingTask);
  1119   else
  1120     markingTask.work(0);
  1121   print_stats();
  1124 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1125   // world is stopped at this checkpoint
  1126   assert(SafepointSynchronize::is_at_safepoint(),
  1127          "world should be stopped");
  1128   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1130   // If a full collection has happened, we shouldn't do this.
  1131   if (has_aborted()) {
  1132     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1133     return;
  1136   SvcGCMarker sgcm(SvcGCMarker::OTHER);
  1138   if (VerifyDuringGC) {
  1139     HandleMark hm;  // handle scope
  1140     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1141     Universe::heap()->prepare_for_verify();
  1142     Universe::verify(true, false, true);
  1145   G1CollectorPolicy* g1p = g1h->g1_policy();
  1146   g1p->record_concurrent_mark_remark_start();
  1148   double start = os::elapsedTime();
  1150   checkpointRootsFinalWork();
  1152   double mark_work_end = os::elapsedTime();
  1154   weakRefsWork(clear_all_soft_refs);
  1156   if (has_overflown()) {
  1157     // Oops.  We overflowed.  Restart concurrent marking.
  1158     _restart_for_overflow = true;
  1159     // Clear the flag. We do not need it any more.
  1160     clear_has_overflown();
  1161     if (G1TraceMarkStackOverflow)
  1162       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1163   } else {
  1164     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1165     // We're done with marking.
  1166     // This is the end of  the marking cycle, we're expected all
  1167     // threads to have SATB queues with active set to true.
  1168     satb_mq_set.set_active_all_threads(false, /* new active value */
  1169                                        true /* expected_active */);
  1171     if (VerifyDuringGC) {
  1172       HandleMark hm;  // handle scope
  1173       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1174       Universe::heap()->prepare_for_verify();
  1175       Universe::heap()->verify(/* allow_dirty */      true,
  1176                                /* silent */           false,
  1177                                /* use_prev_marking */ false);
  1181 #if VERIFY_OBJS_PROCESSED
  1182   _scan_obj_cl.objs_processed = 0;
  1183   ThreadLocalObjQueue::objs_enqueued = 0;
  1184 #endif
  1186   // Statistics
  1187   double now = os::elapsedTime();
  1188   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1189   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1190   _remark_times.add((now - start) * 1000.0);
  1192   g1p->record_concurrent_mark_remark_end();
  1196 #define CARD_BM_TEST_MODE 0
  1198 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1200   CMBitMapRO* _bm;
  1201   ConcurrentMark* _cm;
  1202   bool _changed;
  1203   bool _yield;
  1204   size_t _words_done;
  1205   size_t _tot_live;
  1206   size_t _tot_used;
  1207   size_t _regions_done;
  1208   double _start_vtime_sec;
  1210   BitMap* _region_bm;
  1211   BitMap* _card_bm;
  1212   intptr_t _bottom_card_num;
  1213   bool _final;
  1215   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1216     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1217 #if CARD_BM_TEST_MODE
  1218       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1219 #else
  1220       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1221 #endif
  1225 public:
  1226   CalcLiveObjectsClosure(bool final,
  1227                          CMBitMapRO *bm, ConcurrentMark *cm,
  1228                          BitMap* region_bm, BitMap* card_bm) :
  1229     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1230     _words_done(0), _tot_live(0), _tot_used(0),
  1231     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1232     _regions_done(0), _start_vtime_sec(0.0)
  1234     _bottom_card_num =
  1235       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1236                CardTableModRefBS::card_shift);
  1239   // It takes a region that's not empty (i.e., it has at least one
  1240   // live object in it and sets its corresponding bit on the region
  1241   // bitmap to 1. If the region is "starts humongous" it will also set
  1242   // to 1 the bits on the region bitmap that correspond to its
  1243   // associated "continues humongous" regions.
  1244   void set_bit_for_region(HeapRegion* hr) {
  1245     assert(!hr->continuesHumongous(), "should have filtered those out");
  1247     size_t index = hr->hrs_index();
  1248     if (!hr->startsHumongous()) {
  1249       // Normal (non-humongous) case: just set the bit.
  1250       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1251     } else {
  1252       // Starts humongous case: calculate how many regions are part of
  1253       // this humongous region and then set the bit range. It might
  1254       // have been a bit more efficient to look at the object that
  1255       // spans these humongous regions to calculate their number from
  1256       // the object's size. However, it's a good idea to calculate
  1257       // this based on the metadata itself, and not the region
  1258       // contents, so that this code is not aware of what goes into
  1259       // the humongous regions (in case this changes in the future).
  1260       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1261       size_t end_index = index + 1;
  1262       while (end_index < g1h->n_regions()) {
  1263         HeapRegion* chr = g1h->region_at(end_index);
  1264         if (!chr->continuesHumongous()) {
  1265           break;
  1267         end_index += 1;
  1269       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1270                                    (BitMap::idx_t) end_index, true);
  1274   bool doHeapRegion(HeapRegion* hr) {
  1275     if (!_final && _regions_done == 0)
  1276       _start_vtime_sec = os::elapsedVTime();
  1278     if (hr->continuesHumongous()) {
  1279       // We will ignore these here and process them when their
  1280       // associated "starts humongous" region is processed (see
  1281       // set_bit_for_heap_region()). Note that we cannot rely on their
  1282       // associated "starts humongous" region to have their bit set to
  1283       // 1 since, due to the region chunking in the parallel region
  1284       // iteration, a "continues humongous" region might be visited
  1285       // before its associated "starts humongous".
  1286       return false;
  1289     HeapWord* nextTop = hr->next_top_at_mark_start();
  1290     HeapWord* start   = hr->top_at_conc_mark_count();
  1291     assert(hr->bottom() <= start && start <= hr->end() &&
  1292            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1293            start <= nextTop,
  1294            "Preconditions.");
  1295     // Otherwise, record the number of word's we'll examine.
  1296     size_t words_done = (nextTop - start);
  1297     // Find the first marked object at or after "start".
  1298     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1299     size_t marked_bytes = 0;
  1301     // Below, the term "card num" means the result of shifting an address
  1302     // by the card shift -- address 0 corresponds to card number 0.  One
  1303     // must subtract the card num of the bottom of the heap to obtain a
  1304     // card table index.
  1305     // The first card num of the sequence of live cards currently being
  1306     // constructed.  -1 ==> no sequence.
  1307     intptr_t start_card_num = -1;
  1308     // The last card num of the sequence of live cards currently being
  1309     // constructed.  -1 ==> no sequence.
  1310     intptr_t last_card_num = -1;
  1312     while (start < nextTop) {
  1313       if (_yield && _cm->do_yield_check()) {
  1314         // We yielded.  It might be for a full collection, in which case
  1315         // all bets are off; terminate the traversal.
  1316         if (_cm->has_aborted()) {
  1317           _changed = false;
  1318           return true;
  1319         } else {
  1320           // Otherwise, it might be a collection pause, and the region
  1321           // we're looking at might be in the collection set.  We'll
  1322           // abandon this region.
  1323           return false;
  1326       oop obj = oop(start);
  1327       int obj_sz = obj->size();
  1328       // The card num of the start of the current object.
  1329       intptr_t obj_card_num =
  1330         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1332       HeapWord* obj_last = start + obj_sz - 1;
  1333       intptr_t obj_last_card_num =
  1334         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1336       if (obj_card_num != last_card_num) {
  1337         if (start_card_num == -1) {
  1338           assert(last_card_num == -1, "Both or neither.");
  1339           start_card_num = obj_card_num;
  1340         } else {
  1341           assert(last_card_num != -1, "Both or neither.");
  1342           assert(obj_card_num >= last_card_num, "Inv");
  1343           if ((obj_card_num - last_card_num) > 1) {
  1344             // Mark the last run, and start a new one.
  1345             mark_card_num_range(start_card_num, last_card_num);
  1346             start_card_num = obj_card_num;
  1349 #if CARD_BM_TEST_MODE
  1350         /*
  1351         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1352                                obj_card_num - _bottom_card_num,
  1353                                obj_last_card_num - _bottom_card_num);
  1354         */
  1355         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1356           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1358 #endif
  1360       // In any case, we set the last card num.
  1361       last_card_num = obj_last_card_num;
  1363       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1364       // Find the next marked object after this one.
  1365       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1366       _changed = true;
  1368     // Handle the last range, if any.
  1369     if (start_card_num != -1)
  1370       mark_card_num_range(start_card_num, last_card_num);
  1371     if (_final) {
  1372       // Mark the allocated-since-marking portion...
  1373       HeapWord* tp = hr->top();
  1374       if (nextTop < tp) {
  1375         start_card_num =
  1376           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1377         last_card_num =
  1378           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1379         mark_card_num_range(start_card_num, last_card_num);
  1380         // This definitely means the region has live objects.
  1381         set_bit_for_region(hr);
  1385     hr->add_to_marked_bytes(marked_bytes);
  1386     // Update the live region bitmap.
  1387     if (marked_bytes > 0) {
  1388       set_bit_for_region(hr);
  1390     hr->set_top_at_conc_mark_count(nextTop);
  1391     _tot_live += hr->next_live_bytes();
  1392     _tot_used += hr->used();
  1393     _words_done = words_done;
  1395     if (!_final) {
  1396       ++_regions_done;
  1397       if (_regions_done % 10 == 0) {
  1398         double end_vtime_sec = os::elapsedVTime();
  1399         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1400         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1401           jlong sleep_time_ms =
  1402             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1403           os::sleep(Thread::current(), sleep_time_ms, false);
  1404           _start_vtime_sec = end_vtime_sec;
  1409     return false;
  1412   bool changed() { return _changed;  }
  1413   void reset()   { _changed = false; _words_done = 0; }
  1414   void no_yield() { _yield = false; }
  1415   size_t words_done() { return _words_done; }
  1416   size_t tot_live() { return _tot_live; }
  1417   size_t tot_used() { return _tot_used; }
  1418 };
  1421 void ConcurrentMark::calcDesiredRegions() {
  1422   _region_bm.clear();
  1423   _card_bm.clear();
  1424   CalcLiveObjectsClosure calccl(false /*final*/,
  1425                                 nextMarkBitMap(), this,
  1426                                 &_region_bm, &_card_bm);
  1427   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1428   g1h->heap_region_iterate(&calccl);
  1430   do {
  1431     calccl.reset();
  1432     g1h->heap_region_iterate(&calccl);
  1433   } while (calccl.changed());
  1436 class G1ParFinalCountTask: public AbstractGangTask {
  1437 protected:
  1438   G1CollectedHeap* _g1h;
  1439   CMBitMap* _bm;
  1440   size_t _n_workers;
  1441   size_t *_live_bytes;
  1442   size_t *_used_bytes;
  1443   BitMap* _region_bm;
  1444   BitMap* _card_bm;
  1445 public:
  1446   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1447                       BitMap* region_bm, BitMap* card_bm) :
  1448     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1449     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1451     if (ParallelGCThreads > 0)
  1452       _n_workers = _g1h->workers()->total_workers();
  1453     else
  1454       _n_workers = 1;
  1455     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1456     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1459   ~G1ParFinalCountTask() {
  1460     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1461     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1464   void work(int i) {
  1465     CalcLiveObjectsClosure calccl(true /*final*/,
  1466                                   _bm, _g1h->concurrent_mark(),
  1467                                   _region_bm, _card_bm);
  1468     calccl.no_yield();
  1469     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1470       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1471                                             HeapRegion::FinalCountClaimValue);
  1472     } else {
  1473       _g1h->heap_region_iterate(&calccl);
  1475     assert(calccl.complete(), "Shouldn't have yielded!");
  1477     assert((size_t) i < _n_workers, "invariant");
  1478     _live_bytes[i] = calccl.tot_live();
  1479     _used_bytes[i] = calccl.tot_used();
  1481   size_t live_bytes()  {
  1482     size_t live_bytes = 0;
  1483     for (size_t i = 0; i < _n_workers; ++i)
  1484       live_bytes += _live_bytes[i];
  1485     return live_bytes;
  1487   size_t used_bytes()  {
  1488     size_t used_bytes = 0;
  1489     for (size_t i = 0; i < _n_workers; ++i)
  1490       used_bytes += _used_bytes[i];
  1491     return used_bytes;
  1493 };
  1495 class G1ParNoteEndTask;
  1497 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1498   G1CollectedHeap* _g1;
  1499   int _worker_num;
  1500   size_t _max_live_bytes;
  1501   size_t _regions_claimed;
  1502   size_t _freed_bytes;
  1503   FreeRegionList _local_cleanup_list;
  1504   HumongousRegionSet _humongous_proxy_set;
  1505   double _claimed_region_time;
  1506   double _max_region_time;
  1508 public:
  1509   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1510                              int worker_num);
  1511   size_t freed_bytes() { return _freed_bytes; }
  1512   FreeRegionList* local_cleanup_list() {
  1513     return &_local_cleanup_list;
  1515   HumongousRegionSet* humongous_proxy_set() {
  1516     return &_humongous_proxy_set;
  1519   bool doHeapRegion(HeapRegion *r);
  1521   size_t max_live_bytes() { return _max_live_bytes; }
  1522   size_t regions_claimed() { return _regions_claimed; }
  1523   double claimed_region_time_sec() { return _claimed_region_time; }
  1524   double max_region_time_sec() { return _max_region_time; }
  1525 };
  1527 class G1ParNoteEndTask: public AbstractGangTask {
  1528   friend class G1NoteEndOfConcMarkClosure;
  1530 protected:
  1531   G1CollectedHeap* _g1h;
  1532   size_t _max_live_bytes;
  1533   size_t _freed_bytes;
  1534   FreeRegionList* _cleanup_list;
  1536 public:
  1537   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1538                    FreeRegionList* cleanup_list) :
  1539     AbstractGangTask("G1 note end"), _g1h(g1h),
  1540     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
  1542   void work(int i) {
  1543     double start = os::elapsedTime();
  1544     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i);
  1545     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1546       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1547                                             HeapRegion::NoteEndClaimValue);
  1548     } else {
  1549       _g1h->heap_region_iterate(&g1_note_end);
  1551     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1553     // Now update the lists
  1554     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
  1555                                             NULL /* free_list */,
  1556                                             g1_note_end.humongous_proxy_set(),
  1557                                             true /* par */);
  1559       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1560       _max_live_bytes += g1_note_end.max_live_bytes();
  1561       _freed_bytes += g1_note_end.freed_bytes();
  1563       _cleanup_list->add_as_tail(g1_note_end.local_cleanup_list());
  1564       assert(g1_note_end.local_cleanup_list()->is_empty(), "post-condition");
  1566     double end = os::elapsedTime();
  1567     if (G1PrintParCleanupStats) {
  1568       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1569                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1570                           i, start, end, (end-start)*1000.0,
  1571                           g1_note_end.regions_claimed(),
  1572                           g1_note_end.claimed_region_time_sec()*1000.0,
  1573                           g1_note_end.max_region_time_sec()*1000.0);
  1576   size_t max_live_bytes() { return _max_live_bytes; }
  1577   size_t freed_bytes() { return _freed_bytes; }
  1578 };
  1580 class G1ParScrubRemSetTask: public AbstractGangTask {
  1581 protected:
  1582   G1RemSet* _g1rs;
  1583   BitMap* _region_bm;
  1584   BitMap* _card_bm;
  1585 public:
  1586   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1587                        BitMap* region_bm, BitMap* card_bm) :
  1588     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1589     _region_bm(region_bm), _card_bm(card_bm)
  1590   {}
  1592   void work(int i) {
  1593     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1594       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1595                        HeapRegion::ScrubRemSetClaimValue);
  1596     } else {
  1597       _g1rs->scrub(_region_bm, _card_bm);
  1601 };
  1603 G1NoteEndOfConcMarkClosure::
  1604 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1605                            int worker_num)
  1606   : _g1(g1), _worker_num(worker_num),
  1607     _max_live_bytes(0), _regions_claimed(0),
  1608     _freed_bytes(0),
  1609     _claimed_region_time(0.0), _max_region_time(0.0),
  1610     _local_cleanup_list("Local Cleanup List"),
  1611     _humongous_proxy_set("Local Cleanup Humongous Proxy Set") { }
  1613 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
  1614   // We use a claim value of zero here because all regions
  1615   // were claimed with value 1 in the FinalCount task.
  1616   hr->reset_gc_time_stamp();
  1617   if (!hr->continuesHumongous()) {
  1618     double start = os::elapsedTime();
  1619     _regions_claimed++;
  1620     hr->note_end_of_marking();
  1621     _max_live_bytes += hr->max_live_bytes();
  1622     _g1->free_region_if_totally_empty(hr,
  1623                                       &_freed_bytes,
  1624                                       &_local_cleanup_list,
  1625                                       &_humongous_proxy_set,
  1626                                       true /* par */);
  1627     double region_time = (os::elapsedTime() - start);
  1628     _claimed_region_time += region_time;
  1629     if (region_time > _max_region_time) _max_region_time = region_time;
  1631   return false;
  1634 void ConcurrentMark::cleanup() {
  1635   // world is stopped at this checkpoint
  1636   assert(SafepointSynchronize::is_at_safepoint(),
  1637          "world should be stopped");
  1638   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1640   // If a full collection has happened, we shouldn't do this.
  1641   if (has_aborted()) {
  1642     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1643     return;
  1646   g1h->verify_region_sets_optional();
  1648   if (VerifyDuringGC) {
  1649     HandleMark hm;  // handle scope
  1650     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1651     Universe::heap()->prepare_for_verify();
  1652     Universe::verify(/* allow dirty  */ true,
  1653                      /* silent       */ false,
  1654                      /* prev marking */ true);
  1657   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1658   g1p->record_concurrent_mark_cleanup_start();
  1660   double start = os::elapsedTime();
  1662   // Do counting once more with the world stopped for good measure.
  1663   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1664                                         &_region_bm, &_card_bm);
  1665   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1666     assert(g1h->check_heap_region_claim_values(
  1667                                                HeapRegion::InitialClaimValue),
  1668            "sanity check");
  1670     int n_workers = g1h->workers()->total_workers();
  1671     g1h->set_par_threads(n_workers);
  1672     g1h->workers()->run_task(&g1_par_count_task);
  1673     g1h->set_par_threads(0);
  1675     assert(g1h->check_heap_region_claim_values(
  1676                                              HeapRegion::FinalCountClaimValue),
  1677            "sanity check");
  1678   } else {
  1679     g1_par_count_task.work(0);
  1682   size_t known_garbage_bytes =
  1683     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1684 #if 0
  1685   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1686                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1687                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1688                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1689 #endif // 0
  1690   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1692   size_t start_used_bytes = g1h->used();
  1693   _at_least_one_mark_complete = true;
  1694   g1h->set_marking_complete();
  1696   double count_end = os::elapsedTime();
  1697   double this_final_counting_time = (count_end - start);
  1698   if (G1PrintParCleanupStats) {
  1699     gclog_or_tty->print_cr("Cleanup:");
  1700     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1701                            this_final_counting_time*1000.0);
  1703   _total_counting_time += this_final_counting_time;
  1705   // Install newly created mark bitMap as "prev".
  1706   swapMarkBitMaps();
  1708   g1h->reset_gc_time_stamp();
  1710   // Note end of marking in all heap regions.
  1711   double note_end_start = os::elapsedTime();
  1712   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
  1713   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1714     int n_workers = g1h->workers()->total_workers();
  1715     g1h->set_par_threads(n_workers);
  1716     g1h->workers()->run_task(&g1_par_note_end_task);
  1717     g1h->set_par_threads(0);
  1719     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1720            "sanity check");
  1721   } else {
  1722     g1_par_note_end_task.work(0);
  1725   if (!cleanup_list_is_empty()) {
  1726     // The cleanup list is not empty, so we'll have to process it
  1727     // concurrently. Notify anyone else that might be wanting free
  1728     // regions that there will be more free regions coming soon.
  1729     g1h->set_free_regions_coming();
  1731   double note_end_end = os::elapsedTime();
  1732   if (G1PrintParCleanupStats) {
  1733     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1734                            (note_end_end - note_end_start)*1000.0);
  1738   // call below, since it affects the metric by which we sort the heap
  1739   // regions.
  1740   if (G1ScrubRemSets) {
  1741     double rs_scrub_start = os::elapsedTime();
  1742     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1743     if (G1CollectedHeap::use_parallel_gc_threads()) {
  1744       int n_workers = g1h->workers()->total_workers();
  1745       g1h->set_par_threads(n_workers);
  1746       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1747       g1h->set_par_threads(0);
  1749       assert(g1h->check_heap_region_claim_values(
  1750                                             HeapRegion::ScrubRemSetClaimValue),
  1751              "sanity check");
  1752     } else {
  1753       g1_par_scrub_rs_task.work(0);
  1756     double rs_scrub_end = os::elapsedTime();
  1757     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1758     _total_rs_scrub_time += this_rs_scrub_time;
  1761   // this will also free any regions totally full of garbage objects,
  1762   // and sort the regions.
  1763   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1764                         g1_par_note_end_task.freed_bytes(),
  1765                         g1_par_note_end_task.max_live_bytes());
  1767   // Statistics.
  1768   double end = os::elapsedTime();
  1769   _cleanup_times.add((end - start) * 1000.0);
  1771   // G1CollectedHeap::heap()->print();
  1772   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1773   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1775   if (PrintGC || PrintGCDetails) {
  1776     g1h->print_size_transition(gclog_or_tty,
  1777                                start_used_bytes,
  1778                                g1h->used(),
  1779                                g1h->capacity());
  1782   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1783   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1785   // We need to make this be a "collection" so any collection pause that
  1786   // races with it goes around and waits for completeCleanup to finish.
  1787   g1h->increment_total_collections();
  1789   if (VerifyDuringGC) {
  1790     HandleMark hm;  // handle scope
  1791     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1792     Universe::heap()->prepare_for_verify();
  1793     Universe::verify(/* allow dirty  */ true,
  1794                      /* silent       */ false,
  1795                      /* prev marking */ true);
  1798   g1h->verify_region_sets_optional();
  1801 void ConcurrentMark::completeCleanup() {
  1802   if (has_aborted()) return;
  1804   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1806   _cleanup_list.verify_optional();
  1807   FreeRegionList local_free_list("Local Cleanup List");
  1809   if (G1ConcRegionFreeingVerbose) {
  1810     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  1811                            "cleanup list has "SIZE_FORMAT" entries",
  1812                            _cleanup_list.length());
  1815   // Noone else should be accessing the _cleanup_list at this point,
  1816   // so it's not necessary to take any locks
  1817   while (!_cleanup_list.is_empty()) {
  1818     HeapRegion* hr = _cleanup_list.remove_head();
  1819     assert(hr != NULL, "the list was not empty");
  1820     hr->rem_set()->clear();
  1821     local_free_list.add_as_tail(hr);
  1823     // Instead of adding one region at a time to the secondary_free_list,
  1824     // we accumulate them in the local list and move them a few at a
  1825     // time. This also cuts down on the number of notify_all() calls
  1826     // we do during this process. We'll also append the local list when
  1827     // _cleanup_list is empty (which means we just removed the last
  1828     // region from the _cleanup_list).
  1829     if ((local_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
  1830         _cleanup_list.is_empty()) {
  1831       if (G1ConcRegionFreeingVerbose) {
  1832         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
  1833                                "appending "SIZE_FORMAT" entries to the "
  1834                                "secondary_free_list, clean list still has "
  1835                                SIZE_FORMAT" entries",
  1836                                local_free_list.length(),
  1837                                _cleanup_list.length());
  1841         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
  1842         g1h->secondary_free_list_add_as_tail(&local_free_list);
  1843         SecondaryFreeList_lock->notify_all();
  1846       if (G1StressConcRegionFreeing) {
  1847         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
  1848           os::sleep(Thread::current(), (jlong) 1, false);
  1853   assert(local_free_list.is_empty(), "post-condition");
  1856 bool G1CMIsAliveClosure::do_object_b(oop obj) {
  1857   HeapWord* addr = (HeapWord*)obj;
  1858   return addr != NULL &&
  1859          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1862 class G1CMKeepAliveClosure: public OopClosure {
  1863   G1CollectedHeap* _g1;
  1864   ConcurrentMark*  _cm;
  1865   CMBitMap*        _bitMap;
  1866  public:
  1867   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1868                        CMBitMap* bitMap) :
  1869     _g1(g1), _cm(cm),
  1870     _bitMap(bitMap) {}
  1872   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1873   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1875   template <class T> void do_oop_work(T* p) {
  1876     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1877     HeapWord* addr = (HeapWord*)thisOop;
  1878     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1879       _bitMap->mark(addr);
  1880       _cm->mark_stack_push(thisOop);
  1883 };
  1885 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1886   CMMarkStack*                  _markStack;
  1887   CMBitMap*                     _bitMap;
  1888   G1CMKeepAliveClosure*         _oopClosure;
  1889  public:
  1890   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1891                                G1CMKeepAliveClosure* oopClosure) :
  1892     _bitMap(bitMap),
  1893     _markStack(markStack),
  1894     _oopClosure(oopClosure)
  1895   {}
  1897   void do_void() {
  1898     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1900 };
  1902 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1903   ResourceMark rm;
  1904   HandleMark   hm;
  1905   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1906   ReferenceProcessor* rp = g1h->ref_processor();
  1908   // See the comment in G1CollectedHeap::ref_processing_init()
  1909   // about how reference processing currently works in G1.
  1911   // Process weak references.
  1912   rp->setup_policy(clear_all_soft_refs);
  1913   assert(_markStack.isEmpty(), "mark stack should be empty");
  1915   G1CMIsAliveClosure   g1_is_alive(g1h);
  1916   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
  1917   G1CMDrainMarkingStackClosure
  1918     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
  1920   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1921   rp->process_discovered_references(&g1_is_alive,
  1922                                     &g1_keep_alive,
  1923                                     &g1_drain_mark_stack,
  1924                                     NULL);
  1925   assert(_markStack.overflow() || _markStack.isEmpty(),
  1926          "mark stack should be empty (unless it overflowed)");
  1927   if (_markStack.overflow()) {
  1928     set_has_overflown();
  1931   rp->enqueue_discovered_references();
  1932   rp->verify_no_references_recorded();
  1933   assert(!rp->discovery_enabled(), "should have been disabled");
  1935   // Now clean up stale oops in SymbolTable and StringTable
  1936   SymbolTable::unlink(&g1_is_alive);
  1937   StringTable::unlink(&g1_is_alive);
  1940 void ConcurrentMark::swapMarkBitMaps() {
  1941   CMBitMapRO* temp = _prevMarkBitMap;
  1942   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1943   _nextMarkBitMap  = (CMBitMap*)  temp;
  1946 class CMRemarkTask: public AbstractGangTask {
  1947 private:
  1948   ConcurrentMark *_cm;
  1950 public:
  1951   void work(int worker_i) {
  1952     // Since all available tasks are actually started, we should
  1953     // only proceed if we're supposed to be actived.
  1954     if ((size_t)worker_i < _cm->active_tasks()) {
  1955       CMTask* task = _cm->task(worker_i);
  1956       task->record_start_time();
  1957       do {
  1958         task->do_marking_step(1000000000.0 /* something very large */);
  1959       } while (task->has_aborted() && !_cm->has_overflown());
  1960       // If we overflow, then we do not want to restart. We instead
  1961       // want to abort remark and do concurrent marking again.
  1962       task->record_end_time();
  1966   CMRemarkTask(ConcurrentMark* cm) :
  1967     AbstractGangTask("Par Remark"), _cm(cm) { }
  1968 };
  1970 void ConcurrentMark::checkpointRootsFinalWork() {
  1971   ResourceMark rm;
  1972   HandleMark   hm;
  1973   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1975   g1h->ensure_parsability(false);
  1977   if (G1CollectedHeap::use_parallel_gc_threads()) {
  1978     G1CollectedHeap::StrongRootsScope srs(g1h);
  1979     // this is remark, so we'll use up all available threads
  1980     int active_workers = ParallelGCThreads;
  1981     set_phase(active_workers, false);
  1983     CMRemarkTask remarkTask(this);
  1984     // We will start all available threads, even if we decide that the
  1985     // active_workers will be fewer. The extra ones will just bail out
  1986     // immediately.
  1987     int n_workers = g1h->workers()->total_workers();
  1988     g1h->set_par_threads(n_workers);
  1989     g1h->workers()->run_task(&remarkTask);
  1990     g1h->set_par_threads(0);
  1991   } else {
  1992     G1CollectedHeap::StrongRootsScope srs(g1h);
  1993     // this is remark, so we'll use up all available threads
  1994     int active_workers = 1;
  1995     set_phase(active_workers, false);
  1997     CMRemarkTask remarkTask(this);
  1998     // We will start all available threads, even if we decide that the
  1999     // active_workers will be fewer. The extra ones will just bail out
  2000     // immediately.
  2001     remarkTask.work(0);
  2003   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2004   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2006   print_stats();
  2008   if (!restart_for_overflow())
  2009     set_non_marking_state();
  2011 #if VERIFY_OBJS_PROCESSED
  2012   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  2013     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  2014                            _scan_obj_cl.objs_processed,
  2015                            ThreadLocalObjQueue::objs_enqueued);
  2016     guarantee(_scan_obj_cl.objs_processed ==
  2017               ThreadLocalObjQueue::objs_enqueued,
  2018               "Different number of objs processed and enqueued.");
  2020 #endif
  2023 #ifndef PRODUCT
  2025 class PrintReachableOopClosure: public OopClosure {
  2026 private:
  2027   G1CollectedHeap* _g1h;
  2028   CMBitMapRO*      _bitmap;
  2029   outputStream*    _out;
  2030   bool             _use_prev_marking;
  2031   bool             _all;
  2033 public:
  2034   PrintReachableOopClosure(CMBitMapRO*   bitmap,
  2035                            outputStream* out,
  2036                            bool          use_prev_marking,
  2037                            bool          all) :
  2038     _g1h(G1CollectedHeap::heap()),
  2039     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2041   void do_oop(narrowOop* p) { do_oop_work(p); }
  2042   void do_oop(      oop* p) { do_oop_work(p); }
  2044   template <class T> void do_oop_work(T* p) {
  2045     oop         obj = oopDesc::load_decode_heap_oop(p);
  2046     const char* str = NULL;
  2047     const char* str2 = "";
  2049     if (obj == NULL) {
  2050       str = "";
  2051     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2052       str = " O";
  2053     } else {
  2054       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2055       guarantee(hr != NULL, "invariant");
  2056       bool over_tams = false;
  2057       if (_use_prev_marking) {
  2058         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2059       } else {
  2060         over_tams = hr->obj_allocated_since_next_marking(obj);
  2062       bool marked = _bitmap->isMarked((HeapWord*) obj);
  2064       if (over_tams) {
  2065         str = " >";
  2066         if (marked) {
  2067           str2 = " AND MARKED";
  2069       } else if (marked) {
  2070         str = " M";
  2071       } else {
  2072         str = " NOT";
  2076     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2077                    p, (void*) obj, str, str2);
  2079 };
  2081 class PrintReachableObjectClosure : public ObjectClosure {
  2082 private:
  2083   CMBitMapRO*   _bitmap;
  2084   outputStream* _out;
  2085   bool          _use_prev_marking;
  2086   bool          _all;
  2087   HeapRegion*   _hr;
  2089 public:
  2090   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
  2091                               outputStream* out,
  2092                               bool          use_prev_marking,
  2093                               bool          all,
  2094                               HeapRegion*   hr) :
  2095     _bitmap(bitmap), _out(out),
  2096     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
  2098   void do_object(oop o) {
  2099     bool over_tams;
  2100     if (_use_prev_marking) {
  2101       over_tams = _hr->obj_allocated_since_prev_marking(o);
  2102     } else {
  2103       over_tams = _hr->obj_allocated_since_next_marking(o);
  2105     bool marked = _bitmap->isMarked((HeapWord*) o);
  2106     bool print_it = _all || over_tams || marked;
  2108     if (print_it) {
  2109       _out->print_cr(" "PTR_FORMAT"%s",
  2110                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2111       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
  2112       o->oop_iterate(&oopCl);
  2115 };
  2117 class PrintReachableRegionClosure : public HeapRegionClosure {
  2118 private:
  2119   CMBitMapRO*   _bitmap;
  2120   outputStream* _out;
  2121   bool          _use_prev_marking;
  2122   bool          _all;
  2124 public:
  2125   bool doHeapRegion(HeapRegion* hr) {
  2126     HeapWord* b = hr->bottom();
  2127     HeapWord* e = hr->end();
  2128     HeapWord* t = hr->top();
  2129     HeapWord* p = NULL;
  2130     if (_use_prev_marking) {
  2131       p = hr->prev_top_at_mark_start();
  2132     } else {
  2133       p = hr->next_top_at_mark_start();
  2135     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2136                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2137     _out->cr();
  2139     HeapWord* from = b;
  2140     HeapWord* to   = t;
  2142     if (to > from) {
  2143       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2144       _out->cr();
  2145       PrintReachableObjectClosure ocl(_bitmap, _out,
  2146                                       _use_prev_marking, _all, hr);
  2147       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2148       _out->cr();
  2151     return false;
  2154   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
  2155                               outputStream* out,
  2156                               bool          use_prev_marking,
  2157                               bool          all) :
  2158     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2159 };
  2161 void ConcurrentMark::print_reachable(const char* str,
  2162                                      bool use_prev_marking,
  2163                                      bool all) {
  2164   gclog_or_tty->cr();
  2165   gclog_or_tty->print_cr("== Doing heap dump... ");
  2167   if (G1PrintReachableBaseFile == NULL) {
  2168     gclog_or_tty->print_cr("  #### error: no base file defined");
  2169     return;
  2172   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2173       (JVM_MAXPATHLEN - 1)) {
  2174     gclog_or_tty->print_cr("  #### error: file name too long");
  2175     return;
  2178   char file_name[JVM_MAXPATHLEN];
  2179   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2180   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2182   fileStream fout(file_name);
  2183   if (!fout.is_open()) {
  2184     gclog_or_tty->print_cr("  #### error: could not open file");
  2185     return;
  2188   outputStream* out = &fout;
  2190   CMBitMapRO* bitmap = NULL;
  2191   if (use_prev_marking) {
  2192     bitmap = _prevMarkBitMap;
  2193   } else {
  2194     bitmap = _nextMarkBitMap;
  2197   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2198   out->cr();
  2200   out->print_cr("--- ITERATING OVER REGIONS");
  2201   out->cr();
  2202   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
  2203   _g1h->heap_region_iterate(&rcl);
  2204   out->cr();
  2206   gclog_or_tty->print_cr("  done");
  2207   gclog_or_tty->flush();
  2210 #endif // PRODUCT
  2212 // This note is for drainAllSATBBuffers and the code in between.
  2213 // In the future we could reuse a task to do this work during an
  2214 // evacuation pause (since now tasks are not active and can be claimed
  2215 // during an evacuation pause). This was a late change to the code and
  2216 // is currently not being taken advantage of.
  2218 class CMGlobalObjectClosure : public ObjectClosure {
  2219 private:
  2220   ConcurrentMark* _cm;
  2222 public:
  2223   void do_object(oop obj) {
  2224     _cm->deal_with_reference(obj);
  2227   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2228 };
  2230 void ConcurrentMark::deal_with_reference(oop obj) {
  2231   if (verbose_high())
  2232     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2233                            (void*) obj);
  2236   HeapWord* objAddr = (HeapWord*) obj;
  2237   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2238   if (_g1h->is_in_g1_reserved(objAddr)) {
  2239     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2240     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2241     if (_g1h->is_obj_ill(obj, hr)) {
  2242       if (verbose_high())
  2243         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2244                                "marked", (void*) obj);
  2246       // we need to mark it first
  2247       if (_nextMarkBitMap->parMark(objAddr)) {
  2248         // No OrderAccess:store_load() is needed. It is implicit in the
  2249         // CAS done in parMark(objAddr) above
  2250         HeapWord* finger = _finger;
  2251         if (objAddr < finger) {
  2252           if (verbose_high())
  2253             gclog_or_tty->print_cr("[global] below the global finger "
  2254                                    "("PTR_FORMAT"), pushing it", finger);
  2255           if (!mark_stack_push(obj)) {
  2256             if (verbose_low())
  2257               gclog_or_tty->print_cr("[global] global stack overflow during "
  2258                                      "deal_with_reference");
  2266 void ConcurrentMark::drainAllSATBBuffers() {
  2267   CMGlobalObjectClosure oc(this);
  2268   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2269   satb_mq_set.set_closure(&oc);
  2271   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2272     if (verbose_medium())
  2273       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2276   // no need to check whether we should do this, as this is only
  2277   // called during an evacuation pause
  2278   satb_mq_set.iterate_closure_all_threads();
  2280   satb_mq_set.set_closure(NULL);
  2281   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2284 void ConcurrentMark::markPrev(oop p) {
  2285   // Note we are overriding the read-only view of the prev map here, via
  2286   // the cast.
  2287   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2290 void ConcurrentMark::clear(oop p) {
  2291   assert(p != NULL && p->is_oop(), "expected an oop");
  2292   HeapWord* addr = (HeapWord*)p;
  2293   assert(addr >= _nextMarkBitMap->startWord() ||
  2294          addr < _nextMarkBitMap->endWord(), "in a region");
  2296   _nextMarkBitMap->clear(addr);
  2299 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2300   // Note we are overriding the read-only view of the prev map here, via
  2301   // the cast.
  2302   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2303   _nextMarkBitMap->clearRange(mr);
  2306 HeapRegion*
  2307 ConcurrentMark::claim_region(int task_num) {
  2308   // "checkpoint" the finger
  2309   HeapWord* finger = _finger;
  2311   // _heap_end will not change underneath our feet; it only changes at
  2312   // yield points.
  2313   while (finger < _heap_end) {
  2314     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2316     // is the gap between reading the finger and doing the CAS too long?
  2318     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2319     HeapWord*   bottom        = curr_region->bottom();
  2320     HeapWord*   end           = curr_region->end();
  2321     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2323     if (verbose_low())
  2324       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2325                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2326                              "limit = "PTR_FORMAT,
  2327                              task_num, curr_region, bottom, end, limit);
  2329     HeapWord* res =
  2330       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2331     if (res == finger) {
  2332       // we succeeded
  2334       // notice that _finger == end cannot be guaranteed here since,
  2335       // someone else might have moved the finger even further
  2336       assert(_finger >= end, "the finger should have moved forward");
  2338       if (verbose_low())
  2339         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2340                                PTR_FORMAT, task_num, curr_region);
  2342       if (limit > bottom) {
  2343         if (verbose_low())
  2344           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2345                                  "returning it ", task_num, curr_region);
  2346         return curr_region;
  2347       } else {
  2348         assert(limit == bottom,
  2349                "the region limit should be at bottom");
  2350         if (verbose_low())
  2351           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2352                                  "returning NULL", task_num, curr_region);
  2353         // we return NULL and the caller should try calling
  2354         // claim_region() again.
  2355         return NULL;
  2357     } else {
  2358       assert(_finger > finger, "the finger should have moved forward");
  2359       if (verbose_low())
  2360         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2361                                "global finger = "PTR_FORMAT", "
  2362                                "our finger = "PTR_FORMAT,
  2363                                task_num, _finger, finger);
  2365       // read it again
  2366       finger = _finger;
  2370   return NULL;
  2373 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
  2374   bool result = false;
  2375   for (int i = 0; i < (int)_max_task_num; ++i) {
  2376     CMTask* the_task = _tasks[i];
  2377     MemRegion mr = the_task->aborted_region();
  2378     if (mr.start() != NULL) {
  2379       assert(mr.end() != NULL, "invariant");
  2380       assert(mr.word_size() > 0, "invariant");
  2381       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
  2382       assert(hr != NULL, "invariant");
  2383       if (hr->in_collection_set()) {
  2384         // The region points into the collection set
  2385         the_task->set_aborted_region(MemRegion());
  2386         result = true;
  2390   return result;
  2393 bool ConcurrentMark::has_aborted_regions() {
  2394   for (int i = 0; i < (int)_max_task_num; ++i) {
  2395     CMTask* the_task = _tasks[i];
  2396     MemRegion mr = the_task->aborted_region();
  2397     if (mr.start() != NULL) {
  2398       assert(mr.end() != NULL, "invariant");
  2399       assert(mr.word_size() > 0, "invariant");
  2400       return true;
  2403   return false;
  2406 void ConcurrentMark::oops_do(OopClosure* cl) {
  2407   if (_markStack.size() > 0 && verbose_low())
  2408     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2409                            "size = %d", _markStack.size());
  2410   // we first iterate over the contents of the mark stack...
  2411   _markStack.oops_do(cl);
  2413   for (int i = 0; i < (int)_max_task_num; ++i) {
  2414     OopTaskQueue* queue = _task_queues->queue((int)i);
  2416     if (queue->size() > 0 && verbose_low())
  2417       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2418                              "size = %d", i, queue->size());
  2420     // ...then over the contents of the all the task queues.
  2421     queue->oops_do(cl);
  2424   // Invalidate any entries, that are in the region stack, that
  2425   // point into the collection set
  2426   if (_regionStack.invalidate_entries_into_cset()) {
  2427     // otherwise, any gray objects copied during the evacuation pause
  2428     // might not be visited.
  2429     assert(_should_gray_objects, "invariant");
  2432   // Invalidate any aborted regions, recorded in the individual CM
  2433   // tasks, that point into the collection set.
  2434   if (invalidate_aborted_regions_in_cset()) {
  2435     // otherwise, any gray objects copied during the evacuation pause
  2436     // might not be visited.
  2437     assert(_should_gray_objects, "invariant");
  2442 void ConcurrentMark::clear_marking_state() {
  2443   _markStack.setEmpty();
  2444   _markStack.clear_overflow();
  2445   _regionStack.setEmpty();
  2446   _regionStack.clear_overflow();
  2447   clear_has_overflown();
  2448   _finger = _heap_start;
  2450   for (int i = 0; i < (int)_max_task_num; ++i) {
  2451     OopTaskQueue* queue = _task_queues->queue(i);
  2452     queue->set_empty();
  2453     // Clear any partial regions from the CMTasks
  2454     _tasks[i]->clear_aborted_region();
  2458 void ConcurrentMark::print_stats() {
  2459   if (verbose_stats()) {
  2460     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2461     for (size_t i = 0; i < _active_tasks; ++i) {
  2462       _tasks[i]->print_stats();
  2463       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2468 class CSMarkOopClosure: public OopClosure {
  2469   friend class CSMarkBitMapClosure;
  2471   G1CollectedHeap* _g1h;
  2472   CMBitMap*        _bm;
  2473   ConcurrentMark*  _cm;
  2474   oop*             _ms;
  2475   jint*            _array_ind_stack;
  2476   int              _ms_size;
  2477   int              _ms_ind;
  2478   int              _array_increment;
  2480   bool push(oop obj, int arr_ind = 0) {
  2481     if (_ms_ind == _ms_size) {
  2482       gclog_or_tty->print_cr("Mark stack is full.");
  2483       return false;
  2485     _ms[_ms_ind] = obj;
  2486     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2487     _ms_ind++;
  2488     return true;
  2491   oop pop() {
  2492     if (_ms_ind == 0) return NULL;
  2493     else {
  2494       _ms_ind--;
  2495       return _ms[_ms_ind];
  2499   template <class T> bool drain() {
  2500     while (_ms_ind > 0) {
  2501       oop obj = pop();
  2502       assert(obj != NULL, "Since index was non-zero.");
  2503       if (obj->is_objArray()) {
  2504         jint arr_ind = _array_ind_stack[_ms_ind];
  2505         objArrayOop aobj = objArrayOop(obj);
  2506         jint len = aobj->length();
  2507         jint next_arr_ind = arr_ind + _array_increment;
  2508         if (next_arr_ind < len) {
  2509           push(obj, next_arr_ind);
  2511         // Now process this portion of this one.
  2512         int lim = MIN2(next_arr_ind, len);
  2513         for (int j = arr_ind; j < lim; j++) {
  2514           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2517       } else {
  2518         obj->oop_iterate(this);
  2520       if (abort()) return false;
  2522     return true;
  2525 public:
  2526   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2527     _g1h(G1CollectedHeap::heap()),
  2528     _cm(cm),
  2529     _bm(cm->nextMarkBitMap()),
  2530     _ms_size(ms_size), _ms_ind(0),
  2531     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2532     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2533     _array_increment(MAX2(ms_size/8, 16))
  2534   {}
  2536   ~CSMarkOopClosure() {
  2537     FREE_C_HEAP_ARRAY(oop, _ms);
  2538     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2541   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2542   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2544   template <class T> void do_oop_work(T* p) {
  2545     T heap_oop = oopDesc::load_heap_oop(p);
  2546     if (oopDesc::is_null(heap_oop)) return;
  2547     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2548     if (obj->is_forwarded()) {
  2549       // If the object has already been forwarded, we have to make sure
  2550       // that it's marked.  So follow the forwarding pointer.  Note that
  2551       // this does the right thing for self-forwarding pointers in the
  2552       // evacuation failure case.
  2553       obj = obj->forwardee();
  2555     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2556     if (hr != NULL) {
  2557       if (hr->in_collection_set()) {
  2558         if (_g1h->is_obj_ill(obj)) {
  2559           _bm->mark((HeapWord*)obj);
  2560           if (!push(obj)) {
  2561             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2562             set_abort();
  2565       } else {
  2566         // Outside the collection set; we need to gray it
  2567         _cm->deal_with_reference(obj);
  2571 };
  2573 class CSMarkBitMapClosure: public BitMapClosure {
  2574   G1CollectedHeap* _g1h;
  2575   CMBitMap*        _bitMap;
  2576   ConcurrentMark*  _cm;
  2577   CSMarkOopClosure _oop_cl;
  2578 public:
  2579   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2580     _g1h(G1CollectedHeap::heap()),
  2581     _bitMap(cm->nextMarkBitMap()),
  2582     _oop_cl(cm, ms_size)
  2583   {}
  2585   ~CSMarkBitMapClosure() {}
  2587   bool do_bit(size_t offset) {
  2588     // convert offset into a HeapWord*
  2589     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2590     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2591            "address out of range");
  2592     assert(_bitMap->isMarked(addr), "tautology");
  2593     oop obj = oop(addr);
  2594     if (!obj->is_forwarded()) {
  2595       if (!_oop_cl.push(obj)) return false;
  2596       if (UseCompressedOops) {
  2597         if (!_oop_cl.drain<narrowOop>()) return false;
  2598       } else {
  2599         if (!_oop_cl.drain<oop>()) return false;
  2602     // Otherwise...
  2603     return true;
  2605 };
  2608 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2609   CMBitMap* _bm;
  2610   CSMarkBitMapClosure _bit_cl;
  2611   enum SomePrivateConstants {
  2612     MSSize = 1000
  2613   };
  2614   bool _completed;
  2615 public:
  2616   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2617     _bm(cm->nextMarkBitMap()),
  2618     _bit_cl(cm, MSSize),
  2619     _completed(true)
  2620   {}
  2622   ~CompleteMarkingInCSHRClosure() {}
  2624   bool doHeapRegion(HeapRegion* r) {
  2625     if (!r->evacuation_failed()) {
  2626       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2627       if (!mr.is_empty()) {
  2628         if (!_bm->iterate(&_bit_cl, mr)) {
  2629           _completed = false;
  2630           return true;
  2634     return false;
  2637   bool completed() { return _completed; }
  2638 };
  2640 class ClearMarksInHRClosure: public HeapRegionClosure {
  2641   CMBitMap* _bm;
  2642 public:
  2643   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2645   bool doHeapRegion(HeapRegion* r) {
  2646     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2647       MemRegion usedMR = r->used_region();
  2648       _bm->clearRange(r->used_region());
  2650     return false;
  2652 };
  2654 void ConcurrentMark::complete_marking_in_collection_set() {
  2655   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2657   if (!g1h->mark_in_progress()) {
  2658     g1h->g1_policy()->record_mark_closure_time(0.0);
  2659     return;
  2662   int i = 1;
  2663   double start = os::elapsedTime();
  2664   while (true) {
  2665     i++;
  2666     CompleteMarkingInCSHRClosure cmplt(this);
  2667     g1h->collection_set_iterate(&cmplt);
  2668     if (cmplt.completed()) break;
  2670   double end_time = os::elapsedTime();
  2671   double elapsed_time_ms = (end_time - start) * 1000.0;
  2672   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2674   ClearMarksInHRClosure clr(nextMarkBitMap());
  2675   g1h->collection_set_iterate(&clr);
  2678 // The next two methods deal with the following optimisation. Some
  2679 // objects are gray by being marked and located above the finger. If
  2680 // they are copied, during an evacuation pause, below the finger then
  2681 // the need to be pushed on the stack. The observation is that, if
  2682 // there are no regions in the collection set located above the
  2683 // finger, then the above cannot happen, hence we do not need to
  2684 // explicitly gray any objects when copying them to below the
  2685 // finger. The global stack will be scanned to ensure that, if it
  2686 // points to objects being copied, it will update their
  2687 // location. There is a tricky situation with the gray objects in
  2688 // region stack that are being coped, however. See the comment in
  2689 // newCSet().
  2691 void ConcurrentMark::newCSet() {
  2692   if (!concurrent_marking_in_progress())
  2693     // nothing to do if marking is not in progress
  2694     return;
  2696   // find what the lowest finger is among the global and local fingers
  2697   _min_finger = _finger;
  2698   for (int i = 0; i < (int)_max_task_num; ++i) {
  2699     CMTask* task = _tasks[i];
  2700     HeapWord* task_finger = task->finger();
  2701     if (task_finger != NULL && task_finger < _min_finger)
  2702       _min_finger = task_finger;
  2705   _should_gray_objects = false;
  2707   // This fixes a very subtle and fustrating bug. It might be the case
  2708   // that, during en evacuation pause, heap regions that contain
  2709   // objects that are gray (by being in regions contained in the
  2710   // region stack) are included in the collection set. Since such gray
  2711   // objects will be moved, and because it's not easy to redirect
  2712   // region stack entries to point to a new location (because objects
  2713   // in one region might be scattered to multiple regions after they
  2714   // are copied), one option is to ensure that all marked objects
  2715   // copied during a pause are pushed on the stack. Notice, however,
  2716   // that this problem can only happen when the region stack is not
  2717   // empty during an evacuation pause. So, we make the fix a bit less
  2718   // conservative and ensure that regions are pushed on the stack,
  2719   // irrespective whether all collection set regions are below the
  2720   // finger, if the region stack is not empty. This is expected to be
  2721   // a rare case, so I don't think it's necessary to be smarted about it.
  2722   if (!region_stack_empty() || has_aborted_regions())
  2723     _should_gray_objects = true;
  2726 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2727   if (!concurrent_marking_in_progress())
  2728     return;
  2730   HeapWord* region_end = hr->end();
  2731   if (region_end > _min_finger)
  2732     _should_gray_objects = true;
  2735 // abandon current marking iteration due to a Full GC
  2736 void ConcurrentMark::abort() {
  2737   // Clear all marks to force marking thread to do nothing
  2738   _nextMarkBitMap->clearAll();
  2739   // Empty mark stack
  2740   clear_marking_state();
  2741   for (int i = 0; i < (int)_max_task_num; ++i) {
  2742     _tasks[i]->clear_region_fields();
  2744   _has_aborted = true;
  2746   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2747   satb_mq_set.abandon_partial_marking();
  2748   // This can be called either during or outside marking, we'll read
  2749   // the expected_active value from the SATB queue set.
  2750   satb_mq_set.set_active_all_threads(
  2751                                  false, /* new active value */
  2752                                  satb_mq_set.is_active() /* expected_active */);
  2755 static void print_ms_time_info(const char* prefix, const char* name,
  2756                                NumberSeq& ns) {
  2757   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2758                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2759   if (ns.num() > 0) {
  2760     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2761                            prefix, ns.sd(), ns.maximum());
  2765 void ConcurrentMark::print_summary_info() {
  2766   gclog_or_tty->print_cr(" Concurrent marking:");
  2767   print_ms_time_info("  ", "init marks", _init_times);
  2768   print_ms_time_info("  ", "remarks", _remark_times);
  2770     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2771     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2774   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2775   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2776                          _total_counting_time,
  2777                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2778                           (double)_cleanup_times.num()
  2779                          : 0.0));
  2780   if (G1ScrubRemSets) {
  2781     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2782                            _total_rs_scrub_time,
  2783                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2784                             (double)_cleanup_times.num()
  2785                            : 0.0));
  2787   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2788                          (_init_times.sum() + _remark_times.sum() +
  2789                           _cleanup_times.sum())/1000.0);
  2790   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2791                 "(%8.2f s marking, %8.2f s counting).",
  2792                 cmThread()->vtime_accum(),
  2793                 cmThread()->vtime_mark_accum(),
  2794                 cmThread()->vtime_count_accum());
  2797 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2798   _parallel_workers->print_worker_threads_on(st);
  2801 // Closures
  2802 // XXX: there seems to be a lot of code  duplication here;
  2803 // should refactor and consolidate the shared code.
  2805 // This closure is used to mark refs into the CMS generation in
  2806 // the CMS bit map. Called at the first checkpoint.
  2808 // We take a break if someone is trying to stop the world.
  2809 bool ConcurrentMark::do_yield_check(int worker_i) {
  2810   if (should_yield()) {
  2811     if (worker_i == 0)
  2812       _g1h->g1_policy()->record_concurrent_pause();
  2813     cmThread()->yield();
  2814     if (worker_i == 0)
  2815       _g1h->g1_policy()->record_concurrent_pause_end();
  2816     return true;
  2817   } else {
  2818     return false;
  2822 bool ConcurrentMark::should_yield() {
  2823   return cmThread()->should_yield();
  2826 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2827   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2828   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2831 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2832                                                  void* last) {
  2833   return
  2834     containing_card_is_marked(start) &&
  2835     containing_card_is_marked(last);
  2838 #ifndef PRODUCT
  2839 // for debugging purposes
  2840 void ConcurrentMark::print_finger() {
  2841   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2842                          _heap_start, _heap_end, _finger);
  2843   for (int i = 0; i < (int) _max_task_num; ++i) {
  2844     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2846   gclog_or_tty->print_cr("");
  2848 #endif
  2850 // Closure for iteration over bitmaps
  2851 class CMBitMapClosure : public BitMapClosure {
  2852 private:
  2853   // the bitmap that is being iterated over
  2854   CMBitMap*                   _nextMarkBitMap;
  2855   ConcurrentMark*             _cm;
  2856   CMTask*                     _task;
  2857   // true if we're scanning a heap region claimed by the task (so that
  2858   // we move the finger along), false if we're not, i.e. currently when
  2859   // scanning a heap region popped from the region stack (so that we
  2860   // do not move the task finger along; it'd be a mistake if we did so).
  2861   bool                        _scanning_heap_region;
  2863 public:
  2864   CMBitMapClosure(CMTask *task,
  2865                   ConcurrentMark* cm,
  2866                   CMBitMap* nextMarkBitMap)
  2867     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2869   void set_scanning_heap_region(bool scanning_heap_region) {
  2870     _scanning_heap_region = scanning_heap_region;
  2873   bool do_bit(size_t offset) {
  2874     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2875     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2876     assert( addr < _cm->finger(), "invariant");
  2878     if (_scanning_heap_region) {
  2879       statsOnly( _task->increase_objs_found_on_bitmap() );
  2880       assert(addr >= _task->finger(), "invariant");
  2881       // We move that task's local finger along.
  2882       _task->move_finger_to(addr);
  2883     } else {
  2884       // We move the task's region finger along.
  2885       _task->move_region_finger_to(addr);
  2888     _task->scan_object(oop(addr));
  2889     // we only partially drain the local queue and global stack
  2890     _task->drain_local_queue(true);
  2891     _task->drain_global_stack(true);
  2893     // if the has_aborted flag has been raised, we need to bail out of
  2894     // the iteration
  2895     return !_task->has_aborted();
  2897 };
  2899 // Closure for iterating over objects, currently only used for
  2900 // processing SATB buffers.
  2901 class CMObjectClosure : public ObjectClosure {
  2902 private:
  2903   CMTask* _task;
  2905 public:
  2906   void do_object(oop obj) {
  2907     _task->deal_with_reference(obj);
  2910   CMObjectClosure(CMTask* task) : _task(task) { }
  2911 };
  2913 // Closure for iterating over object fields
  2914 class CMOopClosure : public OopClosure {
  2915 private:
  2916   G1CollectedHeap*   _g1h;
  2917   ConcurrentMark*    _cm;
  2918   CMTask*            _task;
  2920 public:
  2921   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2922   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2924   template <class T> void do_oop_work(T* p) {
  2925     assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2926     assert(!_g1h->is_on_free_list(
  2927                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
  2929     oop obj = oopDesc::load_decode_heap_oop(p);
  2930     if (_cm->verbose_high())
  2931       gclog_or_tty->print_cr("[%d] we're looking at location "
  2932                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2933                              _task->task_id(), p, (void*) obj);
  2934     _task->deal_with_reference(obj);
  2937   CMOopClosure(G1CollectedHeap* g1h,
  2938                ConcurrentMark* cm,
  2939                CMTask* task)
  2940     : _g1h(g1h), _cm(cm), _task(task)
  2942     _ref_processor = g1h->ref_processor();
  2943     assert(_ref_processor != NULL, "should not be NULL");
  2945 };
  2947 void CMTask::setup_for_region(HeapRegion* hr) {
  2948   // Separated the asserts so that we know which one fires.
  2949   assert(hr != NULL,
  2950         "claim_region() should have filtered out continues humongous regions");
  2951   assert(!hr->continuesHumongous(),
  2952         "claim_region() should have filtered out continues humongous regions");
  2954   if (_cm->verbose_low())
  2955     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2956                            _task_id, hr);
  2958   _curr_region  = hr;
  2959   _finger       = hr->bottom();
  2960   update_region_limit();
  2963 void CMTask::update_region_limit() {
  2964   HeapRegion* hr            = _curr_region;
  2965   HeapWord* bottom          = hr->bottom();
  2966   HeapWord* limit           = hr->next_top_at_mark_start();
  2968   if (limit == bottom) {
  2969     if (_cm->verbose_low())
  2970       gclog_or_tty->print_cr("[%d] found an empty region "
  2971                              "["PTR_FORMAT", "PTR_FORMAT")",
  2972                              _task_id, bottom, limit);
  2973     // The region was collected underneath our feet.
  2974     // We set the finger to bottom to ensure that the bitmap
  2975     // iteration that will follow this will not do anything.
  2976     // (this is not a condition that holds when we set the region up,
  2977     // as the region is not supposed to be empty in the first place)
  2978     _finger = bottom;
  2979   } else if (limit >= _region_limit) {
  2980     assert(limit >= _finger, "peace of mind");
  2981   } else {
  2982     assert(limit < _region_limit, "only way to get here");
  2983     // This can happen under some pretty unusual circumstances.  An
  2984     // evacuation pause empties the region underneath our feet (NTAMS
  2985     // at bottom). We then do some allocation in the region (NTAMS
  2986     // stays at bottom), followed by the region being used as a GC
  2987     // alloc region (NTAMS will move to top() and the objects
  2988     // originally below it will be grayed). All objects now marked in
  2989     // the region are explicitly grayed, if below the global finger,
  2990     // and we do not need in fact to scan anything else. So, we simply
  2991     // set _finger to be limit to ensure that the bitmap iteration
  2992     // doesn't do anything.
  2993     _finger = limit;
  2996   _region_limit = limit;
  2999 void CMTask::giveup_current_region() {
  3000   assert(_curr_region != NULL, "invariant");
  3001   if (_cm->verbose_low())
  3002     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  3003                            _task_id, _curr_region);
  3004   clear_region_fields();
  3007 void CMTask::clear_region_fields() {
  3008   // Values for these three fields that indicate that we're not
  3009   // holding on to a region.
  3010   _curr_region   = NULL;
  3011   _finger        = NULL;
  3012   _region_limit  = NULL;
  3014   _region_finger = NULL;
  3017 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  3018   guarantee(nextMarkBitMap != NULL, "invariant");
  3020   if (_cm->verbose_low())
  3021     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  3023   _nextMarkBitMap                = nextMarkBitMap;
  3024   clear_region_fields();
  3025   assert(_aborted_region.is_empty(), "should have been cleared");
  3027   _calls                         = 0;
  3028   _elapsed_time_ms               = 0.0;
  3029   _termination_time_ms           = 0.0;
  3030   _termination_start_time_ms     = 0.0;
  3032 #if _MARKING_STATS_
  3033   _local_pushes                  = 0;
  3034   _local_pops                    = 0;
  3035   _local_max_size                = 0;
  3036   _objs_scanned                  = 0;
  3037   _global_pushes                 = 0;
  3038   _global_pops                   = 0;
  3039   _global_max_size               = 0;
  3040   _global_transfers_to           = 0;
  3041   _global_transfers_from         = 0;
  3042   _region_stack_pops             = 0;
  3043   _regions_claimed               = 0;
  3044   _objs_found_on_bitmap          = 0;
  3045   _satb_buffers_processed        = 0;
  3046   _steal_attempts                = 0;
  3047   _steals                        = 0;
  3048   _aborted                       = 0;
  3049   _aborted_overflow              = 0;
  3050   _aborted_cm_aborted            = 0;
  3051   _aborted_yield                 = 0;
  3052   _aborted_timed_out             = 0;
  3053   _aborted_satb                  = 0;
  3054   _aborted_termination           = 0;
  3055 #endif // _MARKING_STATS_
  3058 bool CMTask::should_exit_termination() {
  3059   regular_clock_call();
  3060   // This is called when we are in the termination protocol. We should
  3061   // quit if, for some reason, this task wants to abort or the global
  3062   // stack is not empty (this means that we can get work from it).
  3063   return !_cm->mark_stack_empty() || has_aborted();
  3066 // This determines whether the method below will check both the local
  3067 // and global fingers when determining whether to push on the stack a
  3068 // gray object (value 1) or whether it will only check the global one
  3069 // (value 0). The tradeoffs are that the former will be a bit more
  3070 // accurate and possibly push less on the stack, but it might also be
  3071 // a little bit slower.
  3073 #define _CHECK_BOTH_FINGERS_      1
  3075 void CMTask::deal_with_reference(oop obj) {
  3076   if (_cm->verbose_high())
  3077     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  3078                            _task_id, (void*) obj);
  3080   ++_refs_reached;
  3082   HeapWord* objAddr = (HeapWord*) obj;
  3083   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  3084   if (_g1h->is_in_g1_reserved(objAddr)) {
  3085     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  3086     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  3087     if (_g1h->is_obj_ill(obj, hr)) {
  3088       if (_cm->verbose_high())
  3089         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  3090                                _task_id, (void*) obj);
  3092       // we need to mark it first
  3093       if (_nextMarkBitMap->parMark(objAddr)) {
  3094         // No OrderAccess:store_load() is needed. It is implicit in the
  3095         // CAS done in parMark(objAddr) above
  3096         HeapWord* global_finger = _cm->finger();
  3098 #if _CHECK_BOTH_FINGERS_
  3099         // we will check both the local and global fingers
  3101         if (_finger != NULL && objAddr < _finger) {
  3102           if (_cm->verbose_high())
  3103             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3104                                    "pushing it", _task_id, _finger);
  3105           push(obj);
  3106         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3107           // do nothing
  3108         } else if (objAddr < global_finger) {
  3109           // Notice that the global finger might be moving forward
  3110           // concurrently. This is not a problem. In the worst case, we
  3111           // mark the object while it is above the global finger and, by
  3112           // the time we read the global finger, it has moved forward
  3113           // passed this object. In this case, the object will probably
  3114           // be visited when a task is scanning the region and will also
  3115           // be pushed on the stack. So, some duplicate work, but no
  3116           // correctness problems.
  3118           if (_cm->verbose_high())
  3119             gclog_or_tty->print_cr("[%d] below the global finger "
  3120                                    "("PTR_FORMAT"), pushing it",
  3121                                    _task_id, global_finger);
  3122           push(obj);
  3123         } else {
  3124           // do nothing
  3126 #else // _CHECK_BOTH_FINGERS_
  3127       // we will only check the global finger
  3129         if (objAddr < global_finger) {
  3130           // see long comment above
  3132           if (_cm->verbose_high())
  3133             gclog_or_tty->print_cr("[%d] below the global finger "
  3134                                    "("PTR_FORMAT"), pushing it",
  3135                                    _task_id, global_finger);
  3136           push(obj);
  3138 #endif // _CHECK_BOTH_FINGERS_
  3144 void CMTask::push(oop obj) {
  3145   HeapWord* objAddr = (HeapWord*) obj;
  3146   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3147   assert(!_g1h->is_on_free_list(
  3148               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
  3149   assert(!_g1h->is_obj_ill(obj), "invariant");
  3150   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3152   if (_cm->verbose_high())
  3153     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3155   if (!_task_queue->push(obj)) {
  3156     // The local task queue looks full. We need to push some entries
  3157     // to the global stack.
  3159     if (_cm->verbose_medium())
  3160       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3161                              "moving entries to the global stack",
  3162                              _task_id);
  3163     move_entries_to_global_stack();
  3165     // this should succeed since, even if we overflow the global
  3166     // stack, we should have definitely removed some entries from the
  3167     // local queue. So, there must be space on it.
  3168     bool success = _task_queue->push(obj);
  3169     assert(success, "invariant");
  3172   statsOnly( int tmp_size = _task_queue->size();
  3173              if (tmp_size > _local_max_size)
  3174                _local_max_size = tmp_size;
  3175              ++_local_pushes );
  3178 void CMTask::reached_limit() {
  3179   assert(_words_scanned >= _words_scanned_limit ||
  3180          _refs_reached >= _refs_reached_limit ,
  3181          "shouldn't have been called otherwise");
  3182   regular_clock_call();
  3185 void CMTask::regular_clock_call() {
  3186   if (has_aborted())
  3187     return;
  3189   // First, we need to recalculate the words scanned and refs reached
  3190   // limits for the next clock call.
  3191   recalculate_limits();
  3193   // During the regular clock call we do the following
  3195   // (1) If an overflow has been flagged, then we abort.
  3196   if (_cm->has_overflown()) {
  3197     set_has_aborted();
  3198     return;
  3201   // If we are not concurrent (i.e. we're doing remark) we don't need
  3202   // to check anything else. The other steps are only needed during
  3203   // the concurrent marking phase.
  3204   if (!concurrent())
  3205     return;
  3207   // (2) If marking has been aborted for Full GC, then we also abort.
  3208   if (_cm->has_aborted()) {
  3209     set_has_aborted();
  3210     statsOnly( ++_aborted_cm_aborted );
  3211     return;
  3214   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3216   // (3) If marking stats are enabled, then we update the step history.
  3217 #if _MARKING_STATS_
  3218   if (_words_scanned >= _words_scanned_limit)
  3219     ++_clock_due_to_scanning;
  3220   if (_refs_reached >= _refs_reached_limit)
  3221     ++_clock_due_to_marking;
  3223   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3224   _interval_start_time_ms = curr_time_ms;
  3225   _all_clock_intervals_ms.add(last_interval_ms);
  3227   if (_cm->verbose_medium()) {
  3228     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3229                            "scanned = %d%s, refs reached = %d%s",
  3230                            _task_id, last_interval_ms,
  3231                            _words_scanned,
  3232                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3233                            _refs_reached,
  3234                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3236 #endif // _MARKING_STATS_
  3238   // (4) We check whether we should yield. If we have to, then we abort.
  3239   if (_cm->should_yield()) {
  3240     // We should yield. To do this we abort the task. The caller is
  3241     // responsible for yielding.
  3242     set_has_aborted();
  3243     statsOnly( ++_aborted_yield );
  3244     return;
  3247   // (5) We check whether we've reached our time quota. If we have,
  3248   // then we abort.
  3249   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3250   if (elapsed_time_ms > _time_target_ms) {
  3251     set_has_aborted();
  3252     _has_aborted_timed_out = true;
  3253     statsOnly( ++_aborted_timed_out );
  3254     return;
  3257   // (6) Finally, we check whether there are enough completed STAB
  3258   // buffers available for processing. If there are, we abort.
  3259   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3260   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3261     if (_cm->verbose_low())
  3262       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3263                              _task_id);
  3264     // we do need to process SATB buffers, we'll abort and restart
  3265     // the marking task to do so
  3266     set_has_aborted();
  3267     statsOnly( ++_aborted_satb );
  3268     return;
  3272 void CMTask::recalculate_limits() {
  3273   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3274   _words_scanned_limit      = _real_words_scanned_limit;
  3276   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3277   _refs_reached_limit       = _real_refs_reached_limit;
  3280 void CMTask::decrease_limits() {
  3281   // This is called when we believe that we're going to do an infrequent
  3282   // operation which will increase the per byte scanned cost (i.e. move
  3283   // entries to/from the global stack). It basically tries to decrease the
  3284   // scanning limit so that the clock is called earlier.
  3286   if (_cm->verbose_medium())
  3287     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3289   _words_scanned_limit = _real_words_scanned_limit -
  3290     3 * words_scanned_period / 4;
  3291   _refs_reached_limit  = _real_refs_reached_limit -
  3292     3 * refs_reached_period / 4;
  3295 void CMTask::move_entries_to_global_stack() {
  3296   // local array where we'll store the entries that will be popped
  3297   // from the local queue
  3298   oop buffer[global_stack_transfer_size];
  3300   int n = 0;
  3301   oop obj;
  3302   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3303     buffer[n] = obj;
  3304     ++n;
  3307   if (n > 0) {
  3308     // we popped at least one entry from the local queue
  3310     statsOnly( ++_global_transfers_to; _local_pops += n );
  3312     if (!_cm->mark_stack_push(buffer, n)) {
  3313       if (_cm->verbose_low())
  3314         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3315       set_has_aborted();
  3316     } else {
  3317       // the transfer was successful
  3319       if (_cm->verbose_medium())
  3320         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3321                                _task_id, n);
  3322       statsOnly( int tmp_size = _cm->mark_stack_size();
  3323                  if (tmp_size > _global_max_size)
  3324                    _global_max_size = tmp_size;
  3325                  _global_pushes += n );
  3329   // this operation was quite expensive, so decrease the limits
  3330   decrease_limits();
  3333 void CMTask::get_entries_from_global_stack() {
  3334   // local array where we'll store the entries that will be popped
  3335   // from the global stack.
  3336   oop buffer[global_stack_transfer_size];
  3337   int n;
  3338   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3339   assert(n <= global_stack_transfer_size,
  3340          "we should not pop more than the given limit");
  3341   if (n > 0) {
  3342     // yes, we did actually pop at least one entry
  3344     statsOnly( ++_global_transfers_from; _global_pops += n );
  3345     if (_cm->verbose_medium())
  3346       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3347                              _task_id, n);
  3348     for (int i = 0; i < n; ++i) {
  3349       bool success = _task_queue->push(buffer[i]);
  3350       // We only call this when the local queue is empty or under a
  3351       // given target limit. So, we do not expect this push to fail.
  3352       assert(success, "invariant");
  3355     statsOnly( int tmp_size = _task_queue->size();
  3356                if (tmp_size > _local_max_size)
  3357                  _local_max_size = tmp_size;
  3358                _local_pushes += n );
  3361   // this operation was quite expensive, so decrease the limits
  3362   decrease_limits();
  3365 void CMTask::drain_local_queue(bool partially) {
  3366   if (has_aborted())
  3367     return;
  3369   // Decide what the target size is, depending whether we're going to
  3370   // drain it partially (so that other tasks can steal if they run out
  3371   // of things to do) or totally (at the very end).
  3372   size_t target_size;
  3373   if (partially)
  3374     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3375   else
  3376     target_size = 0;
  3378   if (_task_queue->size() > target_size) {
  3379     if (_cm->verbose_high())
  3380       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3381                              _task_id, target_size);
  3383     oop obj;
  3384     bool ret = _task_queue->pop_local(obj);
  3385     while (ret) {
  3386       statsOnly( ++_local_pops );
  3388       if (_cm->verbose_high())
  3389         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3390                                (void*) obj);
  3392       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3393       assert(!_g1h->is_on_free_list(
  3394                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
  3396       scan_object(obj);
  3398       if (_task_queue->size() <= target_size || has_aborted())
  3399         ret = false;
  3400       else
  3401         ret = _task_queue->pop_local(obj);
  3404     if (_cm->verbose_high())
  3405       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3406                              _task_id, _task_queue->size());
  3410 void CMTask::drain_global_stack(bool partially) {
  3411   if (has_aborted())
  3412     return;
  3414   // We have a policy to drain the local queue before we attempt to
  3415   // drain the global stack.
  3416   assert(partially || _task_queue->size() == 0, "invariant");
  3418   // Decide what the target size is, depending whether we're going to
  3419   // drain it partially (so that other tasks can steal if they run out
  3420   // of things to do) or totally (at the very end).  Notice that,
  3421   // because we move entries from the global stack in chunks or
  3422   // because another task might be doing the same, we might in fact
  3423   // drop below the target. But, this is not a problem.
  3424   size_t target_size;
  3425   if (partially)
  3426     target_size = _cm->partial_mark_stack_size_target();
  3427   else
  3428     target_size = 0;
  3430   if (_cm->mark_stack_size() > target_size) {
  3431     if (_cm->verbose_low())
  3432       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3433                              _task_id, target_size);
  3435     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3436       get_entries_from_global_stack();
  3437       drain_local_queue(partially);
  3440     if (_cm->verbose_low())
  3441       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3442                              _task_id, _cm->mark_stack_size());
  3446 // SATB Queue has several assumptions on whether to call the par or
  3447 // non-par versions of the methods. this is why some of the code is
  3448 // replicated. We should really get rid of the single-threaded version
  3449 // of the code to simplify things.
  3450 void CMTask::drain_satb_buffers() {
  3451   if (has_aborted())
  3452     return;
  3454   // We set this so that the regular clock knows that we're in the
  3455   // middle of draining buffers and doesn't set the abort flag when it
  3456   // notices that SATB buffers are available for draining. It'd be
  3457   // very counter productive if it did that. :-)
  3458   _draining_satb_buffers = true;
  3460   CMObjectClosure oc(this);
  3461   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3462   if (G1CollectedHeap::use_parallel_gc_threads())
  3463     satb_mq_set.set_par_closure(_task_id, &oc);
  3464   else
  3465     satb_mq_set.set_closure(&oc);
  3467   // This keeps claiming and applying the closure to completed buffers
  3468   // until we run out of buffers or we need to abort.
  3469   if (G1CollectedHeap::use_parallel_gc_threads()) {
  3470     while (!has_aborted() &&
  3471            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3472       if (_cm->verbose_medium())
  3473         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3474       statsOnly( ++_satb_buffers_processed );
  3475       regular_clock_call();
  3477   } else {
  3478     while (!has_aborted() &&
  3479            satb_mq_set.apply_closure_to_completed_buffer()) {
  3480       if (_cm->verbose_medium())
  3481         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3482       statsOnly( ++_satb_buffers_processed );
  3483       regular_clock_call();
  3487   if (!concurrent() && !has_aborted()) {
  3488     // We should only do this during remark.
  3489     if (G1CollectedHeap::use_parallel_gc_threads())
  3490       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3491     else
  3492       satb_mq_set.iterate_closure_all_threads();
  3495   _draining_satb_buffers = false;
  3497   assert(has_aborted() ||
  3498          concurrent() ||
  3499          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3501   if (G1CollectedHeap::use_parallel_gc_threads())
  3502     satb_mq_set.set_par_closure(_task_id, NULL);
  3503   else
  3504     satb_mq_set.set_closure(NULL);
  3506   // again, this was a potentially expensive operation, decrease the
  3507   // limits to get the regular clock call early
  3508   decrease_limits();
  3511 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3512   if (has_aborted())
  3513     return;
  3515   assert(_region_finger == NULL,
  3516          "it should be NULL when we're not scanning a region");
  3518   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
  3519     if (_cm->verbose_low())
  3520       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3521                              _task_id, _cm->region_stack_size());
  3523     MemRegion mr;
  3525     if (!_aborted_region.is_empty()) {
  3526       mr = _aborted_region;
  3527       _aborted_region = MemRegion();
  3529       if (_cm->verbose_low())
  3530         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
  3531                              _task_id, mr.start(), mr.end());
  3532     } else {
  3533       mr = _cm->region_stack_pop_lock_free();
  3534       // it returns MemRegion() if the pop fails
  3535       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3538     while (mr.start() != NULL) {
  3539       if (_cm->verbose_medium())
  3540         gclog_or_tty->print_cr("[%d] we are scanning region "
  3541                                "["PTR_FORMAT", "PTR_FORMAT")",
  3542                                _task_id, mr.start(), mr.end());
  3544       assert(mr.end() <= _cm->finger(),
  3545              "otherwise the region shouldn't be on the stack");
  3546       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3547       if (_nextMarkBitMap->iterate(bc, mr)) {
  3548         assert(!has_aborted(),
  3549                "cannot abort the task without aborting the bitmap iteration");
  3551         // We finished iterating over the region without aborting.
  3552         regular_clock_call();
  3553         if (has_aborted())
  3554           mr = MemRegion();
  3555         else {
  3556           mr = _cm->region_stack_pop_lock_free();
  3557           // it returns MemRegion() if the pop fails
  3558           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3560       } else {
  3561         assert(has_aborted(), "currently the only way to do so");
  3563         // The only way to abort the bitmap iteration is to return
  3564         // false from the do_bit() method. However, inside the
  3565         // do_bit() method we move the _region_finger to point to the
  3566         // object currently being looked at. So, if we bail out, we
  3567         // have definitely set _region_finger to something non-null.
  3568         assert(_region_finger != NULL, "invariant");
  3570         // Make sure that any previously aborted region has been
  3571         // cleared.
  3572         assert(_aborted_region.is_empty(), "aborted region not cleared");
  3574         // The iteration was actually aborted. So now _region_finger
  3575         // points to the address of the object we last scanned. If we
  3576         // leave it there, when we restart this task, we will rescan
  3577         // the object. It is easy to avoid this. We move the finger by
  3578         // enough to point to the next possible object header (the
  3579         // bitmap knows by how much we need to move it as it knows its
  3580         // granularity).
  3581         MemRegion newRegion =
  3582           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3584         if (!newRegion.is_empty()) {
  3585           if (_cm->verbose_low()) {
  3586             gclog_or_tty->print_cr("[%d] recording unscanned region"
  3587                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
  3588                                    _task_id,
  3589                                    newRegion.start(), newRegion.end());
  3591           // Now record the part of the region we didn't scan to
  3592           // make sure this task scans it later.
  3593           _aborted_region = newRegion;
  3595         // break from while
  3596         mr = MemRegion();
  3598       _region_finger = NULL;
  3601     if (_cm->verbose_low())
  3602       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3603                              _task_id, _cm->region_stack_size());
  3607 void CMTask::print_stats() {
  3608   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3609                          _task_id, _calls);
  3610   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3611                          _elapsed_time_ms, _termination_time_ms);
  3612   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3613                          _step_times_ms.num(), _step_times_ms.avg(),
  3614                          _step_times_ms.sd());
  3615   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3616                          _step_times_ms.maximum(), _step_times_ms.sum());
  3618 #if _MARKING_STATS_
  3619   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3620                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3621                          _all_clock_intervals_ms.sd());
  3622   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3623                          _all_clock_intervals_ms.maximum(),
  3624                          _all_clock_intervals_ms.sum());
  3625   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3626                          _clock_due_to_scanning, _clock_due_to_marking);
  3627   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3628                          _objs_scanned, _objs_found_on_bitmap);
  3629   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3630                          _local_pushes, _local_pops, _local_max_size);
  3631   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3632                          _global_pushes, _global_pops, _global_max_size);
  3633   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3634                          _global_transfers_to,_global_transfers_from);
  3635   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3636                          _regions_claimed, _region_stack_pops);
  3637   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3638   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3639                          _steal_attempts, _steals);
  3640   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3641   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3642                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3643   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3644                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3645 #endif // _MARKING_STATS_
  3648 /*****************************************************************************
  3650     The do_marking_step(time_target_ms) method is the building block
  3651     of the parallel marking framework. It can be called in parallel
  3652     with other invocations of do_marking_step() on different tasks
  3653     (but only one per task, obviously) and concurrently with the
  3654     mutator threads, or during remark, hence it eliminates the need
  3655     for two versions of the code. When called during remark, it will
  3656     pick up from where the task left off during the concurrent marking
  3657     phase. Interestingly, tasks are also claimable during evacuation
  3658     pauses too, since do_marking_step() ensures that it aborts before
  3659     it needs to yield.
  3661     The data structures that is uses to do marking work are the
  3662     following:
  3664       (1) Marking Bitmap. If there are gray objects that appear only
  3665       on the bitmap (this happens either when dealing with an overflow
  3666       or when the initial marking phase has simply marked the roots
  3667       and didn't push them on the stack), then tasks claim heap
  3668       regions whose bitmap they then scan to find gray objects. A
  3669       global finger indicates where the end of the last claimed region
  3670       is. A local finger indicates how far into the region a task has
  3671       scanned. The two fingers are used to determine how to gray an
  3672       object (i.e. whether simply marking it is OK, as it will be
  3673       visited by a task in the future, or whether it needs to be also
  3674       pushed on a stack).
  3676       (2) Local Queue. The local queue of the task which is accessed
  3677       reasonably efficiently by the task. Other tasks can steal from
  3678       it when they run out of work. Throughout the marking phase, a
  3679       task attempts to keep its local queue short but not totally
  3680       empty, so that entries are available for stealing by other
  3681       tasks. Only when there is no more work, a task will totally
  3682       drain its local queue.
  3684       (3) Global Mark Stack. This handles local queue overflow. During
  3685       marking only sets of entries are moved between it and the local
  3686       queues, as access to it requires a mutex and more fine-grain
  3687       interaction with it which might cause contention. If it
  3688       overflows, then the marking phase should restart and iterate
  3689       over the bitmap to identify gray objects. Throughout the marking
  3690       phase, tasks attempt to keep the global mark stack at a small
  3691       length but not totally empty, so that entries are available for
  3692       popping by other tasks. Only when there is no more work, tasks
  3693       will totally drain the global mark stack.
  3695       (4) Global Region Stack. Entries on it correspond to areas of
  3696       the bitmap that need to be scanned since they contain gray
  3697       objects. Pushes on the region stack only happen during
  3698       evacuation pauses and typically correspond to areas covered by
  3699       GC LABS. If it overflows, then the marking phase should restart
  3700       and iterate over the bitmap to identify gray objects. Tasks will
  3701       try to totally drain the region stack as soon as possible.
  3703       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3704       made available. Buffers are regularly removed from this queue
  3705       and scanned for roots, so that the queue doesn't get too
  3706       long. During remark, all completed buffers are processed, as
  3707       well as the filled in parts of any uncompleted buffers.
  3709     The do_marking_step() method tries to abort when the time target
  3710     has been reached. There are a few other cases when the
  3711     do_marking_step() method also aborts:
  3713       (1) When the marking phase has been aborted (after a Full GC).
  3715       (2) When a global overflow (either on the global stack or the
  3716       region stack) has been triggered. Before the task aborts, it
  3717       will actually sync up with the other tasks to ensure that all
  3718       the marking data structures (local queues, stacks, fingers etc.)
  3719       are re-initialised so that when do_marking_step() completes,
  3720       the marking phase can immediately restart.
  3722       (3) When enough completed SATB buffers are available. The
  3723       do_marking_step() method only tries to drain SATB buffers right
  3724       at the beginning. So, if enough buffers are available, the
  3725       marking step aborts and the SATB buffers are processed at
  3726       the beginning of the next invocation.
  3728       (4) To yield. when we have to yield then we abort and yield
  3729       right at the end of do_marking_step(). This saves us from a lot
  3730       of hassle as, by yielding we might allow a Full GC. If this
  3731       happens then objects will be compacted underneath our feet, the
  3732       heap might shrink, etc. We save checking for this by just
  3733       aborting and doing the yield right at the end.
  3735     From the above it follows that the do_marking_step() method should
  3736     be called in a loop (or, otherwise, regularly) until it completes.
  3738     If a marking step completes without its has_aborted() flag being
  3739     true, it means it has completed the current marking phase (and
  3740     also all other marking tasks have done so and have all synced up).
  3742     A method called regular_clock_call() is invoked "regularly" (in
  3743     sub ms intervals) throughout marking. It is this clock method that
  3744     checks all the abort conditions which were mentioned above and
  3745     decides when the task should abort. A work-based scheme is used to
  3746     trigger this clock method: when the number of object words the
  3747     marking phase has scanned or the number of references the marking
  3748     phase has visited reach a given limit. Additional invocations to
  3749     the method clock have been planted in a few other strategic places
  3750     too. The initial reason for the clock method was to avoid calling
  3751     vtime too regularly, as it is quite expensive. So, once it was in
  3752     place, it was natural to piggy-back all the other conditions on it
  3753     too and not constantly check them throughout the code.
  3755  *****************************************************************************/
  3757 void CMTask::do_marking_step(double time_target_ms) {
  3758   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3759   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3761   assert(concurrent() || _cm->region_stack_empty(),
  3762          "the region stack should have been cleared before remark");
  3763   assert(concurrent() || !_cm->has_aborted_regions(),
  3764          "aborted regions should have been cleared before remark");
  3765   assert(_region_finger == NULL,
  3766          "this should be non-null only when a region is being scanned");
  3768   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3769   assert(_task_queues != NULL, "invariant");
  3770   assert(_task_queue != NULL, "invariant");
  3771   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3773   assert(!_claimed,
  3774          "only one thread should claim this task at any one time");
  3776   // OK, this doesn't safeguard again all possible scenarios, as it is
  3777   // possible for two threads to set the _claimed flag at the same
  3778   // time. But it is only for debugging purposes anyway and it will
  3779   // catch most problems.
  3780   _claimed = true;
  3782   _start_time_ms = os::elapsedVTime() * 1000.0;
  3783   statsOnly( _interval_start_time_ms = _start_time_ms );
  3785   double diff_prediction_ms =
  3786     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3787   _time_target_ms = time_target_ms - diff_prediction_ms;
  3789   // set up the variables that are used in the work-based scheme to
  3790   // call the regular clock method
  3791   _words_scanned = 0;
  3792   _refs_reached  = 0;
  3793   recalculate_limits();
  3795   // clear all flags
  3796   clear_has_aborted();
  3797   _has_aborted_timed_out = false;
  3798   _draining_satb_buffers = false;
  3800   ++_calls;
  3802   if (_cm->verbose_low())
  3803     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3804                            "target = %1.2lfms >>>>>>>>>>",
  3805                            _task_id, _calls, _time_target_ms);
  3807   // Set up the bitmap and oop closures. Anything that uses them is
  3808   // eventually called from this method, so it is OK to allocate these
  3809   // statically.
  3810   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3811   CMOopClosure    oop_closure(_g1h, _cm, this);
  3812   set_oop_closure(&oop_closure);
  3814   if (_cm->has_overflown()) {
  3815     // This can happen if the region stack or the mark stack overflows
  3816     // during a GC pause and this task, after a yield point,
  3817     // restarts. We have to abort as we need to get into the overflow
  3818     // protocol which happens right at the end of this task.
  3819     set_has_aborted();
  3822   // First drain any available SATB buffers. After this, we will not
  3823   // look at SATB buffers before the next invocation of this method.
  3824   // If enough completed SATB buffers are queued up, the regular clock
  3825   // will abort this task so that it restarts.
  3826   drain_satb_buffers();
  3827   // ...then partially drain the local queue and the global stack
  3828   drain_local_queue(true);
  3829   drain_global_stack(true);
  3831   // Then totally drain the region stack.  We will not look at
  3832   // it again before the next invocation of this method. Entries on
  3833   // the region stack are only added during evacuation pauses, for
  3834   // which we have to yield. When we do, we abort the task anyway so
  3835   // it will look at the region stack again when it restarts.
  3836   bitmap_closure.set_scanning_heap_region(false);
  3837   drain_region_stack(&bitmap_closure);
  3838   // ...then partially drain the local queue and the global stack
  3839   drain_local_queue(true);
  3840   drain_global_stack(true);
  3842   do {
  3843     if (!has_aborted() && _curr_region != NULL) {
  3844       // This means that we're already holding on to a region.
  3845       assert(_finger != NULL, "if region is not NULL, then the finger "
  3846              "should not be NULL either");
  3848       // We might have restarted this task after an evacuation pause
  3849       // which might have evacuated the region we're holding on to
  3850       // underneath our feet. Let's read its limit again to make sure
  3851       // that we do not iterate over a region of the heap that
  3852       // contains garbage (update_region_limit() will also move
  3853       // _finger to the start of the region if it is found empty).
  3854       update_region_limit();
  3855       // We will start from _finger not from the start of the region,
  3856       // as we might be restarting this task after aborting half-way
  3857       // through scanning this region. In this case, _finger points to
  3858       // the address where we last found a marked object. If this is a
  3859       // fresh region, _finger points to start().
  3860       MemRegion mr = MemRegion(_finger, _region_limit);
  3862       if (_cm->verbose_low())
  3863         gclog_or_tty->print_cr("[%d] we're scanning part "
  3864                                "["PTR_FORMAT", "PTR_FORMAT") "
  3865                                "of region "PTR_FORMAT,
  3866                                _task_id, _finger, _region_limit, _curr_region);
  3868       // Let's iterate over the bitmap of the part of the
  3869       // region that is left.
  3870       bitmap_closure.set_scanning_heap_region(true);
  3871       if (mr.is_empty() ||
  3872           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3873         // We successfully completed iterating over the region. Now,
  3874         // let's give up the region.
  3875         giveup_current_region();
  3876         regular_clock_call();
  3877       } else {
  3878         assert(has_aborted(), "currently the only way to do so");
  3879         // The only way to abort the bitmap iteration is to return
  3880         // false from the do_bit() method. However, inside the
  3881         // do_bit() method we move the _finger to point to the
  3882         // object currently being looked at. So, if we bail out, we
  3883         // have definitely set _finger to something non-null.
  3884         assert(_finger != NULL, "invariant");
  3886         // Region iteration was actually aborted. So now _finger
  3887         // points to the address of the object we last scanned. If we
  3888         // leave it there, when we restart this task, we will rescan
  3889         // the object. It is easy to avoid this. We move the finger by
  3890         // enough to point to the next possible object header (the
  3891         // bitmap knows by how much we need to move it as it knows its
  3892         // granularity).
  3893         assert(_finger < _region_limit, "invariant");
  3894         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3895         // Check if bitmap iteration was aborted while scanning the last object
  3896         if (new_finger >= _region_limit) {
  3897             giveup_current_region();
  3898         } else {
  3899             move_finger_to(new_finger);
  3903     // At this point we have either completed iterating over the
  3904     // region we were holding on to, or we have aborted.
  3906     // We then partially drain the local queue and the global stack.
  3907     // (Do we really need this?)
  3908     drain_local_queue(true);
  3909     drain_global_stack(true);
  3911     // Read the note on the claim_region() method on why it might
  3912     // return NULL with potentially more regions available for
  3913     // claiming and why we have to check out_of_regions() to determine
  3914     // whether we're done or not.
  3915     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3916       // We are going to try to claim a new region. We should have
  3917       // given up on the previous one.
  3918       // Separated the asserts so that we know which one fires.
  3919       assert(_curr_region  == NULL, "invariant");
  3920       assert(_finger       == NULL, "invariant");
  3921       assert(_region_limit == NULL, "invariant");
  3922       if (_cm->verbose_low())
  3923         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3924       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3925       if (claimed_region != NULL) {
  3926         // Yes, we managed to claim one
  3927         statsOnly( ++_regions_claimed );
  3929         if (_cm->verbose_low())
  3930           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3931                                  "region "PTR_FORMAT,
  3932                                  _task_id, claimed_region);
  3934         setup_for_region(claimed_region);
  3935         assert(_curr_region == claimed_region, "invariant");
  3937       // It is important to call the regular clock here. It might take
  3938       // a while to claim a region if, for example, we hit a large
  3939       // block of empty regions. So we need to call the regular clock
  3940       // method once round the loop to make sure it's called
  3941       // frequently enough.
  3942       regular_clock_call();
  3945     if (!has_aborted() && _curr_region == NULL) {
  3946       assert(_cm->out_of_regions(),
  3947              "at this point we should be out of regions");
  3949   } while ( _curr_region != NULL && !has_aborted());
  3951   if (!has_aborted()) {
  3952     // We cannot check whether the global stack is empty, since other
  3953     // tasks might be pushing objects to it concurrently. We also cannot
  3954     // check if the region stack is empty because if a thread is aborting
  3955     // it can push a partially done region back.
  3956     assert(_cm->out_of_regions(),
  3957            "at this point we should be out of regions");
  3959     if (_cm->verbose_low())
  3960       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3962     // Try to reduce the number of available SATB buffers so that
  3963     // remark has less work to do.
  3964     drain_satb_buffers();
  3967   // Since we've done everything else, we can now totally drain the
  3968   // local queue and global stack.
  3969   drain_local_queue(false);
  3970   drain_global_stack(false);
  3972   // Attempt at work stealing from other task's queues.
  3973   if (!has_aborted()) {
  3974     // We have not aborted. This means that we have finished all that
  3975     // we could. Let's try to do some stealing...
  3977     // We cannot check whether the global stack is empty, since other
  3978     // tasks might be pushing objects to it concurrently. We also cannot
  3979     // check if the region stack is empty because if a thread is aborting
  3980     // it can push a partially done region back.
  3981     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3982            "only way to reach here");
  3984     if (_cm->verbose_low())
  3985       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3987     while (!has_aborted()) {
  3988       oop obj;
  3989       statsOnly( ++_steal_attempts );
  3991       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3992         if (_cm->verbose_medium())
  3993           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3994                                  _task_id, (void*) obj);
  3996         statsOnly( ++_steals );
  3998         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3999                "any stolen object should be marked");
  4000         scan_object(obj);
  4002         // And since we're towards the end, let's totally drain the
  4003         // local queue and global stack.
  4004         drain_local_queue(false);
  4005         drain_global_stack(false);
  4006       } else {
  4007         break;
  4012   // We still haven't aborted. Now, let's try to get into the
  4013   // termination protocol.
  4014   if (!has_aborted()) {
  4015     // We cannot check whether the global stack is empty, since other
  4016     // tasks might be concurrently pushing objects on it. We also cannot
  4017     // check if the region stack is empty because if a thread is aborting
  4018     // it can push a partially done region back.
  4019     // Separated the asserts so that we know which one fires.
  4020     assert(_cm->out_of_regions(), "only way to reach here");
  4021     assert(_task_queue->size() == 0, "only way to reach here");
  4023     if (_cm->verbose_low())
  4024       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  4026     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  4027     // The CMTask class also extends the TerminatorTerminator class,
  4028     // hence its should_exit_termination() method will also decide
  4029     // whether to exit the termination protocol or not.
  4030     bool finished = _cm->terminator()->offer_termination(this);
  4031     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  4032     _termination_time_ms +=
  4033       termination_end_time_ms - _termination_start_time_ms;
  4035     if (finished) {
  4036       // We're all done.
  4038       if (_task_id == 0) {
  4039         // let's allow task 0 to do this
  4040         if (concurrent()) {
  4041           assert(_cm->concurrent_marking_in_progress(), "invariant");
  4042           // we need to set this to false before the next
  4043           // safepoint. This way we ensure that the marking phase
  4044           // doesn't observe any more heap expansions.
  4045           _cm->clear_concurrent_marking_in_progress();
  4049       // We can now guarantee that the global stack is empty, since
  4050       // all other tasks have finished. We separated the guarantees so
  4051       // that, if a condition is false, we can immediately find out
  4052       // which one.
  4053       guarantee(_cm->out_of_regions(), "only way to reach here");
  4054       guarantee(_aborted_region.is_empty(), "only way to reach here");
  4055       guarantee(_cm->region_stack_empty(), "only way to reach here");
  4056       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  4057       guarantee(_task_queue->size() == 0, "only way to reach here");
  4058       guarantee(!_cm->has_overflown(), "only way to reach here");
  4059       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  4060       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  4062       if (_cm->verbose_low())
  4063         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  4064     } else {
  4065       // Apparently there's more work to do. Let's abort this task. It
  4066       // will restart it and we can hopefully find more things to do.
  4068       if (_cm->verbose_low())
  4069         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  4071       set_has_aborted();
  4072       statsOnly( ++_aborted_termination );
  4076   // Mainly for debugging purposes to make sure that a pointer to the
  4077   // closure which was statically allocated in this frame doesn't
  4078   // escape it by accident.
  4079   set_oop_closure(NULL);
  4080   double end_time_ms = os::elapsedVTime() * 1000.0;
  4081   double elapsed_time_ms = end_time_ms - _start_time_ms;
  4082   // Update the step history.
  4083   _step_times_ms.add(elapsed_time_ms);
  4085   if (has_aborted()) {
  4086     // The task was aborted for some reason.
  4088     statsOnly( ++_aborted );
  4090     if (_has_aborted_timed_out) {
  4091       double diff_ms = elapsed_time_ms - _time_target_ms;
  4092       // Keep statistics of how well we did with respect to hitting
  4093       // our target only if we actually timed out (if we aborted for
  4094       // other reasons, then the results might get skewed).
  4095       _marking_step_diffs_ms.add(diff_ms);
  4098     if (_cm->has_overflown()) {
  4099       // This is the interesting one. We aborted because a global
  4100       // overflow was raised. This means we have to restart the
  4101       // marking phase and start iterating over regions. However, in
  4102       // order to do this we have to make sure that all tasks stop
  4103       // what they are doing and re-initialise in a safe manner. We
  4104       // will achieve this with the use of two barrier sync points.
  4106       if (_cm->verbose_low())
  4107         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  4109       _cm->enter_first_sync_barrier(_task_id);
  4110       // When we exit this sync barrier we know that all tasks have
  4111       // stopped doing marking work. So, it's now safe to
  4112       // re-initialise our data structures. At the end of this method,
  4113       // task 0 will clear the global data structures.
  4115       statsOnly( ++_aborted_overflow );
  4117       // We clear the local state of this task...
  4118       clear_region_fields();
  4120       // ...and enter the second barrier.
  4121       _cm->enter_second_sync_barrier(_task_id);
  4122       // At this point everything has bee re-initialised and we're
  4123       // ready to restart.
  4126     if (_cm->verbose_low()) {
  4127       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4128                              "elapsed = %1.2lfms <<<<<<<<<<",
  4129                              _task_id, _time_target_ms, elapsed_time_ms);
  4130       if (_cm->has_aborted())
  4131         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4132                                _task_id);
  4134   } else {
  4135     if (_cm->verbose_low())
  4136       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4137                              "elapsed = %1.2lfms <<<<<<<<<<",
  4138                              _task_id, _time_target_ms, elapsed_time_ms);
  4141   _claimed = false;
  4144 CMTask::CMTask(int task_id,
  4145                ConcurrentMark* cm,
  4146                CMTaskQueue* task_queue,
  4147                CMTaskQueueSet* task_queues)
  4148   : _g1h(G1CollectedHeap::heap()),
  4149     _task_id(task_id), _cm(cm),
  4150     _claimed(false),
  4151     _nextMarkBitMap(NULL), _hash_seed(17),
  4152     _task_queue(task_queue),
  4153     _task_queues(task_queues),
  4154     _oop_closure(NULL),
  4155     _aborted_region(MemRegion()) {
  4156   guarantee(task_queue != NULL, "invariant");
  4157   guarantee(task_queues != NULL, "invariant");
  4159   statsOnly( _clock_due_to_scanning = 0;
  4160              _clock_due_to_marking  = 0 );
  4162   _marking_step_diffs_ms.add(0.5);

mercurial