src/share/vm/gc_implementation/g1/concurrentMark.cpp

Mon, 02 Aug 2010 12:51:43 -0700

author
johnc
date
Mon, 02 Aug 2010 12:51:43 -0700
changeset 2060
2d160770d2e5
parent 1907
c18cbe5936b8
child 2074
b63010841f78
permissions
-rw-r--r--

6814437: G1: remove the _new_refs array
Summary: The per-worker _new_refs array is used to hold references that point into the collection set. It is populated during RSet updating and subsequently processed. In the event of an evacuation failure it processed again to recreate the RSets of regions in the collection set. Remove the per-worker _new_refs array by processing the references directly. Use a DirtyCardQueue to hold the cards containing the references so that the RSets of regions in the collection set can be recreated when handling an evacuation failure.
Reviewed-by: iveresov, jmasa, tonyp

     1 /*
     2  * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     assert(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 // Currently we do not call this at all. Normally we would call it
   301 // during the concurrent marking / remark phases but we now call
   302 // the lock-based version instead. But we might want to resurrect this
   303 // code in the future. So, we'll leave it here commented out.
   304 #if 0
   305 MemRegion CMRegionStack::pop() {
   306   while (true) {
   307     // Otherwise...
   308     jint index = _index;
   310     if (index == 0) {
   311       return MemRegion();
   312     }
   313     jint next_index = index-1;
   314     jint res = Atomic::cmpxchg(next_index, &_index, index);
   315     if (res == index) {
   316       MemRegion mr = _base[next_index];
   317       if (mr.start() != NULL) {
   318         assert(mr.end() != NULL, "invariant");
   319         assert(mr.word_size() > 0, "invariant");
   320         return mr;
   321       } else {
   322         // that entry was invalidated... let's skip it
   323         assert(mr.end() == NULL, "invariant");
   324       }
   325     }
   326     // Otherwise, we need to try again.
   327   }
   328 }
   329 #endif // 0
   331 void CMRegionStack::push_with_lock(MemRegion mr) {
   332   assert(mr.word_size() > 0, "Precondition");
   333   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   335   if (isFull()) {
   336     _overflow = true;
   337     return;
   338   }
   340   _base[_index] = mr;
   341   _index += 1;
   342 }
   344 MemRegion CMRegionStack::pop_with_lock() {
   345   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   347   while (true) {
   348     if (_index == 0) {
   349       return MemRegion();
   350     }
   351     _index -= 1;
   353     MemRegion mr = _base[_index];
   354     if (mr.start() != NULL) {
   355       assert(mr.end() != NULL, "invariant");
   356       assert(mr.word_size() > 0, "invariant");
   357       return mr;
   358     } else {
   359       // that entry was invalidated... let's skip it
   360       assert(mr.end() == NULL, "invariant");
   361     }
   362   }
   363 }
   365 bool CMRegionStack::invalidate_entries_into_cset() {
   366   bool result = false;
   367   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   368   for (int i = 0; i < _oops_do_bound; ++i) {
   369     MemRegion mr = _base[i];
   370     if (mr.start() != NULL) {
   371       assert(mr.end() != NULL, "invariant");
   372       assert(mr.word_size() > 0, "invariant");
   373       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   374       assert(hr != NULL, "invariant");
   375       if (hr->in_collection_set()) {
   376         // The region points into the collection set
   377         _base[i] = MemRegion();
   378         result = true;
   379       }
   380     } else {
   381       // that entry was invalidated... let's skip it
   382       assert(mr.end() == NULL, "invariant");
   383     }
   384   }
   385   return result;
   386 }
   388 template<class OopClosureClass>
   389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   390   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   391          || SafepointSynchronize::is_at_safepoint(),
   392          "Drain recursion must be yield-safe.");
   393   bool res = true;
   394   debug_only(_drain_in_progress = true);
   395   debug_only(_drain_in_progress_yields = yield_after);
   396   while (!isEmpty()) {
   397     oop newOop = pop();
   398     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   399     assert(newOop->is_oop(), "Expected an oop");
   400     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   401            "only grey objects on this stack");
   402     // iterate over the oops in this oop, marking and pushing
   403     // the ones in CMS generation.
   404     newOop->oop_iterate(cl);
   405     if (yield_after && _cm->do_yield_check()) {
   406       res = false; break;
   407     }
   408   }
   409   debug_only(_drain_in_progress = false);
   410   return res;
   411 }
   413 void CMMarkStack::oops_do(OopClosure* f) {
   414   if (_index == 0) return;
   415   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   416          "Bound must be set.");
   417   for (int i = 0; i < _oops_do_bound; i++) {
   418     f->do_oop(&_base[i]);
   419   }
   420   _oops_do_bound = -1;
   421 }
   423 bool ConcurrentMark::not_yet_marked(oop obj) const {
   424   return (_g1h->is_obj_ill(obj)
   425           || (_g1h->is_in_permanent(obj)
   426               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   427 }
   429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   431 #endif // _MSC_VER
   433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   434                                int max_regions) :
   435   _markBitMap1(rs, MinObjAlignment - 1),
   436   _markBitMap2(rs, MinObjAlignment - 1),
   438   _parallel_marking_threads(0),
   439   _sleep_factor(0.0),
   440   _marking_task_overhead(1.0),
   441   _cleanup_sleep_factor(0.0),
   442   _cleanup_task_overhead(1.0),
   443   _region_bm(max_regions, false /* in_resource_area*/),
   444   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   445            CardTableModRefBS::card_shift,
   446            false /* in_resource_area*/),
   447   _prevMarkBitMap(&_markBitMap1),
   448   _nextMarkBitMap(&_markBitMap2),
   449   _at_least_one_mark_complete(false),
   451   _markStack(this),
   452   _regionStack(),
   453   // _finger set in set_non_marking_state
   455   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   456   // _active_tasks set in set_non_marking_state
   457   // _tasks set inside the constructor
   458   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   459   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   461   _has_overflown(false),
   462   _concurrent(false),
   463   _has_aborted(false),
   464   _restart_for_overflow(false),
   465   _concurrent_marking_in_progress(false),
   466   _should_gray_objects(false),
   468   // _verbose_level set below
   470   _init_times(),
   471   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   472   _cleanup_times(),
   473   _total_counting_time(0.0),
   474   _total_rs_scrub_time(0.0),
   476   _parallel_workers(NULL)
   477 {
   478   CMVerboseLevel verbose_level =
   479     (CMVerboseLevel) G1MarkingVerboseLevel;
   480   if (verbose_level < no_verbose)
   481     verbose_level = no_verbose;
   482   if (verbose_level > high_verbose)
   483     verbose_level = high_verbose;
   484   _verbose_level = verbose_level;
   486   if (verbose_low())
   487     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   488                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   490   _markStack.allocate(MarkStackSize);
   491   _regionStack.allocate(G1MarkRegionStackSize);
   493   // Create & start a ConcurrentMark thread.
   494   _cmThread = new ConcurrentMarkThread(this);
   495   assert(cmThread() != NULL, "CM Thread should have been created");
   496   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   498   _g1h = G1CollectedHeap::heap();
   499   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   500   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   501   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   503   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   504   satb_qs.set_buffer_size(G1SATBBufferSize);
   506   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   507   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   508   for (int i = 0 ; i < size; i++) {
   509     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   510   }
   512   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   513   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   516   _active_tasks = _max_task_num;
   517   for (int i = 0; i < (int) _max_task_num; ++i) {
   518     CMTaskQueue* task_queue = new CMTaskQueue();
   519     task_queue->initialize();
   520     _task_queues->register_queue(i, task_queue);
   522     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   523     _accum_task_vtime[i] = 0.0;
   524   }
   526   if (ConcGCThreads > ParallelGCThreads) {
   527     vm_exit_during_initialization("Can't have more ConcGCThreads "
   528                                   "than ParallelGCThreads.");
   529   }
   530   if (ParallelGCThreads == 0) {
   531     // if we are not running with any parallel GC threads we will not
   532     // spawn any marking threads either
   533     _parallel_marking_threads =   0;
   534     _sleep_factor             = 0.0;
   535     _marking_task_overhead    = 1.0;
   536   } else {
   537     if (ConcGCThreads > 0) {
   538       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   539       // if both are set
   541       _parallel_marking_threads = ConcGCThreads;
   542       _sleep_factor             = 0.0;
   543       _marking_task_overhead    = 1.0;
   544     } else if (G1MarkingOverheadPercent > 0) {
   545       // we will calculate the number of parallel marking threads
   546       // based on a target overhead with respect to the soft real-time
   547       // goal
   549       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   550       double overall_cm_overhead =
   551         (double) MaxGCPauseMillis * marking_overhead /
   552         (double) GCPauseIntervalMillis;
   553       double cpu_ratio = 1.0 / (double) os::processor_count();
   554       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   555       double marking_task_overhead =
   556         overall_cm_overhead / marking_thread_num *
   557                                                 (double) os::processor_count();
   558       double sleep_factor =
   559                          (1.0 - marking_task_overhead) / marking_task_overhead;
   561       _parallel_marking_threads = (size_t) marking_thread_num;
   562       _sleep_factor             = sleep_factor;
   563       _marking_task_overhead    = marking_task_overhead;
   564     } else {
   565       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   566       _sleep_factor             = 0.0;
   567       _marking_task_overhead    = 1.0;
   568     }
   570     if (parallel_marking_threads() > 1)
   571       _cleanup_task_overhead = 1.0;
   572     else
   573       _cleanup_task_overhead = marking_task_overhead();
   574     _cleanup_sleep_factor =
   575                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   577 #if 0
   578     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   579     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   580     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   581     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   582     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   583 #endif
   585     guarantee(parallel_marking_threads() > 0, "peace of mind");
   586     _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
   587                                      (int) parallel_marking_threads(), false, true);
   588     if (_parallel_workers == NULL)
   589       vm_exit_during_initialization("Failed necessary allocation.");
   590   }
   592   // so that the call below can read a sensible value
   593   _heap_start = (HeapWord*) rs.base();
   594   set_non_marking_state();
   595 }
   597 void ConcurrentMark::update_g1_committed(bool force) {
   598   // If concurrent marking is not in progress, then we do not need to
   599   // update _heap_end. This has a subtle and important
   600   // side-effect. Imagine that two evacuation pauses happen between
   601   // marking completion and remark. The first one can grow the
   602   // heap (hence now the finger is below the heap end). Then, the
   603   // second one could unnecessarily push regions on the region
   604   // stack. This causes the invariant that the region stack is empty
   605   // at the beginning of remark to be false. By ensuring that we do
   606   // not observe heap expansions after marking is complete, then we do
   607   // not have this problem.
   608   if (!concurrent_marking_in_progress() && !force)
   609     return;
   611   MemRegion committed = _g1h->g1_committed();
   612   assert(committed.start() == _heap_start, "start shouldn't change");
   613   HeapWord* new_end = committed.end();
   614   if (new_end > _heap_end) {
   615     // The heap has been expanded.
   617     _heap_end = new_end;
   618   }
   619   // Notice that the heap can also shrink. However, this only happens
   620   // during a Full GC (at least currently) and the entire marking
   621   // phase will bail out and the task will not be restarted. So, let's
   622   // do nothing.
   623 }
   625 void ConcurrentMark::reset() {
   626   // Starting values for these two. This should be called in a STW
   627   // phase. CM will be notified of any future g1_committed expansions
   628   // will be at the end of evacuation pauses, when tasks are
   629   // inactive.
   630   MemRegion committed = _g1h->g1_committed();
   631   _heap_start = committed.start();
   632   _heap_end   = committed.end();
   634   // Separated the asserts so that we know which one fires.
   635   assert(_heap_start != NULL, "heap bounds should look ok");
   636   assert(_heap_end != NULL, "heap bounds should look ok");
   637   assert(_heap_start < _heap_end, "heap bounds should look ok");
   639   // reset all the marking data structures and any necessary flags
   640   clear_marking_state();
   642   if (verbose_low())
   643     gclog_or_tty->print_cr("[global] resetting");
   645   // We do reset all of them, since different phases will use
   646   // different number of active threads. So, it's easiest to have all
   647   // of them ready.
   648   for (int i = 0; i < (int) _max_task_num; ++i)
   649     _tasks[i]->reset(_nextMarkBitMap);
   651   // we need this to make sure that the flag is on during the evac
   652   // pause with initial mark piggy-backed
   653   set_concurrent_marking_in_progress();
   654 }
   656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   657   assert(active_tasks <= _max_task_num, "we should not have more");
   659   _active_tasks = active_tasks;
   660   // Need to update the three data structures below according to the
   661   // number of active threads for this phase.
   662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   666   _concurrent = concurrent;
   667   // We propagate this to all tasks, not just the active ones.
   668   for (int i = 0; i < (int) _max_task_num; ++i)
   669     _tasks[i]->set_concurrent(concurrent);
   671   if (concurrent) {
   672     set_concurrent_marking_in_progress();
   673   } else {
   674     // We currently assume that the concurrent flag has been set to
   675     // false before we start remark. At this point we should also be
   676     // in a STW phase.
   677     assert(!concurrent_marking_in_progress(), "invariant");
   678     assert(_finger == _heap_end, "only way to get here");
   679     update_g1_committed(true);
   680   }
   681 }
   683 void ConcurrentMark::set_non_marking_state() {
   684   // We set the global marking state to some default values when we're
   685   // not doing marking.
   686   clear_marking_state();
   687   _active_tasks = 0;
   688   clear_concurrent_marking_in_progress();
   689 }
   691 ConcurrentMark::~ConcurrentMark() {
   692   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   693   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   694   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   695                     _par_cleanup_thread_state);
   697   for (int i = 0; i < (int) _max_task_num; ++i) {
   698     delete _task_queues->queue(i);
   699     delete _tasks[i];
   700   }
   701   delete _task_queues;
   702   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   703 }
   705 // This closure is used to mark refs into the g1 generation
   706 // from external roots in the CMS bit map.
   707 // Called at the first checkpoint.
   708 //
   710 void ConcurrentMark::clearNextBitmap() {
   711   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   712   G1CollectorPolicy* g1p = g1h->g1_policy();
   714   // Make sure that the concurrent mark thread looks to still be in
   715   // the current cycle.
   716   guarantee(cmThread()->during_cycle(), "invariant");
   718   // We are finishing up the current cycle by clearing the next
   719   // marking bitmap and getting it ready for the next cycle. During
   720   // this time no other cycle can start. So, let's make sure that this
   721   // is the case.
   722   guarantee(!g1h->mark_in_progress(), "invariant");
   724   // clear the mark bitmap (no grey objects to start with).
   725   // We need to do this in chunks and offer to yield in between
   726   // each chunk.
   727   HeapWord* start  = _nextMarkBitMap->startWord();
   728   HeapWord* end    = _nextMarkBitMap->endWord();
   729   HeapWord* cur    = start;
   730   size_t chunkSize = M;
   731   while (cur < end) {
   732     HeapWord* next = cur + chunkSize;
   733     if (next > end)
   734       next = end;
   735     MemRegion mr(cur,next);
   736     _nextMarkBitMap->clearRange(mr);
   737     cur = next;
   738     do_yield_check();
   740     // Repeat the asserts from above. We'll do them as asserts here to
   741     // minimize their overhead on the product. However, we'll have
   742     // them as guarantees at the beginning / end of the bitmap
   743     // clearing to get some checking in the product.
   744     assert(cmThread()->during_cycle(), "invariant");
   745     assert(!g1h->mark_in_progress(), "invariant");
   746   }
   748   // Repeat the asserts from above.
   749   guarantee(cmThread()->during_cycle(), "invariant");
   750   guarantee(!g1h->mark_in_progress(), "invariant");
   751 }
   753 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   754 public:
   755   bool doHeapRegion(HeapRegion* r) {
   756     if (!r->continuesHumongous()) {
   757       r->note_start_of_marking(true);
   758     }
   759     return false;
   760   }
   761 };
   763 void ConcurrentMark::checkpointRootsInitialPre() {
   764   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   765   G1CollectorPolicy* g1p = g1h->g1_policy();
   767   _has_aborted = false;
   769 #ifndef PRODUCT
   770   if (G1PrintReachableAtInitialMark) {
   771     print_reachable("at-cycle-start",
   772                     true /* use_prev_marking */, true /* all */);
   773   }
   774 #endif
   776   // Initialise marking structures. This has to be done in a STW phase.
   777   reset();
   778 }
   780 class CMMarkRootsClosure: public OopsInGenClosure {
   781 private:
   782   ConcurrentMark*  _cm;
   783   G1CollectedHeap* _g1h;
   784   bool             _do_barrier;
   786 public:
   787   CMMarkRootsClosure(ConcurrentMark* cm,
   788                      G1CollectedHeap* g1h,
   789                      bool do_barrier) : _cm(cm), _g1h(g1h),
   790                                         _do_barrier(do_barrier) { }
   792   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   793   virtual void do_oop(      oop* p) { do_oop_work(p); }
   795   template <class T> void do_oop_work(T* p) {
   796     T heap_oop = oopDesc::load_heap_oop(p);
   797     if (!oopDesc::is_null(heap_oop)) {
   798       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   799       assert(obj->is_oop() || obj->mark() == NULL,
   800              "expected an oop, possibly with mark word displaced");
   801       HeapWord* addr = (HeapWord*)obj;
   802       if (_g1h->is_in_g1_reserved(addr)) {
   803         _cm->grayRoot(obj);
   804       }
   805     }
   806     if (_do_barrier) {
   807       assert(!_g1h->is_in_g1_reserved(p),
   808              "Should be called on external roots");
   809       do_barrier(p);
   810     }
   811   }
   812 };
   814 void ConcurrentMark::checkpointRootsInitialPost() {
   815   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   817   // For each region note start of marking.
   818   NoteStartOfMarkHRClosure startcl;
   819   g1h->heap_region_iterate(&startcl);
   821   // Start weak-reference discovery.
   822   ReferenceProcessor* rp = g1h->ref_processor();
   823   rp->verify_no_references_recorded();
   824   rp->enable_discovery(); // enable ("weak") refs discovery
   825   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   827   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   828   // This is the start of  the marking cycle, we're expected all
   829   // threads to have SATB queues with active set to false.
   830   satb_mq_set.set_active_all_threads(true, /* new active value */
   831                                      false /* expected_active */);
   833   // update_g1_committed() will be called at the end of an evac pause
   834   // when marking is on. So, it's also called at the end of the
   835   // initial-mark pause to update the heap end, if the heap expands
   836   // during it. No need to call it here.
   837 }
   839 // Checkpoint the roots into this generation from outside
   840 // this generation. [Note this initial checkpoint need only
   841 // be approximate -- we'll do a catch up phase subsequently.]
   842 void ConcurrentMark::checkpointRootsInitial() {
   843   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   844   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   846   double start = os::elapsedTime();
   848   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   849   g1p->record_concurrent_mark_init_start();
   850   checkpointRootsInitialPre();
   852   // YSR: when concurrent precleaning is in place, we'll
   853   // need to clear the cached card table here
   855   ResourceMark rm;
   856   HandleMark  hm;
   858   g1h->ensure_parsability(false);
   859   g1h->perm_gen()->save_marks();
   861   CMMarkRootsClosure notOlder(this, g1h, false);
   862   CMMarkRootsClosure older(this, g1h, true);
   864   g1h->set_marking_started();
   865   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   867   g1h->process_strong_roots(true,    // activate StrongRootsScope
   868                             false,   // fake perm gen collection
   869                             SharedHeap::SO_AllClasses,
   870                             &notOlder, // Regular roots
   871                             NULL,     // do not visit active blobs
   872                             &older    // Perm Gen Roots
   873                             );
   874   checkpointRootsInitialPost();
   876   // Statistics.
   877   double end = os::elapsedTime();
   878   _init_times.add((end - start) * 1000.0);
   880   g1p->record_concurrent_mark_init_end();
   881 }
   883 /*
   884    Notice that in the next two methods, we actually leave the STS
   885    during the barrier sync and join it immediately afterwards. If we
   886    do not do this, this then the following deadlock can occur: one
   887    thread could be in the barrier sync code, waiting for the other
   888    thread to also sync up, whereas another one could be trying to
   889    yield, while also waiting for the other threads to sync up too.
   891    Because the thread that does the sync barrier has left the STS, it
   892    is possible to be suspended for a Full GC or an evacuation pause
   893    could occur. This is actually safe, since the entering the sync
   894    barrier is one of the last things do_marking_step() does, and it
   895    doesn't manipulate any data structures afterwards.
   896 */
   898 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   899   if (verbose_low())
   900     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   902   ConcurrentGCThread::stsLeave();
   903   _first_overflow_barrier_sync.enter();
   904   ConcurrentGCThread::stsJoin();
   905   // at this point everyone should have synced up and not be doing any
   906   // more work
   908   if (verbose_low())
   909     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   911   // let task 0 do this
   912   if (task_num == 0) {
   913     // task 0 is responsible for clearing the global data structures
   914     clear_marking_state();
   916     if (PrintGC) {
   917       gclog_or_tty->date_stamp(PrintGCDateStamps);
   918       gclog_or_tty->stamp(PrintGCTimeStamps);
   919       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   920     }
   921   }
   923   // after this, each task should reset its own data structures then
   924   // then go into the second barrier
   925 }
   927 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   928   if (verbose_low())
   929     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   931   ConcurrentGCThread::stsLeave();
   932   _second_overflow_barrier_sync.enter();
   933   ConcurrentGCThread::stsJoin();
   934   // at this point everything should be re-initialised and ready to go
   936   if (verbose_low())
   937     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   938 }
   940 void ConcurrentMark::grayRoot(oop p) {
   941   HeapWord* addr = (HeapWord*) p;
   942   // We can't really check against _heap_start and _heap_end, since it
   943   // is possible during an evacuation pause with piggy-backed
   944   // initial-mark that the committed space is expanded during the
   945   // pause without CM observing this change. So the assertions below
   946   // is a bit conservative; but better than nothing.
   947   assert(_g1h->g1_committed().contains(addr),
   948          "address should be within the heap bounds");
   950   if (!_nextMarkBitMap->isMarked(addr))
   951     _nextMarkBitMap->parMark(addr);
   952 }
   954 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   955   // The objects on the region have already been marked "in bulk" by
   956   // the caller. We only need to decide whether to push the region on
   957   // the region stack or not.
   959   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   960     // We're done with marking and waiting for remark. We do not need to
   961     // push anything else on the region stack.
   962     return;
   964   HeapWord* finger = _finger;
   966   if (verbose_low())
   967     gclog_or_tty->print_cr("[global] attempting to push "
   968                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   969                            PTR_FORMAT, mr.start(), mr.end(), finger);
   971   if (mr.start() < finger) {
   972     // The finger is always heap region aligned and it is not possible
   973     // for mr to span heap regions.
   974     assert(mr.end() <= finger, "invariant");
   976     // Separated the asserts so that we know which one fires.
   977     assert(mr.start() <= mr.end(),
   978            "region boundaries should fall within the committed space");
   979     assert(_heap_start <= mr.start(),
   980            "region boundaries should fall within the committed space");
   981     assert(mr.end() <= _heap_end,
   982            "region boundaries should fall within the committed space");
   983     if (verbose_low())
   984       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   985                              "below the finger, pushing it",
   986                              mr.start(), mr.end());
   988     if (!region_stack_push(mr)) {
   989       if (verbose_low())
   990         gclog_or_tty->print_cr("[global] region stack has overflown.");
   991     }
   992   }
   993 }
   995 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   996   // The object is not marked by the caller. We need to at least mark
   997   // it and maybe push in on the stack.
   999   HeapWord* addr = (HeapWord*)p;
  1000   if (!_nextMarkBitMap->isMarked(addr)) {
  1001     // We definitely need to mark it, irrespective whether we bail out
  1002     // because we're done with marking.
  1003     if (_nextMarkBitMap->parMark(addr)) {
  1004       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1005         // If we're done with concurrent marking and we're waiting for
  1006         // remark, then we're not pushing anything on the stack.
  1007         return;
  1009       // No OrderAccess:store_load() is needed. It is implicit in the
  1010       // CAS done in parMark(addr) above
  1011       HeapWord* finger = _finger;
  1013       if (addr < finger) {
  1014         if (!mark_stack_push(oop(addr))) {
  1015           if (verbose_low())
  1016             gclog_or_tty->print_cr("[global] global stack overflow "
  1017                                    "during parMark");
  1024 class CMConcurrentMarkingTask: public AbstractGangTask {
  1025 private:
  1026   ConcurrentMark*       _cm;
  1027   ConcurrentMarkThread* _cmt;
  1029 public:
  1030   void work(int worker_i) {
  1031     assert(Thread::current()->is_ConcurrentGC_thread(),
  1032            "this should only be done by a conc GC thread");
  1034     double start_vtime = os::elapsedVTime();
  1036     ConcurrentGCThread::stsJoin();
  1038     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1039     CMTask* the_task = _cm->task(worker_i);
  1040     the_task->record_start_time();
  1041     if (!_cm->has_aborted()) {
  1042       do {
  1043         double start_vtime_sec = os::elapsedVTime();
  1044         double start_time_sec = os::elapsedTime();
  1045         the_task->do_marking_step(10.0);
  1046         double end_time_sec = os::elapsedTime();
  1047         double end_vtime_sec = os::elapsedVTime();
  1048         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1049         double elapsed_time_sec = end_time_sec - start_time_sec;
  1050         _cm->clear_has_overflown();
  1052         bool ret = _cm->do_yield_check(worker_i);
  1054         jlong sleep_time_ms;
  1055         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1056           sleep_time_ms =
  1057             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1058           ConcurrentGCThread::stsLeave();
  1059           os::sleep(Thread::current(), sleep_time_ms, false);
  1060           ConcurrentGCThread::stsJoin();
  1062         double end_time2_sec = os::elapsedTime();
  1063         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1065 #if 0
  1066           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1067                                  "overhead %1.4lf",
  1068                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1069                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1070           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1071                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1072 #endif
  1073       } while (!_cm->has_aborted() && the_task->has_aborted());
  1075     the_task->record_end_time();
  1076     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1078     ConcurrentGCThread::stsLeave();
  1080     double end_vtime = os::elapsedVTime();
  1081     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1084   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1085                           ConcurrentMarkThread* cmt) :
  1086       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1088   ~CMConcurrentMarkingTask() { }
  1089 };
  1091 void ConcurrentMark::markFromRoots() {
  1092   // we might be tempted to assert that:
  1093   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1094   //        "inconsistent argument?");
  1095   // However that wouldn't be right, because it's possible that
  1096   // a safepoint is indeed in progress as a younger generation
  1097   // stop-the-world GC happens even as we mark in this generation.
  1099   _restart_for_overflow = false;
  1101   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1103   CMConcurrentMarkingTask markingTask(this, cmThread());
  1104   if (parallel_marking_threads() > 0)
  1105     _parallel_workers->run_task(&markingTask);
  1106   else
  1107     markingTask.work(0);
  1108   print_stats();
  1111 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1112   // world is stopped at this checkpoint
  1113   assert(SafepointSynchronize::is_at_safepoint(),
  1114          "world should be stopped");
  1115   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1117   // If a full collection has happened, we shouldn't do this.
  1118   if (has_aborted()) {
  1119     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1120     return;
  1123   if (VerifyDuringGC) {
  1124     HandleMark hm;  // handle scope
  1125     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1126     Universe::heap()->prepare_for_verify();
  1127     Universe::verify(true, false, true);
  1130   G1CollectorPolicy* g1p = g1h->g1_policy();
  1131   g1p->record_concurrent_mark_remark_start();
  1133   double start = os::elapsedTime();
  1135   checkpointRootsFinalWork();
  1137   double mark_work_end = os::elapsedTime();
  1139   weakRefsWork(clear_all_soft_refs);
  1141   if (has_overflown()) {
  1142     // Oops.  We overflowed.  Restart concurrent marking.
  1143     _restart_for_overflow = true;
  1144     // Clear the flag. We do not need it any more.
  1145     clear_has_overflown();
  1146     if (G1TraceMarkStackOverflow)
  1147       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1148   } else {
  1149     // We're done with marking.
  1150     // This is the end of  the marking cycle, we're expected all
  1151     // threads to have SATB queues with active set to true.
  1152     JavaThread::satb_mark_queue_set().set_active_all_threads(
  1153                                                   false, /* new active value */
  1154                                                   true /* expected_active */);
  1156     if (VerifyDuringGC) {
  1157       HandleMark hm;  // handle scope
  1158       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1159       Universe::heap()->prepare_for_verify();
  1160       Universe::heap()->verify(/* allow_dirty */      true,
  1161                                /* silent */           false,
  1162                                /* use_prev_marking */ false);
  1166 #if VERIFY_OBJS_PROCESSED
  1167   _scan_obj_cl.objs_processed = 0;
  1168   ThreadLocalObjQueue::objs_enqueued = 0;
  1169 #endif
  1171   // Statistics
  1172   double now = os::elapsedTime();
  1173   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1174   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1175   _remark_times.add((now - start) * 1000.0);
  1177   g1p->record_concurrent_mark_remark_end();
  1181 #define CARD_BM_TEST_MODE 0
  1183 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1185   CMBitMapRO* _bm;
  1186   ConcurrentMark* _cm;
  1187   bool _changed;
  1188   bool _yield;
  1189   size_t _words_done;
  1190   size_t _tot_live;
  1191   size_t _tot_used;
  1192   size_t _regions_done;
  1193   double _start_vtime_sec;
  1195   BitMap* _region_bm;
  1196   BitMap* _card_bm;
  1197   intptr_t _bottom_card_num;
  1198   bool _final;
  1200   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1201     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1202 #if CARD_BM_TEST_MODE
  1203       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1204 #else
  1205       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1206 #endif
  1210 public:
  1211   CalcLiveObjectsClosure(bool final,
  1212                          CMBitMapRO *bm, ConcurrentMark *cm,
  1213                          BitMap* region_bm, BitMap* card_bm) :
  1214     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1215     _words_done(0), _tot_live(0), _tot_used(0),
  1216     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1217     _regions_done(0), _start_vtime_sec(0.0)
  1219     _bottom_card_num =
  1220       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1221                CardTableModRefBS::card_shift);
  1224   // It takes a region that's not empty (i.e., it has at least one
  1225   // live object in it and sets its corresponding bit on the region
  1226   // bitmap to 1. If the region is "starts humongous" it will also set
  1227   // to 1 the bits on the region bitmap that correspond to its
  1228   // associated "continues humongous" regions.
  1229   void set_bit_for_region(HeapRegion* hr) {
  1230     assert(!hr->continuesHumongous(), "should have filtered those out");
  1232     size_t index = hr->hrs_index();
  1233     if (!hr->startsHumongous()) {
  1234       // Normal (non-humongous) case: just set the bit.
  1235       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1236     } else {
  1237       // Starts humongous case: calculate how many regions are part of
  1238       // this humongous region and then set the bit range. It might
  1239       // have been a bit more efficient to look at the object that
  1240       // spans these humongous regions to calculate their number from
  1241       // the object's size. However, it's a good idea to calculate
  1242       // this based on the metadata itself, and not the region
  1243       // contents, so that this code is not aware of what goes into
  1244       // the humongous regions (in case this changes in the future).
  1245       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1246       size_t end_index = index + 1;
  1247       while (end_index < g1h->n_regions()) {
  1248         HeapRegion* chr = g1h->region_at(end_index);
  1249         if (!chr->continuesHumongous()) {
  1250           break;
  1252         end_index += 1;
  1254       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1255                                    (BitMap::idx_t) end_index, true);
  1259   bool doHeapRegion(HeapRegion* hr) {
  1260     if (!_final && _regions_done == 0)
  1261       _start_vtime_sec = os::elapsedVTime();
  1263     if (hr->continuesHumongous()) {
  1264       // We will ignore these here and process them when their
  1265       // associated "starts humongous" region is processed (see
  1266       // set_bit_for_heap_region()). Note that we cannot rely on their
  1267       // associated "starts humongous" region to have their bit set to
  1268       // 1 since, due to the region chunking in the parallel region
  1269       // iteration, a "continues humongous" region might be visited
  1270       // before its associated "starts humongous".
  1271       return false;
  1274     HeapWord* nextTop = hr->next_top_at_mark_start();
  1275     HeapWord* start   = hr->top_at_conc_mark_count();
  1276     assert(hr->bottom() <= start && start <= hr->end() &&
  1277            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1278            start <= nextTop,
  1279            "Preconditions.");
  1280     // Otherwise, record the number of word's we'll examine.
  1281     size_t words_done = (nextTop - start);
  1282     // Find the first marked object at or after "start".
  1283     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1284     size_t marked_bytes = 0;
  1286     // Below, the term "card num" means the result of shifting an address
  1287     // by the card shift -- address 0 corresponds to card number 0.  One
  1288     // must subtract the card num of the bottom of the heap to obtain a
  1289     // card table index.
  1290     // The first card num of the sequence of live cards currently being
  1291     // constructed.  -1 ==> no sequence.
  1292     intptr_t start_card_num = -1;
  1293     // The last card num of the sequence of live cards currently being
  1294     // constructed.  -1 ==> no sequence.
  1295     intptr_t last_card_num = -1;
  1297     while (start < nextTop) {
  1298       if (_yield && _cm->do_yield_check()) {
  1299         // We yielded.  It might be for a full collection, in which case
  1300         // all bets are off; terminate the traversal.
  1301         if (_cm->has_aborted()) {
  1302           _changed = false;
  1303           return true;
  1304         } else {
  1305           // Otherwise, it might be a collection pause, and the region
  1306           // we're looking at might be in the collection set.  We'll
  1307           // abandon this region.
  1308           return false;
  1311       oop obj = oop(start);
  1312       int obj_sz = obj->size();
  1313       // The card num of the start of the current object.
  1314       intptr_t obj_card_num =
  1315         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1317       HeapWord* obj_last = start + obj_sz - 1;
  1318       intptr_t obj_last_card_num =
  1319         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1321       if (obj_card_num != last_card_num) {
  1322         if (start_card_num == -1) {
  1323           assert(last_card_num == -1, "Both or neither.");
  1324           start_card_num = obj_card_num;
  1325         } else {
  1326           assert(last_card_num != -1, "Both or neither.");
  1327           assert(obj_card_num >= last_card_num, "Inv");
  1328           if ((obj_card_num - last_card_num) > 1) {
  1329             // Mark the last run, and start a new one.
  1330             mark_card_num_range(start_card_num, last_card_num);
  1331             start_card_num = obj_card_num;
  1334 #if CARD_BM_TEST_MODE
  1335         /*
  1336         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1337                                obj_card_num - _bottom_card_num,
  1338                                obj_last_card_num - _bottom_card_num);
  1339         */
  1340         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1341           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1343 #endif
  1345       // In any case, we set the last card num.
  1346       last_card_num = obj_last_card_num;
  1348       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1349       // Find the next marked object after this one.
  1350       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1351       _changed = true;
  1353     // Handle the last range, if any.
  1354     if (start_card_num != -1)
  1355       mark_card_num_range(start_card_num, last_card_num);
  1356     if (_final) {
  1357       // Mark the allocated-since-marking portion...
  1358       HeapWord* tp = hr->top();
  1359       if (nextTop < tp) {
  1360         start_card_num =
  1361           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1362         last_card_num =
  1363           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1364         mark_card_num_range(start_card_num, last_card_num);
  1365         // This definitely means the region has live objects.
  1366         set_bit_for_region(hr);
  1370     hr->add_to_marked_bytes(marked_bytes);
  1371     // Update the live region bitmap.
  1372     if (marked_bytes > 0) {
  1373       set_bit_for_region(hr);
  1375     hr->set_top_at_conc_mark_count(nextTop);
  1376     _tot_live += hr->next_live_bytes();
  1377     _tot_used += hr->used();
  1378     _words_done = words_done;
  1380     if (!_final) {
  1381       ++_regions_done;
  1382       if (_regions_done % 10 == 0) {
  1383         double end_vtime_sec = os::elapsedVTime();
  1384         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1385         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1386           jlong sleep_time_ms =
  1387             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1388           os::sleep(Thread::current(), sleep_time_ms, false);
  1389           _start_vtime_sec = end_vtime_sec;
  1394     return false;
  1397   bool changed() { return _changed;  }
  1398   void reset()   { _changed = false; _words_done = 0; }
  1399   void no_yield() { _yield = false; }
  1400   size_t words_done() { return _words_done; }
  1401   size_t tot_live() { return _tot_live; }
  1402   size_t tot_used() { return _tot_used; }
  1403 };
  1406 void ConcurrentMark::calcDesiredRegions() {
  1407   _region_bm.clear();
  1408   _card_bm.clear();
  1409   CalcLiveObjectsClosure calccl(false /*final*/,
  1410                                 nextMarkBitMap(), this,
  1411                                 &_region_bm, &_card_bm);
  1412   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1413   g1h->heap_region_iterate(&calccl);
  1415   do {
  1416     calccl.reset();
  1417     g1h->heap_region_iterate(&calccl);
  1418   } while (calccl.changed());
  1421 class G1ParFinalCountTask: public AbstractGangTask {
  1422 protected:
  1423   G1CollectedHeap* _g1h;
  1424   CMBitMap* _bm;
  1425   size_t _n_workers;
  1426   size_t *_live_bytes;
  1427   size_t *_used_bytes;
  1428   BitMap* _region_bm;
  1429   BitMap* _card_bm;
  1430 public:
  1431   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1432                       BitMap* region_bm, BitMap* card_bm) :
  1433     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1434     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1436     if (ParallelGCThreads > 0)
  1437       _n_workers = _g1h->workers()->total_workers();
  1438     else
  1439       _n_workers = 1;
  1440     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1441     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1444   ~G1ParFinalCountTask() {
  1445     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1446     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1449   void work(int i) {
  1450     CalcLiveObjectsClosure calccl(true /*final*/,
  1451                                   _bm, _g1h->concurrent_mark(),
  1452                                   _region_bm, _card_bm);
  1453     calccl.no_yield();
  1454     if (ParallelGCThreads > 0) {
  1455       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1456                                             HeapRegion::FinalCountClaimValue);
  1457     } else {
  1458       _g1h->heap_region_iterate(&calccl);
  1460     assert(calccl.complete(), "Shouldn't have yielded!");
  1462     assert((size_t) i < _n_workers, "invariant");
  1463     _live_bytes[i] = calccl.tot_live();
  1464     _used_bytes[i] = calccl.tot_used();
  1466   size_t live_bytes()  {
  1467     size_t live_bytes = 0;
  1468     for (size_t i = 0; i < _n_workers; ++i)
  1469       live_bytes += _live_bytes[i];
  1470     return live_bytes;
  1472   size_t used_bytes()  {
  1473     size_t used_bytes = 0;
  1474     for (size_t i = 0; i < _n_workers; ++i)
  1475       used_bytes += _used_bytes[i];
  1476     return used_bytes;
  1478 };
  1480 class G1ParNoteEndTask;
  1482 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1483   G1CollectedHeap* _g1;
  1484   int _worker_num;
  1485   size_t _max_live_bytes;
  1486   size_t _regions_claimed;
  1487   size_t _freed_bytes;
  1488   size_t _cleared_h_regions;
  1489   size_t _freed_regions;
  1490   UncleanRegionList* _unclean_region_list;
  1491   double _claimed_region_time;
  1492   double _max_region_time;
  1494 public:
  1495   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1496                              UncleanRegionList* list,
  1497                              int worker_num);
  1498   size_t freed_bytes() { return _freed_bytes; }
  1499   size_t cleared_h_regions() { return _cleared_h_regions; }
  1500   size_t freed_regions() { return  _freed_regions; }
  1501   UncleanRegionList* unclean_region_list() {
  1502     return _unclean_region_list;
  1505   bool doHeapRegion(HeapRegion *r);
  1507   size_t max_live_bytes() { return _max_live_bytes; }
  1508   size_t regions_claimed() { return _regions_claimed; }
  1509   double claimed_region_time_sec() { return _claimed_region_time; }
  1510   double max_region_time_sec() { return _max_region_time; }
  1511 };
  1513 class G1ParNoteEndTask: public AbstractGangTask {
  1514   friend class G1NoteEndOfConcMarkClosure;
  1515 protected:
  1516   G1CollectedHeap* _g1h;
  1517   size_t _max_live_bytes;
  1518   size_t _freed_bytes;
  1519   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1520 public:
  1521   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1522                    ConcurrentMark::ParCleanupThreadState**
  1523                    par_cleanup_thread_state) :
  1524     AbstractGangTask("G1 note end"), _g1h(g1h),
  1525     _max_live_bytes(0), _freed_bytes(0),
  1526     _par_cleanup_thread_state(par_cleanup_thread_state)
  1527   {}
  1529   void work(int i) {
  1530     double start = os::elapsedTime();
  1531     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1532                                            &_par_cleanup_thread_state[i]->list,
  1533                                            i);
  1534     if (ParallelGCThreads > 0) {
  1535       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1536                                             HeapRegion::NoteEndClaimValue);
  1537     } else {
  1538       _g1h->heap_region_iterate(&g1_note_end);
  1540     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1542     // Now finish up freeing the current thread's regions.
  1543     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1544                                   g1_note_end.cleared_h_regions(),
  1545                                   0, NULL);
  1547       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1548       _max_live_bytes += g1_note_end.max_live_bytes();
  1549       _freed_bytes += g1_note_end.freed_bytes();
  1551     double end = os::elapsedTime();
  1552     if (G1PrintParCleanupStats) {
  1553       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1554                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1555                           i, start, end, (end-start)*1000.0,
  1556                           g1_note_end.regions_claimed(),
  1557                           g1_note_end.claimed_region_time_sec()*1000.0,
  1558                           g1_note_end.max_region_time_sec()*1000.0);
  1561   size_t max_live_bytes() { return _max_live_bytes; }
  1562   size_t freed_bytes() { return _freed_bytes; }
  1563 };
  1565 class G1ParScrubRemSetTask: public AbstractGangTask {
  1566 protected:
  1567   G1RemSet* _g1rs;
  1568   BitMap* _region_bm;
  1569   BitMap* _card_bm;
  1570 public:
  1571   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1572                        BitMap* region_bm, BitMap* card_bm) :
  1573     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1574     _region_bm(region_bm), _card_bm(card_bm)
  1575   {}
  1577   void work(int i) {
  1578     if (ParallelGCThreads > 0) {
  1579       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1580                        HeapRegion::ScrubRemSetClaimValue);
  1581     } else {
  1582       _g1rs->scrub(_region_bm, _card_bm);
  1586 };
  1588 G1NoteEndOfConcMarkClosure::
  1589 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1590                            UncleanRegionList* list,
  1591                            int worker_num)
  1592   : _g1(g1), _worker_num(worker_num),
  1593     _max_live_bytes(0), _regions_claimed(0),
  1594     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1595     _claimed_region_time(0.0), _max_region_time(0.0),
  1596     _unclean_region_list(list)
  1597 {}
  1599 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1600   // We use a claim value of zero here because all regions
  1601   // were claimed with value 1 in the FinalCount task.
  1602   r->reset_gc_time_stamp();
  1603   if (!r->continuesHumongous()) {
  1604     double start = os::elapsedTime();
  1605     _regions_claimed++;
  1606     r->note_end_of_marking();
  1607     _max_live_bytes += r->max_live_bytes();
  1608     _g1->free_region_if_totally_empty_work(r,
  1609                                            _freed_bytes,
  1610                                            _cleared_h_regions,
  1611                                            _freed_regions,
  1612                                            _unclean_region_list,
  1613                                            true /*par*/);
  1614     double region_time = (os::elapsedTime() - start);
  1615     _claimed_region_time += region_time;
  1616     if (region_time > _max_region_time) _max_region_time = region_time;
  1618   return false;
  1621 void ConcurrentMark::cleanup() {
  1622   // world is stopped at this checkpoint
  1623   assert(SafepointSynchronize::is_at_safepoint(),
  1624          "world should be stopped");
  1625   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1627   // If a full collection has happened, we shouldn't do this.
  1628   if (has_aborted()) {
  1629     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1630     return;
  1633   if (VerifyDuringGC) {
  1634     HandleMark hm;  // handle scope
  1635     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1636     Universe::heap()->prepare_for_verify();
  1637     Universe::verify(/* allow dirty  */ true,
  1638                      /* silent       */ false,
  1639                      /* prev marking */ true);
  1642   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1643   g1p->record_concurrent_mark_cleanup_start();
  1645   double start = os::elapsedTime();
  1647   // Do counting once more with the world stopped for good measure.
  1648   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1649                                         &_region_bm, &_card_bm);
  1650   if (ParallelGCThreads > 0) {
  1651     assert(g1h->check_heap_region_claim_values(
  1652                                                HeapRegion::InitialClaimValue),
  1653            "sanity check");
  1655     int n_workers = g1h->workers()->total_workers();
  1656     g1h->set_par_threads(n_workers);
  1657     g1h->workers()->run_task(&g1_par_count_task);
  1658     g1h->set_par_threads(0);
  1660     assert(g1h->check_heap_region_claim_values(
  1661                                              HeapRegion::FinalCountClaimValue),
  1662            "sanity check");
  1663   } else {
  1664     g1_par_count_task.work(0);
  1667   size_t known_garbage_bytes =
  1668     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1669 #if 0
  1670   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1671                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1672                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1673                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1674 #endif // 0
  1675   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1677   size_t start_used_bytes = g1h->used();
  1678   _at_least_one_mark_complete = true;
  1679   g1h->set_marking_complete();
  1681   double count_end = os::elapsedTime();
  1682   double this_final_counting_time = (count_end - start);
  1683   if (G1PrintParCleanupStats) {
  1684     gclog_or_tty->print_cr("Cleanup:");
  1685     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1686                            this_final_counting_time*1000.0);
  1688   _total_counting_time += this_final_counting_time;
  1690   // Install newly created mark bitMap as "prev".
  1691   swapMarkBitMaps();
  1693   g1h->reset_gc_time_stamp();
  1695   // Note end of marking in all heap regions.
  1696   double note_end_start = os::elapsedTime();
  1697   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1698   if (ParallelGCThreads > 0) {
  1699     int n_workers = g1h->workers()->total_workers();
  1700     g1h->set_par_threads(n_workers);
  1701     g1h->workers()->run_task(&g1_par_note_end_task);
  1702     g1h->set_par_threads(0);
  1704     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1705            "sanity check");
  1706   } else {
  1707     g1_par_note_end_task.work(0);
  1709   g1h->set_unclean_regions_coming(true);
  1710   double note_end_end = os::elapsedTime();
  1711   // Tell the mutators that there might be unclean regions coming...
  1712   if (G1PrintParCleanupStats) {
  1713     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1714                            (note_end_end - note_end_start)*1000.0);
  1718   // call below, since it affects the metric by which we sort the heap
  1719   // regions.
  1720   if (G1ScrubRemSets) {
  1721     double rs_scrub_start = os::elapsedTime();
  1722     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1723     if (ParallelGCThreads > 0) {
  1724       int n_workers = g1h->workers()->total_workers();
  1725       g1h->set_par_threads(n_workers);
  1726       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1727       g1h->set_par_threads(0);
  1729       assert(g1h->check_heap_region_claim_values(
  1730                                             HeapRegion::ScrubRemSetClaimValue),
  1731              "sanity check");
  1732     } else {
  1733       g1_par_scrub_rs_task.work(0);
  1736     double rs_scrub_end = os::elapsedTime();
  1737     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1738     _total_rs_scrub_time += this_rs_scrub_time;
  1741   // this will also free any regions totally full of garbage objects,
  1742   // and sort the regions.
  1743   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1744                         g1_par_note_end_task.freed_bytes(),
  1745                         g1_par_note_end_task.max_live_bytes());
  1747   // Statistics.
  1748   double end = os::elapsedTime();
  1749   _cleanup_times.add((end - start) * 1000.0);
  1751   // G1CollectedHeap::heap()->print();
  1752   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1753   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1755   if (PrintGC || PrintGCDetails) {
  1756     g1h->print_size_transition(gclog_or_tty,
  1757                                start_used_bytes,
  1758                                g1h->used(),
  1759                                g1h->capacity());
  1762   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1763   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1765   // We need to make this be a "collection" so any collection pause that
  1766   // races with it goes around and waits for completeCleanup to finish.
  1767   g1h->increment_total_collections();
  1769   if (VerifyDuringGC) {
  1770     HandleMark hm;  // handle scope
  1771     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1772     Universe::heap()->prepare_for_verify();
  1773     Universe::verify(/* allow dirty  */ true,
  1774                      /* silent       */ false,
  1775                      /* prev marking */ true);
  1779 void ConcurrentMark::completeCleanup() {
  1780   // A full collection intervened.
  1781   if (has_aborted()) return;
  1783   int first = 0;
  1784   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1785   for (int t = 0; t < last; t++) {
  1786     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1787     assert(list->well_formed(), "Inv");
  1788     HeapRegion* hd = list->hd();
  1789     while (hd != NULL) {
  1790       // Now finish up the other stuff.
  1791       hd->rem_set()->clear();
  1792       HeapRegion* next_hd = hd->next_from_unclean_list();
  1793       (void)list->pop();
  1794       assert(list->hd() == next_hd, "how not?");
  1795       _g1h->put_region_on_unclean_list(hd);
  1796       if (!hd->isHumongous()) {
  1797         // Add this to the _free_regions count by 1.
  1798         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1800       hd = list->hd();
  1801       assert(hd == next_hd, "how not?");
  1807 class G1CMIsAliveClosure: public BoolObjectClosure {
  1808   G1CollectedHeap* _g1;
  1809  public:
  1810   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1811     _g1(g1)
  1812   {}
  1814   void do_object(oop obj) {
  1815     assert(false, "not to be invoked");
  1817   bool do_object_b(oop obj) {
  1818     HeapWord* addr = (HeapWord*)obj;
  1819     return addr != NULL &&
  1820            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1822 };
  1824 class G1CMKeepAliveClosure: public OopClosure {
  1825   G1CollectedHeap* _g1;
  1826   ConcurrentMark*  _cm;
  1827   CMBitMap*        _bitMap;
  1828  public:
  1829   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1830                        CMBitMap* bitMap) :
  1831     _g1(g1), _cm(cm),
  1832     _bitMap(bitMap) {}
  1834   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1835   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1837   template <class T> void do_oop_work(T* p) {
  1838     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1839     HeapWord* addr = (HeapWord*)thisOop;
  1840     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1841       _bitMap->mark(addr);
  1842       _cm->mark_stack_push(thisOop);
  1845 };
  1847 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1848   CMMarkStack*                  _markStack;
  1849   CMBitMap*                     _bitMap;
  1850   G1CMKeepAliveClosure*         _oopClosure;
  1851  public:
  1852   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1853                                G1CMKeepAliveClosure* oopClosure) :
  1854     _bitMap(bitMap),
  1855     _markStack(markStack),
  1856     _oopClosure(oopClosure)
  1857   {}
  1859   void do_void() {
  1860     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1862 };
  1864 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1865   ResourceMark rm;
  1866   HandleMark   hm;
  1867   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1868   ReferenceProcessor* rp = g1h->ref_processor();
  1870   // Process weak references.
  1871   rp->setup_policy(clear_all_soft_refs);
  1872   assert(_markStack.isEmpty(), "mark stack should be empty");
  1874   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1875   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1876   G1CMDrainMarkingStackClosure
  1877     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1878                                &g1KeepAliveClosure);
  1880   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1881   rp->process_discovered_references(&g1IsAliveClosure,
  1882                                     &g1KeepAliveClosure,
  1883                                     &g1DrainMarkingStackClosure,
  1884                                     NULL);
  1885   assert(_markStack.overflow() || _markStack.isEmpty(),
  1886          "mark stack should be empty (unless it overflowed)");
  1887   if (_markStack.overflow()) {
  1888     set_has_overflown();
  1891   rp->enqueue_discovered_references();
  1892   rp->verify_no_references_recorded();
  1893   assert(!rp->discovery_enabled(), "should have been disabled");
  1895   // Now clean up stale oops in SymbolTable and StringTable
  1896   SymbolTable::unlink(&g1IsAliveClosure);
  1897   StringTable::unlink(&g1IsAliveClosure);
  1900 void ConcurrentMark::swapMarkBitMaps() {
  1901   CMBitMapRO* temp = _prevMarkBitMap;
  1902   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1903   _nextMarkBitMap  = (CMBitMap*)  temp;
  1906 class CMRemarkTask: public AbstractGangTask {
  1907 private:
  1908   ConcurrentMark *_cm;
  1910 public:
  1911   void work(int worker_i) {
  1912     // Since all available tasks are actually started, we should
  1913     // only proceed if we're supposed to be actived.
  1914     if ((size_t)worker_i < _cm->active_tasks()) {
  1915       CMTask* task = _cm->task(worker_i);
  1916       task->record_start_time();
  1917       do {
  1918         task->do_marking_step(1000000000.0 /* something very large */);
  1919       } while (task->has_aborted() && !_cm->has_overflown());
  1920       // If we overflow, then we do not want to restart. We instead
  1921       // want to abort remark and do concurrent marking again.
  1922       task->record_end_time();
  1926   CMRemarkTask(ConcurrentMark* cm) :
  1927     AbstractGangTask("Par Remark"), _cm(cm) { }
  1928 };
  1930 void ConcurrentMark::checkpointRootsFinalWork() {
  1931   ResourceMark rm;
  1932   HandleMark   hm;
  1933   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1935   g1h->ensure_parsability(false);
  1937   if (ParallelGCThreads > 0) {
  1938     G1CollectedHeap::StrongRootsScope srs(g1h);
  1939     // this is remark, so we'll use up all available threads
  1940     int active_workers = ParallelGCThreads;
  1941     set_phase(active_workers, false);
  1943     CMRemarkTask remarkTask(this);
  1944     // We will start all available threads, even if we decide that the
  1945     // active_workers will be fewer. The extra ones will just bail out
  1946     // immediately.
  1947     int n_workers = g1h->workers()->total_workers();
  1948     g1h->set_par_threads(n_workers);
  1949     g1h->workers()->run_task(&remarkTask);
  1950     g1h->set_par_threads(0);
  1951   } else {
  1952     G1CollectedHeap::StrongRootsScope srs(g1h);
  1953     // this is remark, so we'll use up all available threads
  1954     int active_workers = 1;
  1955     set_phase(active_workers, false);
  1957     CMRemarkTask remarkTask(this);
  1958     // We will start all available threads, even if we decide that the
  1959     // active_workers will be fewer. The extra ones will just bail out
  1960     // immediately.
  1961     remarkTask.work(0);
  1963   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1964   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1966   print_stats();
  1968   if (!restart_for_overflow())
  1969     set_non_marking_state();
  1971 #if VERIFY_OBJS_PROCESSED
  1972   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1973     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1974                            _scan_obj_cl.objs_processed,
  1975                            ThreadLocalObjQueue::objs_enqueued);
  1976     guarantee(_scan_obj_cl.objs_processed ==
  1977               ThreadLocalObjQueue::objs_enqueued,
  1978               "Different number of objs processed and enqueued.");
  1980 #endif
  1983 #ifndef PRODUCT
  1985 class PrintReachableOopClosure: public OopClosure {
  1986 private:
  1987   G1CollectedHeap* _g1h;
  1988   CMBitMapRO*      _bitmap;
  1989   outputStream*    _out;
  1990   bool             _use_prev_marking;
  1991   bool             _all;
  1993 public:
  1994   PrintReachableOopClosure(CMBitMapRO*   bitmap,
  1995                            outputStream* out,
  1996                            bool          use_prev_marking,
  1997                            bool          all) :
  1998     _g1h(G1CollectedHeap::heap()),
  1999     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2001   void do_oop(narrowOop* p) { do_oop_work(p); }
  2002   void do_oop(      oop* p) { do_oop_work(p); }
  2004   template <class T> void do_oop_work(T* p) {
  2005     oop         obj = oopDesc::load_decode_heap_oop(p);
  2006     const char* str = NULL;
  2007     const char* str2 = "";
  2009     if (obj == NULL) {
  2010       str = "";
  2011     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2012       str = " O";
  2013     } else {
  2014       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2015       guarantee(hr != NULL, "invariant");
  2016       bool over_tams = false;
  2017       if (_use_prev_marking) {
  2018         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2019       } else {
  2020         over_tams = hr->obj_allocated_since_next_marking(obj);
  2022       bool marked = _bitmap->isMarked((HeapWord*) obj);
  2024       if (over_tams) {
  2025         str = " >";
  2026         if (marked) {
  2027           str2 = " AND MARKED";
  2029       } else if (marked) {
  2030         str = " M";
  2031       } else {
  2032         str = " NOT";
  2036     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2037                    p, (void*) obj, str, str2);
  2039 };
  2041 class PrintReachableObjectClosure : public ObjectClosure {
  2042 private:
  2043   CMBitMapRO*   _bitmap;
  2044   outputStream* _out;
  2045   bool          _use_prev_marking;
  2046   bool          _all;
  2047   HeapRegion*   _hr;
  2049 public:
  2050   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
  2051                               outputStream* out,
  2052                               bool          use_prev_marking,
  2053                               bool          all,
  2054                               HeapRegion*   hr) :
  2055     _bitmap(bitmap), _out(out),
  2056     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
  2058   void do_object(oop o) {
  2059     bool over_tams;
  2060     if (_use_prev_marking) {
  2061       over_tams = _hr->obj_allocated_since_prev_marking(o);
  2062     } else {
  2063       over_tams = _hr->obj_allocated_since_next_marking(o);
  2065     bool marked = _bitmap->isMarked((HeapWord*) o);
  2066     bool print_it = _all || over_tams || marked;
  2068     if (print_it) {
  2069       _out->print_cr(" "PTR_FORMAT"%s",
  2070                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2071       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
  2072       o->oop_iterate(&oopCl);
  2075 };
  2077 class PrintReachableRegionClosure : public HeapRegionClosure {
  2078 private:
  2079   CMBitMapRO*   _bitmap;
  2080   outputStream* _out;
  2081   bool          _use_prev_marking;
  2082   bool          _all;
  2084 public:
  2085   bool doHeapRegion(HeapRegion* hr) {
  2086     HeapWord* b = hr->bottom();
  2087     HeapWord* e = hr->end();
  2088     HeapWord* t = hr->top();
  2089     HeapWord* p = NULL;
  2090     if (_use_prev_marking) {
  2091       p = hr->prev_top_at_mark_start();
  2092     } else {
  2093       p = hr->next_top_at_mark_start();
  2095     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2096                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2097     _out->cr();
  2099     HeapWord* from = b;
  2100     HeapWord* to   = t;
  2102     if (to > from) {
  2103       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2104       _out->cr();
  2105       PrintReachableObjectClosure ocl(_bitmap, _out,
  2106                                       _use_prev_marking, _all, hr);
  2107       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2108       _out->cr();
  2111     return false;
  2114   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
  2115                               outputStream* out,
  2116                               bool          use_prev_marking,
  2117                               bool          all) :
  2118     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2119 };
  2121 void ConcurrentMark::print_reachable(const char* str,
  2122                                      bool use_prev_marking,
  2123                                      bool all) {
  2124   gclog_or_tty->cr();
  2125   gclog_or_tty->print_cr("== Doing heap dump... ");
  2127   if (G1PrintReachableBaseFile == NULL) {
  2128     gclog_or_tty->print_cr("  #### error: no base file defined");
  2129     return;
  2132   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2133       (JVM_MAXPATHLEN - 1)) {
  2134     gclog_or_tty->print_cr("  #### error: file name too long");
  2135     return;
  2138   char file_name[JVM_MAXPATHLEN];
  2139   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2140   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2142   fileStream fout(file_name);
  2143   if (!fout.is_open()) {
  2144     gclog_or_tty->print_cr("  #### error: could not open file");
  2145     return;
  2148   outputStream* out = &fout;
  2150   CMBitMapRO* bitmap = NULL;
  2151   if (use_prev_marking) {
  2152     bitmap = _prevMarkBitMap;
  2153   } else {
  2154     bitmap = _nextMarkBitMap;
  2157   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2158   out->cr();
  2160   out->print_cr("--- ITERATING OVER REGIONS");
  2161   out->cr();
  2162   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
  2163   _g1h->heap_region_iterate(&rcl);
  2164   out->cr();
  2166   gclog_or_tty->print_cr("  done");
  2167   gclog_or_tty->flush();
  2170 #endif // PRODUCT
  2172 // This note is for drainAllSATBBuffers and the code in between.
  2173 // In the future we could reuse a task to do this work during an
  2174 // evacuation pause (since now tasks are not active and can be claimed
  2175 // during an evacuation pause). This was a late change to the code and
  2176 // is currently not being taken advantage of.
  2178 class CMGlobalObjectClosure : public ObjectClosure {
  2179 private:
  2180   ConcurrentMark* _cm;
  2182 public:
  2183   void do_object(oop obj) {
  2184     _cm->deal_with_reference(obj);
  2187   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2188 };
  2190 void ConcurrentMark::deal_with_reference(oop obj) {
  2191   if (verbose_high())
  2192     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2193                            (void*) obj);
  2196   HeapWord* objAddr = (HeapWord*) obj;
  2197   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2198   if (_g1h->is_in_g1_reserved(objAddr)) {
  2199     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2200     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2201     if (_g1h->is_obj_ill(obj, hr)) {
  2202       if (verbose_high())
  2203         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2204                                "marked", (void*) obj);
  2206       // we need to mark it first
  2207       if (_nextMarkBitMap->parMark(objAddr)) {
  2208         // No OrderAccess:store_load() is needed. It is implicit in the
  2209         // CAS done in parMark(objAddr) above
  2210         HeapWord* finger = _finger;
  2211         if (objAddr < finger) {
  2212           if (verbose_high())
  2213             gclog_or_tty->print_cr("[global] below the global finger "
  2214                                    "("PTR_FORMAT"), pushing it", finger);
  2215           if (!mark_stack_push(obj)) {
  2216             if (verbose_low())
  2217               gclog_or_tty->print_cr("[global] global stack overflow during "
  2218                                      "deal_with_reference");
  2226 void ConcurrentMark::drainAllSATBBuffers() {
  2227   CMGlobalObjectClosure oc(this);
  2228   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2229   satb_mq_set.set_closure(&oc);
  2231   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2232     if (verbose_medium())
  2233       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2236   // no need to check whether we should do this, as this is only
  2237   // called during an evacuation pause
  2238   satb_mq_set.iterate_closure_all_threads();
  2240   satb_mq_set.set_closure(NULL);
  2241   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2244 void ConcurrentMark::markPrev(oop p) {
  2245   // Note we are overriding the read-only view of the prev map here, via
  2246   // the cast.
  2247   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2250 void ConcurrentMark::clear(oop p) {
  2251   assert(p != NULL && p->is_oop(), "expected an oop");
  2252   HeapWord* addr = (HeapWord*)p;
  2253   assert(addr >= _nextMarkBitMap->startWord() ||
  2254          addr < _nextMarkBitMap->endWord(), "in a region");
  2256   _nextMarkBitMap->clear(addr);
  2259 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2260   // Note we are overriding the read-only view of the prev map here, via
  2261   // the cast.
  2262   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2263   _nextMarkBitMap->clearRange(mr);
  2266 HeapRegion*
  2267 ConcurrentMark::claim_region(int task_num) {
  2268   // "checkpoint" the finger
  2269   HeapWord* finger = _finger;
  2271   // _heap_end will not change underneath our feet; it only changes at
  2272   // yield points.
  2273   while (finger < _heap_end) {
  2274     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2276     // is the gap between reading the finger and doing the CAS too long?
  2278     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2279     HeapWord*   bottom        = curr_region->bottom();
  2280     HeapWord*   end           = curr_region->end();
  2281     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2283     if (verbose_low())
  2284       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2285                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2286                              "limit = "PTR_FORMAT,
  2287                              task_num, curr_region, bottom, end, limit);
  2289     HeapWord* res =
  2290       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2291     if (res == finger) {
  2292       // we succeeded
  2294       // notice that _finger == end cannot be guaranteed here since,
  2295       // someone else might have moved the finger even further
  2296       assert(_finger >= end, "the finger should have moved forward");
  2298       if (verbose_low())
  2299         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2300                                PTR_FORMAT, task_num, curr_region);
  2302       if (limit > bottom) {
  2303         if (verbose_low())
  2304           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2305                                  "returning it ", task_num, curr_region);
  2306         return curr_region;
  2307       } else {
  2308         assert(limit == bottom,
  2309                "the region limit should be at bottom");
  2310         if (verbose_low())
  2311           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2312                                  "returning NULL", task_num, curr_region);
  2313         // we return NULL and the caller should try calling
  2314         // claim_region() again.
  2315         return NULL;
  2317     } else {
  2318       assert(_finger > finger, "the finger should have moved forward");
  2319       if (verbose_low())
  2320         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2321                                "global finger = "PTR_FORMAT", "
  2322                                "our finger = "PTR_FORMAT,
  2323                                task_num, _finger, finger);
  2325       // read it again
  2326       finger = _finger;
  2330   return NULL;
  2333 void ConcurrentMark::oops_do(OopClosure* cl) {
  2334   if (_markStack.size() > 0 && verbose_low())
  2335     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2336                            "size = %d", _markStack.size());
  2337   // we first iterate over the contents of the mark stack...
  2338   _markStack.oops_do(cl);
  2340   for (int i = 0; i < (int)_max_task_num; ++i) {
  2341     OopTaskQueue* queue = _task_queues->queue((int)i);
  2343     if (queue->size() > 0 && verbose_low())
  2344       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2345                              "size = %d", i, queue->size());
  2347     // ...then over the contents of the all the task queues.
  2348     queue->oops_do(cl);
  2351   // finally, invalidate any entries that in the region stack that
  2352   // point into the collection set
  2353   if (_regionStack.invalidate_entries_into_cset()) {
  2354     // otherwise, any gray objects copied during the evacuation pause
  2355     // might not be visited.
  2356     assert(_should_gray_objects, "invariant");
  2360 void ConcurrentMark::clear_marking_state() {
  2361   _markStack.setEmpty();
  2362   _markStack.clear_overflow();
  2363   _regionStack.setEmpty();
  2364   _regionStack.clear_overflow();
  2365   clear_has_overflown();
  2366   _finger = _heap_start;
  2368   for (int i = 0; i < (int)_max_task_num; ++i) {
  2369     OopTaskQueue* queue = _task_queues->queue(i);
  2370     queue->set_empty();
  2374 void ConcurrentMark::print_stats() {
  2375   if (verbose_stats()) {
  2376     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2377     for (size_t i = 0; i < _active_tasks; ++i) {
  2378       _tasks[i]->print_stats();
  2379       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2384 class CSMarkOopClosure: public OopClosure {
  2385   friend class CSMarkBitMapClosure;
  2387   G1CollectedHeap* _g1h;
  2388   CMBitMap*        _bm;
  2389   ConcurrentMark*  _cm;
  2390   oop*             _ms;
  2391   jint*            _array_ind_stack;
  2392   int              _ms_size;
  2393   int              _ms_ind;
  2394   int              _array_increment;
  2396   bool push(oop obj, int arr_ind = 0) {
  2397     if (_ms_ind == _ms_size) {
  2398       gclog_or_tty->print_cr("Mark stack is full.");
  2399       return false;
  2401     _ms[_ms_ind] = obj;
  2402     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2403     _ms_ind++;
  2404     return true;
  2407   oop pop() {
  2408     if (_ms_ind == 0) return NULL;
  2409     else {
  2410       _ms_ind--;
  2411       return _ms[_ms_ind];
  2415   template <class T> bool drain() {
  2416     while (_ms_ind > 0) {
  2417       oop obj = pop();
  2418       assert(obj != NULL, "Since index was non-zero.");
  2419       if (obj->is_objArray()) {
  2420         jint arr_ind = _array_ind_stack[_ms_ind];
  2421         objArrayOop aobj = objArrayOop(obj);
  2422         jint len = aobj->length();
  2423         jint next_arr_ind = arr_ind + _array_increment;
  2424         if (next_arr_ind < len) {
  2425           push(obj, next_arr_ind);
  2427         // Now process this portion of this one.
  2428         int lim = MIN2(next_arr_ind, len);
  2429         for (int j = arr_ind; j < lim; j++) {
  2430           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2433       } else {
  2434         obj->oop_iterate(this);
  2436       if (abort()) return false;
  2438     return true;
  2441 public:
  2442   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2443     _g1h(G1CollectedHeap::heap()),
  2444     _cm(cm),
  2445     _bm(cm->nextMarkBitMap()),
  2446     _ms_size(ms_size), _ms_ind(0),
  2447     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2448     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2449     _array_increment(MAX2(ms_size/8, 16))
  2450   {}
  2452   ~CSMarkOopClosure() {
  2453     FREE_C_HEAP_ARRAY(oop, _ms);
  2454     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2457   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2458   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2460   template <class T> void do_oop_work(T* p) {
  2461     T heap_oop = oopDesc::load_heap_oop(p);
  2462     if (oopDesc::is_null(heap_oop)) return;
  2463     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2464     if (obj->is_forwarded()) {
  2465       // If the object has already been forwarded, we have to make sure
  2466       // that it's marked.  So follow the forwarding pointer.  Note that
  2467       // this does the right thing for self-forwarding pointers in the
  2468       // evacuation failure case.
  2469       obj = obj->forwardee();
  2471     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2472     if (hr != NULL) {
  2473       if (hr->in_collection_set()) {
  2474         if (_g1h->is_obj_ill(obj)) {
  2475           _bm->mark((HeapWord*)obj);
  2476           if (!push(obj)) {
  2477             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2478             set_abort();
  2481       } else {
  2482         // Outside the collection set; we need to gray it
  2483         _cm->deal_with_reference(obj);
  2487 };
  2489 class CSMarkBitMapClosure: public BitMapClosure {
  2490   G1CollectedHeap* _g1h;
  2491   CMBitMap*        _bitMap;
  2492   ConcurrentMark*  _cm;
  2493   CSMarkOopClosure _oop_cl;
  2494 public:
  2495   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2496     _g1h(G1CollectedHeap::heap()),
  2497     _bitMap(cm->nextMarkBitMap()),
  2498     _oop_cl(cm, ms_size)
  2499   {}
  2501   ~CSMarkBitMapClosure() {}
  2503   bool do_bit(size_t offset) {
  2504     // convert offset into a HeapWord*
  2505     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2506     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2507            "address out of range");
  2508     assert(_bitMap->isMarked(addr), "tautology");
  2509     oop obj = oop(addr);
  2510     if (!obj->is_forwarded()) {
  2511       if (!_oop_cl.push(obj)) return false;
  2512       if (UseCompressedOops) {
  2513         if (!_oop_cl.drain<narrowOop>()) return false;
  2514       } else {
  2515         if (!_oop_cl.drain<oop>()) return false;
  2518     // Otherwise...
  2519     return true;
  2521 };
  2524 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2525   CMBitMap* _bm;
  2526   CSMarkBitMapClosure _bit_cl;
  2527   enum SomePrivateConstants {
  2528     MSSize = 1000
  2529   };
  2530   bool _completed;
  2531 public:
  2532   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2533     _bm(cm->nextMarkBitMap()),
  2534     _bit_cl(cm, MSSize),
  2535     _completed(true)
  2536   {}
  2538   ~CompleteMarkingInCSHRClosure() {}
  2540   bool doHeapRegion(HeapRegion* r) {
  2541     if (!r->evacuation_failed()) {
  2542       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2543       if (!mr.is_empty()) {
  2544         if (!_bm->iterate(&_bit_cl, mr)) {
  2545           _completed = false;
  2546           return true;
  2550     return false;
  2553   bool completed() { return _completed; }
  2554 };
  2556 class ClearMarksInHRClosure: public HeapRegionClosure {
  2557   CMBitMap* _bm;
  2558 public:
  2559   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2561   bool doHeapRegion(HeapRegion* r) {
  2562     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2563       MemRegion usedMR = r->used_region();
  2564       _bm->clearRange(r->used_region());
  2566     return false;
  2568 };
  2570 void ConcurrentMark::complete_marking_in_collection_set() {
  2571   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2573   if (!g1h->mark_in_progress()) {
  2574     g1h->g1_policy()->record_mark_closure_time(0.0);
  2575     return;
  2578   int i = 1;
  2579   double start = os::elapsedTime();
  2580   while (true) {
  2581     i++;
  2582     CompleteMarkingInCSHRClosure cmplt(this);
  2583     g1h->collection_set_iterate(&cmplt);
  2584     if (cmplt.completed()) break;
  2586   double end_time = os::elapsedTime();
  2587   double elapsed_time_ms = (end_time - start) * 1000.0;
  2588   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2589   if (PrintGCDetails) {
  2590     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2593   ClearMarksInHRClosure clr(nextMarkBitMap());
  2594   g1h->collection_set_iterate(&clr);
  2597 // The next two methods deal with the following optimisation. Some
  2598 // objects are gray by being marked and located above the finger. If
  2599 // they are copied, during an evacuation pause, below the finger then
  2600 // the need to be pushed on the stack. The observation is that, if
  2601 // there are no regions in the collection set located above the
  2602 // finger, then the above cannot happen, hence we do not need to
  2603 // explicitly gray any objects when copying them to below the
  2604 // finger. The global stack will be scanned to ensure that, if it
  2605 // points to objects being copied, it will update their
  2606 // location. There is a tricky situation with the gray objects in
  2607 // region stack that are being coped, however. See the comment in
  2608 // newCSet().
  2610 void ConcurrentMark::newCSet() {
  2611   if (!concurrent_marking_in_progress())
  2612     // nothing to do if marking is not in progress
  2613     return;
  2615   // find what the lowest finger is among the global and local fingers
  2616   _min_finger = _finger;
  2617   for (int i = 0; i < (int)_max_task_num; ++i) {
  2618     CMTask* task = _tasks[i];
  2619     HeapWord* task_finger = task->finger();
  2620     if (task_finger != NULL && task_finger < _min_finger)
  2621       _min_finger = task_finger;
  2624   _should_gray_objects = false;
  2626   // This fixes a very subtle and fustrating bug. It might be the case
  2627   // that, during en evacuation pause, heap regions that contain
  2628   // objects that are gray (by being in regions contained in the
  2629   // region stack) are included in the collection set. Since such gray
  2630   // objects will be moved, and because it's not easy to redirect
  2631   // region stack entries to point to a new location (because objects
  2632   // in one region might be scattered to multiple regions after they
  2633   // are copied), one option is to ensure that all marked objects
  2634   // copied during a pause are pushed on the stack. Notice, however,
  2635   // that this problem can only happen when the region stack is not
  2636   // empty during an evacuation pause. So, we make the fix a bit less
  2637   // conservative and ensure that regions are pushed on the stack,
  2638   // irrespective whether all collection set regions are below the
  2639   // finger, if the region stack is not empty. This is expected to be
  2640   // a rare case, so I don't think it's necessary to be smarted about it.
  2641   if (!region_stack_empty())
  2642     _should_gray_objects = true;
  2645 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2646   if (!concurrent_marking_in_progress())
  2647     return;
  2649   HeapWord* region_end = hr->end();
  2650   if (region_end > _min_finger)
  2651     _should_gray_objects = true;
  2654 // abandon current marking iteration due to a Full GC
  2655 void ConcurrentMark::abort() {
  2656   // Clear all marks to force marking thread to do nothing
  2657   _nextMarkBitMap->clearAll();
  2658   // Empty mark stack
  2659   clear_marking_state();
  2660   for (int i = 0; i < (int)_max_task_num; ++i)
  2661     _tasks[i]->clear_region_fields();
  2662   _has_aborted = true;
  2664   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2665   satb_mq_set.abandon_partial_marking();
  2666   // This can be called either during or outside marking, we'll read
  2667   // the expected_active value from the SATB queue set.
  2668   satb_mq_set.set_active_all_threads(
  2669                                  false, /* new active value */
  2670                                  satb_mq_set.is_active() /* expected_active */);
  2673 static void print_ms_time_info(const char* prefix, const char* name,
  2674                                NumberSeq& ns) {
  2675   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2676                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2677   if (ns.num() > 0) {
  2678     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2679                            prefix, ns.sd(), ns.maximum());
  2683 void ConcurrentMark::print_summary_info() {
  2684   gclog_or_tty->print_cr(" Concurrent marking:");
  2685   print_ms_time_info("  ", "init marks", _init_times);
  2686   print_ms_time_info("  ", "remarks", _remark_times);
  2688     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2689     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2692   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2693   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2694                          _total_counting_time,
  2695                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2696                           (double)_cleanup_times.num()
  2697                          : 0.0));
  2698   if (G1ScrubRemSets) {
  2699     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2700                            _total_rs_scrub_time,
  2701                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2702                             (double)_cleanup_times.num()
  2703                            : 0.0));
  2705   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2706                          (_init_times.sum() + _remark_times.sum() +
  2707                           _cleanup_times.sum())/1000.0);
  2708   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2709                 "(%8.2f s marking, %8.2f s counting).",
  2710                 cmThread()->vtime_accum(),
  2711                 cmThread()->vtime_mark_accum(),
  2712                 cmThread()->vtime_count_accum());
  2715 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2716   _parallel_workers->print_worker_threads_on(st);
  2719 // Closures
  2720 // XXX: there seems to be a lot of code  duplication here;
  2721 // should refactor and consolidate the shared code.
  2723 // This closure is used to mark refs into the CMS generation in
  2724 // the CMS bit map. Called at the first checkpoint.
  2726 // We take a break if someone is trying to stop the world.
  2727 bool ConcurrentMark::do_yield_check(int worker_i) {
  2728   if (should_yield()) {
  2729     if (worker_i == 0)
  2730       _g1h->g1_policy()->record_concurrent_pause();
  2731     cmThread()->yield();
  2732     if (worker_i == 0)
  2733       _g1h->g1_policy()->record_concurrent_pause_end();
  2734     return true;
  2735   } else {
  2736     return false;
  2740 bool ConcurrentMark::should_yield() {
  2741   return cmThread()->should_yield();
  2744 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2745   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2746   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2749 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2750                                                  void* last) {
  2751   return
  2752     containing_card_is_marked(start) &&
  2753     containing_card_is_marked(last);
  2756 #ifndef PRODUCT
  2757 // for debugging purposes
  2758 void ConcurrentMark::print_finger() {
  2759   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2760                          _heap_start, _heap_end, _finger);
  2761   for (int i = 0; i < (int) _max_task_num; ++i) {
  2762     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2764   gclog_or_tty->print_cr("");
  2766 #endif
  2768 // Closure for iteration over bitmaps
  2769 class CMBitMapClosure : public BitMapClosure {
  2770 private:
  2771   // the bitmap that is being iterated over
  2772   CMBitMap*                   _nextMarkBitMap;
  2773   ConcurrentMark*             _cm;
  2774   CMTask*                     _task;
  2775   // true if we're scanning a heap region claimed by the task (so that
  2776   // we move the finger along), false if we're not, i.e. currently when
  2777   // scanning a heap region popped from the region stack (so that we
  2778   // do not move the task finger along; it'd be a mistake if we did so).
  2779   bool                        _scanning_heap_region;
  2781 public:
  2782   CMBitMapClosure(CMTask *task,
  2783                   ConcurrentMark* cm,
  2784                   CMBitMap* nextMarkBitMap)
  2785     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2787   void set_scanning_heap_region(bool scanning_heap_region) {
  2788     _scanning_heap_region = scanning_heap_region;
  2791   bool do_bit(size_t offset) {
  2792     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2793     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2794     assert( addr < _cm->finger(), "invariant");
  2796     if (_scanning_heap_region) {
  2797       statsOnly( _task->increase_objs_found_on_bitmap() );
  2798       assert(addr >= _task->finger(), "invariant");
  2799       // We move that task's local finger along.
  2800       _task->move_finger_to(addr);
  2801     } else {
  2802       // We move the task's region finger along.
  2803       _task->move_region_finger_to(addr);
  2806     _task->scan_object(oop(addr));
  2807     // we only partially drain the local queue and global stack
  2808     _task->drain_local_queue(true);
  2809     _task->drain_global_stack(true);
  2811     // if the has_aborted flag has been raised, we need to bail out of
  2812     // the iteration
  2813     return !_task->has_aborted();
  2815 };
  2817 // Closure for iterating over objects, currently only used for
  2818 // processing SATB buffers.
  2819 class CMObjectClosure : public ObjectClosure {
  2820 private:
  2821   CMTask* _task;
  2823 public:
  2824   void do_object(oop obj) {
  2825     _task->deal_with_reference(obj);
  2828   CMObjectClosure(CMTask* task) : _task(task) { }
  2829 };
  2831 // Closure for iterating over object fields
  2832 class CMOopClosure : public OopClosure {
  2833 private:
  2834   G1CollectedHeap*   _g1h;
  2835   ConcurrentMark*    _cm;
  2836   CMTask*            _task;
  2838 public:
  2839   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2840   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2842   template <class T> void do_oop_work(T* p) {
  2843     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2844     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2845            "invariant");
  2847     oop obj = oopDesc::load_decode_heap_oop(p);
  2848     if (_cm->verbose_high())
  2849       gclog_or_tty->print_cr("[%d] we're looking at location "
  2850                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2851                              _task->task_id(), p, (void*) obj);
  2852     _task->deal_with_reference(obj);
  2855   CMOopClosure(G1CollectedHeap* g1h,
  2856                ConcurrentMark* cm,
  2857                CMTask* task)
  2858     : _g1h(g1h), _cm(cm), _task(task) { }
  2859 };
  2861 void CMTask::setup_for_region(HeapRegion* hr) {
  2862   // Separated the asserts so that we know which one fires.
  2863   assert(hr != NULL,
  2864         "claim_region() should have filtered out continues humongous regions");
  2865   assert(!hr->continuesHumongous(),
  2866         "claim_region() should have filtered out continues humongous regions");
  2868   if (_cm->verbose_low())
  2869     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2870                            _task_id, hr);
  2872   _curr_region  = hr;
  2873   _finger       = hr->bottom();
  2874   update_region_limit();
  2877 void CMTask::update_region_limit() {
  2878   HeapRegion* hr            = _curr_region;
  2879   HeapWord* bottom          = hr->bottom();
  2880   HeapWord* limit           = hr->next_top_at_mark_start();
  2882   if (limit == bottom) {
  2883     if (_cm->verbose_low())
  2884       gclog_or_tty->print_cr("[%d] found an empty region "
  2885                              "["PTR_FORMAT", "PTR_FORMAT")",
  2886                              _task_id, bottom, limit);
  2887     // The region was collected underneath our feet.
  2888     // We set the finger to bottom to ensure that the bitmap
  2889     // iteration that will follow this will not do anything.
  2890     // (this is not a condition that holds when we set the region up,
  2891     // as the region is not supposed to be empty in the first place)
  2892     _finger = bottom;
  2893   } else if (limit >= _region_limit) {
  2894     assert(limit >= _finger, "peace of mind");
  2895   } else {
  2896     assert(limit < _region_limit, "only way to get here");
  2897     // This can happen under some pretty unusual circumstances.  An
  2898     // evacuation pause empties the region underneath our feet (NTAMS
  2899     // at bottom). We then do some allocation in the region (NTAMS
  2900     // stays at bottom), followed by the region being used as a GC
  2901     // alloc region (NTAMS will move to top() and the objects
  2902     // originally below it will be grayed). All objects now marked in
  2903     // the region are explicitly grayed, if below the global finger,
  2904     // and we do not need in fact to scan anything else. So, we simply
  2905     // set _finger to be limit to ensure that the bitmap iteration
  2906     // doesn't do anything.
  2907     _finger = limit;
  2910   _region_limit = limit;
  2913 void CMTask::giveup_current_region() {
  2914   assert(_curr_region != NULL, "invariant");
  2915   if (_cm->verbose_low())
  2916     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2917                            _task_id, _curr_region);
  2918   clear_region_fields();
  2921 void CMTask::clear_region_fields() {
  2922   // Values for these three fields that indicate that we're not
  2923   // holding on to a region.
  2924   _curr_region   = NULL;
  2925   _finger        = NULL;
  2926   _region_limit  = NULL;
  2928   _region_finger = NULL;
  2931 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2932   guarantee(nextMarkBitMap != NULL, "invariant");
  2934   if (_cm->verbose_low())
  2935     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2937   _nextMarkBitMap                = nextMarkBitMap;
  2938   clear_region_fields();
  2940   _calls                         = 0;
  2941   _elapsed_time_ms               = 0.0;
  2942   _termination_time_ms           = 0.0;
  2943   _termination_start_time_ms     = 0.0;
  2945 #if _MARKING_STATS_
  2946   _local_pushes                  = 0;
  2947   _local_pops                    = 0;
  2948   _local_max_size                = 0;
  2949   _objs_scanned                  = 0;
  2950   _global_pushes                 = 0;
  2951   _global_pops                   = 0;
  2952   _global_max_size               = 0;
  2953   _global_transfers_to           = 0;
  2954   _global_transfers_from         = 0;
  2955   _region_stack_pops             = 0;
  2956   _regions_claimed               = 0;
  2957   _objs_found_on_bitmap          = 0;
  2958   _satb_buffers_processed        = 0;
  2959   _steal_attempts                = 0;
  2960   _steals                        = 0;
  2961   _aborted                       = 0;
  2962   _aborted_overflow              = 0;
  2963   _aborted_cm_aborted            = 0;
  2964   _aborted_yield                 = 0;
  2965   _aborted_timed_out             = 0;
  2966   _aborted_satb                  = 0;
  2967   _aborted_termination           = 0;
  2968 #endif // _MARKING_STATS_
  2971 bool CMTask::should_exit_termination() {
  2972   regular_clock_call();
  2973   // This is called when we are in the termination protocol. We should
  2974   // quit if, for some reason, this task wants to abort or the global
  2975   // stack is not empty (this means that we can get work from it).
  2976   return !_cm->mark_stack_empty() || has_aborted();
  2979 // This determines whether the method below will check both the local
  2980 // and global fingers when determining whether to push on the stack a
  2981 // gray object (value 1) or whether it will only check the global one
  2982 // (value 0). The tradeoffs are that the former will be a bit more
  2983 // accurate and possibly push less on the stack, but it might also be
  2984 // a little bit slower.
  2986 #define _CHECK_BOTH_FINGERS_      1
  2988 void CMTask::deal_with_reference(oop obj) {
  2989   if (_cm->verbose_high())
  2990     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2991                            _task_id, (void*) obj);
  2993   ++_refs_reached;
  2995   HeapWord* objAddr = (HeapWord*) obj;
  2996   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2997   if (_g1h->is_in_g1_reserved(objAddr)) {
  2998     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2999     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  3000     if (_g1h->is_obj_ill(obj, hr)) {
  3001       if (_cm->verbose_high())
  3002         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  3003                                _task_id, (void*) obj);
  3005       // we need to mark it first
  3006       if (_nextMarkBitMap->parMark(objAddr)) {
  3007         // No OrderAccess:store_load() is needed. It is implicit in the
  3008         // CAS done in parMark(objAddr) above
  3009         HeapWord* global_finger = _cm->finger();
  3011 #if _CHECK_BOTH_FINGERS_
  3012         // we will check both the local and global fingers
  3014         if (_finger != NULL && objAddr < _finger) {
  3015           if (_cm->verbose_high())
  3016             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3017                                    "pushing it", _task_id, _finger);
  3018           push(obj);
  3019         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3020           // do nothing
  3021         } else if (objAddr < global_finger) {
  3022           // Notice that the global finger might be moving forward
  3023           // concurrently. This is not a problem. In the worst case, we
  3024           // mark the object while it is above the global finger and, by
  3025           // the time we read the global finger, it has moved forward
  3026           // passed this object. In this case, the object will probably
  3027           // be visited when a task is scanning the region and will also
  3028           // be pushed on the stack. So, some duplicate work, but no
  3029           // correctness problems.
  3031           if (_cm->verbose_high())
  3032             gclog_or_tty->print_cr("[%d] below the global finger "
  3033                                    "("PTR_FORMAT"), pushing it",
  3034                                    _task_id, global_finger);
  3035           push(obj);
  3036         } else {
  3037           // do nothing
  3039 #else // _CHECK_BOTH_FINGERS_
  3040       // we will only check the global finger
  3042         if (objAddr < global_finger) {
  3043           // see long comment above
  3045           if (_cm->verbose_high())
  3046             gclog_or_tty->print_cr("[%d] below the global finger "
  3047                                    "("PTR_FORMAT"), pushing it",
  3048                                    _task_id, global_finger);
  3049           push(obj);
  3051 #endif // _CHECK_BOTH_FINGERS_
  3057 void CMTask::push(oop obj) {
  3058   HeapWord* objAddr = (HeapWord*) obj;
  3059   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3060   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3061          "invariant");
  3062   assert(!_g1h->is_obj_ill(obj), "invariant");
  3063   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3065   if (_cm->verbose_high())
  3066     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3068   if (!_task_queue->push(obj)) {
  3069     // The local task queue looks full. We need to push some entries
  3070     // to the global stack.
  3072     if (_cm->verbose_medium())
  3073       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3074                              "moving entries to the global stack",
  3075                              _task_id);
  3076     move_entries_to_global_stack();
  3078     // this should succeed since, even if we overflow the global
  3079     // stack, we should have definitely removed some entries from the
  3080     // local queue. So, there must be space on it.
  3081     bool success = _task_queue->push(obj);
  3082     assert(success, "invariant");
  3085   statsOnly( int tmp_size = _task_queue->size();
  3086              if (tmp_size > _local_max_size)
  3087                _local_max_size = tmp_size;
  3088              ++_local_pushes );
  3091 void CMTask::reached_limit() {
  3092   assert(_words_scanned >= _words_scanned_limit ||
  3093          _refs_reached >= _refs_reached_limit ,
  3094          "shouldn't have been called otherwise");
  3095   regular_clock_call();
  3098 void CMTask::regular_clock_call() {
  3099   if (has_aborted())
  3100     return;
  3102   // First, we need to recalculate the words scanned and refs reached
  3103   // limits for the next clock call.
  3104   recalculate_limits();
  3106   // During the regular clock call we do the following
  3108   // (1) If an overflow has been flagged, then we abort.
  3109   if (_cm->has_overflown()) {
  3110     set_has_aborted();
  3111     return;
  3114   // If we are not concurrent (i.e. we're doing remark) we don't need
  3115   // to check anything else. The other steps are only needed during
  3116   // the concurrent marking phase.
  3117   if (!concurrent())
  3118     return;
  3120   // (2) If marking has been aborted for Full GC, then we also abort.
  3121   if (_cm->has_aborted()) {
  3122     set_has_aborted();
  3123     statsOnly( ++_aborted_cm_aborted );
  3124     return;
  3127   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3129   // (3) If marking stats are enabled, then we update the step history.
  3130 #if _MARKING_STATS_
  3131   if (_words_scanned >= _words_scanned_limit)
  3132     ++_clock_due_to_scanning;
  3133   if (_refs_reached >= _refs_reached_limit)
  3134     ++_clock_due_to_marking;
  3136   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3137   _interval_start_time_ms = curr_time_ms;
  3138   _all_clock_intervals_ms.add(last_interval_ms);
  3140   if (_cm->verbose_medium()) {
  3141     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3142                            "scanned = %d%s, refs reached = %d%s",
  3143                            _task_id, last_interval_ms,
  3144                            _words_scanned,
  3145                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3146                            _refs_reached,
  3147                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3149 #endif // _MARKING_STATS_
  3151   // (4) We check whether we should yield. If we have to, then we abort.
  3152   if (_cm->should_yield()) {
  3153     // We should yield. To do this we abort the task. The caller is
  3154     // responsible for yielding.
  3155     set_has_aborted();
  3156     statsOnly( ++_aborted_yield );
  3157     return;
  3160   // (5) We check whether we've reached our time quota. If we have,
  3161   // then we abort.
  3162   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3163   if (elapsed_time_ms > _time_target_ms) {
  3164     set_has_aborted();
  3165     _has_aborted_timed_out = true;
  3166     statsOnly( ++_aborted_timed_out );
  3167     return;
  3170   // (6) Finally, we check whether there are enough completed STAB
  3171   // buffers available for processing. If there are, we abort.
  3172   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3173   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3174     if (_cm->verbose_low())
  3175       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3176                              _task_id);
  3177     // we do need to process SATB buffers, we'll abort and restart
  3178     // the marking task to do so
  3179     set_has_aborted();
  3180     statsOnly( ++_aborted_satb );
  3181     return;
  3185 void CMTask::recalculate_limits() {
  3186   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3187   _words_scanned_limit      = _real_words_scanned_limit;
  3189   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3190   _refs_reached_limit       = _real_refs_reached_limit;
  3193 void CMTask::decrease_limits() {
  3194   // This is called when we believe that we're going to do an infrequent
  3195   // operation which will increase the per byte scanned cost (i.e. move
  3196   // entries to/from the global stack). It basically tries to decrease the
  3197   // scanning limit so that the clock is called earlier.
  3199   if (_cm->verbose_medium())
  3200     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3202   _words_scanned_limit = _real_words_scanned_limit -
  3203     3 * words_scanned_period / 4;
  3204   _refs_reached_limit  = _real_refs_reached_limit -
  3205     3 * refs_reached_period / 4;
  3208 void CMTask::move_entries_to_global_stack() {
  3209   // local array where we'll store the entries that will be popped
  3210   // from the local queue
  3211   oop buffer[global_stack_transfer_size];
  3213   int n = 0;
  3214   oop obj;
  3215   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3216     buffer[n] = obj;
  3217     ++n;
  3220   if (n > 0) {
  3221     // we popped at least one entry from the local queue
  3223     statsOnly( ++_global_transfers_to; _local_pops += n );
  3225     if (!_cm->mark_stack_push(buffer, n)) {
  3226       if (_cm->verbose_low())
  3227         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3228       set_has_aborted();
  3229     } else {
  3230       // the transfer was successful
  3232       if (_cm->verbose_medium())
  3233         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3234                                _task_id, n);
  3235       statsOnly( int tmp_size = _cm->mark_stack_size();
  3236                  if (tmp_size > _global_max_size)
  3237                    _global_max_size = tmp_size;
  3238                  _global_pushes += n );
  3242   // this operation was quite expensive, so decrease the limits
  3243   decrease_limits();
  3246 void CMTask::get_entries_from_global_stack() {
  3247   // local array where we'll store the entries that will be popped
  3248   // from the global stack.
  3249   oop buffer[global_stack_transfer_size];
  3250   int n;
  3251   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3252   assert(n <= global_stack_transfer_size,
  3253          "we should not pop more than the given limit");
  3254   if (n > 0) {
  3255     // yes, we did actually pop at least one entry
  3257     statsOnly( ++_global_transfers_from; _global_pops += n );
  3258     if (_cm->verbose_medium())
  3259       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3260                              _task_id, n);
  3261     for (int i = 0; i < n; ++i) {
  3262       bool success = _task_queue->push(buffer[i]);
  3263       // We only call this when the local queue is empty or under a
  3264       // given target limit. So, we do not expect this push to fail.
  3265       assert(success, "invariant");
  3268     statsOnly( int tmp_size = _task_queue->size();
  3269                if (tmp_size > _local_max_size)
  3270                  _local_max_size = tmp_size;
  3271                _local_pushes += n );
  3274   // this operation was quite expensive, so decrease the limits
  3275   decrease_limits();
  3278 void CMTask::drain_local_queue(bool partially) {
  3279   if (has_aborted())
  3280     return;
  3282   // Decide what the target size is, depending whether we're going to
  3283   // drain it partially (so that other tasks can steal if they run out
  3284   // of things to do) or totally (at the very end).
  3285   size_t target_size;
  3286   if (partially)
  3287     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3288   else
  3289     target_size = 0;
  3291   if (_task_queue->size() > target_size) {
  3292     if (_cm->verbose_high())
  3293       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3294                              _task_id, target_size);
  3296     oop obj;
  3297     bool ret = _task_queue->pop_local(obj);
  3298     while (ret) {
  3299       statsOnly( ++_local_pops );
  3301       if (_cm->verbose_high())
  3302         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3303                                (void*) obj);
  3305       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3306       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3307              "invariant");
  3309       scan_object(obj);
  3311       if (_task_queue->size() <= target_size || has_aborted())
  3312         ret = false;
  3313       else
  3314         ret = _task_queue->pop_local(obj);
  3317     if (_cm->verbose_high())
  3318       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3319                              _task_id, _task_queue->size());
  3323 void CMTask::drain_global_stack(bool partially) {
  3324   if (has_aborted())
  3325     return;
  3327   // We have a policy to drain the local queue before we attempt to
  3328   // drain the global stack.
  3329   assert(partially || _task_queue->size() == 0, "invariant");
  3331   // Decide what the target size is, depending whether we're going to
  3332   // drain it partially (so that other tasks can steal if they run out
  3333   // of things to do) or totally (at the very end).  Notice that,
  3334   // because we move entries from the global stack in chunks or
  3335   // because another task might be doing the same, we might in fact
  3336   // drop below the target. But, this is not a problem.
  3337   size_t target_size;
  3338   if (partially)
  3339     target_size = _cm->partial_mark_stack_size_target();
  3340   else
  3341     target_size = 0;
  3343   if (_cm->mark_stack_size() > target_size) {
  3344     if (_cm->verbose_low())
  3345       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3346                              _task_id, target_size);
  3348     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3349       get_entries_from_global_stack();
  3350       drain_local_queue(partially);
  3353     if (_cm->verbose_low())
  3354       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3355                              _task_id, _cm->mark_stack_size());
  3359 // SATB Queue has several assumptions on whether to call the par or
  3360 // non-par versions of the methods. this is why some of the code is
  3361 // replicated. We should really get rid of the single-threaded version
  3362 // of the code to simplify things.
  3363 void CMTask::drain_satb_buffers() {
  3364   if (has_aborted())
  3365     return;
  3367   // We set this so that the regular clock knows that we're in the
  3368   // middle of draining buffers and doesn't set the abort flag when it
  3369   // notices that SATB buffers are available for draining. It'd be
  3370   // very counter productive if it did that. :-)
  3371   _draining_satb_buffers = true;
  3373   CMObjectClosure oc(this);
  3374   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3375   if (ParallelGCThreads > 0)
  3376     satb_mq_set.set_par_closure(_task_id, &oc);
  3377   else
  3378     satb_mq_set.set_closure(&oc);
  3380   // This keeps claiming and applying the closure to completed buffers
  3381   // until we run out of buffers or we need to abort.
  3382   if (ParallelGCThreads > 0) {
  3383     while (!has_aborted() &&
  3384            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3385       if (_cm->verbose_medium())
  3386         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3387       statsOnly( ++_satb_buffers_processed );
  3388       regular_clock_call();
  3390   } else {
  3391     while (!has_aborted() &&
  3392            satb_mq_set.apply_closure_to_completed_buffer()) {
  3393       if (_cm->verbose_medium())
  3394         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3395       statsOnly( ++_satb_buffers_processed );
  3396       regular_clock_call();
  3400   if (!concurrent() && !has_aborted()) {
  3401     // We should only do this during remark.
  3402     if (ParallelGCThreads > 0)
  3403       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3404     else
  3405       satb_mq_set.iterate_closure_all_threads();
  3408   _draining_satb_buffers = false;
  3410   assert(has_aborted() ||
  3411          concurrent() ||
  3412          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3414   if (ParallelGCThreads > 0)
  3415     satb_mq_set.set_par_closure(_task_id, NULL);
  3416   else
  3417     satb_mq_set.set_closure(NULL);
  3419   // again, this was a potentially expensive operation, decrease the
  3420   // limits to get the regular clock call early
  3421   decrease_limits();
  3424 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3425   if (has_aborted())
  3426     return;
  3428   assert(_region_finger == NULL,
  3429          "it should be NULL when we're not scanning a region");
  3431   if (!_cm->region_stack_empty()) {
  3432     if (_cm->verbose_low())
  3433       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3434                              _task_id, _cm->region_stack_size());
  3436     MemRegion mr = _cm->region_stack_pop_with_lock();
  3437     // it returns MemRegion() if the pop fails
  3438     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3440     while (mr.start() != NULL) {
  3441       if (_cm->verbose_medium())
  3442         gclog_or_tty->print_cr("[%d] we are scanning region "
  3443                                "["PTR_FORMAT", "PTR_FORMAT")",
  3444                                _task_id, mr.start(), mr.end());
  3445       assert(mr.end() <= _cm->finger(),
  3446              "otherwise the region shouldn't be on the stack");
  3447       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3448       if (_nextMarkBitMap->iterate(bc, mr)) {
  3449         assert(!has_aborted(),
  3450                "cannot abort the task without aborting the bitmap iteration");
  3452         // We finished iterating over the region without aborting.
  3453         regular_clock_call();
  3454         if (has_aborted())
  3455           mr = MemRegion();
  3456         else {
  3457           mr = _cm->region_stack_pop_with_lock();
  3458           // it returns MemRegion() if the pop fails
  3459           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3461       } else {
  3462         assert(has_aborted(), "currently the only way to do so");
  3464         // The only way to abort the bitmap iteration is to return
  3465         // false from the do_bit() method. However, inside the
  3466         // do_bit() method we move the _region_finger to point to the
  3467         // object currently being looked at. So, if we bail out, we
  3468         // have definitely set _region_finger to something non-null.
  3469         assert(_region_finger != NULL, "invariant");
  3471         // The iteration was actually aborted. So now _region_finger
  3472         // points to the address of the object we last scanned. If we
  3473         // leave it there, when we restart this task, we will rescan
  3474         // the object. It is easy to avoid this. We move the finger by
  3475         // enough to point to the next possible object header (the
  3476         // bitmap knows by how much we need to move it as it knows its
  3477         // granularity).
  3478         MemRegion newRegion =
  3479           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3481         if (!newRegion.is_empty()) {
  3482           if (_cm->verbose_low()) {
  3483             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3484                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3485                                    _task_id,
  3486                                    newRegion.start(), newRegion.end());
  3488           // Now push the part of the region we didn't scan on the
  3489           // region stack to make sure a task scans it later.
  3490           _cm->region_stack_push_with_lock(newRegion);
  3492         // break from while
  3493         mr = MemRegion();
  3495       _region_finger = NULL;
  3498     if (_cm->verbose_low())
  3499       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3500                              _task_id, _cm->region_stack_size());
  3504 void CMTask::print_stats() {
  3505   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3506                          _task_id, _calls);
  3507   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3508                          _elapsed_time_ms, _termination_time_ms);
  3509   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3510                          _step_times_ms.num(), _step_times_ms.avg(),
  3511                          _step_times_ms.sd());
  3512   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3513                          _step_times_ms.maximum(), _step_times_ms.sum());
  3515 #if _MARKING_STATS_
  3516   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3517                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3518                          _all_clock_intervals_ms.sd());
  3519   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3520                          _all_clock_intervals_ms.maximum(),
  3521                          _all_clock_intervals_ms.sum());
  3522   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3523                          _clock_due_to_scanning, _clock_due_to_marking);
  3524   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3525                          _objs_scanned, _objs_found_on_bitmap);
  3526   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3527                          _local_pushes, _local_pops, _local_max_size);
  3528   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3529                          _global_pushes, _global_pops, _global_max_size);
  3530   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3531                          _global_transfers_to,_global_transfers_from);
  3532   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3533                          _regions_claimed, _region_stack_pops);
  3534   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3535   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3536                          _steal_attempts, _steals);
  3537   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3538   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3539                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3540   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3541                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3542 #endif // _MARKING_STATS_
  3545 /*****************************************************************************
  3547     The do_marking_step(time_target_ms) method is the building block
  3548     of the parallel marking framework. It can be called in parallel
  3549     with other invocations of do_marking_step() on different tasks
  3550     (but only one per task, obviously) and concurrently with the
  3551     mutator threads, or during remark, hence it eliminates the need
  3552     for two versions of the code. When called during remark, it will
  3553     pick up from where the task left off during the concurrent marking
  3554     phase. Interestingly, tasks are also claimable during evacuation
  3555     pauses too, since do_marking_step() ensures that it aborts before
  3556     it needs to yield.
  3558     The data structures that is uses to do marking work are the
  3559     following:
  3561       (1) Marking Bitmap. If there are gray objects that appear only
  3562       on the bitmap (this happens either when dealing with an overflow
  3563       or when the initial marking phase has simply marked the roots
  3564       and didn't push them on the stack), then tasks claim heap
  3565       regions whose bitmap they then scan to find gray objects. A
  3566       global finger indicates where the end of the last claimed region
  3567       is. A local finger indicates how far into the region a task has
  3568       scanned. The two fingers are used to determine how to gray an
  3569       object (i.e. whether simply marking it is OK, as it will be
  3570       visited by a task in the future, or whether it needs to be also
  3571       pushed on a stack).
  3573       (2) Local Queue. The local queue of the task which is accessed
  3574       reasonably efficiently by the task. Other tasks can steal from
  3575       it when they run out of work. Throughout the marking phase, a
  3576       task attempts to keep its local queue short but not totally
  3577       empty, so that entries are available for stealing by other
  3578       tasks. Only when there is no more work, a task will totally
  3579       drain its local queue.
  3581       (3) Global Mark Stack. This handles local queue overflow. During
  3582       marking only sets of entries are moved between it and the local
  3583       queues, as access to it requires a mutex and more fine-grain
  3584       interaction with it which might cause contention. If it
  3585       overflows, then the marking phase should restart and iterate
  3586       over the bitmap to identify gray objects. Throughout the marking
  3587       phase, tasks attempt to keep the global mark stack at a small
  3588       length but not totally empty, so that entries are available for
  3589       popping by other tasks. Only when there is no more work, tasks
  3590       will totally drain the global mark stack.
  3592       (4) Global Region Stack. Entries on it correspond to areas of
  3593       the bitmap that need to be scanned since they contain gray
  3594       objects. Pushes on the region stack only happen during
  3595       evacuation pauses and typically correspond to areas covered by
  3596       GC LABS. If it overflows, then the marking phase should restart
  3597       and iterate over the bitmap to identify gray objects. Tasks will
  3598       try to totally drain the region stack as soon as possible.
  3600       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3601       made available. Buffers are regularly removed from this queue
  3602       and scanned for roots, so that the queue doesn't get too
  3603       long. During remark, all completed buffers are processed, as
  3604       well as the filled in parts of any uncompleted buffers.
  3606     The do_marking_step() method tries to abort when the time target
  3607     has been reached. There are a few other cases when the
  3608     do_marking_step() method also aborts:
  3610       (1) When the marking phase has been aborted (after a Full GC).
  3612       (2) When a global overflow (either on the global stack or the
  3613       region stack) has been triggered. Before the task aborts, it
  3614       will actually sync up with the other tasks to ensure that all
  3615       the marking data structures (local queues, stacks, fingers etc.)
  3616       are re-initialised so that when do_marking_step() completes,
  3617       the marking phase can immediately restart.
  3619       (3) When enough completed SATB buffers are available. The
  3620       do_marking_step() method only tries to drain SATB buffers right
  3621       at the beginning. So, if enough buffers are available, the
  3622       marking step aborts and the SATB buffers are processed at
  3623       the beginning of the next invocation.
  3625       (4) To yield. when we have to yield then we abort and yield
  3626       right at the end of do_marking_step(). This saves us from a lot
  3627       of hassle as, by yielding we might allow a Full GC. If this
  3628       happens then objects will be compacted underneath our feet, the
  3629       heap might shrink, etc. We save checking for this by just
  3630       aborting and doing the yield right at the end.
  3632     From the above it follows that the do_marking_step() method should
  3633     be called in a loop (or, otherwise, regularly) until it completes.
  3635     If a marking step completes without its has_aborted() flag being
  3636     true, it means it has completed the current marking phase (and
  3637     also all other marking tasks have done so and have all synced up).
  3639     A method called regular_clock_call() is invoked "regularly" (in
  3640     sub ms intervals) throughout marking. It is this clock method that
  3641     checks all the abort conditions which were mentioned above and
  3642     decides when the task should abort. A work-based scheme is used to
  3643     trigger this clock method: when the number of object words the
  3644     marking phase has scanned or the number of references the marking
  3645     phase has visited reach a given limit. Additional invocations to
  3646     the method clock have been planted in a few other strategic places
  3647     too. The initial reason for the clock method was to avoid calling
  3648     vtime too regularly, as it is quite expensive. So, once it was in
  3649     place, it was natural to piggy-back all the other conditions on it
  3650     too and not constantly check them throughout the code.
  3652  *****************************************************************************/
  3654 void CMTask::do_marking_step(double time_target_ms) {
  3655   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3656   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3658   assert(concurrent() || _cm->region_stack_empty(),
  3659          "the region stack should have been cleared before remark");
  3660   assert(_region_finger == NULL,
  3661          "this should be non-null only when a region is being scanned");
  3663   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3664   assert(_task_queues != NULL, "invariant");
  3665   assert(_task_queue != NULL, "invariant");
  3666   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3668   assert(!_claimed,
  3669          "only one thread should claim this task at any one time");
  3671   // OK, this doesn't safeguard again all possible scenarios, as it is
  3672   // possible for two threads to set the _claimed flag at the same
  3673   // time. But it is only for debugging purposes anyway and it will
  3674   // catch most problems.
  3675   _claimed = true;
  3677   _start_time_ms = os::elapsedVTime() * 1000.0;
  3678   statsOnly( _interval_start_time_ms = _start_time_ms );
  3680   double diff_prediction_ms =
  3681     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3682   _time_target_ms = time_target_ms - diff_prediction_ms;
  3684   // set up the variables that are used in the work-based scheme to
  3685   // call the regular clock method
  3686   _words_scanned = 0;
  3687   _refs_reached  = 0;
  3688   recalculate_limits();
  3690   // clear all flags
  3691   clear_has_aborted();
  3692   _has_aborted_timed_out = false;
  3693   _draining_satb_buffers = false;
  3695   ++_calls;
  3697   if (_cm->verbose_low())
  3698     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3699                            "target = %1.2lfms >>>>>>>>>>",
  3700                            _task_id, _calls, _time_target_ms);
  3702   // Set up the bitmap and oop closures. Anything that uses them is
  3703   // eventually called from this method, so it is OK to allocate these
  3704   // statically.
  3705   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3706   CMOopClosure    oop_closure(_g1h, _cm, this);
  3707   set_oop_closure(&oop_closure);
  3709   if (_cm->has_overflown()) {
  3710     // This can happen if the region stack or the mark stack overflows
  3711     // during a GC pause and this task, after a yield point,
  3712     // restarts. We have to abort as we need to get into the overflow
  3713     // protocol which happens right at the end of this task.
  3714     set_has_aborted();
  3717   // First drain any available SATB buffers. After this, we will not
  3718   // look at SATB buffers before the next invocation of this method.
  3719   // If enough completed SATB buffers are queued up, the regular clock
  3720   // will abort this task so that it restarts.
  3721   drain_satb_buffers();
  3722   // ...then partially drain the local queue and the global stack
  3723   drain_local_queue(true);
  3724   drain_global_stack(true);
  3726   // Then totally drain the region stack.  We will not look at
  3727   // it again before the next invocation of this method. Entries on
  3728   // the region stack are only added during evacuation pauses, for
  3729   // which we have to yield. When we do, we abort the task anyway so
  3730   // it will look at the region stack again when it restarts.
  3731   bitmap_closure.set_scanning_heap_region(false);
  3732   drain_region_stack(&bitmap_closure);
  3733   // ...then partially drain the local queue and the global stack
  3734   drain_local_queue(true);
  3735   drain_global_stack(true);
  3737   do {
  3738     if (!has_aborted() && _curr_region != NULL) {
  3739       // This means that we're already holding on to a region.
  3740       assert(_finger != NULL, "if region is not NULL, then the finger "
  3741              "should not be NULL either");
  3743       // We might have restarted this task after an evacuation pause
  3744       // which might have evacuated the region we're holding on to
  3745       // underneath our feet. Let's read its limit again to make sure
  3746       // that we do not iterate over a region of the heap that
  3747       // contains garbage (update_region_limit() will also move
  3748       // _finger to the start of the region if it is found empty).
  3749       update_region_limit();
  3750       // We will start from _finger not from the start of the region,
  3751       // as we might be restarting this task after aborting half-way
  3752       // through scanning this region. In this case, _finger points to
  3753       // the address where we last found a marked object. If this is a
  3754       // fresh region, _finger points to start().
  3755       MemRegion mr = MemRegion(_finger, _region_limit);
  3757       if (_cm->verbose_low())
  3758         gclog_or_tty->print_cr("[%d] we're scanning part "
  3759                                "["PTR_FORMAT", "PTR_FORMAT") "
  3760                                "of region "PTR_FORMAT,
  3761                                _task_id, _finger, _region_limit, _curr_region);
  3763       // Let's iterate over the bitmap of the part of the
  3764       // region that is left.
  3765       bitmap_closure.set_scanning_heap_region(true);
  3766       if (mr.is_empty() ||
  3767           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3768         // We successfully completed iterating over the region. Now,
  3769         // let's give up the region.
  3770         giveup_current_region();
  3771         regular_clock_call();
  3772       } else {
  3773         assert(has_aborted(), "currently the only way to do so");
  3774         // The only way to abort the bitmap iteration is to return
  3775         // false from the do_bit() method. However, inside the
  3776         // do_bit() method we move the _finger to point to the
  3777         // object currently being looked at. So, if we bail out, we
  3778         // have definitely set _finger to something non-null.
  3779         assert(_finger != NULL, "invariant");
  3781         // Region iteration was actually aborted. So now _finger
  3782         // points to the address of the object we last scanned. If we
  3783         // leave it there, when we restart this task, we will rescan
  3784         // the object. It is easy to avoid this. We move the finger by
  3785         // enough to point to the next possible object header (the
  3786         // bitmap knows by how much we need to move it as it knows its
  3787         // granularity).
  3788         assert(_finger < _region_limit, "invariant");
  3789         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3790         // Check if bitmap iteration was aborted while scanning the last object
  3791         if (new_finger >= _region_limit) {
  3792             giveup_current_region();
  3793         } else {
  3794             move_finger_to(new_finger);
  3798     // At this point we have either completed iterating over the
  3799     // region we were holding on to, or we have aborted.
  3801     // We then partially drain the local queue and the global stack.
  3802     // (Do we really need this?)
  3803     drain_local_queue(true);
  3804     drain_global_stack(true);
  3806     // Read the note on the claim_region() method on why it might
  3807     // return NULL with potentially more regions available for
  3808     // claiming and why we have to check out_of_regions() to determine
  3809     // whether we're done or not.
  3810     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3811       // We are going to try to claim a new region. We should have
  3812       // given up on the previous one.
  3813       // Separated the asserts so that we know which one fires.
  3814       assert(_curr_region  == NULL, "invariant");
  3815       assert(_finger       == NULL, "invariant");
  3816       assert(_region_limit == NULL, "invariant");
  3817       if (_cm->verbose_low())
  3818         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3819       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3820       if (claimed_region != NULL) {
  3821         // Yes, we managed to claim one
  3822         statsOnly( ++_regions_claimed );
  3824         if (_cm->verbose_low())
  3825           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3826                                  "region "PTR_FORMAT,
  3827                                  _task_id, claimed_region);
  3829         setup_for_region(claimed_region);
  3830         assert(_curr_region == claimed_region, "invariant");
  3832       // It is important to call the regular clock here. It might take
  3833       // a while to claim a region if, for example, we hit a large
  3834       // block of empty regions. So we need to call the regular clock
  3835       // method once round the loop to make sure it's called
  3836       // frequently enough.
  3837       regular_clock_call();
  3840     if (!has_aborted() && _curr_region == NULL) {
  3841       assert(_cm->out_of_regions(),
  3842              "at this point we should be out of regions");
  3844   } while ( _curr_region != NULL && !has_aborted());
  3846   if (!has_aborted()) {
  3847     // We cannot check whether the global stack is empty, since other
  3848     // tasks might be pushing objects to it concurrently. We also cannot
  3849     // check if the region stack is empty because if a thread is aborting
  3850     // it can push a partially done region back.
  3851     assert(_cm->out_of_regions(),
  3852            "at this point we should be out of regions");
  3854     if (_cm->verbose_low())
  3855       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3857     // Try to reduce the number of available SATB buffers so that
  3858     // remark has less work to do.
  3859     drain_satb_buffers();
  3862   // Since we've done everything else, we can now totally drain the
  3863   // local queue and global stack.
  3864   drain_local_queue(false);
  3865   drain_global_stack(false);
  3867   // Attempt at work stealing from other task's queues.
  3868   if (!has_aborted()) {
  3869     // We have not aborted. This means that we have finished all that
  3870     // we could. Let's try to do some stealing...
  3872     // We cannot check whether the global stack is empty, since other
  3873     // tasks might be pushing objects to it concurrently. We also cannot
  3874     // check if the region stack is empty because if a thread is aborting
  3875     // it can push a partially done region back.
  3876     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3877            "only way to reach here");
  3879     if (_cm->verbose_low())
  3880       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3882     while (!has_aborted()) {
  3883       oop obj;
  3884       statsOnly( ++_steal_attempts );
  3886       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3887         if (_cm->verbose_medium())
  3888           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3889                                  _task_id, (void*) obj);
  3891         statsOnly( ++_steals );
  3893         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3894                "any stolen object should be marked");
  3895         scan_object(obj);
  3897         // And since we're towards the end, let's totally drain the
  3898         // local queue and global stack.
  3899         drain_local_queue(false);
  3900         drain_global_stack(false);
  3901       } else {
  3902         break;
  3907   // We still haven't aborted. Now, let's try to get into the
  3908   // termination protocol.
  3909   if (!has_aborted()) {
  3910     // We cannot check whether the global stack is empty, since other
  3911     // tasks might be concurrently pushing objects on it. We also cannot
  3912     // check if the region stack is empty because if a thread is aborting
  3913     // it can push a partially done region back.
  3914     // Separated the asserts so that we know which one fires.
  3915     assert(_cm->out_of_regions(), "only way to reach here");
  3916     assert(_task_queue->size() == 0, "only way to reach here");
  3918     if (_cm->verbose_low())
  3919       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3921     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3922     // The CMTask class also extends the TerminatorTerminator class,
  3923     // hence its should_exit_termination() method will also decide
  3924     // whether to exit the termination protocol or not.
  3925     bool finished = _cm->terminator()->offer_termination(this);
  3926     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3927     _termination_time_ms +=
  3928       termination_end_time_ms - _termination_start_time_ms;
  3930     if (finished) {
  3931       // We're all done.
  3933       if (_task_id == 0) {
  3934         // let's allow task 0 to do this
  3935         if (concurrent()) {
  3936           assert(_cm->concurrent_marking_in_progress(), "invariant");
  3937           // we need to set this to false before the next
  3938           // safepoint. This way we ensure that the marking phase
  3939           // doesn't observe any more heap expansions.
  3940           _cm->clear_concurrent_marking_in_progress();
  3944       // We can now guarantee that the global stack is empty, since
  3945       // all other tasks have finished. We separated the guarantees so
  3946       // that, if a condition is false, we can immediately find out
  3947       // which one.
  3948       guarantee(_cm->out_of_regions(), "only way to reach here");
  3949       guarantee(_cm->region_stack_empty(), "only way to reach here");
  3950       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  3951       guarantee(_task_queue->size() == 0, "only way to reach here");
  3952       guarantee(!_cm->has_overflown(), "only way to reach here");
  3953       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  3954       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  3956       if (_cm->verbose_low())
  3957         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3958     } else {
  3959       // Apparently there's more work to do. Let's abort this task. It
  3960       // will restart it and we can hopefully find more things to do.
  3962       if (_cm->verbose_low())
  3963         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3965       set_has_aborted();
  3966       statsOnly( ++_aborted_termination );
  3970   // Mainly for debugging purposes to make sure that a pointer to the
  3971   // closure which was statically allocated in this frame doesn't
  3972   // escape it by accident.
  3973   set_oop_closure(NULL);
  3974   double end_time_ms = os::elapsedVTime() * 1000.0;
  3975   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3976   // Update the step history.
  3977   _step_times_ms.add(elapsed_time_ms);
  3979   if (has_aborted()) {
  3980     // The task was aborted for some reason.
  3982     statsOnly( ++_aborted );
  3984     if (_has_aborted_timed_out) {
  3985       double diff_ms = elapsed_time_ms - _time_target_ms;
  3986       // Keep statistics of how well we did with respect to hitting
  3987       // our target only if we actually timed out (if we aborted for
  3988       // other reasons, then the results might get skewed).
  3989       _marking_step_diffs_ms.add(diff_ms);
  3992     if (_cm->has_overflown()) {
  3993       // This is the interesting one. We aborted because a global
  3994       // overflow was raised. This means we have to restart the
  3995       // marking phase and start iterating over regions. However, in
  3996       // order to do this we have to make sure that all tasks stop
  3997       // what they are doing and re-initialise in a safe manner. We
  3998       // will achieve this with the use of two barrier sync points.
  4000       if (_cm->verbose_low())
  4001         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  4003       _cm->enter_first_sync_barrier(_task_id);
  4004       // When we exit this sync barrier we know that all tasks have
  4005       // stopped doing marking work. So, it's now safe to
  4006       // re-initialise our data structures. At the end of this method,
  4007       // task 0 will clear the global data structures.
  4009       statsOnly( ++_aborted_overflow );
  4011       // We clear the local state of this task...
  4012       clear_region_fields();
  4014       // ...and enter the second barrier.
  4015       _cm->enter_second_sync_barrier(_task_id);
  4016       // At this point everything has bee re-initialised and we're
  4017       // ready to restart.
  4020     if (_cm->verbose_low()) {
  4021       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4022                              "elapsed = %1.2lfms <<<<<<<<<<",
  4023                              _task_id, _time_target_ms, elapsed_time_ms);
  4024       if (_cm->has_aborted())
  4025         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4026                                _task_id);
  4028   } else {
  4029     if (_cm->verbose_low())
  4030       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4031                              "elapsed = %1.2lfms <<<<<<<<<<",
  4032                              _task_id, _time_target_ms, elapsed_time_ms);
  4035   _claimed = false;
  4038 CMTask::CMTask(int task_id,
  4039                ConcurrentMark* cm,
  4040                CMTaskQueue* task_queue,
  4041                CMTaskQueueSet* task_queues)
  4042   : _g1h(G1CollectedHeap::heap()),
  4043     _task_id(task_id), _cm(cm),
  4044     _claimed(false),
  4045     _nextMarkBitMap(NULL), _hash_seed(17),
  4046     _task_queue(task_queue),
  4047     _task_queues(task_queues),
  4048     _oop_closure(NULL) {
  4049   guarantee(task_queue != NULL, "invariant");
  4050   guarantee(task_queues != NULL, "invariant");
  4052   statsOnly( _clock_due_to_scanning = 0;
  4053              _clock_due_to_marking  = 0 );
  4055   _marking_step_diffs_ms.add(0.5);

mercurial