src/share/vm/gc_implementation/g1/concurrentMark.cpp

Thu, 15 Apr 2010 18:45:30 -0400

author
tonyp
date
Thu, 15 Apr 2010 18:45:30 -0400
changeset 1825
f9ec1e4bbb44
parent 1823
7666957bc44d
child 1902
fb1a39993f69
permissions
-rw-r--r--

6939027: G1: assertion failure during the concurrent phase of cleanup
Summary: The outgoing region map is not maintained properly and it's causing an assert failure. Given that we don't actually use it, I'm removing it. I'm piggy-backing a small change on this which removes a message that it's printed before a Full GC when DisableExplicitGC is set.
Reviewed-by: apetrusenko, ysr

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     assert(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 // Currently we do not call this at all. Normally we would call it
   301 // during the concurrent marking / remark phases but we now call
   302 // the lock-based version instead. But we might want to resurrect this
   303 // code in the future. So, we'll leave it here commented out.
   304 #if 0
   305 MemRegion CMRegionStack::pop() {
   306   while (true) {
   307     // Otherwise...
   308     jint index = _index;
   310     if (index == 0) {
   311       return MemRegion();
   312     }
   313     jint next_index = index-1;
   314     jint res = Atomic::cmpxchg(next_index, &_index, index);
   315     if (res == index) {
   316       MemRegion mr = _base[next_index];
   317       if (mr.start() != NULL) {
   318         assert(mr.end() != NULL, "invariant");
   319         assert(mr.word_size() > 0, "invariant");
   320         return mr;
   321       } else {
   322         // that entry was invalidated... let's skip it
   323         assert(mr.end() == NULL, "invariant");
   324       }
   325     }
   326     // Otherwise, we need to try again.
   327   }
   328 }
   329 #endif // 0
   331 void CMRegionStack::push_with_lock(MemRegion mr) {
   332   assert(mr.word_size() > 0, "Precondition");
   333   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   335   if (isFull()) {
   336     _overflow = true;
   337     return;
   338   }
   340   _base[_index] = mr;
   341   _index += 1;
   342 }
   344 MemRegion CMRegionStack::pop_with_lock() {
   345   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   347   while (true) {
   348     if (_index == 0) {
   349       return MemRegion();
   350     }
   351     _index -= 1;
   353     MemRegion mr = _base[_index];
   354     if (mr.start() != NULL) {
   355       assert(mr.end() != NULL, "invariant");
   356       assert(mr.word_size() > 0, "invariant");
   357       return mr;
   358     } else {
   359       // that entry was invalidated... let's skip it
   360       assert(mr.end() == NULL, "invariant");
   361     }
   362   }
   363 }
   365 bool CMRegionStack::invalidate_entries_into_cset() {
   366   bool result = false;
   367   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   368   for (int i = 0; i < _oops_do_bound; ++i) {
   369     MemRegion mr = _base[i];
   370     if (mr.start() != NULL) {
   371       assert(mr.end() != NULL, "invariant");
   372       assert(mr.word_size() > 0, "invariant");
   373       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   374       assert(hr != NULL, "invariant");
   375       if (hr->in_collection_set()) {
   376         // The region points into the collection set
   377         _base[i] = MemRegion();
   378         result = true;
   379       }
   380     } else {
   381       // that entry was invalidated... let's skip it
   382       assert(mr.end() == NULL, "invariant");
   383     }
   384   }
   385   return result;
   386 }
   388 template<class OopClosureClass>
   389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   390   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   391          || SafepointSynchronize::is_at_safepoint(),
   392          "Drain recursion must be yield-safe.");
   393   bool res = true;
   394   debug_only(_drain_in_progress = true);
   395   debug_only(_drain_in_progress_yields = yield_after);
   396   while (!isEmpty()) {
   397     oop newOop = pop();
   398     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   399     assert(newOop->is_oop(), "Expected an oop");
   400     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   401            "only grey objects on this stack");
   402     // iterate over the oops in this oop, marking and pushing
   403     // the ones in CMS generation.
   404     newOop->oop_iterate(cl);
   405     if (yield_after && _cm->do_yield_check()) {
   406       res = false; break;
   407     }
   408   }
   409   debug_only(_drain_in_progress = false);
   410   return res;
   411 }
   413 void CMMarkStack::oops_do(OopClosure* f) {
   414   if (_index == 0) return;
   415   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   416          "Bound must be set.");
   417   for (int i = 0; i < _oops_do_bound; i++) {
   418     f->do_oop(&_base[i]);
   419   }
   420   _oops_do_bound = -1;
   421 }
   423 bool ConcurrentMark::not_yet_marked(oop obj) const {
   424   return (_g1h->is_obj_ill(obj)
   425           || (_g1h->is_in_permanent(obj)
   426               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   427 }
   429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   431 #endif // _MSC_VER
   433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   434                                int max_regions) :
   435   _markBitMap1(rs, MinObjAlignment - 1),
   436   _markBitMap2(rs, MinObjAlignment - 1),
   438   _parallel_marking_threads(0),
   439   _sleep_factor(0.0),
   440   _marking_task_overhead(1.0),
   441   _cleanup_sleep_factor(0.0),
   442   _cleanup_task_overhead(1.0),
   443   _region_bm(max_regions, false /* in_resource_area*/),
   444   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   445            CardTableModRefBS::card_shift,
   446            false /* in_resource_area*/),
   447   _prevMarkBitMap(&_markBitMap1),
   448   _nextMarkBitMap(&_markBitMap2),
   449   _at_least_one_mark_complete(false),
   451   _markStack(this),
   452   _regionStack(),
   453   // _finger set in set_non_marking_state
   455   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   456   // _active_tasks set in set_non_marking_state
   457   // _tasks set inside the constructor
   458   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   459   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   461   _has_overflown(false),
   462   _concurrent(false),
   463   _has_aborted(false),
   464   _restart_for_overflow(false),
   465   _concurrent_marking_in_progress(false),
   466   _should_gray_objects(false),
   468   // _verbose_level set below
   470   _init_times(),
   471   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   472   _cleanup_times(),
   473   _total_counting_time(0.0),
   474   _total_rs_scrub_time(0.0),
   476   _parallel_workers(NULL)
   477 {
   478   CMVerboseLevel verbose_level =
   479     (CMVerboseLevel) G1MarkingVerboseLevel;
   480   if (verbose_level < no_verbose)
   481     verbose_level = no_verbose;
   482   if (verbose_level > high_verbose)
   483     verbose_level = high_verbose;
   484   _verbose_level = verbose_level;
   486   if (verbose_low())
   487     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   488                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   490   _markStack.allocate(MarkStackSize);
   491   _regionStack.allocate(G1MarkRegionStackSize);
   493   // Create & start a ConcurrentMark thread.
   494   _cmThread = new ConcurrentMarkThread(this);
   495   assert(cmThread() != NULL, "CM Thread should have been created");
   496   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   498   _g1h = G1CollectedHeap::heap();
   499   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   500   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   501   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   503   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   504   satb_qs.set_buffer_size(G1SATBBufferSize);
   506   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   507   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   508   for (int i = 0 ; i < size; i++) {
   509     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   510   }
   512   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   513   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   516   _active_tasks = _max_task_num;
   517   for (int i = 0; i < (int) _max_task_num; ++i) {
   518     CMTaskQueue* task_queue = new CMTaskQueue();
   519     task_queue->initialize();
   520     _task_queues->register_queue(i, task_queue);
   522     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   523     _accum_task_vtime[i] = 0.0;
   524   }
   526   if (ConcGCThreads > ParallelGCThreads) {
   527     vm_exit_during_initialization("Can't have more ConcGCThreads "
   528                                   "than ParallelGCThreads.");
   529   }
   530   if (ParallelGCThreads == 0) {
   531     // if we are not running with any parallel GC threads we will not
   532     // spawn any marking threads either
   533     _parallel_marking_threads =   0;
   534     _sleep_factor             = 0.0;
   535     _marking_task_overhead    = 1.0;
   536   } else {
   537     if (ConcGCThreads > 0) {
   538       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   539       // if both are set
   541       _parallel_marking_threads = ConcGCThreads;
   542       _sleep_factor             = 0.0;
   543       _marking_task_overhead    = 1.0;
   544     } else if (G1MarkingOverheadPercent > 0) {
   545       // we will calculate the number of parallel marking threads
   546       // based on a target overhead with respect to the soft real-time
   547       // goal
   549       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   550       double overall_cm_overhead =
   551         (double) MaxGCPauseMillis * marking_overhead /
   552         (double) GCPauseIntervalMillis;
   553       double cpu_ratio = 1.0 / (double) os::processor_count();
   554       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   555       double marking_task_overhead =
   556         overall_cm_overhead / marking_thread_num *
   557                                                 (double) os::processor_count();
   558       double sleep_factor =
   559                          (1.0 - marking_task_overhead) / marking_task_overhead;
   561       _parallel_marking_threads = (size_t) marking_thread_num;
   562       _sleep_factor             = sleep_factor;
   563       _marking_task_overhead    = marking_task_overhead;
   564     } else {
   565       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   566       _sleep_factor             = 0.0;
   567       _marking_task_overhead    = 1.0;
   568     }
   570     if (parallel_marking_threads() > 1)
   571       _cleanup_task_overhead = 1.0;
   572     else
   573       _cleanup_task_overhead = marking_task_overhead();
   574     _cleanup_sleep_factor =
   575                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   577 #if 0
   578     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   579     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   580     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   581     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   582     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   583 #endif
   585     guarantee(parallel_marking_threads() > 0, "peace of mind");
   586     _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
   587                                      (int) parallel_marking_threads(), false, true);
   588     if (_parallel_workers == NULL)
   589       vm_exit_during_initialization("Failed necessary allocation.");
   590   }
   592   // so that the call below can read a sensible value
   593   _heap_start = (HeapWord*) rs.base();
   594   set_non_marking_state();
   595 }
   597 void ConcurrentMark::update_g1_committed(bool force) {
   598   // If concurrent marking is not in progress, then we do not need to
   599   // update _heap_end. This has a subtle and important
   600   // side-effect. Imagine that two evacuation pauses happen between
   601   // marking completion and remark. The first one can grow the
   602   // heap (hence now the finger is below the heap end). Then, the
   603   // second one could unnecessarily push regions on the region
   604   // stack. This causes the invariant that the region stack is empty
   605   // at the beginning of remark to be false. By ensuring that we do
   606   // not observe heap expansions after marking is complete, then we do
   607   // not have this problem.
   608   if (!concurrent_marking_in_progress() && !force)
   609     return;
   611   MemRegion committed = _g1h->g1_committed();
   612   assert(committed.start() == _heap_start, "start shouldn't change");
   613   HeapWord* new_end = committed.end();
   614   if (new_end > _heap_end) {
   615     // The heap has been expanded.
   617     _heap_end = new_end;
   618   }
   619   // Notice that the heap can also shrink. However, this only happens
   620   // during a Full GC (at least currently) and the entire marking
   621   // phase will bail out and the task will not be restarted. So, let's
   622   // do nothing.
   623 }
   625 void ConcurrentMark::reset() {
   626   // Starting values for these two. This should be called in a STW
   627   // phase. CM will be notified of any future g1_committed expansions
   628   // will be at the end of evacuation pauses, when tasks are
   629   // inactive.
   630   MemRegion committed = _g1h->g1_committed();
   631   _heap_start = committed.start();
   632   _heap_end   = committed.end();
   634   // Separated the asserts so that we know which one fires.
   635   assert(_heap_start != NULL, "heap bounds should look ok");
   636   assert(_heap_end != NULL, "heap bounds should look ok");
   637   assert(_heap_start < _heap_end, "heap bounds should look ok");
   639   // reset all the marking data structures and any necessary flags
   640   clear_marking_state();
   642   if (verbose_low())
   643     gclog_or_tty->print_cr("[global] resetting");
   645   // We do reset all of them, since different phases will use
   646   // different number of active threads. So, it's easiest to have all
   647   // of them ready.
   648   for (int i = 0; i < (int) _max_task_num; ++i)
   649     _tasks[i]->reset(_nextMarkBitMap);
   651   // we need this to make sure that the flag is on during the evac
   652   // pause with initial mark piggy-backed
   653   set_concurrent_marking_in_progress();
   654 }
   656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   657   assert(active_tasks <= _max_task_num, "we should not have more");
   659   _active_tasks = active_tasks;
   660   // Need to update the three data structures below according to the
   661   // number of active threads for this phase.
   662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   666   _concurrent = concurrent;
   667   // We propagate this to all tasks, not just the active ones.
   668   for (int i = 0; i < (int) _max_task_num; ++i)
   669     _tasks[i]->set_concurrent(concurrent);
   671   if (concurrent) {
   672     set_concurrent_marking_in_progress();
   673   } else {
   674     // We currently assume that the concurrent flag has been set to
   675     // false before we start remark. At this point we should also be
   676     // in a STW phase.
   677     assert(!concurrent_marking_in_progress(), "invariant");
   678     assert(_finger == _heap_end, "only way to get here");
   679     update_g1_committed(true);
   680   }
   681 }
   683 void ConcurrentMark::set_non_marking_state() {
   684   // We set the global marking state to some default values when we're
   685   // not doing marking.
   686   clear_marking_state();
   687   _active_tasks = 0;
   688   clear_concurrent_marking_in_progress();
   689 }
   691 ConcurrentMark::~ConcurrentMark() {
   692   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   693   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   694   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   695                     _par_cleanup_thread_state);
   697   for (int i = 0; i < (int) _max_task_num; ++i) {
   698     delete _task_queues->queue(i);
   699     delete _tasks[i];
   700   }
   701   delete _task_queues;
   702   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   703 }
   705 // This closure is used to mark refs into the g1 generation
   706 // from external roots in the CMS bit map.
   707 // Called at the first checkpoint.
   708 //
   710 void ConcurrentMark::clearNextBitmap() {
   711   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   712   G1CollectorPolicy* g1p = g1h->g1_policy();
   714   // Make sure that the concurrent mark thread looks to still be in
   715   // the current cycle.
   716   guarantee(cmThread()->during_cycle(), "invariant");
   718   // We are finishing up the current cycle by clearing the next
   719   // marking bitmap and getting it ready for the next cycle. During
   720   // this time no other cycle can start. So, let's make sure that this
   721   // is the case.
   722   guarantee(!g1h->mark_in_progress(), "invariant");
   724   // clear the mark bitmap (no grey objects to start with).
   725   // We need to do this in chunks and offer to yield in between
   726   // each chunk.
   727   HeapWord* start  = _nextMarkBitMap->startWord();
   728   HeapWord* end    = _nextMarkBitMap->endWord();
   729   HeapWord* cur    = start;
   730   size_t chunkSize = M;
   731   while (cur < end) {
   732     HeapWord* next = cur + chunkSize;
   733     if (next > end)
   734       next = end;
   735     MemRegion mr(cur,next);
   736     _nextMarkBitMap->clearRange(mr);
   737     cur = next;
   738     do_yield_check();
   740     // Repeat the asserts from above. We'll do them as asserts here to
   741     // minimize their overhead on the product. However, we'll have
   742     // them as guarantees at the beginning / end of the bitmap
   743     // clearing to get some checking in the product.
   744     assert(cmThread()->during_cycle(), "invariant");
   745     assert(!g1h->mark_in_progress(), "invariant");
   746   }
   748   // Repeat the asserts from above.
   749   guarantee(cmThread()->during_cycle(), "invariant");
   750   guarantee(!g1h->mark_in_progress(), "invariant");
   751 }
   753 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   754 public:
   755   bool doHeapRegion(HeapRegion* r) {
   756     if (!r->continuesHumongous()) {
   757       r->note_start_of_marking(true);
   758     }
   759     return false;
   760   }
   761 };
   763 void ConcurrentMark::checkpointRootsInitialPre() {
   764   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   765   G1CollectorPolicy* g1p = g1h->g1_policy();
   767   _has_aborted = false;
   769   if (G1PrintReachableAtInitialMark) {
   770     print_reachable("at-cycle-start",
   771                     true /* use_prev_marking */, true /* all */);
   772   }
   774   // Initialise marking structures. This has to be done in a STW phase.
   775   reset();
   776 }
   778 class CMMarkRootsClosure: public OopsInGenClosure {
   779 private:
   780   ConcurrentMark*  _cm;
   781   G1CollectedHeap* _g1h;
   782   bool             _do_barrier;
   784 public:
   785   CMMarkRootsClosure(ConcurrentMark* cm,
   786                      G1CollectedHeap* g1h,
   787                      bool do_barrier) : _cm(cm), _g1h(g1h),
   788                                         _do_barrier(do_barrier) { }
   790   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   791   virtual void do_oop(      oop* p) { do_oop_work(p); }
   793   template <class T> void do_oop_work(T* p) {
   794     T heap_oop = oopDesc::load_heap_oop(p);
   795     if (!oopDesc::is_null(heap_oop)) {
   796       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   797       assert(obj->is_oop() || obj->mark() == NULL,
   798              "expected an oop, possibly with mark word displaced");
   799       HeapWord* addr = (HeapWord*)obj;
   800       if (_g1h->is_in_g1_reserved(addr)) {
   801         _cm->grayRoot(obj);
   802       }
   803     }
   804     if (_do_barrier) {
   805       assert(!_g1h->is_in_g1_reserved(p),
   806              "Should be called on external roots");
   807       do_barrier(p);
   808     }
   809   }
   810 };
   812 void ConcurrentMark::checkpointRootsInitialPost() {
   813   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   815   // For each region note start of marking.
   816   NoteStartOfMarkHRClosure startcl;
   817   g1h->heap_region_iterate(&startcl);
   819   // Start weak-reference discovery.
   820   ReferenceProcessor* rp = g1h->ref_processor();
   821   rp->verify_no_references_recorded();
   822   rp->enable_discovery(); // enable ("weak") refs discovery
   823   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   825   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   826   // This is the start of  the marking cycle, we're expected all
   827   // threads to have SATB queues with active set to false.
   828   satb_mq_set.set_active_all_threads(true, /* new active value */
   829                                      false /* expected_active */);
   831   // update_g1_committed() will be called at the end of an evac pause
   832   // when marking is on. So, it's also called at the end of the
   833   // initial-mark pause to update the heap end, if the heap expands
   834   // during it. No need to call it here.
   835 }
   837 // Checkpoint the roots into this generation from outside
   838 // this generation. [Note this initial checkpoint need only
   839 // be approximate -- we'll do a catch up phase subsequently.]
   840 void ConcurrentMark::checkpointRootsInitial() {
   841   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   842   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   844   double start = os::elapsedTime();
   846   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   847   g1p->record_concurrent_mark_init_start();
   848   checkpointRootsInitialPre();
   850   // YSR: when concurrent precleaning is in place, we'll
   851   // need to clear the cached card table here
   853   ResourceMark rm;
   854   HandleMark  hm;
   856   g1h->ensure_parsability(false);
   857   g1h->perm_gen()->save_marks();
   859   CMMarkRootsClosure notOlder(this, g1h, false);
   860   CMMarkRootsClosure older(this, g1h, true);
   862   g1h->set_marking_started();
   863   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   865   g1h->process_strong_roots(true,    // activate StrongRootsScope
   866                             false,   // fake perm gen collection
   867                             SharedHeap::SO_AllClasses,
   868                             &notOlder, // Regular roots
   869                             NULL,     // do not visit active blobs
   870                             &older    // Perm Gen Roots
   871                             );
   872   checkpointRootsInitialPost();
   874   // Statistics.
   875   double end = os::elapsedTime();
   876   _init_times.add((end - start) * 1000.0);
   878   g1p->record_concurrent_mark_init_end();
   879 }
   881 /*
   882    Notice that in the next two methods, we actually leave the STS
   883    during the barrier sync and join it immediately afterwards. If we
   884    do not do this, this then the following deadlock can occur: one
   885    thread could be in the barrier sync code, waiting for the other
   886    thread to also sync up, whereas another one could be trying to
   887    yield, while also waiting for the other threads to sync up too.
   889    Because the thread that does the sync barrier has left the STS, it
   890    is possible to be suspended for a Full GC or an evacuation pause
   891    could occur. This is actually safe, since the entering the sync
   892    barrier is one of the last things do_marking_step() does, and it
   893    doesn't manipulate any data structures afterwards.
   894 */
   896 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   897   if (verbose_low())
   898     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   900   ConcurrentGCThread::stsLeave();
   901   _first_overflow_barrier_sync.enter();
   902   ConcurrentGCThread::stsJoin();
   903   // at this point everyone should have synced up and not be doing any
   904   // more work
   906   if (verbose_low())
   907     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   909   // let task 0 do this
   910   if (task_num == 0) {
   911     // task 0 is responsible for clearing the global data structures
   912     clear_marking_state();
   914     if (PrintGC) {
   915       gclog_or_tty->date_stamp(PrintGCDateStamps);
   916       gclog_or_tty->stamp(PrintGCTimeStamps);
   917       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   918     }
   919   }
   921   // after this, each task should reset its own data structures then
   922   // then go into the second barrier
   923 }
   925 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   926   if (verbose_low())
   927     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   929   ConcurrentGCThread::stsLeave();
   930   _second_overflow_barrier_sync.enter();
   931   ConcurrentGCThread::stsJoin();
   932   // at this point everything should be re-initialised and ready to go
   934   if (verbose_low())
   935     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   936 }
   938 void ConcurrentMark::grayRoot(oop p) {
   939   HeapWord* addr = (HeapWord*) p;
   940   // We can't really check against _heap_start and _heap_end, since it
   941   // is possible during an evacuation pause with piggy-backed
   942   // initial-mark that the committed space is expanded during the
   943   // pause without CM observing this change. So the assertions below
   944   // is a bit conservative; but better than nothing.
   945   assert(_g1h->g1_committed().contains(addr),
   946          "address should be within the heap bounds");
   948   if (!_nextMarkBitMap->isMarked(addr))
   949     _nextMarkBitMap->parMark(addr);
   950 }
   952 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   953   // The objects on the region have already been marked "in bulk" by
   954   // the caller. We only need to decide whether to push the region on
   955   // the region stack or not.
   957   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   958     // We're done with marking and waiting for remark. We do not need to
   959     // push anything else on the region stack.
   960     return;
   962   HeapWord* finger = _finger;
   964   if (verbose_low())
   965     gclog_or_tty->print_cr("[global] attempting to push "
   966                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   967                            PTR_FORMAT, mr.start(), mr.end(), finger);
   969   if (mr.start() < finger) {
   970     // The finger is always heap region aligned and it is not possible
   971     // for mr to span heap regions.
   972     assert(mr.end() <= finger, "invariant");
   974     // Separated the asserts so that we know which one fires.
   975     assert(mr.start() <= mr.end(),
   976            "region boundaries should fall within the committed space");
   977     assert(_heap_start <= mr.start(),
   978            "region boundaries should fall within the committed space");
   979     assert(mr.end() <= _heap_end,
   980            "region boundaries should fall within the committed space");
   981     if (verbose_low())
   982       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   983                              "below the finger, pushing it",
   984                              mr.start(), mr.end());
   986     if (!region_stack_push(mr)) {
   987       if (verbose_low())
   988         gclog_or_tty->print_cr("[global] region stack has overflown.");
   989     }
   990   }
   991 }
   993 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   994   // The object is not marked by the caller. We need to at least mark
   995   // it and maybe push in on the stack.
   997   HeapWord* addr = (HeapWord*)p;
   998   if (!_nextMarkBitMap->isMarked(addr)) {
   999     // We definitely need to mark it, irrespective whether we bail out
  1000     // because we're done with marking.
  1001     if (_nextMarkBitMap->parMark(addr)) {
  1002       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1003         // If we're done with concurrent marking and we're waiting for
  1004         // remark, then we're not pushing anything on the stack.
  1005         return;
  1007       // No OrderAccess:store_load() is needed. It is implicit in the
  1008       // CAS done in parMark(addr) above
  1009       HeapWord* finger = _finger;
  1011       if (addr < finger) {
  1012         if (!mark_stack_push(oop(addr))) {
  1013           if (verbose_low())
  1014             gclog_or_tty->print_cr("[global] global stack overflow "
  1015                                    "during parMark");
  1022 class CMConcurrentMarkingTask: public AbstractGangTask {
  1023 private:
  1024   ConcurrentMark*       _cm;
  1025   ConcurrentMarkThread* _cmt;
  1027 public:
  1028   void work(int worker_i) {
  1029     assert(Thread::current()->is_ConcurrentGC_thread(),
  1030            "this should only be done by a conc GC thread");
  1032     double start_vtime = os::elapsedVTime();
  1034     ConcurrentGCThread::stsJoin();
  1036     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1037     CMTask* the_task = _cm->task(worker_i);
  1038     the_task->record_start_time();
  1039     if (!_cm->has_aborted()) {
  1040       do {
  1041         double start_vtime_sec = os::elapsedVTime();
  1042         double start_time_sec = os::elapsedTime();
  1043         the_task->do_marking_step(10.0);
  1044         double end_time_sec = os::elapsedTime();
  1045         double end_vtime_sec = os::elapsedVTime();
  1046         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1047         double elapsed_time_sec = end_time_sec - start_time_sec;
  1048         _cm->clear_has_overflown();
  1050         bool ret = _cm->do_yield_check(worker_i);
  1052         jlong sleep_time_ms;
  1053         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1054           sleep_time_ms =
  1055             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1056           ConcurrentGCThread::stsLeave();
  1057           os::sleep(Thread::current(), sleep_time_ms, false);
  1058           ConcurrentGCThread::stsJoin();
  1060         double end_time2_sec = os::elapsedTime();
  1061         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1063 #if 0
  1064           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1065                                  "overhead %1.4lf",
  1066                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1067                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1068           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1069                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1070 #endif
  1071       } while (!_cm->has_aborted() && the_task->has_aborted());
  1073     the_task->record_end_time();
  1074     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1076     ConcurrentGCThread::stsLeave();
  1078     double end_vtime = os::elapsedVTime();
  1079     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1082   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1083                           ConcurrentMarkThread* cmt) :
  1084       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1086   ~CMConcurrentMarkingTask() { }
  1087 };
  1089 void ConcurrentMark::markFromRoots() {
  1090   // we might be tempted to assert that:
  1091   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1092   //        "inconsistent argument?");
  1093   // However that wouldn't be right, because it's possible that
  1094   // a safepoint is indeed in progress as a younger generation
  1095   // stop-the-world GC happens even as we mark in this generation.
  1097   _restart_for_overflow = false;
  1099   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1101   CMConcurrentMarkingTask markingTask(this, cmThread());
  1102   if (parallel_marking_threads() > 0)
  1103     _parallel_workers->run_task(&markingTask);
  1104   else
  1105     markingTask.work(0);
  1106   print_stats();
  1109 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1110   // world is stopped at this checkpoint
  1111   assert(SafepointSynchronize::is_at_safepoint(),
  1112          "world should be stopped");
  1113   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1115   // If a full collection has happened, we shouldn't do this.
  1116   if (has_aborted()) {
  1117     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1118     return;
  1121   if (VerifyDuringGC) {
  1122     HandleMark hm;  // handle scope
  1123     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1124     Universe::heap()->prepare_for_verify();
  1125     Universe::verify(true, false, true);
  1128   G1CollectorPolicy* g1p = g1h->g1_policy();
  1129   g1p->record_concurrent_mark_remark_start();
  1131   double start = os::elapsedTime();
  1133   checkpointRootsFinalWork();
  1135   double mark_work_end = os::elapsedTime();
  1137   weakRefsWork(clear_all_soft_refs);
  1139   if (has_overflown()) {
  1140     // Oops.  We overflowed.  Restart concurrent marking.
  1141     _restart_for_overflow = true;
  1142     // Clear the flag. We do not need it any more.
  1143     clear_has_overflown();
  1144     if (G1TraceMarkStackOverflow)
  1145       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1146   } else {
  1147     // We're done with marking.
  1148     // This is the end of  the marking cycle, we're expected all
  1149     // threads to have SATB queues with active set to true.
  1150     JavaThread::satb_mark_queue_set().set_active_all_threads(
  1151                                                   false, /* new active value */
  1152                                                   true /* expected_active */);
  1154     if (VerifyDuringGC) {
  1155       HandleMark hm;  // handle scope
  1156       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1157       Universe::heap()->prepare_for_verify();
  1158       Universe::heap()->verify(/* allow_dirty */      true,
  1159                                /* silent */           false,
  1160                                /* use_prev_marking */ false);
  1164 #if VERIFY_OBJS_PROCESSED
  1165   _scan_obj_cl.objs_processed = 0;
  1166   ThreadLocalObjQueue::objs_enqueued = 0;
  1167 #endif
  1169   // Statistics
  1170   double now = os::elapsedTime();
  1171   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1172   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1173   _remark_times.add((now - start) * 1000.0);
  1175   g1p->record_concurrent_mark_remark_end();
  1179 #define CARD_BM_TEST_MODE 0
  1181 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1183   CMBitMapRO* _bm;
  1184   ConcurrentMark* _cm;
  1185   bool _changed;
  1186   bool _yield;
  1187   size_t _words_done;
  1188   size_t _tot_live;
  1189   size_t _tot_used;
  1190   size_t _regions_done;
  1191   double _start_vtime_sec;
  1193   BitMap* _region_bm;
  1194   BitMap* _card_bm;
  1195   intptr_t _bottom_card_num;
  1196   bool _final;
  1198   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1199     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1200 #if CARD_BM_TEST_MODE
  1201       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1202 #else
  1203       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1204 #endif
  1208 public:
  1209   CalcLiveObjectsClosure(bool final,
  1210                          CMBitMapRO *bm, ConcurrentMark *cm,
  1211                          BitMap* region_bm, BitMap* card_bm) :
  1212     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1213     _words_done(0), _tot_live(0), _tot_used(0),
  1214     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1215     _regions_done(0), _start_vtime_sec(0.0)
  1217     _bottom_card_num =
  1218       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1219                CardTableModRefBS::card_shift);
  1222   // It takes a region that's not empty (i.e., it has at least one
  1223   // live object in it and sets its corresponding bit on the region
  1224   // bitmap to 1. If the region is "starts humongous" it will also set
  1225   // to 1 the bits on the region bitmap that correspond to its
  1226   // associated "continues humongous" regions.
  1227   void set_bit_for_region(HeapRegion* hr) {
  1228     assert(!hr->continuesHumongous(), "should have filtered those out");
  1230     size_t index = hr->hrs_index();
  1231     if (!hr->startsHumongous()) {
  1232       // Normal (non-humongous) case: just set the bit.
  1233       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1234     } else {
  1235       // Starts humongous case: calculate how many regions are part of
  1236       // this humongous region and then set the bit range. It might
  1237       // have been a bit more efficient to look at the object that
  1238       // spans these humongous regions to calculate their number from
  1239       // the object's size. However, it's a good idea to calculate
  1240       // this based on the metadata itself, and not the region
  1241       // contents, so that this code is not aware of what goes into
  1242       // the humongous regions (in case this changes in the future).
  1243       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1244       size_t end_index = index + 1;
  1245       while (end_index < g1h->n_regions()) {
  1246         HeapRegion* chr = g1h->region_at(end_index);
  1247         if (!chr->continuesHumongous()) {
  1248           break;
  1250         end_index += 1;
  1252       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1253                                    (BitMap::idx_t) end_index, true);
  1257   bool doHeapRegion(HeapRegion* hr) {
  1258     if (!_final && _regions_done == 0)
  1259       _start_vtime_sec = os::elapsedVTime();
  1261     if (hr->continuesHumongous()) {
  1262       // We will ignore these here and process them when their
  1263       // associated "starts humongous" region is processed (see
  1264       // set_bit_for_heap_region()). Note that we cannot rely on their
  1265       // associated "starts humongous" region to have their bit set to
  1266       // 1 since, due to the region chunking in the parallel region
  1267       // iteration, a "continues humongous" region might be visited
  1268       // before its associated "starts humongous".
  1269       return false;
  1272     HeapWord* nextTop = hr->next_top_at_mark_start();
  1273     HeapWord* start   = hr->top_at_conc_mark_count();
  1274     assert(hr->bottom() <= start && start <= hr->end() &&
  1275            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1276            start <= nextTop,
  1277            "Preconditions.");
  1278     // Otherwise, record the number of word's we'll examine.
  1279     size_t words_done = (nextTop - start);
  1280     // Find the first marked object at or after "start".
  1281     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1282     size_t marked_bytes = 0;
  1284     // Below, the term "card num" means the result of shifting an address
  1285     // by the card shift -- address 0 corresponds to card number 0.  One
  1286     // must subtract the card num of the bottom of the heap to obtain a
  1287     // card table index.
  1288     // The first card num of the sequence of live cards currently being
  1289     // constructed.  -1 ==> no sequence.
  1290     intptr_t start_card_num = -1;
  1291     // The last card num of the sequence of live cards currently being
  1292     // constructed.  -1 ==> no sequence.
  1293     intptr_t last_card_num = -1;
  1295     while (start < nextTop) {
  1296       if (_yield && _cm->do_yield_check()) {
  1297         // We yielded.  It might be for a full collection, in which case
  1298         // all bets are off; terminate the traversal.
  1299         if (_cm->has_aborted()) {
  1300           _changed = false;
  1301           return true;
  1302         } else {
  1303           // Otherwise, it might be a collection pause, and the region
  1304           // we're looking at might be in the collection set.  We'll
  1305           // abandon this region.
  1306           return false;
  1309       oop obj = oop(start);
  1310       int obj_sz = obj->size();
  1311       // The card num of the start of the current object.
  1312       intptr_t obj_card_num =
  1313         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1315       HeapWord* obj_last = start + obj_sz - 1;
  1316       intptr_t obj_last_card_num =
  1317         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1319       if (obj_card_num != last_card_num) {
  1320         if (start_card_num == -1) {
  1321           assert(last_card_num == -1, "Both or neither.");
  1322           start_card_num = obj_card_num;
  1323         } else {
  1324           assert(last_card_num != -1, "Both or neither.");
  1325           assert(obj_card_num >= last_card_num, "Inv");
  1326           if ((obj_card_num - last_card_num) > 1) {
  1327             // Mark the last run, and start a new one.
  1328             mark_card_num_range(start_card_num, last_card_num);
  1329             start_card_num = obj_card_num;
  1332 #if CARD_BM_TEST_MODE
  1333         /*
  1334         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1335                                obj_card_num - _bottom_card_num,
  1336                                obj_last_card_num - _bottom_card_num);
  1337         */
  1338         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1339           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1341 #endif
  1343       // In any case, we set the last card num.
  1344       last_card_num = obj_last_card_num;
  1346       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1347       // Find the next marked object after this one.
  1348       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1349       _changed = true;
  1351     // Handle the last range, if any.
  1352     if (start_card_num != -1)
  1353       mark_card_num_range(start_card_num, last_card_num);
  1354     if (_final) {
  1355       // Mark the allocated-since-marking portion...
  1356       HeapWord* tp = hr->top();
  1357       if (nextTop < tp) {
  1358         start_card_num =
  1359           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1360         last_card_num =
  1361           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1362         mark_card_num_range(start_card_num, last_card_num);
  1363         // This definitely means the region has live objects.
  1364         set_bit_for_region(hr);
  1368     hr->add_to_marked_bytes(marked_bytes);
  1369     // Update the live region bitmap.
  1370     if (marked_bytes > 0) {
  1371       set_bit_for_region(hr);
  1373     hr->set_top_at_conc_mark_count(nextTop);
  1374     _tot_live += hr->next_live_bytes();
  1375     _tot_used += hr->used();
  1376     _words_done = words_done;
  1378     if (!_final) {
  1379       ++_regions_done;
  1380       if (_regions_done % 10 == 0) {
  1381         double end_vtime_sec = os::elapsedVTime();
  1382         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1383         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1384           jlong sleep_time_ms =
  1385             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1386           os::sleep(Thread::current(), sleep_time_ms, false);
  1387           _start_vtime_sec = end_vtime_sec;
  1392     return false;
  1395   bool changed() { return _changed;  }
  1396   void reset()   { _changed = false; _words_done = 0; }
  1397   void no_yield() { _yield = false; }
  1398   size_t words_done() { return _words_done; }
  1399   size_t tot_live() { return _tot_live; }
  1400   size_t tot_used() { return _tot_used; }
  1401 };
  1404 void ConcurrentMark::calcDesiredRegions() {
  1405   _region_bm.clear();
  1406   _card_bm.clear();
  1407   CalcLiveObjectsClosure calccl(false /*final*/,
  1408                                 nextMarkBitMap(), this,
  1409                                 &_region_bm, &_card_bm);
  1410   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1411   g1h->heap_region_iterate(&calccl);
  1413   do {
  1414     calccl.reset();
  1415     g1h->heap_region_iterate(&calccl);
  1416   } while (calccl.changed());
  1419 class G1ParFinalCountTask: public AbstractGangTask {
  1420 protected:
  1421   G1CollectedHeap* _g1h;
  1422   CMBitMap* _bm;
  1423   size_t _n_workers;
  1424   size_t *_live_bytes;
  1425   size_t *_used_bytes;
  1426   BitMap* _region_bm;
  1427   BitMap* _card_bm;
  1428 public:
  1429   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1430                       BitMap* region_bm, BitMap* card_bm) :
  1431     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1432     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1434     if (ParallelGCThreads > 0)
  1435       _n_workers = _g1h->workers()->total_workers();
  1436     else
  1437       _n_workers = 1;
  1438     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1439     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1442   ~G1ParFinalCountTask() {
  1443     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1444     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1447   void work(int i) {
  1448     CalcLiveObjectsClosure calccl(true /*final*/,
  1449                                   _bm, _g1h->concurrent_mark(),
  1450                                   _region_bm, _card_bm);
  1451     calccl.no_yield();
  1452     if (ParallelGCThreads > 0) {
  1453       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1454                                             HeapRegion::FinalCountClaimValue);
  1455     } else {
  1456       _g1h->heap_region_iterate(&calccl);
  1458     assert(calccl.complete(), "Shouldn't have yielded!");
  1460     assert((size_t) i < _n_workers, "invariant");
  1461     _live_bytes[i] = calccl.tot_live();
  1462     _used_bytes[i] = calccl.tot_used();
  1464   size_t live_bytes()  {
  1465     size_t live_bytes = 0;
  1466     for (size_t i = 0; i < _n_workers; ++i)
  1467       live_bytes += _live_bytes[i];
  1468     return live_bytes;
  1470   size_t used_bytes()  {
  1471     size_t used_bytes = 0;
  1472     for (size_t i = 0; i < _n_workers; ++i)
  1473       used_bytes += _used_bytes[i];
  1474     return used_bytes;
  1476 };
  1478 class G1ParNoteEndTask;
  1480 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1481   G1CollectedHeap* _g1;
  1482   int _worker_num;
  1483   size_t _max_live_bytes;
  1484   size_t _regions_claimed;
  1485   size_t _freed_bytes;
  1486   size_t _cleared_h_regions;
  1487   size_t _freed_regions;
  1488   UncleanRegionList* _unclean_region_list;
  1489   double _claimed_region_time;
  1490   double _max_region_time;
  1492 public:
  1493   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1494                              UncleanRegionList* list,
  1495                              int worker_num);
  1496   size_t freed_bytes() { return _freed_bytes; }
  1497   size_t cleared_h_regions() { return _cleared_h_regions; }
  1498   size_t freed_regions() { return  _freed_regions; }
  1499   UncleanRegionList* unclean_region_list() {
  1500     return _unclean_region_list;
  1503   bool doHeapRegion(HeapRegion *r);
  1505   size_t max_live_bytes() { return _max_live_bytes; }
  1506   size_t regions_claimed() { return _regions_claimed; }
  1507   double claimed_region_time_sec() { return _claimed_region_time; }
  1508   double max_region_time_sec() { return _max_region_time; }
  1509 };
  1511 class G1ParNoteEndTask: public AbstractGangTask {
  1512   friend class G1NoteEndOfConcMarkClosure;
  1513 protected:
  1514   G1CollectedHeap* _g1h;
  1515   size_t _max_live_bytes;
  1516   size_t _freed_bytes;
  1517   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1518 public:
  1519   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1520                    ConcurrentMark::ParCleanupThreadState**
  1521                    par_cleanup_thread_state) :
  1522     AbstractGangTask("G1 note end"), _g1h(g1h),
  1523     _max_live_bytes(0), _freed_bytes(0),
  1524     _par_cleanup_thread_state(par_cleanup_thread_state)
  1525   {}
  1527   void work(int i) {
  1528     double start = os::elapsedTime();
  1529     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1530                                            &_par_cleanup_thread_state[i]->list,
  1531                                            i);
  1532     if (ParallelGCThreads > 0) {
  1533       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1534                                             HeapRegion::NoteEndClaimValue);
  1535     } else {
  1536       _g1h->heap_region_iterate(&g1_note_end);
  1538     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1540     // Now finish up freeing the current thread's regions.
  1541     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1542                                   g1_note_end.cleared_h_regions(),
  1543                                   0, NULL);
  1545       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1546       _max_live_bytes += g1_note_end.max_live_bytes();
  1547       _freed_bytes += g1_note_end.freed_bytes();
  1549     double end = os::elapsedTime();
  1550     if (G1PrintParCleanupStats) {
  1551       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1552                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1553                           i, start, end, (end-start)*1000.0,
  1554                           g1_note_end.regions_claimed(),
  1555                           g1_note_end.claimed_region_time_sec()*1000.0,
  1556                           g1_note_end.max_region_time_sec()*1000.0);
  1559   size_t max_live_bytes() { return _max_live_bytes; }
  1560   size_t freed_bytes() { return _freed_bytes; }
  1561 };
  1563 class G1ParScrubRemSetTask: public AbstractGangTask {
  1564 protected:
  1565   G1RemSet* _g1rs;
  1566   BitMap* _region_bm;
  1567   BitMap* _card_bm;
  1568 public:
  1569   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1570                        BitMap* region_bm, BitMap* card_bm) :
  1571     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1572     _region_bm(region_bm), _card_bm(card_bm)
  1573   {}
  1575   void work(int i) {
  1576     if (ParallelGCThreads > 0) {
  1577       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1578                        HeapRegion::ScrubRemSetClaimValue);
  1579     } else {
  1580       _g1rs->scrub(_region_bm, _card_bm);
  1584 };
  1586 G1NoteEndOfConcMarkClosure::
  1587 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1588                            UncleanRegionList* list,
  1589                            int worker_num)
  1590   : _g1(g1), _worker_num(worker_num),
  1591     _max_live_bytes(0), _regions_claimed(0),
  1592     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1593     _claimed_region_time(0.0), _max_region_time(0.0),
  1594     _unclean_region_list(list)
  1595 {}
  1597 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1598   // We use a claim value of zero here because all regions
  1599   // were claimed with value 1 in the FinalCount task.
  1600   r->reset_gc_time_stamp();
  1601   if (!r->continuesHumongous()) {
  1602     double start = os::elapsedTime();
  1603     _regions_claimed++;
  1604     r->note_end_of_marking();
  1605     _max_live_bytes += r->max_live_bytes();
  1606     _g1->free_region_if_totally_empty_work(r,
  1607                                            _freed_bytes,
  1608                                            _cleared_h_regions,
  1609                                            _freed_regions,
  1610                                            _unclean_region_list,
  1611                                            true /*par*/);
  1612     double region_time = (os::elapsedTime() - start);
  1613     _claimed_region_time += region_time;
  1614     if (region_time > _max_region_time) _max_region_time = region_time;
  1616   return false;
  1619 void ConcurrentMark::cleanup() {
  1620   // world is stopped at this checkpoint
  1621   assert(SafepointSynchronize::is_at_safepoint(),
  1622          "world should be stopped");
  1623   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1625   // If a full collection has happened, we shouldn't do this.
  1626   if (has_aborted()) {
  1627     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1628     return;
  1631   if (VerifyDuringGC) {
  1632     HandleMark hm;  // handle scope
  1633     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1634     Universe::heap()->prepare_for_verify();
  1635     Universe::verify(/* allow dirty  */ true,
  1636                      /* silent       */ false,
  1637                      /* prev marking */ true);
  1640   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1641   g1p->record_concurrent_mark_cleanup_start();
  1643   double start = os::elapsedTime();
  1645   // Do counting once more with the world stopped for good measure.
  1646   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1647                                         &_region_bm, &_card_bm);
  1648   if (ParallelGCThreads > 0) {
  1649     assert(g1h->check_heap_region_claim_values(
  1650                                                HeapRegion::InitialClaimValue),
  1651            "sanity check");
  1653     int n_workers = g1h->workers()->total_workers();
  1654     g1h->set_par_threads(n_workers);
  1655     g1h->workers()->run_task(&g1_par_count_task);
  1656     g1h->set_par_threads(0);
  1658     assert(g1h->check_heap_region_claim_values(
  1659                                              HeapRegion::FinalCountClaimValue),
  1660            "sanity check");
  1661   } else {
  1662     g1_par_count_task.work(0);
  1665   size_t known_garbage_bytes =
  1666     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1667 #if 0
  1668   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1669                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1670                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1671                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1672 #endif // 0
  1673   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1675   size_t start_used_bytes = g1h->used();
  1676   _at_least_one_mark_complete = true;
  1677   g1h->set_marking_complete();
  1679   double count_end = os::elapsedTime();
  1680   double this_final_counting_time = (count_end - start);
  1681   if (G1PrintParCleanupStats) {
  1682     gclog_or_tty->print_cr("Cleanup:");
  1683     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1684                            this_final_counting_time*1000.0);
  1686   _total_counting_time += this_final_counting_time;
  1688   // Install newly created mark bitMap as "prev".
  1689   swapMarkBitMaps();
  1691   g1h->reset_gc_time_stamp();
  1693   // Note end of marking in all heap regions.
  1694   double note_end_start = os::elapsedTime();
  1695   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1696   if (ParallelGCThreads > 0) {
  1697     int n_workers = g1h->workers()->total_workers();
  1698     g1h->set_par_threads(n_workers);
  1699     g1h->workers()->run_task(&g1_par_note_end_task);
  1700     g1h->set_par_threads(0);
  1702     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1703            "sanity check");
  1704   } else {
  1705     g1_par_note_end_task.work(0);
  1707   g1h->set_unclean_regions_coming(true);
  1708   double note_end_end = os::elapsedTime();
  1709   // Tell the mutators that there might be unclean regions coming...
  1710   if (G1PrintParCleanupStats) {
  1711     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1712                            (note_end_end - note_end_start)*1000.0);
  1716   // call below, since it affects the metric by which we sort the heap
  1717   // regions.
  1718   if (G1ScrubRemSets) {
  1719     double rs_scrub_start = os::elapsedTime();
  1720     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1721     if (ParallelGCThreads > 0) {
  1722       int n_workers = g1h->workers()->total_workers();
  1723       g1h->set_par_threads(n_workers);
  1724       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1725       g1h->set_par_threads(0);
  1727       assert(g1h->check_heap_region_claim_values(
  1728                                             HeapRegion::ScrubRemSetClaimValue),
  1729              "sanity check");
  1730     } else {
  1731       g1_par_scrub_rs_task.work(0);
  1734     double rs_scrub_end = os::elapsedTime();
  1735     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1736     _total_rs_scrub_time += this_rs_scrub_time;
  1739   // this will also free any regions totally full of garbage objects,
  1740   // and sort the regions.
  1741   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1742                         g1_par_note_end_task.freed_bytes(),
  1743                         g1_par_note_end_task.max_live_bytes());
  1745   // Statistics.
  1746   double end = os::elapsedTime();
  1747   _cleanup_times.add((end - start) * 1000.0);
  1749   // G1CollectedHeap::heap()->print();
  1750   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1751   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1753   if (PrintGC || PrintGCDetails) {
  1754     g1h->print_size_transition(gclog_or_tty,
  1755                                start_used_bytes,
  1756                                g1h->used(),
  1757                                g1h->capacity());
  1760   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1761   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1763   // We need to make this be a "collection" so any collection pause that
  1764   // races with it goes around and waits for completeCleanup to finish.
  1765   g1h->increment_total_collections();
  1767   if (VerifyDuringGC) {
  1768     HandleMark hm;  // handle scope
  1769     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1770     Universe::heap()->prepare_for_verify();
  1771     Universe::verify(/* allow dirty  */ true,
  1772                      /* silent       */ false,
  1773                      /* prev marking */ true);
  1777 void ConcurrentMark::completeCleanup() {
  1778   // A full collection intervened.
  1779   if (has_aborted()) return;
  1781   int first = 0;
  1782   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1783   for (int t = 0; t < last; t++) {
  1784     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1785     assert(list->well_formed(), "Inv");
  1786     HeapRegion* hd = list->hd();
  1787     while (hd != NULL) {
  1788       // Now finish up the other stuff.
  1789       hd->rem_set()->clear();
  1790       HeapRegion* next_hd = hd->next_from_unclean_list();
  1791       (void)list->pop();
  1792       assert(list->hd() == next_hd, "how not?");
  1793       _g1h->put_region_on_unclean_list(hd);
  1794       if (!hd->isHumongous()) {
  1795         // Add this to the _free_regions count by 1.
  1796         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1798       hd = list->hd();
  1799       assert(hd == next_hd, "how not?");
  1805 class G1CMIsAliveClosure: public BoolObjectClosure {
  1806   G1CollectedHeap* _g1;
  1807  public:
  1808   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1809     _g1(g1)
  1810   {}
  1812   void do_object(oop obj) {
  1813     assert(false, "not to be invoked");
  1815   bool do_object_b(oop obj) {
  1816     HeapWord* addr = (HeapWord*)obj;
  1817     return addr != NULL &&
  1818            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1820 };
  1822 class G1CMKeepAliveClosure: public OopClosure {
  1823   G1CollectedHeap* _g1;
  1824   ConcurrentMark*  _cm;
  1825   CMBitMap*        _bitMap;
  1826  public:
  1827   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1828                        CMBitMap* bitMap) :
  1829     _g1(g1), _cm(cm),
  1830     _bitMap(bitMap) {}
  1832   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1833   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1835   template <class T> void do_oop_work(T* p) {
  1836     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1837     HeapWord* addr = (HeapWord*)thisOop;
  1838     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1839       _bitMap->mark(addr);
  1840       _cm->mark_stack_push(thisOop);
  1843 };
  1845 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1846   CMMarkStack*                  _markStack;
  1847   CMBitMap*                     _bitMap;
  1848   G1CMKeepAliveClosure*         _oopClosure;
  1849  public:
  1850   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1851                                G1CMKeepAliveClosure* oopClosure) :
  1852     _bitMap(bitMap),
  1853     _markStack(markStack),
  1854     _oopClosure(oopClosure)
  1855   {}
  1857   void do_void() {
  1858     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1860 };
  1862 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1863   ResourceMark rm;
  1864   HandleMark   hm;
  1865   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1866   ReferenceProcessor* rp = g1h->ref_processor();
  1868   // Process weak references.
  1869   rp->setup_policy(clear_all_soft_refs);
  1870   assert(_markStack.isEmpty(), "mark stack should be empty");
  1872   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1873   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1874   G1CMDrainMarkingStackClosure
  1875     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1876                                &g1KeepAliveClosure);
  1878   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1879   rp->process_discovered_references(&g1IsAliveClosure,
  1880                                     &g1KeepAliveClosure,
  1881                                     &g1DrainMarkingStackClosure,
  1882                                     NULL);
  1883   assert(_markStack.overflow() || _markStack.isEmpty(),
  1884          "mark stack should be empty (unless it overflowed)");
  1885   if (_markStack.overflow()) {
  1886     set_has_overflown();
  1889   rp->enqueue_discovered_references();
  1890   rp->verify_no_references_recorded();
  1891   assert(!rp->discovery_enabled(), "should have been disabled");
  1893   // Now clean up stale oops in SymbolTable and StringTable
  1894   SymbolTable::unlink(&g1IsAliveClosure);
  1895   StringTable::unlink(&g1IsAliveClosure);
  1898 void ConcurrentMark::swapMarkBitMaps() {
  1899   CMBitMapRO* temp = _prevMarkBitMap;
  1900   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1901   _nextMarkBitMap  = (CMBitMap*)  temp;
  1904 class CMRemarkTask: public AbstractGangTask {
  1905 private:
  1906   ConcurrentMark *_cm;
  1908 public:
  1909   void work(int worker_i) {
  1910     // Since all available tasks are actually started, we should
  1911     // only proceed if we're supposed to be actived.
  1912     if ((size_t)worker_i < _cm->active_tasks()) {
  1913       CMTask* task = _cm->task(worker_i);
  1914       task->record_start_time();
  1915       do {
  1916         task->do_marking_step(1000000000.0 /* something very large */);
  1917       } while (task->has_aborted() && !_cm->has_overflown());
  1918       // If we overflow, then we do not want to restart. We instead
  1919       // want to abort remark and do concurrent marking again.
  1920       task->record_end_time();
  1924   CMRemarkTask(ConcurrentMark* cm) :
  1925     AbstractGangTask("Par Remark"), _cm(cm) { }
  1926 };
  1928 void ConcurrentMark::checkpointRootsFinalWork() {
  1929   ResourceMark rm;
  1930   HandleMark   hm;
  1931   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1933   g1h->ensure_parsability(false);
  1935   if (ParallelGCThreads > 0) {
  1936     G1CollectedHeap::StrongRootsScope srs(g1h);
  1937     // this is remark, so we'll use up all available threads
  1938     int active_workers = ParallelGCThreads;
  1939     set_phase(active_workers, false);
  1941     CMRemarkTask remarkTask(this);
  1942     // We will start all available threads, even if we decide that the
  1943     // active_workers will be fewer. The extra ones will just bail out
  1944     // immediately.
  1945     int n_workers = g1h->workers()->total_workers();
  1946     g1h->set_par_threads(n_workers);
  1947     g1h->workers()->run_task(&remarkTask);
  1948     g1h->set_par_threads(0);
  1949   } else {
  1950     G1CollectedHeap::StrongRootsScope srs(g1h);
  1951     // this is remark, so we'll use up all available threads
  1952     int active_workers = 1;
  1953     set_phase(active_workers, false);
  1955     CMRemarkTask remarkTask(this);
  1956     // We will start all available threads, even if we decide that the
  1957     // active_workers will be fewer. The extra ones will just bail out
  1958     // immediately.
  1959     remarkTask.work(0);
  1961   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1962   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1964   print_stats();
  1966   if (!restart_for_overflow())
  1967     set_non_marking_state();
  1969 #if VERIFY_OBJS_PROCESSED
  1970   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1971     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1972                            _scan_obj_cl.objs_processed,
  1973                            ThreadLocalObjQueue::objs_enqueued);
  1974     guarantee(_scan_obj_cl.objs_processed ==
  1975               ThreadLocalObjQueue::objs_enqueued,
  1976               "Different number of objs processed and enqueued.");
  1978 #endif
  1981 #ifndef PRODUCT
  1983 class PrintReachableOopClosure: public OopClosure {
  1984 private:
  1985   G1CollectedHeap* _g1h;
  1986   CMBitMapRO*      _bitmap;
  1987   outputStream*    _out;
  1988   bool             _use_prev_marking;
  1989   bool             _all;
  1991 public:
  1992   PrintReachableOopClosure(CMBitMapRO*   bitmap,
  1993                            outputStream* out,
  1994                            bool          use_prev_marking,
  1995                            bool          all) :
  1996     _g1h(G1CollectedHeap::heap()),
  1997     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  1999   void do_oop(narrowOop* p) { do_oop_work(p); }
  2000   void do_oop(      oop* p) { do_oop_work(p); }
  2002   template <class T> void do_oop_work(T* p) {
  2003     oop         obj = oopDesc::load_decode_heap_oop(p);
  2004     const char* str = NULL;
  2005     const char* str2 = "";
  2007     if (obj == NULL) {
  2008       str = "";
  2009     } else if (!_g1h->is_in_g1_reserved(obj)) {
  2010       str = " O";
  2011     } else {
  2012       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2013       guarantee(hr != NULL, "invariant");
  2014       bool over_tams = false;
  2015       if (_use_prev_marking) {
  2016         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2017       } else {
  2018         over_tams = hr->obj_allocated_since_next_marking(obj);
  2020       bool marked = _bitmap->isMarked((HeapWord*) obj);
  2022       if (over_tams) {
  2023         str = " >";
  2024         if (marked) {
  2025           str2 = " AND MARKED";
  2027       } else if (marked) {
  2028         str = " M";
  2029       } else {
  2030         str = " NOT";
  2034     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
  2035                    p, (void*) obj, str, str2);
  2037 };
  2039 class PrintReachableObjectClosure : public ObjectClosure {
  2040 private:
  2041   CMBitMapRO*   _bitmap;
  2042   outputStream* _out;
  2043   bool          _use_prev_marking;
  2044   bool          _all;
  2045   HeapRegion*   _hr;
  2047 public:
  2048   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
  2049                               outputStream* out,
  2050                               bool          use_prev_marking,
  2051                               bool          all,
  2052                               HeapRegion*   hr) :
  2053     _bitmap(bitmap), _out(out),
  2054     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
  2056   void do_object(oop o) {
  2057     bool over_tams;
  2058     if (_use_prev_marking) {
  2059       over_tams = _hr->obj_allocated_since_prev_marking(o);
  2060     } else {
  2061       over_tams = _hr->obj_allocated_since_next_marking(o);
  2063     bool marked = _bitmap->isMarked((HeapWord*) o);
  2064     bool print_it = _all || over_tams || marked;
  2066     if (print_it) {
  2067       _out->print_cr(" "PTR_FORMAT"%s",
  2068                      o, (over_tams) ? " >" : (marked) ? " M" : "");
  2069       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
  2070       o->oop_iterate(&oopCl);
  2073 };
  2075 class PrintReachableRegionClosure : public HeapRegionClosure {
  2076 private:
  2077   CMBitMapRO*   _bitmap;
  2078   outputStream* _out;
  2079   bool          _use_prev_marking;
  2080   bool          _all;
  2082 public:
  2083   bool doHeapRegion(HeapRegion* hr) {
  2084     HeapWord* b = hr->bottom();
  2085     HeapWord* e = hr->end();
  2086     HeapWord* t = hr->top();
  2087     HeapWord* p = NULL;
  2088     if (_use_prev_marking) {
  2089       p = hr->prev_top_at_mark_start();
  2090     } else {
  2091       p = hr->next_top_at_mark_start();
  2093     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2094                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2095     _out->cr();
  2097     HeapWord* from = b;
  2098     HeapWord* to   = t;
  2100     if (to > from) {
  2101       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
  2102       _out->cr();
  2103       PrintReachableObjectClosure ocl(_bitmap, _out,
  2104                                       _use_prev_marking, _all, hr);
  2105       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
  2106       _out->cr();
  2109     return false;
  2112   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
  2113                               outputStream* out,
  2114                               bool          use_prev_marking,
  2115                               bool          all) :
  2116     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
  2117 };
  2119 void ConcurrentMark::print_reachable(const char* str,
  2120                                      bool use_prev_marking,
  2121                                      bool all) {
  2122   gclog_or_tty->cr();
  2123   gclog_or_tty->print_cr("== Doing heap dump... ");
  2125   if (G1PrintReachableBaseFile == NULL) {
  2126     gclog_or_tty->print_cr("  #### error: no base file defined");
  2127     return;
  2130   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2131       (JVM_MAXPATHLEN - 1)) {
  2132     gclog_or_tty->print_cr("  #### error: file name too long");
  2133     return;
  2136   char file_name[JVM_MAXPATHLEN];
  2137   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2138   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2140   fileStream fout(file_name);
  2141   if (!fout.is_open()) {
  2142     gclog_or_tty->print_cr("  #### error: could not open file");
  2143     return;
  2146   outputStream* out = &fout;
  2148   CMBitMapRO* bitmap = NULL;
  2149   if (use_prev_marking) {
  2150     bitmap = _prevMarkBitMap;
  2151   } else {
  2152     bitmap = _nextMarkBitMap;
  2155   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2156   out->cr();
  2158   out->print_cr("--- ITERATING OVER REGIONS");
  2159   out->cr();
  2160   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
  2161   _g1h->heap_region_iterate(&rcl);
  2162   out->cr();
  2164   gclog_or_tty->print_cr("  done");
  2165   gclog_or_tty->flush();
  2168 #endif // PRODUCT
  2170 // This note is for drainAllSATBBuffers and the code in between.
  2171 // In the future we could reuse a task to do this work during an
  2172 // evacuation pause (since now tasks are not active and can be claimed
  2173 // during an evacuation pause). This was a late change to the code and
  2174 // is currently not being taken advantage of.
  2176 class CMGlobalObjectClosure : public ObjectClosure {
  2177 private:
  2178   ConcurrentMark* _cm;
  2180 public:
  2181   void do_object(oop obj) {
  2182     _cm->deal_with_reference(obj);
  2185   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2186 };
  2188 void ConcurrentMark::deal_with_reference(oop obj) {
  2189   if (verbose_high())
  2190     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2191                            (void*) obj);
  2194   HeapWord* objAddr = (HeapWord*) obj;
  2195   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2196   if (_g1h->is_in_g1_reserved(objAddr)) {
  2197     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2198     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2199     if (_g1h->is_obj_ill(obj, hr)) {
  2200       if (verbose_high())
  2201         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2202                                "marked", (void*) obj);
  2204       // we need to mark it first
  2205       if (_nextMarkBitMap->parMark(objAddr)) {
  2206         // No OrderAccess:store_load() is needed. It is implicit in the
  2207         // CAS done in parMark(objAddr) above
  2208         HeapWord* finger = _finger;
  2209         if (objAddr < finger) {
  2210           if (verbose_high())
  2211             gclog_or_tty->print_cr("[global] below the global finger "
  2212                                    "("PTR_FORMAT"), pushing it", finger);
  2213           if (!mark_stack_push(obj)) {
  2214             if (verbose_low())
  2215               gclog_or_tty->print_cr("[global] global stack overflow during "
  2216                                      "deal_with_reference");
  2224 void ConcurrentMark::drainAllSATBBuffers() {
  2225   CMGlobalObjectClosure oc(this);
  2226   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2227   satb_mq_set.set_closure(&oc);
  2229   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2230     if (verbose_medium())
  2231       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2234   // no need to check whether we should do this, as this is only
  2235   // called during an evacuation pause
  2236   satb_mq_set.iterate_closure_all_threads();
  2238   satb_mq_set.set_closure(NULL);
  2239   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2242 void ConcurrentMark::markPrev(oop p) {
  2243   // Note we are overriding the read-only view of the prev map here, via
  2244   // the cast.
  2245   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2248 void ConcurrentMark::clear(oop p) {
  2249   assert(p != NULL && p->is_oop(), "expected an oop");
  2250   HeapWord* addr = (HeapWord*)p;
  2251   assert(addr >= _nextMarkBitMap->startWord() ||
  2252          addr < _nextMarkBitMap->endWord(), "in a region");
  2254   _nextMarkBitMap->clear(addr);
  2257 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2258   // Note we are overriding the read-only view of the prev map here, via
  2259   // the cast.
  2260   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2261   _nextMarkBitMap->clearRange(mr);
  2264 HeapRegion*
  2265 ConcurrentMark::claim_region(int task_num) {
  2266   // "checkpoint" the finger
  2267   HeapWord* finger = _finger;
  2269   // _heap_end will not change underneath our feet; it only changes at
  2270   // yield points.
  2271   while (finger < _heap_end) {
  2272     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2274     // is the gap between reading the finger and doing the CAS too long?
  2276     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2277     HeapWord*   bottom        = curr_region->bottom();
  2278     HeapWord*   end           = curr_region->end();
  2279     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2281     if (verbose_low())
  2282       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2283                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2284                              "limit = "PTR_FORMAT,
  2285                              task_num, curr_region, bottom, end, limit);
  2287     HeapWord* res =
  2288       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2289     if (res == finger) {
  2290       // we succeeded
  2292       // notice that _finger == end cannot be guaranteed here since,
  2293       // someone else might have moved the finger even further
  2294       assert(_finger >= end, "the finger should have moved forward");
  2296       if (verbose_low())
  2297         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2298                                PTR_FORMAT, task_num, curr_region);
  2300       if (limit > bottom) {
  2301         if (verbose_low())
  2302           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2303                                  "returning it ", task_num, curr_region);
  2304         return curr_region;
  2305       } else {
  2306         assert(limit == bottom,
  2307                "the region limit should be at bottom");
  2308         if (verbose_low())
  2309           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2310                                  "returning NULL", task_num, curr_region);
  2311         // we return NULL and the caller should try calling
  2312         // claim_region() again.
  2313         return NULL;
  2315     } else {
  2316       assert(_finger > finger, "the finger should have moved forward");
  2317       if (verbose_low())
  2318         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2319                                "global finger = "PTR_FORMAT", "
  2320                                "our finger = "PTR_FORMAT,
  2321                                task_num, _finger, finger);
  2323       // read it again
  2324       finger = _finger;
  2328   return NULL;
  2331 void ConcurrentMark::oops_do(OopClosure* cl) {
  2332   if (_markStack.size() > 0 && verbose_low())
  2333     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2334                            "size = %d", _markStack.size());
  2335   // we first iterate over the contents of the mark stack...
  2336   _markStack.oops_do(cl);
  2338   for (int i = 0; i < (int)_max_task_num; ++i) {
  2339     OopTaskQueue* queue = _task_queues->queue((int)i);
  2341     if (queue->size() > 0 && verbose_low())
  2342       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2343                              "size = %d", i, queue->size());
  2345     // ...then over the contents of the all the task queues.
  2346     queue->oops_do(cl);
  2349   // finally, invalidate any entries that in the region stack that
  2350   // point into the collection set
  2351   if (_regionStack.invalidate_entries_into_cset()) {
  2352     // otherwise, any gray objects copied during the evacuation pause
  2353     // might not be visited.
  2354     assert(_should_gray_objects, "invariant");
  2358 void ConcurrentMark::clear_marking_state() {
  2359   _markStack.setEmpty();
  2360   _markStack.clear_overflow();
  2361   _regionStack.setEmpty();
  2362   _regionStack.clear_overflow();
  2363   clear_has_overflown();
  2364   _finger = _heap_start;
  2366   for (int i = 0; i < (int)_max_task_num; ++i) {
  2367     OopTaskQueue* queue = _task_queues->queue(i);
  2368     queue->set_empty();
  2372 void ConcurrentMark::print_stats() {
  2373   if (verbose_stats()) {
  2374     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2375     for (size_t i = 0; i < _active_tasks; ++i) {
  2376       _tasks[i]->print_stats();
  2377       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2382 class CSMarkOopClosure: public OopClosure {
  2383   friend class CSMarkBitMapClosure;
  2385   G1CollectedHeap* _g1h;
  2386   CMBitMap*        _bm;
  2387   ConcurrentMark*  _cm;
  2388   oop*             _ms;
  2389   jint*            _array_ind_stack;
  2390   int              _ms_size;
  2391   int              _ms_ind;
  2392   int              _array_increment;
  2394   bool push(oop obj, int arr_ind = 0) {
  2395     if (_ms_ind == _ms_size) {
  2396       gclog_or_tty->print_cr("Mark stack is full.");
  2397       return false;
  2399     _ms[_ms_ind] = obj;
  2400     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2401     _ms_ind++;
  2402     return true;
  2405   oop pop() {
  2406     if (_ms_ind == 0) return NULL;
  2407     else {
  2408       _ms_ind--;
  2409       return _ms[_ms_ind];
  2413   template <class T> bool drain() {
  2414     while (_ms_ind > 0) {
  2415       oop obj = pop();
  2416       assert(obj != NULL, "Since index was non-zero.");
  2417       if (obj->is_objArray()) {
  2418         jint arr_ind = _array_ind_stack[_ms_ind];
  2419         objArrayOop aobj = objArrayOop(obj);
  2420         jint len = aobj->length();
  2421         jint next_arr_ind = arr_ind + _array_increment;
  2422         if (next_arr_ind < len) {
  2423           push(obj, next_arr_ind);
  2425         // Now process this portion of this one.
  2426         int lim = MIN2(next_arr_ind, len);
  2427         for (int j = arr_ind; j < lim; j++) {
  2428           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2431       } else {
  2432         obj->oop_iterate(this);
  2434       if (abort()) return false;
  2436     return true;
  2439 public:
  2440   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2441     _g1h(G1CollectedHeap::heap()),
  2442     _cm(cm),
  2443     _bm(cm->nextMarkBitMap()),
  2444     _ms_size(ms_size), _ms_ind(0),
  2445     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2446     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2447     _array_increment(MAX2(ms_size/8, 16))
  2448   {}
  2450   ~CSMarkOopClosure() {
  2451     FREE_C_HEAP_ARRAY(oop, _ms);
  2452     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2455   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2456   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2458   template <class T> void do_oop_work(T* p) {
  2459     T heap_oop = oopDesc::load_heap_oop(p);
  2460     if (oopDesc::is_null(heap_oop)) return;
  2461     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2462     if (obj->is_forwarded()) {
  2463       // If the object has already been forwarded, we have to make sure
  2464       // that it's marked.  So follow the forwarding pointer.  Note that
  2465       // this does the right thing for self-forwarding pointers in the
  2466       // evacuation failure case.
  2467       obj = obj->forwardee();
  2469     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2470     if (hr != NULL) {
  2471       if (hr->in_collection_set()) {
  2472         if (_g1h->is_obj_ill(obj)) {
  2473           _bm->mark((HeapWord*)obj);
  2474           if (!push(obj)) {
  2475             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2476             set_abort();
  2479       } else {
  2480         // Outside the collection set; we need to gray it
  2481         _cm->deal_with_reference(obj);
  2485 };
  2487 class CSMarkBitMapClosure: public BitMapClosure {
  2488   G1CollectedHeap* _g1h;
  2489   CMBitMap*        _bitMap;
  2490   ConcurrentMark*  _cm;
  2491   CSMarkOopClosure _oop_cl;
  2492 public:
  2493   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2494     _g1h(G1CollectedHeap::heap()),
  2495     _bitMap(cm->nextMarkBitMap()),
  2496     _oop_cl(cm, ms_size)
  2497   {}
  2499   ~CSMarkBitMapClosure() {}
  2501   bool do_bit(size_t offset) {
  2502     // convert offset into a HeapWord*
  2503     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2504     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2505            "address out of range");
  2506     assert(_bitMap->isMarked(addr), "tautology");
  2507     oop obj = oop(addr);
  2508     if (!obj->is_forwarded()) {
  2509       if (!_oop_cl.push(obj)) return false;
  2510       if (UseCompressedOops) {
  2511         if (!_oop_cl.drain<narrowOop>()) return false;
  2512       } else {
  2513         if (!_oop_cl.drain<oop>()) return false;
  2516     // Otherwise...
  2517     return true;
  2519 };
  2522 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2523   CMBitMap* _bm;
  2524   CSMarkBitMapClosure _bit_cl;
  2525   enum SomePrivateConstants {
  2526     MSSize = 1000
  2527   };
  2528   bool _completed;
  2529 public:
  2530   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2531     _bm(cm->nextMarkBitMap()),
  2532     _bit_cl(cm, MSSize),
  2533     _completed(true)
  2534   {}
  2536   ~CompleteMarkingInCSHRClosure() {}
  2538   bool doHeapRegion(HeapRegion* r) {
  2539     if (!r->evacuation_failed()) {
  2540       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2541       if (!mr.is_empty()) {
  2542         if (!_bm->iterate(&_bit_cl, mr)) {
  2543           _completed = false;
  2544           return true;
  2548     return false;
  2551   bool completed() { return _completed; }
  2552 };
  2554 class ClearMarksInHRClosure: public HeapRegionClosure {
  2555   CMBitMap* _bm;
  2556 public:
  2557   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2559   bool doHeapRegion(HeapRegion* r) {
  2560     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2561       MemRegion usedMR = r->used_region();
  2562       _bm->clearRange(r->used_region());
  2564     return false;
  2566 };
  2568 void ConcurrentMark::complete_marking_in_collection_set() {
  2569   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2571   if (!g1h->mark_in_progress()) {
  2572     g1h->g1_policy()->record_mark_closure_time(0.0);
  2573     return;
  2576   int i = 1;
  2577   double start = os::elapsedTime();
  2578   while (true) {
  2579     i++;
  2580     CompleteMarkingInCSHRClosure cmplt(this);
  2581     g1h->collection_set_iterate(&cmplt);
  2582     if (cmplt.completed()) break;
  2584   double end_time = os::elapsedTime();
  2585   double elapsed_time_ms = (end_time - start) * 1000.0;
  2586   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2587   if (PrintGCDetails) {
  2588     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2591   ClearMarksInHRClosure clr(nextMarkBitMap());
  2592   g1h->collection_set_iterate(&clr);
  2595 // The next two methods deal with the following optimisation. Some
  2596 // objects are gray by being marked and located above the finger. If
  2597 // they are copied, during an evacuation pause, below the finger then
  2598 // the need to be pushed on the stack. The observation is that, if
  2599 // there are no regions in the collection set located above the
  2600 // finger, then the above cannot happen, hence we do not need to
  2601 // explicitly gray any objects when copying them to below the
  2602 // finger. The global stack will be scanned to ensure that, if it
  2603 // points to objects being copied, it will update their
  2604 // location. There is a tricky situation with the gray objects in
  2605 // region stack that are being coped, however. See the comment in
  2606 // newCSet().
  2608 void ConcurrentMark::newCSet() {
  2609   if (!concurrent_marking_in_progress())
  2610     // nothing to do if marking is not in progress
  2611     return;
  2613   // find what the lowest finger is among the global and local fingers
  2614   _min_finger = _finger;
  2615   for (int i = 0; i < (int)_max_task_num; ++i) {
  2616     CMTask* task = _tasks[i];
  2617     HeapWord* task_finger = task->finger();
  2618     if (task_finger != NULL && task_finger < _min_finger)
  2619       _min_finger = task_finger;
  2622   _should_gray_objects = false;
  2624   // This fixes a very subtle and fustrating bug. It might be the case
  2625   // that, during en evacuation pause, heap regions that contain
  2626   // objects that are gray (by being in regions contained in the
  2627   // region stack) are included in the collection set. Since such gray
  2628   // objects will be moved, and because it's not easy to redirect
  2629   // region stack entries to point to a new location (because objects
  2630   // in one region might be scattered to multiple regions after they
  2631   // are copied), one option is to ensure that all marked objects
  2632   // copied during a pause are pushed on the stack. Notice, however,
  2633   // that this problem can only happen when the region stack is not
  2634   // empty during an evacuation pause. So, we make the fix a bit less
  2635   // conservative and ensure that regions are pushed on the stack,
  2636   // irrespective whether all collection set regions are below the
  2637   // finger, if the region stack is not empty. This is expected to be
  2638   // a rare case, so I don't think it's necessary to be smarted about it.
  2639   if (!region_stack_empty())
  2640     _should_gray_objects = true;
  2643 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2644   if (!concurrent_marking_in_progress())
  2645     return;
  2647   HeapWord* region_end = hr->end();
  2648   if (region_end > _min_finger)
  2649     _should_gray_objects = true;
  2652 // abandon current marking iteration due to a Full GC
  2653 void ConcurrentMark::abort() {
  2654   // Clear all marks to force marking thread to do nothing
  2655   _nextMarkBitMap->clearAll();
  2656   // Empty mark stack
  2657   clear_marking_state();
  2658   for (int i = 0; i < (int)_max_task_num; ++i)
  2659     _tasks[i]->clear_region_fields();
  2660   _has_aborted = true;
  2662   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2663   satb_mq_set.abandon_partial_marking();
  2664   // This can be called either during or outside marking, we'll read
  2665   // the expected_active value from the SATB queue set.
  2666   satb_mq_set.set_active_all_threads(
  2667                                  false, /* new active value */
  2668                                  satb_mq_set.is_active() /* expected_active */);
  2671 static void print_ms_time_info(const char* prefix, const char* name,
  2672                                NumberSeq& ns) {
  2673   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2674                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2675   if (ns.num() > 0) {
  2676     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2677                            prefix, ns.sd(), ns.maximum());
  2681 void ConcurrentMark::print_summary_info() {
  2682   gclog_or_tty->print_cr(" Concurrent marking:");
  2683   print_ms_time_info("  ", "init marks", _init_times);
  2684   print_ms_time_info("  ", "remarks", _remark_times);
  2686     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2687     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2690   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2691   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2692                          _total_counting_time,
  2693                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2694                           (double)_cleanup_times.num()
  2695                          : 0.0));
  2696   if (G1ScrubRemSets) {
  2697     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2698                            _total_rs_scrub_time,
  2699                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2700                             (double)_cleanup_times.num()
  2701                            : 0.0));
  2703   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2704                          (_init_times.sum() + _remark_times.sum() +
  2705                           _cleanup_times.sum())/1000.0);
  2706   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2707                 "(%8.2f s marking, %8.2f s counting).",
  2708                 cmThread()->vtime_accum(),
  2709                 cmThread()->vtime_mark_accum(),
  2710                 cmThread()->vtime_count_accum());
  2713 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2714   _parallel_workers->print_worker_threads_on(st);
  2717 // Closures
  2718 // XXX: there seems to be a lot of code  duplication here;
  2719 // should refactor and consolidate the shared code.
  2721 // This closure is used to mark refs into the CMS generation in
  2722 // the CMS bit map. Called at the first checkpoint.
  2724 // We take a break if someone is trying to stop the world.
  2725 bool ConcurrentMark::do_yield_check(int worker_i) {
  2726   if (should_yield()) {
  2727     if (worker_i == 0)
  2728       _g1h->g1_policy()->record_concurrent_pause();
  2729     cmThread()->yield();
  2730     if (worker_i == 0)
  2731       _g1h->g1_policy()->record_concurrent_pause_end();
  2732     return true;
  2733   } else {
  2734     return false;
  2738 bool ConcurrentMark::should_yield() {
  2739   return cmThread()->should_yield();
  2742 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2743   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2744   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2747 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2748                                                  void* last) {
  2749   return
  2750     containing_card_is_marked(start) &&
  2751     containing_card_is_marked(last);
  2754 #ifndef PRODUCT
  2755 // for debugging purposes
  2756 void ConcurrentMark::print_finger() {
  2757   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2758                          _heap_start, _heap_end, _finger);
  2759   for (int i = 0; i < (int) _max_task_num; ++i) {
  2760     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2762   gclog_or_tty->print_cr("");
  2764 #endif
  2766 // Closure for iteration over bitmaps
  2767 class CMBitMapClosure : public BitMapClosure {
  2768 private:
  2769   // the bitmap that is being iterated over
  2770   CMBitMap*                   _nextMarkBitMap;
  2771   ConcurrentMark*             _cm;
  2772   CMTask*                     _task;
  2773   // true if we're scanning a heap region claimed by the task (so that
  2774   // we move the finger along), false if we're not, i.e. currently when
  2775   // scanning a heap region popped from the region stack (so that we
  2776   // do not move the task finger along; it'd be a mistake if we did so).
  2777   bool                        _scanning_heap_region;
  2779 public:
  2780   CMBitMapClosure(CMTask *task,
  2781                   ConcurrentMark* cm,
  2782                   CMBitMap* nextMarkBitMap)
  2783     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2785   void set_scanning_heap_region(bool scanning_heap_region) {
  2786     _scanning_heap_region = scanning_heap_region;
  2789   bool do_bit(size_t offset) {
  2790     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2791     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2792     assert( addr < _cm->finger(), "invariant");
  2794     if (_scanning_heap_region) {
  2795       statsOnly( _task->increase_objs_found_on_bitmap() );
  2796       assert(addr >= _task->finger(), "invariant");
  2797       // We move that task's local finger along.
  2798       _task->move_finger_to(addr);
  2799     } else {
  2800       // We move the task's region finger along.
  2801       _task->move_region_finger_to(addr);
  2804     _task->scan_object(oop(addr));
  2805     // we only partially drain the local queue and global stack
  2806     _task->drain_local_queue(true);
  2807     _task->drain_global_stack(true);
  2809     // if the has_aborted flag has been raised, we need to bail out of
  2810     // the iteration
  2811     return !_task->has_aborted();
  2813 };
  2815 // Closure for iterating over objects, currently only used for
  2816 // processing SATB buffers.
  2817 class CMObjectClosure : public ObjectClosure {
  2818 private:
  2819   CMTask* _task;
  2821 public:
  2822   void do_object(oop obj) {
  2823     _task->deal_with_reference(obj);
  2826   CMObjectClosure(CMTask* task) : _task(task) { }
  2827 };
  2829 // Closure for iterating over object fields
  2830 class CMOopClosure : public OopClosure {
  2831 private:
  2832   G1CollectedHeap*   _g1h;
  2833   ConcurrentMark*    _cm;
  2834   CMTask*            _task;
  2836 public:
  2837   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2838   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2840   template <class T> void do_oop_work(T* p) {
  2841     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2842     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2843            "invariant");
  2845     oop obj = oopDesc::load_decode_heap_oop(p);
  2846     if (_cm->verbose_high())
  2847       gclog_or_tty->print_cr("[%d] we're looking at location "
  2848                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2849                              _task->task_id(), p, (void*) obj);
  2850     _task->deal_with_reference(obj);
  2853   CMOopClosure(G1CollectedHeap* g1h,
  2854                ConcurrentMark* cm,
  2855                CMTask* task)
  2856     : _g1h(g1h), _cm(cm), _task(task) { }
  2857 };
  2859 void CMTask::setup_for_region(HeapRegion* hr) {
  2860   // Separated the asserts so that we know which one fires.
  2861   assert(hr != NULL,
  2862         "claim_region() should have filtered out continues humongous regions");
  2863   assert(!hr->continuesHumongous(),
  2864         "claim_region() should have filtered out continues humongous regions");
  2866   if (_cm->verbose_low())
  2867     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2868                            _task_id, hr);
  2870   _curr_region  = hr;
  2871   _finger       = hr->bottom();
  2872   update_region_limit();
  2875 void CMTask::update_region_limit() {
  2876   HeapRegion* hr            = _curr_region;
  2877   HeapWord* bottom          = hr->bottom();
  2878   HeapWord* limit           = hr->next_top_at_mark_start();
  2880   if (limit == bottom) {
  2881     if (_cm->verbose_low())
  2882       gclog_or_tty->print_cr("[%d] found an empty region "
  2883                              "["PTR_FORMAT", "PTR_FORMAT")",
  2884                              _task_id, bottom, limit);
  2885     // The region was collected underneath our feet.
  2886     // We set the finger to bottom to ensure that the bitmap
  2887     // iteration that will follow this will not do anything.
  2888     // (this is not a condition that holds when we set the region up,
  2889     // as the region is not supposed to be empty in the first place)
  2890     _finger = bottom;
  2891   } else if (limit >= _region_limit) {
  2892     assert(limit >= _finger, "peace of mind");
  2893   } else {
  2894     assert(limit < _region_limit, "only way to get here");
  2895     // This can happen under some pretty unusual circumstances.  An
  2896     // evacuation pause empties the region underneath our feet (NTAMS
  2897     // at bottom). We then do some allocation in the region (NTAMS
  2898     // stays at bottom), followed by the region being used as a GC
  2899     // alloc region (NTAMS will move to top() and the objects
  2900     // originally below it will be grayed). All objects now marked in
  2901     // the region are explicitly grayed, if below the global finger,
  2902     // and we do not need in fact to scan anything else. So, we simply
  2903     // set _finger to be limit to ensure that the bitmap iteration
  2904     // doesn't do anything.
  2905     _finger = limit;
  2908   _region_limit = limit;
  2911 void CMTask::giveup_current_region() {
  2912   assert(_curr_region != NULL, "invariant");
  2913   if (_cm->verbose_low())
  2914     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2915                            _task_id, _curr_region);
  2916   clear_region_fields();
  2919 void CMTask::clear_region_fields() {
  2920   // Values for these three fields that indicate that we're not
  2921   // holding on to a region.
  2922   _curr_region   = NULL;
  2923   _finger        = NULL;
  2924   _region_limit  = NULL;
  2926   _region_finger = NULL;
  2929 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2930   guarantee(nextMarkBitMap != NULL, "invariant");
  2932   if (_cm->verbose_low())
  2933     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2935   _nextMarkBitMap                = nextMarkBitMap;
  2936   clear_region_fields();
  2938   _calls                         = 0;
  2939   _elapsed_time_ms               = 0.0;
  2940   _termination_time_ms           = 0.0;
  2941   _termination_start_time_ms     = 0.0;
  2943 #if _MARKING_STATS_
  2944   _local_pushes                  = 0;
  2945   _local_pops                    = 0;
  2946   _local_max_size                = 0;
  2947   _objs_scanned                  = 0;
  2948   _global_pushes                 = 0;
  2949   _global_pops                   = 0;
  2950   _global_max_size               = 0;
  2951   _global_transfers_to           = 0;
  2952   _global_transfers_from         = 0;
  2953   _region_stack_pops             = 0;
  2954   _regions_claimed               = 0;
  2955   _objs_found_on_bitmap          = 0;
  2956   _satb_buffers_processed        = 0;
  2957   _steal_attempts                = 0;
  2958   _steals                        = 0;
  2959   _aborted                       = 0;
  2960   _aborted_overflow              = 0;
  2961   _aborted_cm_aborted            = 0;
  2962   _aborted_yield                 = 0;
  2963   _aborted_timed_out             = 0;
  2964   _aborted_satb                  = 0;
  2965   _aborted_termination           = 0;
  2966 #endif // _MARKING_STATS_
  2969 bool CMTask::should_exit_termination() {
  2970   regular_clock_call();
  2971   // This is called when we are in the termination protocol. We should
  2972   // quit if, for some reason, this task wants to abort or the global
  2973   // stack is not empty (this means that we can get work from it).
  2974   return !_cm->mark_stack_empty() || has_aborted();
  2977 // This determines whether the method below will check both the local
  2978 // and global fingers when determining whether to push on the stack a
  2979 // gray object (value 1) or whether it will only check the global one
  2980 // (value 0). The tradeoffs are that the former will be a bit more
  2981 // accurate and possibly push less on the stack, but it might also be
  2982 // a little bit slower.
  2984 #define _CHECK_BOTH_FINGERS_      1
  2986 void CMTask::deal_with_reference(oop obj) {
  2987   if (_cm->verbose_high())
  2988     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2989                            _task_id, (void*) obj);
  2991   ++_refs_reached;
  2993   HeapWord* objAddr = (HeapWord*) obj;
  2994   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2995   if (_g1h->is_in_g1_reserved(objAddr)) {
  2996     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2997     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  2998     if (_g1h->is_obj_ill(obj, hr)) {
  2999       if (_cm->verbose_high())
  3000         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  3001                                _task_id, (void*) obj);
  3003       // we need to mark it first
  3004       if (_nextMarkBitMap->parMark(objAddr)) {
  3005         // No OrderAccess:store_load() is needed. It is implicit in the
  3006         // CAS done in parMark(objAddr) above
  3007         HeapWord* global_finger = _cm->finger();
  3009 #if _CHECK_BOTH_FINGERS_
  3010         // we will check both the local and global fingers
  3012         if (_finger != NULL && objAddr < _finger) {
  3013           if (_cm->verbose_high())
  3014             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3015                                    "pushing it", _task_id, _finger);
  3016           push(obj);
  3017         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3018           // do nothing
  3019         } else if (objAddr < global_finger) {
  3020           // Notice that the global finger might be moving forward
  3021           // concurrently. This is not a problem. In the worst case, we
  3022           // mark the object while it is above the global finger and, by
  3023           // the time we read the global finger, it has moved forward
  3024           // passed this object. In this case, the object will probably
  3025           // be visited when a task is scanning the region and will also
  3026           // be pushed on the stack. So, some duplicate work, but no
  3027           // correctness problems.
  3029           if (_cm->verbose_high())
  3030             gclog_or_tty->print_cr("[%d] below the global finger "
  3031                                    "("PTR_FORMAT"), pushing it",
  3032                                    _task_id, global_finger);
  3033           push(obj);
  3034         } else {
  3035           // do nothing
  3037 #else // _CHECK_BOTH_FINGERS_
  3038       // we will only check the global finger
  3040         if (objAddr < global_finger) {
  3041           // see long comment above
  3043           if (_cm->verbose_high())
  3044             gclog_or_tty->print_cr("[%d] below the global finger "
  3045                                    "("PTR_FORMAT"), pushing it",
  3046                                    _task_id, global_finger);
  3047           push(obj);
  3049 #endif // _CHECK_BOTH_FINGERS_
  3055 void CMTask::push(oop obj) {
  3056   HeapWord* objAddr = (HeapWord*) obj;
  3057   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3058   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3059          "invariant");
  3060   assert(!_g1h->is_obj_ill(obj), "invariant");
  3061   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3063   if (_cm->verbose_high())
  3064     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3066   if (!_task_queue->push(obj)) {
  3067     // The local task queue looks full. We need to push some entries
  3068     // to the global stack.
  3070     if (_cm->verbose_medium())
  3071       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3072                              "moving entries to the global stack",
  3073                              _task_id);
  3074     move_entries_to_global_stack();
  3076     // this should succeed since, even if we overflow the global
  3077     // stack, we should have definitely removed some entries from the
  3078     // local queue. So, there must be space on it.
  3079     bool success = _task_queue->push(obj);
  3080     assert(success, "invariant");
  3083   statsOnly( int tmp_size = _task_queue->size();
  3084              if (tmp_size > _local_max_size)
  3085                _local_max_size = tmp_size;
  3086              ++_local_pushes );
  3089 void CMTask::reached_limit() {
  3090   assert(_words_scanned >= _words_scanned_limit ||
  3091          _refs_reached >= _refs_reached_limit ,
  3092          "shouldn't have been called otherwise");
  3093   regular_clock_call();
  3096 void CMTask::regular_clock_call() {
  3097   if (has_aborted())
  3098     return;
  3100   // First, we need to recalculate the words scanned and refs reached
  3101   // limits for the next clock call.
  3102   recalculate_limits();
  3104   // During the regular clock call we do the following
  3106   // (1) If an overflow has been flagged, then we abort.
  3107   if (_cm->has_overflown()) {
  3108     set_has_aborted();
  3109     return;
  3112   // If we are not concurrent (i.e. we're doing remark) we don't need
  3113   // to check anything else. The other steps are only needed during
  3114   // the concurrent marking phase.
  3115   if (!concurrent())
  3116     return;
  3118   // (2) If marking has been aborted for Full GC, then we also abort.
  3119   if (_cm->has_aborted()) {
  3120     set_has_aborted();
  3121     statsOnly( ++_aborted_cm_aborted );
  3122     return;
  3125   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3127   // (3) If marking stats are enabled, then we update the step history.
  3128 #if _MARKING_STATS_
  3129   if (_words_scanned >= _words_scanned_limit)
  3130     ++_clock_due_to_scanning;
  3131   if (_refs_reached >= _refs_reached_limit)
  3132     ++_clock_due_to_marking;
  3134   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3135   _interval_start_time_ms = curr_time_ms;
  3136   _all_clock_intervals_ms.add(last_interval_ms);
  3138   if (_cm->verbose_medium()) {
  3139     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3140                            "scanned = %d%s, refs reached = %d%s",
  3141                            _task_id, last_interval_ms,
  3142                            _words_scanned,
  3143                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3144                            _refs_reached,
  3145                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3147 #endif // _MARKING_STATS_
  3149   // (4) We check whether we should yield. If we have to, then we abort.
  3150   if (_cm->should_yield()) {
  3151     // We should yield. To do this we abort the task. The caller is
  3152     // responsible for yielding.
  3153     set_has_aborted();
  3154     statsOnly( ++_aborted_yield );
  3155     return;
  3158   // (5) We check whether we've reached our time quota. If we have,
  3159   // then we abort.
  3160   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3161   if (elapsed_time_ms > _time_target_ms) {
  3162     set_has_aborted();
  3163     _has_aborted_timed_out = true;
  3164     statsOnly( ++_aborted_timed_out );
  3165     return;
  3168   // (6) Finally, we check whether there are enough completed STAB
  3169   // buffers available for processing. If there are, we abort.
  3170   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3171   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3172     if (_cm->verbose_low())
  3173       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3174                              _task_id);
  3175     // we do need to process SATB buffers, we'll abort and restart
  3176     // the marking task to do so
  3177     set_has_aborted();
  3178     statsOnly( ++_aborted_satb );
  3179     return;
  3183 void CMTask::recalculate_limits() {
  3184   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3185   _words_scanned_limit      = _real_words_scanned_limit;
  3187   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3188   _refs_reached_limit       = _real_refs_reached_limit;
  3191 void CMTask::decrease_limits() {
  3192   // This is called when we believe that we're going to do an infrequent
  3193   // operation which will increase the per byte scanned cost (i.e. move
  3194   // entries to/from the global stack). It basically tries to decrease the
  3195   // scanning limit so that the clock is called earlier.
  3197   if (_cm->verbose_medium())
  3198     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3200   _words_scanned_limit = _real_words_scanned_limit -
  3201     3 * words_scanned_period / 4;
  3202   _refs_reached_limit  = _real_refs_reached_limit -
  3203     3 * refs_reached_period / 4;
  3206 void CMTask::move_entries_to_global_stack() {
  3207   // local array where we'll store the entries that will be popped
  3208   // from the local queue
  3209   oop buffer[global_stack_transfer_size];
  3211   int n = 0;
  3212   oop obj;
  3213   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3214     buffer[n] = obj;
  3215     ++n;
  3218   if (n > 0) {
  3219     // we popped at least one entry from the local queue
  3221     statsOnly( ++_global_transfers_to; _local_pops += n );
  3223     if (!_cm->mark_stack_push(buffer, n)) {
  3224       if (_cm->verbose_low())
  3225         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3226       set_has_aborted();
  3227     } else {
  3228       // the transfer was successful
  3230       if (_cm->verbose_medium())
  3231         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3232                                _task_id, n);
  3233       statsOnly( int tmp_size = _cm->mark_stack_size();
  3234                  if (tmp_size > _global_max_size)
  3235                    _global_max_size = tmp_size;
  3236                  _global_pushes += n );
  3240   // this operation was quite expensive, so decrease the limits
  3241   decrease_limits();
  3244 void CMTask::get_entries_from_global_stack() {
  3245   // local array where we'll store the entries that will be popped
  3246   // from the global stack.
  3247   oop buffer[global_stack_transfer_size];
  3248   int n;
  3249   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3250   assert(n <= global_stack_transfer_size,
  3251          "we should not pop more than the given limit");
  3252   if (n > 0) {
  3253     // yes, we did actually pop at least one entry
  3255     statsOnly( ++_global_transfers_from; _global_pops += n );
  3256     if (_cm->verbose_medium())
  3257       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3258                              _task_id, n);
  3259     for (int i = 0; i < n; ++i) {
  3260       bool success = _task_queue->push(buffer[i]);
  3261       // We only call this when the local queue is empty or under a
  3262       // given target limit. So, we do not expect this push to fail.
  3263       assert(success, "invariant");
  3266     statsOnly( int tmp_size = _task_queue->size();
  3267                if (tmp_size > _local_max_size)
  3268                  _local_max_size = tmp_size;
  3269                _local_pushes += n );
  3272   // this operation was quite expensive, so decrease the limits
  3273   decrease_limits();
  3276 void CMTask::drain_local_queue(bool partially) {
  3277   if (has_aborted())
  3278     return;
  3280   // Decide what the target size is, depending whether we're going to
  3281   // drain it partially (so that other tasks can steal if they run out
  3282   // of things to do) or totally (at the very end).
  3283   size_t target_size;
  3284   if (partially)
  3285     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3286   else
  3287     target_size = 0;
  3289   if (_task_queue->size() > target_size) {
  3290     if (_cm->verbose_high())
  3291       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3292                              _task_id, target_size);
  3294     oop obj;
  3295     bool ret = _task_queue->pop_local(obj);
  3296     while (ret) {
  3297       statsOnly( ++_local_pops );
  3299       if (_cm->verbose_high())
  3300         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3301                                (void*) obj);
  3303       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3304       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3305              "invariant");
  3307       scan_object(obj);
  3309       if (_task_queue->size() <= target_size || has_aborted())
  3310         ret = false;
  3311       else
  3312         ret = _task_queue->pop_local(obj);
  3315     if (_cm->verbose_high())
  3316       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3317                              _task_id, _task_queue->size());
  3321 void CMTask::drain_global_stack(bool partially) {
  3322   if (has_aborted())
  3323     return;
  3325   // We have a policy to drain the local queue before we attempt to
  3326   // drain the global stack.
  3327   assert(partially || _task_queue->size() == 0, "invariant");
  3329   // Decide what the target size is, depending whether we're going to
  3330   // drain it partially (so that other tasks can steal if they run out
  3331   // of things to do) or totally (at the very end).  Notice that,
  3332   // because we move entries from the global stack in chunks or
  3333   // because another task might be doing the same, we might in fact
  3334   // drop below the target. But, this is not a problem.
  3335   size_t target_size;
  3336   if (partially)
  3337     target_size = _cm->partial_mark_stack_size_target();
  3338   else
  3339     target_size = 0;
  3341   if (_cm->mark_stack_size() > target_size) {
  3342     if (_cm->verbose_low())
  3343       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3344                              _task_id, target_size);
  3346     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3347       get_entries_from_global_stack();
  3348       drain_local_queue(partially);
  3351     if (_cm->verbose_low())
  3352       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3353                              _task_id, _cm->mark_stack_size());
  3357 // SATB Queue has several assumptions on whether to call the par or
  3358 // non-par versions of the methods. this is why some of the code is
  3359 // replicated. We should really get rid of the single-threaded version
  3360 // of the code to simplify things.
  3361 void CMTask::drain_satb_buffers() {
  3362   if (has_aborted())
  3363     return;
  3365   // We set this so that the regular clock knows that we're in the
  3366   // middle of draining buffers and doesn't set the abort flag when it
  3367   // notices that SATB buffers are available for draining. It'd be
  3368   // very counter productive if it did that. :-)
  3369   _draining_satb_buffers = true;
  3371   CMObjectClosure oc(this);
  3372   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3373   if (ParallelGCThreads > 0)
  3374     satb_mq_set.set_par_closure(_task_id, &oc);
  3375   else
  3376     satb_mq_set.set_closure(&oc);
  3378   // This keeps claiming and applying the closure to completed buffers
  3379   // until we run out of buffers or we need to abort.
  3380   if (ParallelGCThreads > 0) {
  3381     while (!has_aborted() &&
  3382            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3383       if (_cm->verbose_medium())
  3384         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3385       statsOnly( ++_satb_buffers_processed );
  3386       regular_clock_call();
  3388   } else {
  3389     while (!has_aborted() &&
  3390            satb_mq_set.apply_closure_to_completed_buffer()) {
  3391       if (_cm->verbose_medium())
  3392         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3393       statsOnly( ++_satb_buffers_processed );
  3394       regular_clock_call();
  3398   if (!concurrent() && !has_aborted()) {
  3399     // We should only do this during remark.
  3400     if (ParallelGCThreads > 0)
  3401       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3402     else
  3403       satb_mq_set.iterate_closure_all_threads();
  3406   _draining_satb_buffers = false;
  3408   assert(has_aborted() ||
  3409          concurrent() ||
  3410          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3412   if (ParallelGCThreads > 0)
  3413     satb_mq_set.set_par_closure(_task_id, NULL);
  3414   else
  3415     satb_mq_set.set_closure(NULL);
  3417   // again, this was a potentially expensive operation, decrease the
  3418   // limits to get the regular clock call early
  3419   decrease_limits();
  3422 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3423   if (has_aborted())
  3424     return;
  3426   assert(_region_finger == NULL,
  3427          "it should be NULL when we're not scanning a region");
  3429   if (!_cm->region_stack_empty()) {
  3430     if (_cm->verbose_low())
  3431       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3432                              _task_id, _cm->region_stack_size());
  3434     MemRegion mr = _cm->region_stack_pop_with_lock();
  3435     // it returns MemRegion() if the pop fails
  3436     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3438     while (mr.start() != NULL) {
  3439       if (_cm->verbose_medium())
  3440         gclog_or_tty->print_cr("[%d] we are scanning region "
  3441                                "["PTR_FORMAT", "PTR_FORMAT")",
  3442                                _task_id, mr.start(), mr.end());
  3443       assert(mr.end() <= _cm->finger(),
  3444              "otherwise the region shouldn't be on the stack");
  3445       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3446       if (_nextMarkBitMap->iterate(bc, mr)) {
  3447         assert(!has_aborted(),
  3448                "cannot abort the task without aborting the bitmap iteration");
  3450         // We finished iterating over the region without aborting.
  3451         regular_clock_call();
  3452         if (has_aborted())
  3453           mr = MemRegion();
  3454         else {
  3455           mr = _cm->region_stack_pop_with_lock();
  3456           // it returns MemRegion() if the pop fails
  3457           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3459       } else {
  3460         assert(has_aborted(), "currently the only way to do so");
  3462         // The only way to abort the bitmap iteration is to return
  3463         // false from the do_bit() method. However, inside the
  3464         // do_bit() method we move the _region_finger to point to the
  3465         // object currently being looked at. So, if we bail out, we
  3466         // have definitely set _region_finger to something non-null.
  3467         assert(_region_finger != NULL, "invariant");
  3469         // The iteration was actually aborted. So now _region_finger
  3470         // points to the address of the object we last scanned. If we
  3471         // leave it there, when we restart this task, we will rescan
  3472         // the object. It is easy to avoid this. We move the finger by
  3473         // enough to point to the next possible object header (the
  3474         // bitmap knows by how much we need to move it as it knows its
  3475         // granularity).
  3476         MemRegion newRegion =
  3477           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3479         if (!newRegion.is_empty()) {
  3480           if (_cm->verbose_low()) {
  3481             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3482                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3483                                    _task_id,
  3484                                    newRegion.start(), newRegion.end());
  3486           // Now push the part of the region we didn't scan on the
  3487           // region stack to make sure a task scans it later.
  3488           _cm->region_stack_push_with_lock(newRegion);
  3490         // break from while
  3491         mr = MemRegion();
  3493       _region_finger = NULL;
  3496     if (_cm->verbose_low())
  3497       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3498                              _task_id, _cm->region_stack_size());
  3502 void CMTask::print_stats() {
  3503   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3504                          _task_id, _calls);
  3505   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3506                          _elapsed_time_ms, _termination_time_ms);
  3507   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3508                          _step_times_ms.num(), _step_times_ms.avg(),
  3509                          _step_times_ms.sd());
  3510   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3511                          _step_times_ms.maximum(), _step_times_ms.sum());
  3513 #if _MARKING_STATS_
  3514   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3515                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3516                          _all_clock_intervals_ms.sd());
  3517   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3518                          _all_clock_intervals_ms.maximum(),
  3519                          _all_clock_intervals_ms.sum());
  3520   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3521                          _clock_due_to_scanning, _clock_due_to_marking);
  3522   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3523                          _objs_scanned, _objs_found_on_bitmap);
  3524   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3525                          _local_pushes, _local_pops, _local_max_size);
  3526   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3527                          _global_pushes, _global_pops, _global_max_size);
  3528   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3529                          _global_transfers_to,_global_transfers_from);
  3530   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3531                          _regions_claimed, _region_stack_pops);
  3532   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3533   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3534                          _steal_attempts, _steals);
  3535   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3536   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3537                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3538   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3539                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3540 #endif // _MARKING_STATS_
  3543 /*****************************************************************************
  3545     The do_marking_step(time_target_ms) method is the building block
  3546     of the parallel marking framework. It can be called in parallel
  3547     with other invocations of do_marking_step() on different tasks
  3548     (but only one per task, obviously) and concurrently with the
  3549     mutator threads, or during remark, hence it eliminates the need
  3550     for two versions of the code. When called during remark, it will
  3551     pick up from where the task left off during the concurrent marking
  3552     phase. Interestingly, tasks are also claimable during evacuation
  3553     pauses too, since do_marking_step() ensures that it aborts before
  3554     it needs to yield.
  3556     The data structures that is uses to do marking work are the
  3557     following:
  3559       (1) Marking Bitmap. If there are gray objects that appear only
  3560       on the bitmap (this happens either when dealing with an overflow
  3561       or when the initial marking phase has simply marked the roots
  3562       and didn't push them on the stack), then tasks claim heap
  3563       regions whose bitmap they then scan to find gray objects. A
  3564       global finger indicates where the end of the last claimed region
  3565       is. A local finger indicates how far into the region a task has
  3566       scanned. The two fingers are used to determine how to gray an
  3567       object (i.e. whether simply marking it is OK, as it will be
  3568       visited by a task in the future, or whether it needs to be also
  3569       pushed on a stack).
  3571       (2) Local Queue. The local queue of the task which is accessed
  3572       reasonably efficiently by the task. Other tasks can steal from
  3573       it when they run out of work. Throughout the marking phase, a
  3574       task attempts to keep its local queue short but not totally
  3575       empty, so that entries are available for stealing by other
  3576       tasks. Only when there is no more work, a task will totally
  3577       drain its local queue.
  3579       (3) Global Mark Stack. This handles local queue overflow. During
  3580       marking only sets of entries are moved between it and the local
  3581       queues, as access to it requires a mutex and more fine-grain
  3582       interaction with it which might cause contention. If it
  3583       overflows, then the marking phase should restart and iterate
  3584       over the bitmap to identify gray objects. Throughout the marking
  3585       phase, tasks attempt to keep the global mark stack at a small
  3586       length but not totally empty, so that entries are available for
  3587       popping by other tasks. Only when there is no more work, tasks
  3588       will totally drain the global mark stack.
  3590       (4) Global Region Stack. Entries on it correspond to areas of
  3591       the bitmap that need to be scanned since they contain gray
  3592       objects. Pushes on the region stack only happen during
  3593       evacuation pauses and typically correspond to areas covered by
  3594       GC LABS. If it overflows, then the marking phase should restart
  3595       and iterate over the bitmap to identify gray objects. Tasks will
  3596       try to totally drain the region stack as soon as possible.
  3598       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3599       made available. Buffers are regularly removed from this queue
  3600       and scanned for roots, so that the queue doesn't get too
  3601       long. During remark, all completed buffers are processed, as
  3602       well as the filled in parts of any uncompleted buffers.
  3604     The do_marking_step() method tries to abort when the time target
  3605     has been reached. There are a few other cases when the
  3606     do_marking_step() method also aborts:
  3608       (1) When the marking phase has been aborted (after a Full GC).
  3610       (2) When a global overflow (either on the global stack or the
  3611       region stack) has been triggered. Before the task aborts, it
  3612       will actually sync up with the other tasks to ensure that all
  3613       the marking data structures (local queues, stacks, fingers etc.)
  3614       are re-initialised so that when do_marking_step() completes,
  3615       the marking phase can immediately restart.
  3617       (3) When enough completed SATB buffers are available. The
  3618       do_marking_step() method only tries to drain SATB buffers right
  3619       at the beginning. So, if enough buffers are available, the
  3620       marking step aborts and the SATB buffers are processed at
  3621       the beginning of the next invocation.
  3623       (4) To yield. when we have to yield then we abort and yield
  3624       right at the end of do_marking_step(). This saves us from a lot
  3625       of hassle as, by yielding we might allow a Full GC. If this
  3626       happens then objects will be compacted underneath our feet, the
  3627       heap might shrink, etc. We save checking for this by just
  3628       aborting and doing the yield right at the end.
  3630     From the above it follows that the do_marking_step() method should
  3631     be called in a loop (or, otherwise, regularly) until it completes.
  3633     If a marking step completes without its has_aborted() flag being
  3634     true, it means it has completed the current marking phase (and
  3635     also all other marking tasks have done so and have all synced up).
  3637     A method called regular_clock_call() is invoked "regularly" (in
  3638     sub ms intervals) throughout marking. It is this clock method that
  3639     checks all the abort conditions which were mentioned above and
  3640     decides when the task should abort. A work-based scheme is used to
  3641     trigger this clock method: when the number of object words the
  3642     marking phase has scanned or the number of references the marking
  3643     phase has visited reach a given limit. Additional invocations to
  3644     the method clock have been planted in a few other strategic places
  3645     too. The initial reason for the clock method was to avoid calling
  3646     vtime too regularly, as it is quite expensive. So, once it was in
  3647     place, it was natural to piggy-back all the other conditions on it
  3648     too and not constantly check them throughout the code.
  3650  *****************************************************************************/
  3652 void CMTask::do_marking_step(double time_target_ms) {
  3653   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3654   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3656   assert(concurrent() || _cm->region_stack_empty(),
  3657          "the region stack should have been cleared before remark");
  3658   assert(_region_finger == NULL,
  3659          "this should be non-null only when a region is being scanned");
  3661   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3662   assert(_task_queues != NULL, "invariant");
  3663   assert(_task_queue != NULL, "invariant");
  3664   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3666   assert(!_claimed,
  3667          "only one thread should claim this task at any one time");
  3669   // OK, this doesn't safeguard again all possible scenarios, as it is
  3670   // possible for two threads to set the _claimed flag at the same
  3671   // time. But it is only for debugging purposes anyway and it will
  3672   // catch most problems.
  3673   _claimed = true;
  3675   _start_time_ms = os::elapsedVTime() * 1000.0;
  3676   statsOnly( _interval_start_time_ms = _start_time_ms );
  3678   double diff_prediction_ms =
  3679     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3680   _time_target_ms = time_target_ms - diff_prediction_ms;
  3682   // set up the variables that are used in the work-based scheme to
  3683   // call the regular clock method
  3684   _words_scanned = 0;
  3685   _refs_reached  = 0;
  3686   recalculate_limits();
  3688   // clear all flags
  3689   clear_has_aborted();
  3690   _has_aborted_timed_out = false;
  3691   _draining_satb_buffers = false;
  3693   ++_calls;
  3695   if (_cm->verbose_low())
  3696     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3697                            "target = %1.2lfms >>>>>>>>>>",
  3698                            _task_id, _calls, _time_target_ms);
  3700   // Set up the bitmap and oop closures. Anything that uses them is
  3701   // eventually called from this method, so it is OK to allocate these
  3702   // statically.
  3703   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3704   CMOopClosure    oop_closure(_g1h, _cm, this);
  3705   set_oop_closure(&oop_closure);
  3707   if (_cm->has_overflown()) {
  3708     // This can happen if the region stack or the mark stack overflows
  3709     // during a GC pause and this task, after a yield point,
  3710     // restarts. We have to abort as we need to get into the overflow
  3711     // protocol which happens right at the end of this task.
  3712     set_has_aborted();
  3715   // First drain any available SATB buffers. After this, we will not
  3716   // look at SATB buffers before the next invocation of this method.
  3717   // If enough completed SATB buffers are queued up, the regular clock
  3718   // will abort this task so that it restarts.
  3719   drain_satb_buffers();
  3720   // ...then partially drain the local queue and the global stack
  3721   drain_local_queue(true);
  3722   drain_global_stack(true);
  3724   // Then totally drain the region stack.  We will not look at
  3725   // it again before the next invocation of this method. Entries on
  3726   // the region stack are only added during evacuation pauses, for
  3727   // which we have to yield. When we do, we abort the task anyway so
  3728   // it will look at the region stack again when it restarts.
  3729   bitmap_closure.set_scanning_heap_region(false);
  3730   drain_region_stack(&bitmap_closure);
  3731   // ...then partially drain the local queue and the global stack
  3732   drain_local_queue(true);
  3733   drain_global_stack(true);
  3735   do {
  3736     if (!has_aborted() && _curr_region != NULL) {
  3737       // This means that we're already holding on to a region.
  3738       assert(_finger != NULL, "if region is not NULL, then the finger "
  3739              "should not be NULL either");
  3741       // We might have restarted this task after an evacuation pause
  3742       // which might have evacuated the region we're holding on to
  3743       // underneath our feet. Let's read its limit again to make sure
  3744       // that we do not iterate over a region of the heap that
  3745       // contains garbage (update_region_limit() will also move
  3746       // _finger to the start of the region if it is found empty).
  3747       update_region_limit();
  3748       // We will start from _finger not from the start of the region,
  3749       // as we might be restarting this task after aborting half-way
  3750       // through scanning this region. In this case, _finger points to
  3751       // the address where we last found a marked object. If this is a
  3752       // fresh region, _finger points to start().
  3753       MemRegion mr = MemRegion(_finger, _region_limit);
  3755       if (_cm->verbose_low())
  3756         gclog_or_tty->print_cr("[%d] we're scanning part "
  3757                                "["PTR_FORMAT", "PTR_FORMAT") "
  3758                                "of region "PTR_FORMAT,
  3759                                _task_id, _finger, _region_limit, _curr_region);
  3761       // Let's iterate over the bitmap of the part of the
  3762       // region that is left.
  3763       bitmap_closure.set_scanning_heap_region(true);
  3764       if (mr.is_empty() ||
  3765           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3766         // We successfully completed iterating over the region. Now,
  3767         // let's give up the region.
  3768         giveup_current_region();
  3769         regular_clock_call();
  3770       } else {
  3771         assert(has_aborted(), "currently the only way to do so");
  3772         // The only way to abort the bitmap iteration is to return
  3773         // false from the do_bit() method. However, inside the
  3774         // do_bit() method we move the _finger to point to the
  3775         // object currently being looked at. So, if we bail out, we
  3776         // have definitely set _finger to something non-null.
  3777         assert(_finger != NULL, "invariant");
  3779         // Region iteration was actually aborted. So now _finger
  3780         // points to the address of the object we last scanned. If we
  3781         // leave it there, when we restart this task, we will rescan
  3782         // the object. It is easy to avoid this. We move the finger by
  3783         // enough to point to the next possible object header (the
  3784         // bitmap knows by how much we need to move it as it knows its
  3785         // granularity).
  3786         assert(_finger < _region_limit, "invariant");
  3787         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3788         // Check if bitmap iteration was aborted while scanning the last object
  3789         if (new_finger >= _region_limit) {
  3790             giveup_current_region();
  3791         } else {
  3792             move_finger_to(new_finger);
  3796     // At this point we have either completed iterating over the
  3797     // region we were holding on to, or we have aborted.
  3799     // We then partially drain the local queue and the global stack.
  3800     // (Do we really need this?)
  3801     drain_local_queue(true);
  3802     drain_global_stack(true);
  3804     // Read the note on the claim_region() method on why it might
  3805     // return NULL with potentially more regions available for
  3806     // claiming and why we have to check out_of_regions() to determine
  3807     // whether we're done or not.
  3808     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3809       // We are going to try to claim a new region. We should have
  3810       // given up on the previous one.
  3811       // Separated the asserts so that we know which one fires.
  3812       assert(_curr_region  == NULL, "invariant");
  3813       assert(_finger       == NULL, "invariant");
  3814       assert(_region_limit == NULL, "invariant");
  3815       if (_cm->verbose_low())
  3816         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3817       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3818       if (claimed_region != NULL) {
  3819         // Yes, we managed to claim one
  3820         statsOnly( ++_regions_claimed );
  3822         if (_cm->verbose_low())
  3823           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3824                                  "region "PTR_FORMAT,
  3825                                  _task_id, claimed_region);
  3827         setup_for_region(claimed_region);
  3828         assert(_curr_region == claimed_region, "invariant");
  3830       // It is important to call the regular clock here. It might take
  3831       // a while to claim a region if, for example, we hit a large
  3832       // block of empty regions. So we need to call the regular clock
  3833       // method once round the loop to make sure it's called
  3834       // frequently enough.
  3835       regular_clock_call();
  3838     if (!has_aborted() && _curr_region == NULL) {
  3839       assert(_cm->out_of_regions(),
  3840              "at this point we should be out of regions");
  3842   } while ( _curr_region != NULL && !has_aborted());
  3844   if (!has_aborted()) {
  3845     // We cannot check whether the global stack is empty, since other
  3846     // tasks might be pushing objects to it concurrently. We also cannot
  3847     // check if the region stack is empty because if a thread is aborting
  3848     // it can push a partially done region back.
  3849     assert(_cm->out_of_regions(),
  3850            "at this point we should be out of regions");
  3852     if (_cm->verbose_low())
  3853       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3855     // Try to reduce the number of available SATB buffers so that
  3856     // remark has less work to do.
  3857     drain_satb_buffers();
  3860   // Since we've done everything else, we can now totally drain the
  3861   // local queue and global stack.
  3862   drain_local_queue(false);
  3863   drain_global_stack(false);
  3865   // Attempt at work stealing from other task's queues.
  3866   if (!has_aborted()) {
  3867     // We have not aborted. This means that we have finished all that
  3868     // we could. Let's try to do some stealing...
  3870     // We cannot check whether the global stack is empty, since other
  3871     // tasks might be pushing objects to it concurrently. We also cannot
  3872     // check if the region stack is empty because if a thread is aborting
  3873     // it can push a partially done region back.
  3874     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3875            "only way to reach here");
  3877     if (_cm->verbose_low())
  3878       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3880     while (!has_aborted()) {
  3881       oop obj;
  3882       statsOnly( ++_steal_attempts );
  3884       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3885         if (_cm->verbose_medium())
  3886           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3887                                  _task_id, (void*) obj);
  3889         statsOnly( ++_steals );
  3891         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3892                "any stolen object should be marked");
  3893         scan_object(obj);
  3895         // And since we're towards the end, let's totally drain the
  3896         // local queue and global stack.
  3897         drain_local_queue(false);
  3898         drain_global_stack(false);
  3899       } else {
  3900         break;
  3905   // We still haven't aborted. Now, let's try to get into the
  3906   // termination protocol.
  3907   if (!has_aborted()) {
  3908     // We cannot check whether the global stack is empty, since other
  3909     // tasks might be concurrently pushing objects on it. We also cannot
  3910     // check if the region stack is empty because if a thread is aborting
  3911     // it can push a partially done region back.
  3912     // Separated the asserts so that we know which one fires.
  3913     assert(_cm->out_of_regions(), "only way to reach here");
  3914     assert(_task_queue->size() == 0, "only way to reach here");
  3916     if (_cm->verbose_low())
  3917       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3919     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3920     // The CMTask class also extends the TerminatorTerminator class,
  3921     // hence its should_exit_termination() method will also decide
  3922     // whether to exit the termination protocol or not.
  3923     bool finished = _cm->terminator()->offer_termination(this);
  3924     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3925     _termination_time_ms +=
  3926       termination_end_time_ms - _termination_start_time_ms;
  3928     if (finished) {
  3929       // We're all done.
  3931       if (_task_id == 0) {
  3932         // let's allow task 0 to do this
  3933         if (concurrent()) {
  3934           assert(_cm->concurrent_marking_in_progress(), "invariant");
  3935           // we need to set this to false before the next
  3936           // safepoint. This way we ensure that the marking phase
  3937           // doesn't observe any more heap expansions.
  3938           _cm->clear_concurrent_marking_in_progress();
  3942       // We can now guarantee that the global stack is empty, since
  3943       // all other tasks have finished. We separated the guarantees so
  3944       // that, if a condition is false, we can immediately find out
  3945       // which one.
  3946       guarantee(_cm->out_of_regions(), "only way to reach here");
  3947       guarantee(_cm->region_stack_empty(), "only way to reach here");
  3948       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  3949       guarantee(_task_queue->size() == 0, "only way to reach here");
  3950       guarantee(!_cm->has_overflown(), "only way to reach here");
  3951       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  3952       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  3954       if (_cm->verbose_low())
  3955         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3956     } else {
  3957       // Apparently there's more work to do. Let's abort this task. It
  3958       // will restart it and we can hopefully find more things to do.
  3960       if (_cm->verbose_low())
  3961         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3963       set_has_aborted();
  3964       statsOnly( ++_aborted_termination );
  3968   // Mainly for debugging purposes to make sure that a pointer to the
  3969   // closure which was statically allocated in this frame doesn't
  3970   // escape it by accident.
  3971   set_oop_closure(NULL);
  3972   double end_time_ms = os::elapsedVTime() * 1000.0;
  3973   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3974   // Update the step history.
  3975   _step_times_ms.add(elapsed_time_ms);
  3977   if (has_aborted()) {
  3978     // The task was aborted for some reason.
  3980     statsOnly( ++_aborted );
  3982     if (_has_aborted_timed_out) {
  3983       double diff_ms = elapsed_time_ms - _time_target_ms;
  3984       // Keep statistics of how well we did with respect to hitting
  3985       // our target only if we actually timed out (if we aborted for
  3986       // other reasons, then the results might get skewed).
  3987       _marking_step_diffs_ms.add(diff_ms);
  3990     if (_cm->has_overflown()) {
  3991       // This is the interesting one. We aborted because a global
  3992       // overflow was raised. This means we have to restart the
  3993       // marking phase and start iterating over regions. However, in
  3994       // order to do this we have to make sure that all tasks stop
  3995       // what they are doing and re-initialise in a safe manner. We
  3996       // will achieve this with the use of two barrier sync points.
  3998       if (_cm->verbose_low())
  3999         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  4001       _cm->enter_first_sync_barrier(_task_id);
  4002       // When we exit this sync barrier we know that all tasks have
  4003       // stopped doing marking work. So, it's now safe to
  4004       // re-initialise our data structures. At the end of this method,
  4005       // task 0 will clear the global data structures.
  4007       statsOnly( ++_aborted_overflow );
  4009       // We clear the local state of this task...
  4010       clear_region_fields();
  4012       // ...and enter the second barrier.
  4013       _cm->enter_second_sync_barrier(_task_id);
  4014       // At this point everything has bee re-initialised and we're
  4015       // ready to restart.
  4018     if (_cm->verbose_low()) {
  4019       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4020                              "elapsed = %1.2lfms <<<<<<<<<<",
  4021                              _task_id, _time_target_ms, elapsed_time_ms);
  4022       if (_cm->has_aborted())
  4023         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4024                                _task_id);
  4026   } else {
  4027     if (_cm->verbose_low())
  4028       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4029                              "elapsed = %1.2lfms <<<<<<<<<<",
  4030                              _task_id, _time_target_ms, elapsed_time_ms);
  4033   _claimed = false;
  4036 CMTask::CMTask(int task_id,
  4037                ConcurrentMark* cm,
  4038                CMTaskQueue* task_queue,
  4039                CMTaskQueueSet* task_queues)
  4040   : _g1h(G1CollectedHeap::heap()),
  4041     _task_id(task_id), _cm(cm),
  4042     _claimed(false),
  4043     _nextMarkBitMap(NULL), _hash_seed(17),
  4044     _task_queue(task_queue),
  4045     _task_queues(task_queues),
  4046     _oop_closure(NULL) {
  4047   guarantee(task_queue != NULL, "invariant");
  4048   guarantee(task_queues != NULL, "invariant");
  4050   statsOnly( _clock_due_to_scanning = 0;
  4051              _clock_due_to_marking  = 0 );
  4053   _marking_step_diffs_ms.add(0.5);

mercurial