src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 06 Apr 2010 10:59:45 -0400

author
tonyp
date
Tue, 06 Apr 2010 10:59:45 -0400
changeset 1794
23b1b27ac76c
parent 1793
72f725c5a7be
child 1823
7666957bc44d
permissions
-rw-r--r--

6909756: G1: guarantee(G1CollectedHeap::heap()->mark_in_progress(),"Precondition.")
Summary: Make sure that two marking cycles do not overlap, i.e., a new one can only start after the concurrent marking thread finishes all its work. In the fix I piggy-back a couple of minor extra fixes: some general code reformatting for consistency (only around the code I modified), the removal of a field (G1CollectorPolicy::_should_initiate_conc_mark) which doesn't seem to be used at all (it's only set but never read), as well as moving the "is GC locker active" test earlier into the G1 pause / Full GC and using a more appropriate method for it.
Reviewed-by: johnc, jmasa, jcoomes, ysr

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     assert(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 // Currently we do not call this at all. Normally we would call it
   301 // during the concurrent marking / remark phases but we now call
   302 // the lock-based version instead. But we might want to resurrect this
   303 // code in the future. So, we'll leave it here commented out.
   304 #if 0
   305 MemRegion CMRegionStack::pop() {
   306   while (true) {
   307     // Otherwise...
   308     jint index = _index;
   310     if (index == 0) {
   311       return MemRegion();
   312     }
   313     jint next_index = index-1;
   314     jint res = Atomic::cmpxchg(next_index, &_index, index);
   315     if (res == index) {
   316       MemRegion mr = _base[next_index];
   317       if (mr.start() != NULL) {
   318         assert(mr.end() != NULL, "invariant");
   319         assert(mr.word_size() > 0, "invariant");
   320         return mr;
   321       } else {
   322         // that entry was invalidated... let's skip it
   323         assert(mr.end() == NULL, "invariant");
   324       }
   325     }
   326     // Otherwise, we need to try again.
   327   }
   328 }
   329 #endif // 0
   331 void CMRegionStack::push_with_lock(MemRegion mr) {
   332   assert(mr.word_size() > 0, "Precondition");
   333   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   335   if (isFull()) {
   336     _overflow = true;
   337     return;
   338   }
   340   _base[_index] = mr;
   341   _index += 1;
   342 }
   344 MemRegion CMRegionStack::pop_with_lock() {
   345   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   347   while (true) {
   348     if (_index == 0) {
   349       return MemRegion();
   350     }
   351     _index -= 1;
   353     MemRegion mr = _base[_index];
   354     if (mr.start() != NULL) {
   355       assert(mr.end() != NULL, "invariant");
   356       assert(mr.word_size() > 0, "invariant");
   357       return mr;
   358     } else {
   359       // that entry was invalidated... let's skip it
   360       assert(mr.end() == NULL, "invariant");
   361     }
   362   }
   363 }
   365 bool CMRegionStack::invalidate_entries_into_cset() {
   366   bool result = false;
   367   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   368   for (int i = 0; i < _oops_do_bound; ++i) {
   369     MemRegion mr = _base[i];
   370     if (mr.start() != NULL) {
   371       assert(mr.end() != NULL, "invariant");
   372       assert(mr.word_size() > 0, "invariant");
   373       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   374       assert(hr != NULL, "invariant");
   375       if (hr->in_collection_set()) {
   376         // The region points into the collection set
   377         _base[i] = MemRegion();
   378         result = true;
   379       }
   380     } else {
   381       // that entry was invalidated... let's skip it
   382       assert(mr.end() == NULL, "invariant");
   383     }
   384   }
   385   return result;
   386 }
   388 template<class OopClosureClass>
   389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   390   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   391          || SafepointSynchronize::is_at_safepoint(),
   392          "Drain recursion must be yield-safe.");
   393   bool res = true;
   394   debug_only(_drain_in_progress = true);
   395   debug_only(_drain_in_progress_yields = yield_after);
   396   while (!isEmpty()) {
   397     oop newOop = pop();
   398     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   399     assert(newOop->is_oop(), "Expected an oop");
   400     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   401            "only grey objects on this stack");
   402     // iterate over the oops in this oop, marking and pushing
   403     // the ones in CMS generation.
   404     newOop->oop_iterate(cl);
   405     if (yield_after && _cm->do_yield_check()) {
   406       res = false; break;
   407     }
   408   }
   409   debug_only(_drain_in_progress = false);
   410   return res;
   411 }
   413 void CMMarkStack::oops_do(OopClosure* f) {
   414   if (_index == 0) return;
   415   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   416          "Bound must be set.");
   417   for (int i = 0; i < _oops_do_bound; i++) {
   418     f->do_oop(&_base[i]);
   419   }
   420   _oops_do_bound = -1;
   421 }
   423 bool ConcurrentMark::not_yet_marked(oop obj) const {
   424   return (_g1h->is_obj_ill(obj)
   425           || (_g1h->is_in_permanent(obj)
   426               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   427 }
   429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   431 #endif // _MSC_VER
   433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   434                                int max_regions) :
   435   _markBitMap1(rs, MinObjAlignment - 1),
   436   _markBitMap2(rs, MinObjAlignment - 1),
   438   _parallel_marking_threads(0),
   439   _sleep_factor(0.0),
   440   _marking_task_overhead(1.0),
   441   _cleanup_sleep_factor(0.0),
   442   _cleanup_task_overhead(1.0),
   443   _region_bm(max_regions, false /* in_resource_area*/),
   444   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   445            CardTableModRefBS::card_shift,
   446            false /* in_resource_area*/),
   447   _prevMarkBitMap(&_markBitMap1),
   448   _nextMarkBitMap(&_markBitMap2),
   449   _at_least_one_mark_complete(false),
   451   _markStack(this),
   452   _regionStack(),
   453   // _finger set in set_non_marking_state
   455   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   456   // _active_tasks set in set_non_marking_state
   457   // _tasks set inside the constructor
   458   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   459   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   461   _has_overflown(false),
   462   _concurrent(false),
   463   _has_aborted(false),
   464   _restart_for_overflow(false),
   465   _concurrent_marking_in_progress(false),
   466   _should_gray_objects(false),
   468   // _verbose_level set below
   470   _init_times(),
   471   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   472   _cleanup_times(),
   473   _total_counting_time(0.0),
   474   _total_rs_scrub_time(0.0),
   476   _parallel_workers(NULL)
   477 {
   478   CMVerboseLevel verbose_level =
   479     (CMVerboseLevel) G1MarkingVerboseLevel;
   480   if (verbose_level < no_verbose)
   481     verbose_level = no_verbose;
   482   if (verbose_level > high_verbose)
   483     verbose_level = high_verbose;
   484   _verbose_level = verbose_level;
   486   if (verbose_low())
   487     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   488                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   490   _markStack.allocate(MarkStackSize);
   491   _regionStack.allocate(G1MarkRegionStackSize);
   493   // Create & start a ConcurrentMark thread.
   494   _cmThread = new ConcurrentMarkThread(this);
   495   assert(cmThread() != NULL, "CM Thread should have been created");
   496   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   498   _g1h = G1CollectedHeap::heap();
   499   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   500   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   501   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   503   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   504   satb_qs.set_buffer_size(G1SATBBufferSize);
   506   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   507   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   508   for (int i = 0 ; i < size; i++) {
   509     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   510   }
   512   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   513   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   516   _active_tasks = _max_task_num;
   517   for (int i = 0; i < (int) _max_task_num; ++i) {
   518     CMTaskQueue* task_queue = new CMTaskQueue();
   519     task_queue->initialize();
   520     _task_queues->register_queue(i, task_queue);
   522     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   523     _accum_task_vtime[i] = 0.0;
   524   }
   526   if (ConcGCThreads > ParallelGCThreads) {
   527     vm_exit_during_initialization("Can't have more ConcGCThreads "
   528                                   "than ParallelGCThreads.");
   529   }
   530   if (ParallelGCThreads == 0) {
   531     // if we are not running with any parallel GC threads we will not
   532     // spawn any marking threads either
   533     _parallel_marking_threads =   0;
   534     _sleep_factor             = 0.0;
   535     _marking_task_overhead    = 1.0;
   536   } else {
   537     if (ConcGCThreads > 0) {
   538       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   539       // if both are set
   541       _parallel_marking_threads = ConcGCThreads;
   542       _sleep_factor             = 0.0;
   543       _marking_task_overhead    = 1.0;
   544     } else if (G1MarkingOverheadPercent > 0) {
   545       // we will calculate the number of parallel marking threads
   546       // based on a target overhead with respect to the soft real-time
   547       // goal
   549       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   550       double overall_cm_overhead =
   551         (double) MaxGCPauseMillis * marking_overhead /
   552         (double) GCPauseIntervalMillis;
   553       double cpu_ratio = 1.0 / (double) os::processor_count();
   554       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   555       double marking_task_overhead =
   556         overall_cm_overhead / marking_thread_num *
   557                                                 (double) os::processor_count();
   558       double sleep_factor =
   559                          (1.0 - marking_task_overhead) / marking_task_overhead;
   561       _parallel_marking_threads = (size_t) marking_thread_num;
   562       _sleep_factor             = sleep_factor;
   563       _marking_task_overhead    = marking_task_overhead;
   564     } else {
   565       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   566       _sleep_factor             = 0.0;
   567       _marking_task_overhead    = 1.0;
   568     }
   570     if (parallel_marking_threads() > 1)
   571       _cleanup_task_overhead = 1.0;
   572     else
   573       _cleanup_task_overhead = marking_task_overhead();
   574     _cleanup_sleep_factor =
   575                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   577 #if 0
   578     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   579     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   580     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   581     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   582     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   583 #endif
   585     guarantee(parallel_marking_threads() > 0, "peace of mind");
   586     _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
   587                                      (int) parallel_marking_threads(), false, true);
   588     if (_parallel_workers == NULL)
   589       vm_exit_during_initialization("Failed necessary allocation.");
   590   }
   592   // so that the call below can read a sensible value
   593   _heap_start = (HeapWord*) rs.base();
   594   set_non_marking_state();
   595 }
   597 void ConcurrentMark::update_g1_committed(bool force) {
   598   // If concurrent marking is not in progress, then we do not need to
   599   // update _heap_end. This has a subtle and important
   600   // side-effect. Imagine that two evacuation pauses happen between
   601   // marking completion and remark. The first one can grow the
   602   // heap (hence now the finger is below the heap end). Then, the
   603   // second one could unnecessarily push regions on the region
   604   // stack. This causes the invariant that the region stack is empty
   605   // at the beginning of remark to be false. By ensuring that we do
   606   // not observe heap expansions after marking is complete, then we do
   607   // not have this problem.
   608   if (!concurrent_marking_in_progress() && !force)
   609     return;
   611   MemRegion committed = _g1h->g1_committed();
   612   assert(committed.start() == _heap_start, "start shouldn't change");
   613   HeapWord* new_end = committed.end();
   614   if (new_end > _heap_end) {
   615     // The heap has been expanded.
   617     _heap_end = new_end;
   618   }
   619   // Notice that the heap can also shrink. However, this only happens
   620   // during a Full GC (at least currently) and the entire marking
   621   // phase will bail out and the task will not be restarted. So, let's
   622   // do nothing.
   623 }
   625 void ConcurrentMark::reset() {
   626   // Starting values for these two. This should be called in a STW
   627   // phase. CM will be notified of any future g1_committed expansions
   628   // will be at the end of evacuation pauses, when tasks are
   629   // inactive.
   630   MemRegion committed = _g1h->g1_committed();
   631   _heap_start = committed.start();
   632   _heap_end   = committed.end();
   634   // Separated the asserts so that we know which one fires.
   635   assert(_heap_start != NULL, "heap bounds should look ok");
   636   assert(_heap_end != NULL, "heap bounds should look ok");
   637   assert(_heap_start < _heap_end, "heap bounds should look ok");
   639   // reset all the marking data structures and any necessary flags
   640   clear_marking_state();
   642   if (verbose_low())
   643     gclog_or_tty->print_cr("[global] resetting");
   645   // We do reset all of them, since different phases will use
   646   // different number of active threads. So, it's easiest to have all
   647   // of them ready.
   648   for (int i = 0; i < (int) _max_task_num; ++i)
   649     _tasks[i]->reset(_nextMarkBitMap);
   651   // we need this to make sure that the flag is on during the evac
   652   // pause with initial mark piggy-backed
   653   set_concurrent_marking_in_progress();
   654 }
   656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   657   assert(active_tasks <= _max_task_num, "we should not have more");
   659   _active_tasks = active_tasks;
   660   // Need to update the three data structures below according to the
   661   // number of active threads for this phase.
   662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   666   _concurrent = concurrent;
   667   // We propagate this to all tasks, not just the active ones.
   668   for (int i = 0; i < (int) _max_task_num; ++i)
   669     _tasks[i]->set_concurrent(concurrent);
   671   if (concurrent) {
   672     set_concurrent_marking_in_progress();
   673   } else {
   674     // We currently assume that the concurrent flag has been set to
   675     // false before we start remark. At this point we should also be
   676     // in a STW phase.
   677     assert(!concurrent_marking_in_progress(), "invariant");
   678     assert(_finger == _heap_end, "only way to get here");
   679     update_g1_committed(true);
   680   }
   681 }
   683 void ConcurrentMark::set_non_marking_state() {
   684   // We set the global marking state to some default values when we're
   685   // not doing marking.
   686   clear_marking_state();
   687   _active_tasks = 0;
   688   clear_concurrent_marking_in_progress();
   689 }
   691 ConcurrentMark::~ConcurrentMark() {
   692   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   693   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   694   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   695                     _par_cleanup_thread_state);
   697   for (int i = 0; i < (int) _max_task_num; ++i) {
   698     delete _task_queues->queue(i);
   699     delete _tasks[i];
   700   }
   701   delete _task_queues;
   702   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   703 }
   705 // This closure is used to mark refs into the g1 generation
   706 // from external roots in the CMS bit map.
   707 // Called at the first checkpoint.
   708 //
   710 void ConcurrentMark::clearNextBitmap() {
   711   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   712   G1CollectorPolicy* g1p = g1h->g1_policy();
   714   // Make sure that the concurrent mark thread looks to still be in
   715   // the current cycle.
   716   guarantee(cmThread()->during_cycle(), "invariant");
   718   // We are finishing up the current cycle by clearing the next
   719   // marking bitmap and getting it ready for the next cycle. During
   720   // this time no other cycle can start. So, let's make sure that this
   721   // is the case.
   722   guarantee(!g1h->mark_in_progress(), "invariant");
   724   // clear the mark bitmap (no grey objects to start with).
   725   // We need to do this in chunks and offer to yield in between
   726   // each chunk.
   727   HeapWord* start  = _nextMarkBitMap->startWord();
   728   HeapWord* end    = _nextMarkBitMap->endWord();
   729   HeapWord* cur    = start;
   730   size_t chunkSize = M;
   731   while (cur < end) {
   732     HeapWord* next = cur + chunkSize;
   733     if (next > end)
   734       next = end;
   735     MemRegion mr(cur,next);
   736     _nextMarkBitMap->clearRange(mr);
   737     cur = next;
   738     do_yield_check();
   740     // Repeat the asserts from above. We'll do them as asserts here to
   741     // minimize their overhead on the product. However, we'll have
   742     // them as guarantees at the beginning / end of the bitmap
   743     // clearing to get some checking in the product.
   744     assert(cmThread()->during_cycle(), "invariant");
   745     assert(!g1h->mark_in_progress(), "invariant");
   746   }
   748   // Repeat the asserts from above.
   749   guarantee(cmThread()->during_cycle(), "invariant");
   750   guarantee(!g1h->mark_in_progress(), "invariant");
   751 }
   753 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   754 public:
   755   bool doHeapRegion(HeapRegion* r) {
   756     if (!r->continuesHumongous()) {
   757       r->note_start_of_marking(true);
   758     }
   759     return false;
   760   }
   761 };
   763 void ConcurrentMark::checkpointRootsInitialPre() {
   764   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   765   G1CollectorPolicy* g1p = g1h->g1_policy();
   767   _has_aborted = false;
   769   if (G1PrintReachableAtInitialMark) {
   770     print_reachable(true, "before");
   771   }
   773   // Initialise marking structures. This has to be done in a STW phase.
   774   reset();
   775 }
   777 class CMMarkRootsClosure: public OopsInGenClosure {
   778 private:
   779   ConcurrentMark*  _cm;
   780   G1CollectedHeap* _g1h;
   781   bool             _do_barrier;
   783 public:
   784   CMMarkRootsClosure(ConcurrentMark* cm,
   785                      G1CollectedHeap* g1h,
   786                      bool do_barrier) : _cm(cm), _g1h(g1h),
   787                                         _do_barrier(do_barrier) { }
   789   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   790   virtual void do_oop(      oop* p) { do_oop_work(p); }
   792   template <class T> void do_oop_work(T* p) {
   793     T heap_oop = oopDesc::load_heap_oop(p);
   794     if (!oopDesc::is_null(heap_oop)) {
   795       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   796       assert(obj->is_oop() || obj->mark() == NULL,
   797              "expected an oop, possibly with mark word displaced");
   798       HeapWord* addr = (HeapWord*)obj;
   799       if (_g1h->is_in_g1_reserved(addr)) {
   800         _cm->grayRoot(obj);
   801       }
   802     }
   803     if (_do_barrier) {
   804       assert(!_g1h->is_in_g1_reserved(p),
   805              "Should be called on external roots");
   806       do_barrier(p);
   807     }
   808   }
   809 };
   811 void ConcurrentMark::checkpointRootsInitialPost() {
   812   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   814   // For each region note start of marking.
   815   NoteStartOfMarkHRClosure startcl;
   816   g1h->heap_region_iterate(&startcl);
   818   // Start weak-reference discovery.
   819   ReferenceProcessor* rp = g1h->ref_processor();
   820   rp->verify_no_references_recorded();
   821   rp->enable_discovery(); // enable ("weak") refs discovery
   822   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   824   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   825   // This is the start of  the marking cycle, we're expected all
   826   // threads to have SATB queues with active set to false.
   827   satb_mq_set.set_active_all_threads(true, /* new active value */
   828                                      false /* expected_active */);
   830   // update_g1_committed() will be called at the end of an evac pause
   831   // when marking is on. So, it's also called at the end of the
   832   // initial-mark pause to update the heap end, if the heap expands
   833   // during it. No need to call it here.
   834 }
   836 // Checkpoint the roots into this generation from outside
   837 // this generation. [Note this initial checkpoint need only
   838 // be approximate -- we'll do a catch up phase subsequently.]
   839 void ConcurrentMark::checkpointRootsInitial() {
   840   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   841   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   843   double start = os::elapsedTime();
   845   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   846   g1p->record_concurrent_mark_init_start();
   847   checkpointRootsInitialPre();
   849   // YSR: when concurrent precleaning is in place, we'll
   850   // need to clear the cached card table here
   852   ResourceMark rm;
   853   HandleMark  hm;
   855   g1h->ensure_parsability(false);
   856   g1h->perm_gen()->save_marks();
   858   CMMarkRootsClosure notOlder(this, g1h, false);
   859   CMMarkRootsClosure older(this, g1h, true);
   861   g1h->set_marking_started();
   862   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   864   g1h->process_strong_roots(true,    // activate StrongRootsScope
   865                             false,   // fake perm gen collection
   866                             SharedHeap::SO_AllClasses,
   867                             &notOlder, // Regular roots
   868                             NULL,     // do not visit active blobs
   869                             &older    // Perm Gen Roots
   870                             );
   871   checkpointRootsInitialPost();
   873   // Statistics.
   874   double end = os::elapsedTime();
   875   _init_times.add((end - start) * 1000.0);
   877   g1p->record_concurrent_mark_init_end();
   878 }
   880 /*
   881    Notice that in the next two methods, we actually leave the STS
   882    during the barrier sync and join it immediately afterwards. If we
   883    do not do this, this then the following deadlock can occur: one
   884    thread could be in the barrier sync code, waiting for the other
   885    thread to also sync up, whereas another one could be trying to
   886    yield, while also waiting for the other threads to sync up too.
   888    Because the thread that does the sync barrier has left the STS, it
   889    is possible to be suspended for a Full GC or an evacuation pause
   890    could occur. This is actually safe, since the entering the sync
   891    barrier is one of the last things do_marking_step() does, and it
   892    doesn't manipulate any data structures afterwards.
   893 */
   895 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   896   if (verbose_low())
   897     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   899   ConcurrentGCThread::stsLeave();
   900   _first_overflow_barrier_sync.enter();
   901   ConcurrentGCThread::stsJoin();
   902   // at this point everyone should have synced up and not be doing any
   903   // more work
   905   if (verbose_low())
   906     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   908   // let task 0 do this
   909   if (task_num == 0) {
   910     // task 0 is responsible for clearing the global data structures
   911     clear_marking_state();
   913     if (PrintGC) {
   914       gclog_or_tty->date_stamp(PrintGCDateStamps);
   915       gclog_or_tty->stamp(PrintGCTimeStamps);
   916       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   917     }
   918   }
   920   // after this, each task should reset its own data structures then
   921   // then go into the second barrier
   922 }
   924 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   925   if (verbose_low())
   926     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   928   ConcurrentGCThread::stsLeave();
   929   _second_overflow_barrier_sync.enter();
   930   ConcurrentGCThread::stsJoin();
   931   // at this point everything should be re-initialised and ready to go
   933   if (verbose_low())
   934     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   935 }
   937 void ConcurrentMark::grayRoot(oop p) {
   938   HeapWord* addr = (HeapWord*) p;
   939   // We can't really check against _heap_start and _heap_end, since it
   940   // is possible during an evacuation pause with piggy-backed
   941   // initial-mark that the committed space is expanded during the
   942   // pause without CM observing this change. So the assertions below
   943   // is a bit conservative; but better than nothing.
   944   assert(_g1h->g1_committed().contains(addr),
   945          "address should be within the heap bounds");
   947   if (!_nextMarkBitMap->isMarked(addr))
   948     _nextMarkBitMap->parMark(addr);
   949 }
   951 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   952   // The objects on the region have already been marked "in bulk" by
   953   // the caller. We only need to decide whether to push the region on
   954   // the region stack or not.
   956   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   957     // We're done with marking and waiting for remark. We do not need to
   958     // push anything else on the region stack.
   959     return;
   961   HeapWord* finger = _finger;
   963   if (verbose_low())
   964     gclog_or_tty->print_cr("[global] attempting to push "
   965                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   966                            PTR_FORMAT, mr.start(), mr.end(), finger);
   968   if (mr.start() < finger) {
   969     // The finger is always heap region aligned and it is not possible
   970     // for mr to span heap regions.
   971     assert(mr.end() <= finger, "invariant");
   973     // Separated the asserts so that we know which one fires.
   974     assert(mr.start() <= mr.end(),
   975            "region boundaries should fall within the committed space");
   976     assert(_heap_start <= mr.start(),
   977            "region boundaries should fall within the committed space");
   978     assert(mr.end() <= _heap_end,
   979            "region boundaries should fall within the committed space");
   980     if (verbose_low())
   981       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   982                              "below the finger, pushing it",
   983                              mr.start(), mr.end());
   985     if (!region_stack_push(mr)) {
   986       if (verbose_low())
   987         gclog_or_tty->print_cr("[global] region stack has overflown.");
   988     }
   989   }
   990 }
   992 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   993   // The object is not marked by the caller. We need to at least mark
   994   // it and maybe push in on the stack.
   996   HeapWord* addr = (HeapWord*)p;
   997   if (!_nextMarkBitMap->isMarked(addr)) {
   998     // We definitely need to mark it, irrespective whether we bail out
   999     // because we're done with marking.
  1000     if (_nextMarkBitMap->parMark(addr)) {
  1001       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1002         // If we're done with concurrent marking and we're waiting for
  1003         // remark, then we're not pushing anything on the stack.
  1004         return;
  1006       // No OrderAccess:store_load() is needed. It is implicit in the
  1007       // CAS done in parMark(addr) above
  1008       HeapWord* finger = _finger;
  1010       if (addr < finger) {
  1011         if (!mark_stack_push(oop(addr))) {
  1012           if (verbose_low())
  1013             gclog_or_tty->print_cr("[global] global stack overflow "
  1014                                    "during parMark");
  1021 class CMConcurrentMarkingTask: public AbstractGangTask {
  1022 private:
  1023   ConcurrentMark*       _cm;
  1024   ConcurrentMarkThread* _cmt;
  1026 public:
  1027   void work(int worker_i) {
  1028     assert(Thread::current()->is_ConcurrentGC_thread(),
  1029            "this should only be done by a conc GC thread");
  1031     double start_vtime = os::elapsedVTime();
  1033     ConcurrentGCThread::stsJoin();
  1035     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1036     CMTask* the_task = _cm->task(worker_i);
  1037     the_task->record_start_time();
  1038     if (!_cm->has_aborted()) {
  1039       do {
  1040         double start_vtime_sec = os::elapsedVTime();
  1041         double start_time_sec = os::elapsedTime();
  1042         the_task->do_marking_step(10.0);
  1043         double end_time_sec = os::elapsedTime();
  1044         double end_vtime_sec = os::elapsedVTime();
  1045         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1046         double elapsed_time_sec = end_time_sec - start_time_sec;
  1047         _cm->clear_has_overflown();
  1049         bool ret = _cm->do_yield_check(worker_i);
  1051         jlong sleep_time_ms;
  1052         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1053           sleep_time_ms =
  1054             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1055           ConcurrentGCThread::stsLeave();
  1056           os::sleep(Thread::current(), sleep_time_ms, false);
  1057           ConcurrentGCThread::stsJoin();
  1059         double end_time2_sec = os::elapsedTime();
  1060         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1062 #if 0
  1063           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1064                                  "overhead %1.4lf",
  1065                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1066                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1067           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1068                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1069 #endif
  1070       } while (!_cm->has_aborted() && the_task->has_aborted());
  1072     the_task->record_end_time();
  1073     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1075     ConcurrentGCThread::stsLeave();
  1077     double end_vtime = os::elapsedVTime();
  1078     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1081   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1082                           ConcurrentMarkThread* cmt) :
  1083       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1085   ~CMConcurrentMarkingTask() { }
  1086 };
  1088 void ConcurrentMark::markFromRoots() {
  1089   // we might be tempted to assert that:
  1090   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1091   //        "inconsistent argument?");
  1092   // However that wouldn't be right, because it's possible that
  1093   // a safepoint is indeed in progress as a younger generation
  1094   // stop-the-world GC happens even as we mark in this generation.
  1096   _restart_for_overflow = false;
  1098   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1100   CMConcurrentMarkingTask markingTask(this, cmThread());
  1101   if (parallel_marking_threads() > 0)
  1102     _parallel_workers->run_task(&markingTask);
  1103   else
  1104     markingTask.work(0);
  1105   print_stats();
  1108 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1109   // world is stopped at this checkpoint
  1110   assert(SafepointSynchronize::is_at_safepoint(),
  1111          "world should be stopped");
  1112   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1114   // If a full collection has happened, we shouldn't do this.
  1115   if (has_aborted()) {
  1116     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1117     return;
  1120   if (VerifyDuringGC) {
  1121     HandleMark hm;  // handle scope
  1122     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1123     Universe::heap()->prepare_for_verify();
  1124     Universe::verify(true, false, true);
  1127   G1CollectorPolicy* g1p = g1h->g1_policy();
  1128   g1p->record_concurrent_mark_remark_start();
  1130   double start = os::elapsedTime();
  1132   checkpointRootsFinalWork();
  1134   double mark_work_end = os::elapsedTime();
  1136   weakRefsWork(clear_all_soft_refs);
  1138   if (has_overflown()) {
  1139     // Oops.  We overflowed.  Restart concurrent marking.
  1140     _restart_for_overflow = true;
  1141     // Clear the flag. We do not need it any more.
  1142     clear_has_overflown();
  1143     if (G1TraceMarkStackOverflow)
  1144       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1145   } else {
  1146     // We're done with marking.
  1147     // This is the end of  the marking cycle, we're expected all
  1148     // threads to have SATB queues with active set to true.
  1149     JavaThread::satb_mark_queue_set().set_active_all_threads(
  1150                                                   false, /* new active value */
  1151                                                   true /* expected_active */);
  1153     if (VerifyDuringGC) {
  1154       HandleMark hm;  // handle scope
  1155       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1156       Universe::heap()->prepare_for_verify();
  1157       Universe::heap()->verify(/* allow_dirty */      true,
  1158                                /* silent */           false,
  1159                                /* use_prev_marking */ false);
  1163 #if VERIFY_OBJS_PROCESSED
  1164   _scan_obj_cl.objs_processed = 0;
  1165   ThreadLocalObjQueue::objs_enqueued = 0;
  1166 #endif
  1168   // Statistics
  1169   double now = os::elapsedTime();
  1170   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1171   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1172   _remark_times.add((now - start) * 1000.0);
  1174   g1p->record_concurrent_mark_remark_end();
  1178 #define CARD_BM_TEST_MODE 0
  1180 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1182   CMBitMapRO* _bm;
  1183   ConcurrentMark* _cm;
  1184   bool _changed;
  1185   bool _yield;
  1186   size_t _words_done;
  1187   size_t _tot_live;
  1188   size_t _tot_used;
  1189   size_t _regions_done;
  1190   double _start_vtime_sec;
  1192   BitMap* _region_bm;
  1193   BitMap* _card_bm;
  1194   intptr_t _bottom_card_num;
  1195   bool _final;
  1197   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1198     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1199 #if CARD_BM_TEST_MODE
  1200       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1201 #else
  1202       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1203 #endif
  1207 public:
  1208   CalcLiveObjectsClosure(bool final,
  1209                          CMBitMapRO *bm, ConcurrentMark *cm,
  1210                          BitMap* region_bm, BitMap* card_bm) :
  1211     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1212     _words_done(0), _tot_live(0), _tot_used(0),
  1213     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1214     _regions_done(0), _start_vtime_sec(0.0)
  1216     _bottom_card_num =
  1217       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1218                CardTableModRefBS::card_shift);
  1221   // It takes a region that's not empty (i.e., it has at least one
  1222   // live object in it and sets its corresponding bit on the region
  1223   // bitmap to 1. If the region is "starts humongous" it will also set
  1224   // to 1 the bits on the region bitmap that correspond to its
  1225   // associated "continues humongous" regions.
  1226   void set_bit_for_region(HeapRegion* hr) {
  1227     assert(!hr->continuesHumongous(), "should have filtered those out");
  1229     size_t index = hr->hrs_index();
  1230     if (!hr->startsHumongous()) {
  1231       // Normal (non-humongous) case: just set the bit.
  1232       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1233     } else {
  1234       // Starts humongous case: calculate how many regions are part of
  1235       // this humongous region and then set the bit range. It might
  1236       // have been a bit more efficient to look at the object that
  1237       // spans these humongous regions to calculate their number from
  1238       // the object's size. However, it's a good idea to calculate
  1239       // this based on the metadata itself, and not the region
  1240       // contents, so that this code is not aware of what goes into
  1241       // the humongous regions (in case this changes in the future).
  1242       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1243       size_t end_index = index + 1;
  1244       while (end_index < g1h->n_regions()) {
  1245         HeapRegion* chr = g1h->region_at(end_index);
  1246         if (!chr->continuesHumongous()) {
  1247           break;
  1249         end_index += 1;
  1251       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1252                                    (BitMap::idx_t) end_index, true);
  1256   bool doHeapRegion(HeapRegion* hr) {
  1257     if (!_final && _regions_done == 0)
  1258       _start_vtime_sec = os::elapsedVTime();
  1260     if (hr->continuesHumongous()) {
  1261       // We will ignore these here and process them when their
  1262       // associated "starts humongous" region is processed (see
  1263       // set_bit_for_heap_region()). Note that we cannot rely on their
  1264       // associated "starts humongous" region to have their bit set to
  1265       // 1 since, due to the region chunking in the parallel region
  1266       // iteration, a "continues humongous" region might be visited
  1267       // before its associated "starts humongous".
  1268       return false;
  1271     HeapWord* nextTop = hr->next_top_at_mark_start();
  1272     HeapWord* start   = hr->top_at_conc_mark_count();
  1273     assert(hr->bottom() <= start && start <= hr->end() &&
  1274            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1275            start <= nextTop,
  1276            "Preconditions.");
  1277     // Otherwise, record the number of word's we'll examine.
  1278     size_t words_done = (nextTop - start);
  1279     // Find the first marked object at or after "start".
  1280     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1281     size_t marked_bytes = 0;
  1283     // Below, the term "card num" means the result of shifting an address
  1284     // by the card shift -- address 0 corresponds to card number 0.  One
  1285     // must subtract the card num of the bottom of the heap to obtain a
  1286     // card table index.
  1287     // The first card num of the sequence of live cards currently being
  1288     // constructed.  -1 ==> no sequence.
  1289     intptr_t start_card_num = -1;
  1290     // The last card num of the sequence of live cards currently being
  1291     // constructed.  -1 ==> no sequence.
  1292     intptr_t last_card_num = -1;
  1294     while (start < nextTop) {
  1295       if (_yield && _cm->do_yield_check()) {
  1296         // We yielded.  It might be for a full collection, in which case
  1297         // all bets are off; terminate the traversal.
  1298         if (_cm->has_aborted()) {
  1299           _changed = false;
  1300           return true;
  1301         } else {
  1302           // Otherwise, it might be a collection pause, and the region
  1303           // we're looking at might be in the collection set.  We'll
  1304           // abandon this region.
  1305           return false;
  1308       oop obj = oop(start);
  1309       int obj_sz = obj->size();
  1310       // The card num of the start of the current object.
  1311       intptr_t obj_card_num =
  1312         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1314       HeapWord* obj_last = start + obj_sz - 1;
  1315       intptr_t obj_last_card_num =
  1316         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1318       if (obj_card_num != last_card_num) {
  1319         if (start_card_num == -1) {
  1320           assert(last_card_num == -1, "Both or neither.");
  1321           start_card_num = obj_card_num;
  1322         } else {
  1323           assert(last_card_num != -1, "Both or neither.");
  1324           assert(obj_card_num >= last_card_num, "Inv");
  1325           if ((obj_card_num - last_card_num) > 1) {
  1326             // Mark the last run, and start a new one.
  1327             mark_card_num_range(start_card_num, last_card_num);
  1328             start_card_num = obj_card_num;
  1331 #if CARD_BM_TEST_MODE
  1332         /*
  1333         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1334                                obj_card_num - _bottom_card_num,
  1335                                obj_last_card_num - _bottom_card_num);
  1336         */
  1337         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1338           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1340 #endif
  1342       // In any case, we set the last card num.
  1343       last_card_num = obj_last_card_num;
  1345       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1346       // Find the next marked object after this one.
  1347       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1348       _changed = true;
  1350     // Handle the last range, if any.
  1351     if (start_card_num != -1)
  1352       mark_card_num_range(start_card_num, last_card_num);
  1353     if (_final) {
  1354       // Mark the allocated-since-marking portion...
  1355       HeapWord* tp = hr->top();
  1356       if (nextTop < tp) {
  1357         start_card_num =
  1358           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1359         last_card_num =
  1360           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1361         mark_card_num_range(start_card_num, last_card_num);
  1362         // This definitely means the region has live objects.
  1363         set_bit_for_region(hr);
  1367     hr->add_to_marked_bytes(marked_bytes);
  1368     // Update the live region bitmap.
  1369     if (marked_bytes > 0) {
  1370       set_bit_for_region(hr);
  1372     hr->set_top_at_conc_mark_count(nextTop);
  1373     _tot_live += hr->next_live_bytes();
  1374     _tot_used += hr->used();
  1375     _words_done = words_done;
  1377     if (!_final) {
  1378       ++_regions_done;
  1379       if (_regions_done % 10 == 0) {
  1380         double end_vtime_sec = os::elapsedVTime();
  1381         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1382         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1383           jlong sleep_time_ms =
  1384             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1385           os::sleep(Thread::current(), sleep_time_ms, false);
  1386           _start_vtime_sec = end_vtime_sec;
  1391     return false;
  1394   bool changed() { return _changed;  }
  1395   void reset()   { _changed = false; _words_done = 0; }
  1396   void no_yield() { _yield = false; }
  1397   size_t words_done() { return _words_done; }
  1398   size_t tot_live() { return _tot_live; }
  1399   size_t tot_used() { return _tot_used; }
  1400 };
  1403 void ConcurrentMark::calcDesiredRegions() {
  1404   _region_bm.clear();
  1405   _card_bm.clear();
  1406   CalcLiveObjectsClosure calccl(false /*final*/,
  1407                                 nextMarkBitMap(), this,
  1408                                 &_region_bm, &_card_bm);
  1409   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1410   g1h->heap_region_iterate(&calccl);
  1412   do {
  1413     calccl.reset();
  1414     g1h->heap_region_iterate(&calccl);
  1415   } while (calccl.changed());
  1418 class G1ParFinalCountTask: public AbstractGangTask {
  1419 protected:
  1420   G1CollectedHeap* _g1h;
  1421   CMBitMap* _bm;
  1422   size_t _n_workers;
  1423   size_t *_live_bytes;
  1424   size_t *_used_bytes;
  1425   BitMap* _region_bm;
  1426   BitMap* _card_bm;
  1427 public:
  1428   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1429                       BitMap* region_bm, BitMap* card_bm) :
  1430     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1431     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1433     if (ParallelGCThreads > 0)
  1434       _n_workers = _g1h->workers()->total_workers();
  1435     else
  1436       _n_workers = 1;
  1437     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1438     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1441   ~G1ParFinalCountTask() {
  1442     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1443     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1446   void work(int i) {
  1447     CalcLiveObjectsClosure calccl(true /*final*/,
  1448                                   _bm, _g1h->concurrent_mark(),
  1449                                   _region_bm, _card_bm);
  1450     calccl.no_yield();
  1451     if (ParallelGCThreads > 0) {
  1452       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1453                                             HeapRegion::FinalCountClaimValue);
  1454     } else {
  1455       _g1h->heap_region_iterate(&calccl);
  1457     assert(calccl.complete(), "Shouldn't have yielded!");
  1459     assert((size_t) i < _n_workers, "invariant");
  1460     _live_bytes[i] = calccl.tot_live();
  1461     _used_bytes[i] = calccl.tot_used();
  1463   size_t live_bytes()  {
  1464     size_t live_bytes = 0;
  1465     for (size_t i = 0; i < _n_workers; ++i)
  1466       live_bytes += _live_bytes[i];
  1467     return live_bytes;
  1469   size_t used_bytes()  {
  1470     size_t used_bytes = 0;
  1471     for (size_t i = 0; i < _n_workers; ++i)
  1472       used_bytes += _used_bytes[i];
  1473     return used_bytes;
  1475 };
  1477 class G1ParNoteEndTask;
  1479 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1480   G1CollectedHeap* _g1;
  1481   int _worker_num;
  1482   size_t _max_live_bytes;
  1483   size_t _regions_claimed;
  1484   size_t _freed_bytes;
  1485   size_t _cleared_h_regions;
  1486   size_t _freed_regions;
  1487   UncleanRegionList* _unclean_region_list;
  1488   double _claimed_region_time;
  1489   double _max_region_time;
  1491 public:
  1492   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1493                              UncleanRegionList* list,
  1494                              int worker_num);
  1495   size_t freed_bytes() { return _freed_bytes; }
  1496   size_t cleared_h_regions() { return _cleared_h_regions; }
  1497   size_t freed_regions() { return  _freed_regions; }
  1498   UncleanRegionList* unclean_region_list() {
  1499     return _unclean_region_list;
  1502   bool doHeapRegion(HeapRegion *r);
  1504   size_t max_live_bytes() { return _max_live_bytes; }
  1505   size_t regions_claimed() { return _regions_claimed; }
  1506   double claimed_region_time_sec() { return _claimed_region_time; }
  1507   double max_region_time_sec() { return _max_region_time; }
  1508 };
  1510 class G1ParNoteEndTask: public AbstractGangTask {
  1511   friend class G1NoteEndOfConcMarkClosure;
  1512 protected:
  1513   G1CollectedHeap* _g1h;
  1514   size_t _max_live_bytes;
  1515   size_t _freed_bytes;
  1516   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1517 public:
  1518   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1519                    ConcurrentMark::ParCleanupThreadState**
  1520                    par_cleanup_thread_state) :
  1521     AbstractGangTask("G1 note end"), _g1h(g1h),
  1522     _max_live_bytes(0), _freed_bytes(0),
  1523     _par_cleanup_thread_state(par_cleanup_thread_state)
  1524   {}
  1526   void work(int i) {
  1527     double start = os::elapsedTime();
  1528     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1529                                            &_par_cleanup_thread_state[i]->list,
  1530                                            i);
  1531     if (ParallelGCThreads > 0) {
  1532       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1533                                             HeapRegion::NoteEndClaimValue);
  1534     } else {
  1535       _g1h->heap_region_iterate(&g1_note_end);
  1537     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1539     // Now finish up freeing the current thread's regions.
  1540     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1541                                   g1_note_end.cleared_h_regions(),
  1542                                   0, NULL);
  1544       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1545       _max_live_bytes += g1_note_end.max_live_bytes();
  1546       _freed_bytes += g1_note_end.freed_bytes();
  1548     double end = os::elapsedTime();
  1549     if (G1PrintParCleanupStats) {
  1550       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1551                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1552                           i, start, end, (end-start)*1000.0,
  1553                           g1_note_end.regions_claimed(),
  1554                           g1_note_end.claimed_region_time_sec()*1000.0,
  1555                           g1_note_end.max_region_time_sec()*1000.0);
  1558   size_t max_live_bytes() { return _max_live_bytes; }
  1559   size_t freed_bytes() { return _freed_bytes; }
  1560 };
  1562 class G1ParScrubRemSetTask: public AbstractGangTask {
  1563 protected:
  1564   G1RemSet* _g1rs;
  1565   BitMap* _region_bm;
  1566   BitMap* _card_bm;
  1567 public:
  1568   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1569                        BitMap* region_bm, BitMap* card_bm) :
  1570     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1571     _region_bm(region_bm), _card_bm(card_bm)
  1572   {}
  1574   void work(int i) {
  1575     if (ParallelGCThreads > 0) {
  1576       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1577                        HeapRegion::ScrubRemSetClaimValue);
  1578     } else {
  1579       _g1rs->scrub(_region_bm, _card_bm);
  1583 };
  1585 G1NoteEndOfConcMarkClosure::
  1586 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1587                            UncleanRegionList* list,
  1588                            int worker_num)
  1589   : _g1(g1), _worker_num(worker_num),
  1590     _max_live_bytes(0), _regions_claimed(0),
  1591     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1592     _claimed_region_time(0.0), _max_region_time(0.0),
  1593     _unclean_region_list(list)
  1594 {}
  1596 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1597   // We use a claim value of zero here because all regions
  1598   // were claimed with value 1 in the FinalCount task.
  1599   r->reset_gc_time_stamp();
  1600   if (!r->continuesHumongous()) {
  1601     double start = os::elapsedTime();
  1602     _regions_claimed++;
  1603     r->note_end_of_marking();
  1604     _max_live_bytes += r->max_live_bytes();
  1605     _g1->free_region_if_totally_empty_work(r,
  1606                                            _freed_bytes,
  1607                                            _cleared_h_regions,
  1608                                            _freed_regions,
  1609                                            _unclean_region_list,
  1610                                            true /*par*/);
  1611     double region_time = (os::elapsedTime() - start);
  1612     _claimed_region_time += region_time;
  1613     if (region_time > _max_region_time) _max_region_time = region_time;
  1615   return false;
  1618 void ConcurrentMark::cleanup() {
  1619   // world is stopped at this checkpoint
  1620   assert(SafepointSynchronize::is_at_safepoint(),
  1621          "world should be stopped");
  1622   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1624   // If a full collection has happened, we shouldn't do this.
  1625   if (has_aborted()) {
  1626     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1627     return;
  1630   if (VerifyDuringGC) {
  1631     HandleMark hm;  // handle scope
  1632     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1633     Universe::heap()->prepare_for_verify();
  1634     Universe::verify(/* allow dirty  */ true,
  1635                      /* silent       */ false,
  1636                      /* prev marking */ true);
  1639   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1640   g1p->record_concurrent_mark_cleanup_start();
  1642   double start = os::elapsedTime();
  1644   // Do counting once more with the world stopped for good measure.
  1645   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1646                                         &_region_bm, &_card_bm);
  1647   if (ParallelGCThreads > 0) {
  1648     assert(g1h->check_heap_region_claim_values(
  1649                                                HeapRegion::InitialClaimValue),
  1650            "sanity check");
  1652     int n_workers = g1h->workers()->total_workers();
  1653     g1h->set_par_threads(n_workers);
  1654     g1h->workers()->run_task(&g1_par_count_task);
  1655     g1h->set_par_threads(0);
  1657     assert(g1h->check_heap_region_claim_values(
  1658                                              HeapRegion::FinalCountClaimValue),
  1659            "sanity check");
  1660   } else {
  1661     g1_par_count_task.work(0);
  1664   size_t known_garbage_bytes =
  1665     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1666 #if 0
  1667   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1668                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1669                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1670                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1671 #endif // 0
  1672   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1674   size_t start_used_bytes = g1h->used();
  1675   _at_least_one_mark_complete = true;
  1676   g1h->set_marking_complete();
  1678   double count_end = os::elapsedTime();
  1679   double this_final_counting_time = (count_end - start);
  1680   if (G1PrintParCleanupStats) {
  1681     gclog_or_tty->print_cr("Cleanup:");
  1682     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1683                            this_final_counting_time*1000.0);
  1685   _total_counting_time += this_final_counting_time;
  1687   // Install newly created mark bitMap as "prev".
  1688   swapMarkBitMaps();
  1690   g1h->reset_gc_time_stamp();
  1692   // Note end of marking in all heap regions.
  1693   double note_end_start = os::elapsedTime();
  1694   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1695   if (ParallelGCThreads > 0) {
  1696     int n_workers = g1h->workers()->total_workers();
  1697     g1h->set_par_threads(n_workers);
  1698     g1h->workers()->run_task(&g1_par_note_end_task);
  1699     g1h->set_par_threads(0);
  1701     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1702            "sanity check");
  1703   } else {
  1704     g1_par_note_end_task.work(0);
  1706   g1h->set_unclean_regions_coming(true);
  1707   double note_end_end = os::elapsedTime();
  1708   // Tell the mutators that there might be unclean regions coming...
  1709   if (G1PrintParCleanupStats) {
  1710     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1711                            (note_end_end - note_end_start)*1000.0);
  1715   // call below, since it affects the metric by which we sort the heap
  1716   // regions.
  1717   if (G1ScrubRemSets) {
  1718     double rs_scrub_start = os::elapsedTime();
  1719     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1720     if (ParallelGCThreads > 0) {
  1721       int n_workers = g1h->workers()->total_workers();
  1722       g1h->set_par_threads(n_workers);
  1723       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1724       g1h->set_par_threads(0);
  1726       assert(g1h->check_heap_region_claim_values(
  1727                                             HeapRegion::ScrubRemSetClaimValue),
  1728              "sanity check");
  1729     } else {
  1730       g1_par_scrub_rs_task.work(0);
  1733     double rs_scrub_end = os::elapsedTime();
  1734     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1735     _total_rs_scrub_time += this_rs_scrub_time;
  1738   // this will also free any regions totally full of garbage objects,
  1739   // and sort the regions.
  1740   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1741                         g1_par_note_end_task.freed_bytes(),
  1742                         g1_par_note_end_task.max_live_bytes());
  1744   // Statistics.
  1745   double end = os::elapsedTime();
  1746   _cleanup_times.add((end - start) * 1000.0);
  1748   // G1CollectedHeap::heap()->print();
  1749   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1750   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1752   if (PrintGC || PrintGCDetails) {
  1753     g1h->print_size_transition(gclog_or_tty,
  1754                                start_used_bytes,
  1755                                g1h->used(),
  1756                                g1h->capacity());
  1759   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1760   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1762   // We need to make this be a "collection" so any collection pause that
  1763   // races with it goes around and waits for completeCleanup to finish.
  1764   g1h->increment_total_collections();
  1766   if (VerifyDuringGC) {
  1767     HandleMark hm;  // handle scope
  1768     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1769     Universe::heap()->prepare_for_verify();
  1770     Universe::verify(/* allow dirty  */ true,
  1771                      /* silent       */ false,
  1772                      /* prev marking */ true);
  1776 void ConcurrentMark::completeCleanup() {
  1777   // A full collection intervened.
  1778   if (has_aborted()) return;
  1780   int first = 0;
  1781   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1782   for (int t = 0; t < last; t++) {
  1783     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1784     assert(list->well_formed(), "Inv");
  1785     HeapRegion* hd = list->hd();
  1786     while (hd != NULL) {
  1787       // Now finish up the other stuff.
  1788       hd->rem_set()->clear();
  1789       HeapRegion* next_hd = hd->next_from_unclean_list();
  1790       (void)list->pop();
  1791       assert(list->hd() == next_hd, "how not?");
  1792       _g1h->put_region_on_unclean_list(hd);
  1793       if (!hd->isHumongous()) {
  1794         // Add this to the _free_regions count by 1.
  1795         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1797       hd = list->hd();
  1798       assert(hd == next_hd, "how not?");
  1804 class G1CMIsAliveClosure: public BoolObjectClosure {
  1805   G1CollectedHeap* _g1;
  1806  public:
  1807   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1808     _g1(g1)
  1809   {}
  1811   void do_object(oop obj) {
  1812     assert(false, "not to be invoked");
  1814   bool do_object_b(oop obj) {
  1815     HeapWord* addr = (HeapWord*)obj;
  1816     return addr != NULL &&
  1817            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1819 };
  1821 class G1CMKeepAliveClosure: public OopClosure {
  1822   G1CollectedHeap* _g1;
  1823   ConcurrentMark*  _cm;
  1824   CMBitMap*        _bitMap;
  1825  public:
  1826   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1827                        CMBitMap* bitMap) :
  1828     _g1(g1), _cm(cm),
  1829     _bitMap(bitMap) {}
  1831   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1832   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1834   template <class T> void do_oop_work(T* p) {
  1835     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1836     HeapWord* addr = (HeapWord*)thisOop;
  1837     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1838       _bitMap->mark(addr);
  1839       _cm->mark_stack_push(thisOop);
  1842 };
  1844 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1845   CMMarkStack*                  _markStack;
  1846   CMBitMap*                     _bitMap;
  1847   G1CMKeepAliveClosure*         _oopClosure;
  1848  public:
  1849   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1850                                G1CMKeepAliveClosure* oopClosure) :
  1851     _bitMap(bitMap),
  1852     _markStack(markStack),
  1853     _oopClosure(oopClosure)
  1854   {}
  1856   void do_void() {
  1857     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1859 };
  1861 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1862   ResourceMark rm;
  1863   HandleMark   hm;
  1864   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1865   ReferenceProcessor* rp = g1h->ref_processor();
  1867   // Process weak references.
  1868   rp->setup_policy(clear_all_soft_refs);
  1869   assert(_markStack.isEmpty(), "mark stack should be empty");
  1871   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1872   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1873   G1CMDrainMarkingStackClosure
  1874     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1875                                &g1KeepAliveClosure);
  1877   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1878   rp->process_discovered_references(&g1IsAliveClosure,
  1879                                     &g1KeepAliveClosure,
  1880                                     &g1DrainMarkingStackClosure,
  1881                                     NULL);
  1882   assert(_markStack.overflow() || _markStack.isEmpty(),
  1883          "mark stack should be empty (unless it overflowed)");
  1884   if (_markStack.overflow()) {
  1885     set_has_overflown();
  1888   rp->enqueue_discovered_references();
  1889   rp->verify_no_references_recorded();
  1890   assert(!rp->discovery_enabled(), "should have been disabled");
  1892   // Now clean up stale oops in SymbolTable and StringTable
  1893   SymbolTable::unlink(&g1IsAliveClosure);
  1894   StringTable::unlink(&g1IsAliveClosure);
  1897 void ConcurrentMark::swapMarkBitMaps() {
  1898   CMBitMapRO* temp = _prevMarkBitMap;
  1899   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1900   _nextMarkBitMap  = (CMBitMap*)  temp;
  1903 class CMRemarkTask: public AbstractGangTask {
  1904 private:
  1905   ConcurrentMark *_cm;
  1907 public:
  1908   void work(int worker_i) {
  1909     // Since all available tasks are actually started, we should
  1910     // only proceed if we're supposed to be actived.
  1911     if ((size_t)worker_i < _cm->active_tasks()) {
  1912       CMTask* task = _cm->task(worker_i);
  1913       task->record_start_time();
  1914       do {
  1915         task->do_marking_step(1000000000.0 /* something very large */);
  1916       } while (task->has_aborted() && !_cm->has_overflown());
  1917       // If we overflow, then we do not want to restart. We instead
  1918       // want to abort remark and do concurrent marking again.
  1919       task->record_end_time();
  1923   CMRemarkTask(ConcurrentMark* cm) :
  1924     AbstractGangTask("Par Remark"), _cm(cm) { }
  1925 };
  1927 void ConcurrentMark::checkpointRootsFinalWork() {
  1928   ResourceMark rm;
  1929   HandleMark   hm;
  1930   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1932   g1h->ensure_parsability(false);
  1934   if (ParallelGCThreads > 0) {
  1935     G1CollectedHeap::StrongRootsScope srs(g1h);
  1936     // this is remark, so we'll use up all available threads
  1937     int active_workers = ParallelGCThreads;
  1938     set_phase(active_workers, false);
  1940     CMRemarkTask remarkTask(this);
  1941     // We will start all available threads, even if we decide that the
  1942     // active_workers will be fewer. The extra ones will just bail out
  1943     // immediately.
  1944     int n_workers = g1h->workers()->total_workers();
  1945     g1h->set_par_threads(n_workers);
  1946     g1h->workers()->run_task(&remarkTask);
  1947     g1h->set_par_threads(0);
  1948   } else {
  1949     G1CollectedHeap::StrongRootsScope srs(g1h);
  1950     // this is remark, so we'll use up all available threads
  1951     int active_workers = 1;
  1952     set_phase(active_workers, false);
  1954     CMRemarkTask remarkTask(this);
  1955     // We will start all available threads, even if we decide that the
  1956     // active_workers will be fewer. The extra ones will just bail out
  1957     // immediately.
  1958     remarkTask.work(0);
  1960   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1961   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1963   print_stats();
  1965   if (!restart_for_overflow())
  1966     set_non_marking_state();
  1968 #if VERIFY_OBJS_PROCESSED
  1969   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1970     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1971                            _scan_obj_cl.objs_processed,
  1972                            ThreadLocalObjQueue::objs_enqueued);
  1973     guarantee(_scan_obj_cl.objs_processed ==
  1974               ThreadLocalObjQueue::objs_enqueued,
  1975               "Different number of objs processed and enqueued.");
  1977 #endif
  1980 #ifndef PRODUCT
  1982 class ReachablePrinterOopClosure: public OopClosure {
  1983 private:
  1984   G1CollectedHeap* _g1h;
  1985   CMBitMapRO*      _bitmap;
  1986   outputStream*    _out;
  1987   bool             _use_prev_marking;
  1989 public:
  1990   ReachablePrinterOopClosure(CMBitMapRO*   bitmap,
  1991                              outputStream* out,
  1992                              bool          use_prev_marking) :
  1993     _g1h(G1CollectedHeap::heap()),
  1994     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  1996   void do_oop(narrowOop* p) { do_oop_work(p); }
  1997   void do_oop(      oop* p) { do_oop_work(p); }
  1999   template <class T> void do_oop_work(T* p) {
  2000     oop         obj = oopDesc::load_decode_heap_oop(p);
  2001     const char* str = NULL;
  2002     const char* str2 = "";
  2004     if (!_g1h->is_in_g1_reserved(obj))
  2005       str = "outside G1 reserved";
  2006     else {
  2007       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2008       guarantee(hr != NULL, "invariant");
  2009       bool over_tams = false;
  2010       if (_use_prev_marking) {
  2011         over_tams = hr->obj_allocated_since_prev_marking(obj);
  2012       } else {
  2013         over_tams = hr->obj_allocated_since_next_marking(obj);
  2016       if (over_tams) {
  2017         str = "over TAMS";
  2018         if (_bitmap->isMarked((HeapWord*) obj)) {
  2019           str2 = " AND MARKED";
  2021       } else if (_bitmap->isMarked((HeapWord*) obj)) {
  2022         str = "marked";
  2023       } else {
  2024         str = "#### NOT MARKED ####";
  2028     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
  2029                    p, (void*) obj, str, str2);
  2031 };
  2033 class ReachablePrinterClosure: public BitMapClosure {
  2034 private:
  2035   CMBitMapRO*   _bitmap;
  2036   outputStream* _out;
  2037   bool          _use_prev_marking;
  2039 public:
  2040   ReachablePrinterClosure(CMBitMapRO*   bitmap,
  2041                           outputStream* out,
  2042                           bool          use_prev_marking) :
  2043     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2045   bool do_bit(size_t offset) {
  2046     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
  2047     ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
  2049     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
  2050     oop(addr)->oop_iterate(&oopCl);
  2051     _out->print_cr("");
  2053     return true;
  2055 };
  2057 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
  2058 private:
  2059   CMBitMapRO*   _bitmap;
  2060   outputStream* _out;
  2061   bool          _use_prev_marking;
  2063 public:
  2064   ObjInRegionReachablePrinterClosure(CMBitMapRO*   bitmap,
  2065                                      outputStream* out,
  2066                                      bool          use_prev_marking) :
  2067     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2069   void do_object(oop o) {
  2070     ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
  2072     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
  2073     o->oop_iterate(&oopCl);
  2074     _out->print_cr("");
  2076 };
  2078 class RegionReachablePrinterClosure : public HeapRegionClosure {
  2079 private:
  2080   CMBitMapRO*   _bitmap;
  2081   outputStream* _out;
  2082   bool          _use_prev_marking;
  2084 public:
  2085   bool doHeapRegion(HeapRegion* hr) {
  2086     HeapWord* b = hr->bottom();
  2087     HeapWord* e = hr->end();
  2088     HeapWord* t = hr->top();
  2089     HeapWord* p = NULL;
  2090     if (_use_prev_marking) {
  2091       p = hr->prev_top_at_mark_start();
  2092     } else {
  2093       p = hr->next_top_at_mark_start();
  2095     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2096                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2097     _out->print_cr("");
  2099     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out, _use_prev_marking);
  2100     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
  2102     return false;
  2105   RegionReachablePrinterClosure(CMBitMapRO*   bitmap,
  2106                                 outputStream* out,
  2107                                 bool          use_prev_marking) :
  2108     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2109 };
  2111 void ConcurrentMark::print_reachable(bool use_prev_marking, const char* str) {
  2112   gclog_or_tty->print_cr("== Doing reachable object dump... ");
  2114   if (G1PrintReachableBaseFile == NULL) {
  2115     gclog_or_tty->print_cr("  #### error: no base file defined");
  2116     return;
  2119   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2120       (JVM_MAXPATHLEN - 1)) {
  2121     gclog_or_tty->print_cr("  #### error: file name too long");
  2122     return;
  2125   char file_name[JVM_MAXPATHLEN];
  2126   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2127   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2129   fileStream fout(file_name);
  2130   if (!fout.is_open()) {
  2131     gclog_or_tty->print_cr("  #### error: could not open file");
  2132     return;
  2135   outputStream* out = &fout;
  2137   CMBitMapRO* bitmap = NULL;
  2138   if (use_prev_marking) {
  2139     bitmap = _prevMarkBitMap;
  2140   } else {
  2141     bitmap = _nextMarkBitMap;
  2144   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2145   out->cr();
  2147   RegionReachablePrinterClosure rcl(bitmap, out, use_prev_marking);
  2148   out->print_cr("--- ITERATING OVER REGIONS WITH TAMS < TOP");
  2149   out->cr();
  2150   _g1h->heap_region_iterate(&rcl);
  2151   out->cr();
  2153   ReachablePrinterClosure cl(bitmap, out, use_prev_marking);
  2154   out->print_cr("--- ITERATING OVER MARKED OBJECTS ON THE BITMAP");
  2155   out->cr();
  2156   bitmap->iterate(&cl);
  2157   out->cr();
  2159   gclog_or_tty->print_cr("  done");
  2162 #endif // PRODUCT
  2164 // This note is for drainAllSATBBuffers and the code in between.
  2165 // In the future we could reuse a task to do this work during an
  2166 // evacuation pause (since now tasks are not active and can be claimed
  2167 // during an evacuation pause). This was a late change to the code and
  2168 // is currently not being taken advantage of.
  2170 class CMGlobalObjectClosure : public ObjectClosure {
  2171 private:
  2172   ConcurrentMark* _cm;
  2174 public:
  2175   void do_object(oop obj) {
  2176     _cm->deal_with_reference(obj);
  2179   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2180 };
  2182 void ConcurrentMark::deal_with_reference(oop obj) {
  2183   if (verbose_high())
  2184     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2185                            (void*) obj);
  2188   HeapWord* objAddr = (HeapWord*) obj;
  2189   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2190   if (_g1h->is_in_g1_reserved(objAddr)) {
  2191     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2192     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2193     if (_g1h->is_obj_ill(obj, hr)) {
  2194       if (verbose_high())
  2195         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2196                                "marked", (void*) obj);
  2198       // we need to mark it first
  2199       if (_nextMarkBitMap->parMark(objAddr)) {
  2200         // No OrderAccess:store_load() is needed. It is implicit in the
  2201         // CAS done in parMark(objAddr) above
  2202         HeapWord* finger = _finger;
  2203         if (objAddr < finger) {
  2204           if (verbose_high())
  2205             gclog_or_tty->print_cr("[global] below the global finger "
  2206                                    "("PTR_FORMAT"), pushing it", finger);
  2207           if (!mark_stack_push(obj)) {
  2208             if (verbose_low())
  2209               gclog_or_tty->print_cr("[global] global stack overflow during "
  2210                                      "deal_with_reference");
  2218 void ConcurrentMark::drainAllSATBBuffers() {
  2219   CMGlobalObjectClosure oc(this);
  2220   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2221   satb_mq_set.set_closure(&oc);
  2223   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2224     if (verbose_medium())
  2225       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2228   // no need to check whether we should do this, as this is only
  2229   // called during an evacuation pause
  2230   satb_mq_set.iterate_closure_all_threads();
  2232   satb_mq_set.set_closure(NULL);
  2233   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2236 void ConcurrentMark::markPrev(oop p) {
  2237   // Note we are overriding the read-only view of the prev map here, via
  2238   // the cast.
  2239   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2242 void ConcurrentMark::clear(oop p) {
  2243   assert(p != NULL && p->is_oop(), "expected an oop");
  2244   HeapWord* addr = (HeapWord*)p;
  2245   assert(addr >= _nextMarkBitMap->startWord() ||
  2246          addr < _nextMarkBitMap->endWord(), "in a region");
  2248   _nextMarkBitMap->clear(addr);
  2251 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2252   // Note we are overriding the read-only view of the prev map here, via
  2253   // the cast.
  2254   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2255   _nextMarkBitMap->clearRange(mr);
  2258 HeapRegion*
  2259 ConcurrentMark::claim_region(int task_num) {
  2260   // "checkpoint" the finger
  2261   HeapWord* finger = _finger;
  2263   // _heap_end will not change underneath our feet; it only changes at
  2264   // yield points.
  2265   while (finger < _heap_end) {
  2266     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2268     // is the gap between reading the finger and doing the CAS too long?
  2270     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2271     HeapWord*   bottom        = curr_region->bottom();
  2272     HeapWord*   end           = curr_region->end();
  2273     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2275     if (verbose_low())
  2276       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2277                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2278                              "limit = "PTR_FORMAT,
  2279                              task_num, curr_region, bottom, end, limit);
  2281     HeapWord* res =
  2282       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2283     if (res == finger) {
  2284       // we succeeded
  2286       // notice that _finger == end cannot be guaranteed here since,
  2287       // someone else might have moved the finger even further
  2288       assert(_finger >= end, "the finger should have moved forward");
  2290       if (verbose_low())
  2291         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2292                                PTR_FORMAT, task_num, curr_region);
  2294       if (limit > bottom) {
  2295         if (verbose_low())
  2296           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2297                                  "returning it ", task_num, curr_region);
  2298         return curr_region;
  2299       } else {
  2300         assert(limit == bottom,
  2301                "the region limit should be at bottom");
  2302         if (verbose_low())
  2303           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2304                                  "returning NULL", task_num, curr_region);
  2305         // we return NULL and the caller should try calling
  2306         // claim_region() again.
  2307         return NULL;
  2309     } else {
  2310       assert(_finger > finger, "the finger should have moved forward");
  2311       if (verbose_low())
  2312         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2313                                "global finger = "PTR_FORMAT", "
  2314                                "our finger = "PTR_FORMAT,
  2315                                task_num, _finger, finger);
  2317       // read it again
  2318       finger = _finger;
  2322   return NULL;
  2325 void ConcurrentMark::oops_do(OopClosure* cl) {
  2326   if (_markStack.size() > 0 && verbose_low())
  2327     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2328                            "size = %d", _markStack.size());
  2329   // we first iterate over the contents of the mark stack...
  2330   _markStack.oops_do(cl);
  2332   for (int i = 0; i < (int)_max_task_num; ++i) {
  2333     OopTaskQueue* queue = _task_queues->queue((int)i);
  2335     if (queue->size() > 0 && verbose_low())
  2336       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2337                              "size = %d", i, queue->size());
  2339     // ...then over the contents of the all the task queues.
  2340     queue->oops_do(cl);
  2343   // finally, invalidate any entries that in the region stack that
  2344   // point into the collection set
  2345   if (_regionStack.invalidate_entries_into_cset()) {
  2346     // otherwise, any gray objects copied during the evacuation pause
  2347     // might not be visited.
  2348     assert(_should_gray_objects, "invariant");
  2352 void ConcurrentMark::clear_marking_state() {
  2353   _markStack.setEmpty();
  2354   _markStack.clear_overflow();
  2355   _regionStack.setEmpty();
  2356   _regionStack.clear_overflow();
  2357   clear_has_overflown();
  2358   _finger = _heap_start;
  2360   for (int i = 0; i < (int)_max_task_num; ++i) {
  2361     OopTaskQueue* queue = _task_queues->queue(i);
  2362     queue->set_empty();
  2366 void ConcurrentMark::print_stats() {
  2367   if (verbose_stats()) {
  2368     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2369     for (size_t i = 0; i < _active_tasks; ++i) {
  2370       _tasks[i]->print_stats();
  2371       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2376 class CSMarkOopClosure: public OopClosure {
  2377   friend class CSMarkBitMapClosure;
  2379   G1CollectedHeap* _g1h;
  2380   CMBitMap*        _bm;
  2381   ConcurrentMark*  _cm;
  2382   oop*             _ms;
  2383   jint*            _array_ind_stack;
  2384   int              _ms_size;
  2385   int              _ms_ind;
  2386   int              _array_increment;
  2388   bool push(oop obj, int arr_ind = 0) {
  2389     if (_ms_ind == _ms_size) {
  2390       gclog_or_tty->print_cr("Mark stack is full.");
  2391       return false;
  2393     _ms[_ms_ind] = obj;
  2394     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2395     _ms_ind++;
  2396     return true;
  2399   oop pop() {
  2400     if (_ms_ind == 0) return NULL;
  2401     else {
  2402       _ms_ind--;
  2403       return _ms[_ms_ind];
  2407   template <class T> bool drain() {
  2408     while (_ms_ind > 0) {
  2409       oop obj = pop();
  2410       assert(obj != NULL, "Since index was non-zero.");
  2411       if (obj->is_objArray()) {
  2412         jint arr_ind = _array_ind_stack[_ms_ind];
  2413         objArrayOop aobj = objArrayOop(obj);
  2414         jint len = aobj->length();
  2415         jint next_arr_ind = arr_ind + _array_increment;
  2416         if (next_arr_ind < len) {
  2417           push(obj, next_arr_ind);
  2419         // Now process this portion of this one.
  2420         int lim = MIN2(next_arr_ind, len);
  2421         for (int j = arr_ind; j < lim; j++) {
  2422           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2425       } else {
  2426         obj->oop_iterate(this);
  2428       if (abort()) return false;
  2430     return true;
  2433 public:
  2434   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2435     _g1h(G1CollectedHeap::heap()),
  2436     _cm(cm),
  2437     _bm(cm->nextMarkBitMap()),
  2438     _ms_size(ms_size), _ms_ind(0),
  2439     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2440     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2441     _array_increment(MAX2(ms_size/8, 16))
  2442   {}
  2444   ~CSMarkOopClosure() {
  2445     FREE_C_HEAP_ARRAY(oop, _ms);
  2446     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2449   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2450   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2452   template <class T> void do_oop_work(T* p) {
  2453     T heap_oop = oopDesc::load_heap_oop(p);
  2454     if (oopDesc::is_null(heap_oop)) return;
  2455     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2456     if (obj->is_forwarded()) {
  2457       // If the object has already been forwarded, we have to make sure
  2458       // that it's marked.  So follow the forwarding pointer.  Note that
  2459       // this does the right thing for self-forwarding pointers in the
  2460       // evacuation failure case.
  2461       obj = obj->forwardee();
  2463     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2464     if (hr != NULL) {
  2465       if (hr->in_collection_set()) {
  2466         if (_g1h->is_obj_ill(obj)) {
  2467           _bm->mark((HeapWord*)obj);
  2468           if (!push(obj)) {
  2469             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2470             set_abort();
  2473       } else {
  2474         // Outside the collection set; we need to gray it
  2475         _cm->deal_with_reference(obj);
  2479 };
  2481 class CSMarkBitMapClosure: public BitMapClosure {
  2482   G1CollectedHeap* _g1h;
  2483   CMBitMap*        _bitMap;
  2484   ConcurrentMark*  _cm;
  2485   CSMarkOopClosure _oop_cl;
  2486 public:
  2487   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2488     _g1h(G1CollectedHeap::heap()),
  2489     _bitMap(cm->nextMarkBitMap()),
  2490     _oop_cl(cm, ms_size)
  2491   {}
  2493   ~CSMarkBitMapClosure() {}
  2495   bool do_bit(size_t offset) {
  2496     // convert offset into a HeapWord*
  2497     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2498     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2499            "address out of range");
  2500     assert(_bitMap->isMarked(addr), "tautology");
  2501     oop obj = oop(addr);
  2502     if (!obj->is_forwarded()) {
  2503       if (!_oop_cl.push(obj)) return false;
  2504       if (UseCompressedOops) {
  2505         if (!_oop_cl.drain<narrowOop>()) return false;
  2506       } else {
  2507         if (!_oop_cl.drain<oop>()) return false;
  2510     // Otherwise...
  2511     return true;
  2513 };
  2516 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2517   CMBitMap* _bm;
  2518   CSMarkBitMapClosure _bit_cl;
  2519   enum SomePrivateConstants {
  2520     MSSize = 1000
  2521   };
  2522   bool _completed;
  2523 public:
  2524   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2525     _bm(cm->nextMarkBitMap()),
  2526     _bit_cl(cm, MSSize),
  2527     _completed(true)
  2528   {}
  2530   ~CompleteMarkingInCSHRClosure() {}
  2532   bool doHeapRegion(HeapRegion* r) {
  2533     if (!r->evacuation_failed()) {
  2534       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2535       if (!mr.is_empty()) {
  2536         if (!_bm->iterate(&_bit_cl, mr)) {
  2537           _completed = false;
  2538           return true;
  2542     return false;
  2545   bool completed() { return _completed; }
  2546 };
  2548 class ClearMarksInHRClosure: public HeapRegionClosure {
  2549   CMBitMap* _bm;
  2550 public:
  2551   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2553   bool doHeapRegion(HeapRegion* r) {
  2554     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2555       MemRegion usedMR = r->used_region();
  2556       _bm->clearRange(r->used_region());
  2558     return false;
  2560 };
  2562 void ConcurrentMark::complete_marking_in_collection_set() {
  2563   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2565   if (!g1h->mark_in_progress()) {
  2566     g1h->g1_policy()->record_mark_closure_time(0.0);
  2567     return;
  2570   int i = 1;
  2571   double start = os::elapsedTime();
  2572   while (true) {
  2573     i++;
  2574     CompleteMarkingInCSHRClosure cmplt(this);
  2575     g1h->collection_set_iterate(&cmplt);
  2576     if (cmplt.completed()) break;
  2578   double end_time = os::elapsedTime();
  2579   double elapsed_time_ms = (end_time - start) * 1000.0;
  2580   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2581   if (PrintGCDetails) {
  2582     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2585   ClearMarksInHRClosure clr(nextMarkBitMap());
  2586   g1h->collection_set_iterate(&clr);
  2589 // The next two methods deal with the following optimisation. Some
  2590 // objects are gray by being marked and located above the finger. If
  2591 // they are copied, during an evacuation pause, below the finger then
  2592 // the need to be pushed on the stack. The observation is that, if
  2593 // there are no regions in the collection set located above the
  2594 // finger, then the above cannot happen, hence we do not need to
  2595 // explicitly gray any objects when copying them to below the
  2596 // finger. The global stack will be scanned to ensure that, if it
  2597 // points to objects being copied, it will update their
  2598 // location. There is a tricky situation with the gray objects in
  2599 // region stack that are being coped, however. See the comment in
  2600 // newCSet().
  2602 void ConcurrentMark::newCSet() {
  2603   if (!concurrent_marking_in_progress())
  2604     // nothing to do if marking is not in progress
  2605     return;
  2607   // find what the lowest finger is among the global and local fingers
  2608   _min_finger = _finger;
  2609   for (int i = 0; i < (int)_max_task_num; ++i) {
  2610     CMTask* task = _tasks[i];
  2611     HeapWord* task_finger = task->finger();
  2612     if (task_finger != NULL && task_finger < _min_finger)
  2613       _min_finger = task_finger;
  2616   _should_gray_objects = false;
  2618   // This fixes a very subtle and fustrating bug. It might be the case
  2619   // that, during en evacuation pause, heap regions that contain
  2620   // objects that are gray (by being in regions contained in the
  2621   // region stack) are included in the collection set. Since such gray
  2622   // objects will be moved, and because it's not easy to redirect
  2623   // region stack entries to point to a new location (because objects
  2624   // in one region might be scattered to multiple regions after they
  2625   // are copied), one option is to ensure that all marked objects
  2626   // copied during a pause are pushed on the stack. Notice, however,
  2627   // that this problem can only happen when the region stack is not
  2628   // empty during an evacuation pause. So, we make the fix a bit less
  2629   // conservative and ensure that regions are pushed on the stack,
  2630   // irrespective whether all collection set regions are below the
  2631   // finger, if the region stack is not empty. This is expected to be
  2632   // a rare case, so I don't think it's necessary to be smarted about it.
  2633   if (!region_stack_empty())
  2634     _should_gray_objects = true;
  2637 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2638   if (!concurrent_marking_in_progress())
  2639     return;
  2641   HeapWord* region_end = hr->end();
  2642   if (region_end > _min_finger)
  2643     _should_gray_objects = true;
  2646 // abandon current marking iteration due to a Full GC
  2647 void ConcurrentMark::abort() {
  2648   // Clear all marks to force marking thread to do nothing
  2649   _nextMarkBitMap->clearAll();
  2650   // Empty mark stack
  2651   clear_marking_state();
  2652   for (int i = 0; i < (int)_max_task_num; ++i)
  2653     _tasks[i]->clear_region_fields();
  2654   _has_aborted = true;
  2656   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2657   satb_mq_set.abandon_partial_marking();
  2658   // This can be called either during or outside marking, we'll read
  2659   // the expected_active value from the SATB queue set.
  2660   satb_mq_set.set_active_all_threads(
  2661                                  false, /* new active value */
  2662                                  satb_mq_set.is_active() /* expected_active */);
  2665 static void print_ms_time_info(const char* prefix, const char* name,
  2666                                NumberSeq& ns) {
  2667   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2668                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2669   if (ns.num() > 0) {
  2670     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2671                            prefix, ns.sd(), ns.maximum());
  2675 void ConcurrentMark::print_summary_info() {
  2676   gclog_or_tty->print_cr(" Concurrent marking:");
  2677   print_ms_time_info("  ", "init marks", _init_times);
  2678   print_ms_time_info("  ", "remarks", _remark_times);
  2680     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2681     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2684   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2685   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2686                          _total_counting_time,
  2687                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2688                           (double)_cleanup_times.num()
  2689                          : 0.0));
  2690   if (G1ScrubRemSets) {
  2691     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2692                            _total_rs_scrub_time,
  2693                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2694                             (double)_cleanup_times.num()
  2695                            : 0.0));
  2697   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2698                          (_init_times.sum() + _remark_times.sum() +
  2699                           _cleanup_times.sum())/1000.0);
  2700   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2701                 "(%8.2f s marking, %8.2f s counting).",
  2702                 cmThread()->vtime_accum(),
  2703                 cmThread()->vtime_mark_accum(),
  2704                 cmThread()->vtime_count_accum());
  2707 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2708   _parallel_workers->print_worker_threads_on(st);
  2711 // Closures
  2712 // XXX: there seems to be a lot of code  duplication here;
  2713 // should refactor and consolidate the shared code.
  2715 // This closure is used to mark refs into the CMS generation in
  2716 // the CMS bit map. Called at the first checkpoint.
  2718 // We take a break if someone is trying to stop the world.
  2719 bool ConcurrentMark::do_yield_check(int worker_i) {
  2720   if (should_yield()) {
  2721     if (worker_i == 0)
  2722       _g1h->g1_policy()->record_concurrent_pause();
  2723     cmThread()->yield();
  2724     if (worker_i == 0)
  2725       _g1h->g1_policy()->record_concurrent_pause_end();
  2726     return true;
  2727   } else {
  2728     return false;
  2732 bool ConcurrentMark::should_yield() {
  2733   return cmThread()->should_yield();
  2736 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2737   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2738   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2741 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2742                                                  void* last) {
  2743   return
  2744     containing_card_is_marked(start) &&
  2745     containing_card_is_marked(last);
  2748 #ifndef PRODUCT
  2749 // for debugging purposes
  2750 void ConcurrentMark::print_finger() {
  2751   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2752                          _heap_start, _heap_end, _finger);
  2753   for (int i = 0; i < (int) _max_task_num; ++i) {
  2754     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2756   gclog_or_tty->print_cr("");
  2758 #endif
  2760 // Closure for iteration over bitmaps
  2761 class CMBitMapClosure : public BitMapClosure {
  2762 private:
  2763   // the bitmap that is being iterated over
  2764   CMBitMap*                   _nextMarkBitMap;
  2765   ConcurrentMark*             _cm;
  2766   CMTask*                     _task;
  2767   // true if we're scanning a heap region claimed by the task (so that
  2768   // we move the finger along), false if we're not, i.e. currently when
  2769   // scanning a heap region popped from the region stack (so that we
  2770   // do not move the task finger along; it'd be a mistake if we did so).
  2771   bool                        _scanning_heap_region;
  2773 public:
  2774   CMBitMapClosure(CMTask *task,
  2775                   ConcurrentMark* cm,
  2776                   CMBitMap* nextMarkBitMap)
  2777     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2779   void set_scanning_heap_region(bool scanning_heap_region) {
  2780     _scanning_heap_region = scanning_heap_region;
  2783   bool do_bit(size_t offset) {
  2784     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2785     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2786     assert( addr < _cm->finger(), "invariant");
  2788     if (_scanning_heap_region) {
  2789       statsOnly( _task->increase_objs_found_on_bitmap() );
  2790       assert(addr >= _task->finger(), "invariant");
  2791       // We move that task's local finger along.
  2792       _task->move_finger_to(addr);
  2793     } else {
  2794       // We move the task's region finger along.
  2795       _task->move_region_finger_to(addr);
  2798     _task->scan_object(oop(addr));
  2799     // we only partially drain the local queue and global stack
  2800     _task->drain_local_queue(true);
  2801     _task->drain_global_stack(true);
  2803     // if the has_aborted flag has been raised, we need to bail out of
  2804     // the iteration
  2805     return !_task->has_aborted();
  2807 };
  2809 // Closure for iterating over objects, currently only used for
  2810 // processing SATB buffers.
  2811 class CMObjectClosure : public ObjectClosure {
  2812 private:
  2813   CMTask* _task;
  2815 public:
  2816   void do_object(oop obj) {
  2817     _task->deal_with_reference(obj);
  2820   CMObjectClosure(CMTask* task) : _task(task) { }
  2821 };
  2823 // Closure for iterating over object fields
  2824 class CMOopClosure : public OopClosure {
  2825 private:
  2826   G1CollectedHeap*   _g1h;
  2827   ConcurrentMark*    _cm;
  2828   CMTask*            _task;
  2830 public:
  2831   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2832   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2834   template <class T> void do_oop_work(T* p) {
  2835     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2836     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2837            "invariant");
  2839     oop obj = oopDesc::load_decode_heap_oop(p);
  2840     if (_cm->verbose_high())
  2841       gclog_or_tty->print_cr("[%d] we're looking at location "
  2842                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2843                              _task->task_id(), p, (void*) obj);
  2844     _task->deal_with_reference(obj);
  2847   CMOopClosure(G1CollectedHeap* g1h,
  2848                ConcurrentMark* cm,
  2849                CMTask* task)
  2850     : _g1h(g1h), _cm(cm), _task(task) { }
  2851 };
  2853 void CMTask::setup_for_region(HeapRegion* hr) {
  2854   // Separated the asserts so that we know which one fires.
  2855   assert(hr != NULL,
  2856         "claim_region() should have filtered out continues humongous regions");
  2857   assert(!hr->continuesHumongous(),
  2858         "claim_region() should have filtered out continues humongous regions");
  2860   if (_cm->verbose_low())
  2861     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2862                            _task_id, hr);
  2864   _curr_region  = hr;
  2865   _finger       = hr->bottom();
  2866   update_region_limit();
  2869 void CMTask::update_region_limit() {
  2870   HeapRegion* hr            = _curr_region;
  2871   HeapWord* bottom          = hr->bottom();
  2872   HeapWord* limit           = hr->next_top_at_mark_start();
  2874   if (limit == bottom) {
  2875     if (_cm->verbose_low())
  2876       gclog_or_tty->print_cr("[%d] found an empty region "
  2877                              "["PTR_FORMAT", "PTR_FORMAT")",
  2878                              _task_id, bottom, limit);
  2879     // The region was collected underneath our feet.
  2880     // We set the finger to bottom to ensure that the bitmap
  2881     // iteration that will follow this will not do anything.
  2882     // (this is not a condition that holds when we set the region up,
  2883     // as the region is not supposed to be empty in the first place)
  2884     _finger = bottom;
  2885   } else if (limit >= _region_limit) {
  2886     assert(limit >= _finger, "peace of mind");
  2887   } else {
  2888     assert(limit < _region_limit, "only way to get here");
  2889     // This can happen under some pretty unusual circumstances.  An
  2890     // evacuation pause empties the region underneath our feet (NTAMS
  2891     // at bottom). We then do some allocation in the region (NTAMS
  2892     // stays at bottom), followed by the region being used as a GC
  2893     // alloc region (NTAMS will move to top() and the objects
  2894     // originally below it will be grayed). All objects now marked in
  2895     // the region are explicitly grayed, if below the global finger,
  2896     // and we do not need in fact to scan anything else. So, we simply
  2897     // set _finger to be limit to ensure that the bitmap iteration
  2898     // doesn't do anything.
  2899     _finger = limit;
  2902   _region_limit = limit;
  2905 void CMTask::giveup_current_region() {
  2906   assert(_curr_region != NULL, "invariant");
  2907   if (_cm->verbose_low())
  2908     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2909                            _task_id, _curr_region);
  2910   clear_region_fields();
  2913 void CMTask::clear_region_fields() {
  2914   // Values for these three fields that indicate that we're not
  2915   // holding on to a region.
  2916   _curr_region   = NULL;
  2917   _finger        = NULL;
  2918   _region_limit  = NULL;
  2920   _region_finger = NULL;
  2923 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2924   guarantee(nextMarkBitMap != NULL, "invariant");
  2926   if (_cm->verbose_low())
  2927     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2929   _nextMarkBitMap                = nextMarkBitMap;
  2930   clear_region_fields();
  2932   _calls                         = 0;
  2933   _elapsed_time_ms               = 0.0;
  2934   _termination_time_ms           = 0.0;
  2935   _termination_start_time_ms     = 0.0;
  2937 #if _MARKING_STATS_
  2938   _local_pushes                  = 0;
  2939   _local_pops                    = 0;
  2940   _local_max_size                = 0;
  2941   _objs_scanned                  = 0;
  2942   _global_pushes                 = 0;
  2943   _global_pops                   = 0;
  2944   _global_max_size               = 0;
  2945   _global_transfers_to           = 0;
  2946   _global_transfers_from         = 0;
  2947   _region_stack_pops             = 0;
  2948   _regions_claimed               = 0;
  2949   _objs_found_on_bitmap          = 0;
  2950   _satb_buffers_processed        = 0;
  2951   _steal_attempts                = 0;
  2952   _steals                        = 0;
  2953   _aborted                       = 0;
  2954   _aborted_overflow              = 0;
  2955   _aborted_cm_aborted            = 0;
  2956   _aborted_yield                 = 0;
  2957   _aborted_timed_out             = 0;
  2958   _aborted_satb                  = 0;
  2959   _aborted_termination           = 0;
  2960 #endif // _MARKING_STATS_
  2963 bool CMTask::should_exit_termination() {
  2964   regular_clock_call();
  2965   // This is called when we are in the termination protocol. We should
  2966   // quit if, for some reason, this task wants to abort or the global
  2967   // stack is not empty (this means that we can get work from it).
  2968   return !_cm->mark_stack_empty() || has_aborted();
  2971 // This determines whether the method below will check both the local
  2972 // and global fingers when determining whether to push on the stack a
  2973 // gray object (value 1) or whether it will only check the global one
  2974 // (value 0). The tradeoffs are that the former will be a bit more
  2975 // accurate and possibly push less on the stack, but it might also be
  2976 // a little bit slower.
  2978 #define _CHECK_BOTH_FINGERS_      1
  2980 void CMTask::deal_with_reference(oop obj) {
  2981   if (_cm->verbose_high())
  2982     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2983                            _task_id, (void*) obj);
  2985   ++_refs_reached;
  2987   HeapWord* objAddr = (HeapWord*) obj;
  2988   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2989   if (_g1h->is_in_g1_reserved(objAddr)) {
  2990     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2991     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  2992     if (_g1h->is_obj_ill(obj, hr)) {
  2993       if (_cm->verbose_high())
  2994         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  2995                                _task_id, (void*) obj);
  2997       // we need to mark it first
  2998       if (_nextMarkBitMap->parMark(objAddr)) {
  2999         // No OrderAccess:store_load() is needed. It is implicit in the
  3000         // CAS done in parMark(objAddr) above
  3001         HeapWord* global_finger = _cm->finger();
  3003 #if _CHECK_BOTH_FINGERS_
  3004         // we will check both the local and global fingers
  3006         if (_finger != NULL && objAddr < _finger) {
  3007           if (_cm->verbose_high())
  3008             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  3009                                    "pushing it", _task_id, _finger);
  3010           push(obj);
  3011         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3012           // do nothing
  3013         } else if (objAddr < global_finger) {
  3014           // Notice that the global finger might be moving forward
  3015           // concurrently. This is not a problem. In the worst case, we
  3016           // mark the object while it is above the global finger and, by
  3017           // the time we read the global finger, it has moved forward
  3018           // passed this object. In this case, the object will probably
  3019           // be visited when a task is scanning the region and will also
  3020           // be pushed on the stack. So, some duplicate work, but no
  3021           // correctness problems.
  3023           if (_cm->verbose_high())
  3024             gclog_or_tty->print_cr("[%d] below the global finger "
  3025                                    "("PTR_FORMAT"), pushing it",
  3026                                    _task_id, global_finger);
  3027           push(obj);
  3028         } else {
  3029           // do nothing
  3031 #else // _CHECK_BOTH_FINGERS_
  3032       // we will only check the global finger
  3034         if (objAddr < global_finger) {
  3035           // see long comment above
  3037           if (_cm->verbose_high())
  3038             gclog_or_tty->print_cr("[%d] below the global finger "
  3039                                    "("PTR_FORMAT"), pushing it",
  3040                                    _task_id, global_finger);
  3041           push(obj);
  3043 #endif // _CHECK_BOTH_FINGERS_
  3049 void CMTask::push(oop obj) {
  3050   HeapWord* objAddr = (HeapWord*) obj;
  3051   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3052   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3053          "invariant");
  3054   assert(!_g1h->is_obj_ill(obj), "invariant");
  3055   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3057   if (_cm->verbose_high())
  3058     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3060   if (!_task_queue->push(obj)) {
  3061     // The local task queue looks full. We need to push some entries
  3062     // to the global stack.
  3064     if (_cm->verbose_medium())
  3065       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3066                              "moving entries to the global stack",
  3067                              _task_id);
  3068     move_entries_to_global_stack();
  3070     // this should succeed since, even if we overflow the global
  3071     // stack, we should have definitely removed some entries from the
  3072     // local queue. So, there must be space on it.
  3073     bool success = _task_queue->push(obj);
  3074     assert(success, "invariant");
  3077   statsOnly( int tmp_size = _task_queue->size();
  3078              if (tmp_size > _local_max_size)
  3079                _local_max_size = tmp_size;
  3080              ++_local_pushes );
  3083 void CMTask::reached_limit() {
  3084   assert(_words_scanned >= _words_scanned_limit ||
  3085          _refs_reached >= _refs_reached_limit ,
  3086          "shouldn't have been called otherwise");
  3087   regular_clock_call();
  3090 void CMTask::regular_clock_call() {
  3091   if (has_aborted())
  3092     return;
  3094   // First, we need to recalculate the words scanned and refs reached
  3095   // limits for the next clock call.
  3096   recalculate_limits();
  3098   // During the regular clock call we do the following
  3100   // (1) If an overflow has been flagged, then we abort.
  3101   if (_cm->has_overflown()) {
  3102     set_has_aborted();
  3103     return;
  3106   // If we are not concurrent (i.e. we're doing remark) we don't need
  3107   // to check anything else. The other steps are only needed during
  3108   // the concurrent marking phase.
  3109   if (!concurrent())
  3110     return;
  3112   // (2) If marking has been aborted for Full GC, then we also abort.
  3113   if (_cm->has_aborted()) {
  3114     set_has_aborted();
  3115     statsOnly( ++_aborted_cm_aborted );
  3116     return;
  3119   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3121   // (3) If marking stats are enabled, then we update the step history.
  3122 #if _MARKING_STATS_
  3123   if (_words_scanned >= _words_scanned_limit)
  3124     ++_clock_due_to_scanning;
  3125   if (_refs_reached >= _refs_reached_limit)
  3126     ++_clock_due_to_marking;
  3128   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3129   _interval_start_time_ms = curr_time_ms;
  3130   _all_clock_intervals_ms.add(last_interval_ms);
  3132   if (_cm->verbose_medium()) {
  3133     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3134                            "scanned = %d%s, refs reached = %d%s",
  3135                            _task_id, last_interval_ms,
  3136                            _words_scanned,
  3137                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3138                            _refs_reached,
  3139                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3141 #endif // _MARKING_STATS_
  3143   // (4) We check whether we should yield. If we have to, then we abort.
  3144   if (_cm->should_yield()) {
  3145     // We should yield. To do this we abort the task. The caller is
  3146     // responsible for yielding.
  3147     set_has_aborted();
  3148     statsOnly( ++_aborted_yield );
  3149     return;
  3152   // (5) We check whether we've reached our time quota. If we have,
  3153   // then we abort.
  3154   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3155   if (elapsed_time_ms > _time_target_ms) {
  3156     set_has_aborted();
  3157     _has_aborted_timed_out = true;
  3158     statsOnly( ++_aborted_timed_out );
  3159     return;
  3162   // (6) Finally, we check whether there are enough completed STAB
  3163   // buffers available for processing. If there are, we abort.
  3164   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3165   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3166     if (_cm->verbose_low())
  3167       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3168                              _task_id);
  3169     // we do need to process SATB buffers, we'll abort and restart
  3170     // the marking task to do so
  3171     set_has_aborted();
  3172     statsOnly( ++_aborted_satb );
  3173     return;
  3177 void CMTask::recalculate_limits() {
  3178   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3179   _words_scanned_limit      = _real_words_scanned_limit;
  3181   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3182   _refs_reached_limit       = _real_refs_reached_limit;
  3185 void CMTask::decrease_limits() {
  3186   // This is called when we believe that we're going to do an infrequent
  3187   // operation which will increase the per byte scanned cost (i.e. move
  3188   // entries to/from the global stack). It basically tries to decrease the
  3189   // scanning limit so that the clock is called earlier.
  3191   if (_cm->verbose_medium())
  3192     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3194   _words_scanned_limit = _real_words_scanned_limit -
  3195     3 * words_scanned_period / 4;
  3196   _refs_reached_limit  = _real_refs_reached_limit -
  3197     3 * refs_reached_period / 4;
  3200 void CMTask::move_entries_to_global_stack() {
  3201   // local array where we'll store the entries that will be popped
  3202   // from the local queue
  3203   oop buffer[global_stack_transfer_size];
  3205   int n = 0;
  3206   oop obj;
  3207   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3208     buffer[n] = obj;
  3209     ++n;
  3212   if (n > 0) {
  3213     // we popped at least one entry from the local queue
  3215     statsOnly( ++_global_transfers_to; _local_pops += n );
  3217     if (!_cm->mark_stack_push(buffer, n)) {
  3218       if (_cm->verbose_low())
  3219         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3220       set_has_aborted();
  3221     } else {
  3222       // the transfer was successful
  3224       if (_cm->verbose_medium())
  3225         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3226                                _task_id, n);
  3227       statsOnly( int tmp_size = _cm->mark_stack_size();
  3228                  if (tmp_size > _global_max_size)
  3229                    _global_max_size = tmp_size;
  3230                  _global_pushes += n );
  3234   // this operation was quite expensive, so decrease the limits
  3235   decrease_limits();
  3238 void CMTask::get_entries_from_global_stack() {
  3239   // local array where we'll store the entries that will be popped
  3240   // from the global stack.
  3241   oop buffer[global_stack_transfer_size];
  3242   int n;
  3243   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3244   assert(n <= global_stack_transfer_size,
  3245          "we should not pop more than the given limit");
  3246   if (n > 0) {
  3247     // yes, we did actually pop at least one entry
  3249     statsOnly( ++_global_transfers_from; _global_pops += n );
  3250     if (_cm->verbose_medium())
  3251       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3252                              _task_id, n);
  3253     for (int i = 0; i < n; ++i) {
  3254       bool success = _task_queue->push(buffer[i]);
  3255       // We only call this when the local queue is empty or under a
  3256       // given target limit. So, we do not expect this push to fail.
  3257       assert(success, "invariant");
  3260     statsOnly( int tmp_size = _task_queue->size();
  3261                if (tmp_size > _local_max_size)
  3262                  _local_max_size = tmp_size;
  3263                _local_pushes += n );
  3266   // this operation was quite expensive, so decrease the limits
  3267   decrease_limits();
  3270 void CMTask::drain_local_queue(bool partially) {
  3271   if (has_aborted())
  3272     return;
  3274   // Decide what the target size is, depending whether we're going to
  3275   // drain it partially (so that other tasks can steal if they run out
  3276   // of things to do) or totally (at the very end).
  3277   size_t target_size;
  3278   if (partially)
  3279     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3280   else
  3281     target_size = 0;
  3283   if (_task_queue->size() > target_size) {
  3284     if (_cm->verbose_high())
  3285       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3286                              _task_id, target_size);
  3288     oop obj;
  3289     bool ret = _task_queue->pop_local(obj);
  3290     while (ret) {
  3291       statsOnly( ++_local_pops );
  3293       if (_cm->verbose_high())
  3294         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3295                                (void*) obj);
  3297       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3298       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3299              "invariant");
  3301       scan_object(obj);
  3303       if (_task_queue->size() <= target_size || has_aborted())
  3304         ret = false;
  3305       else
  3306         ret = _task_queue->pop_local(obj);
  3309     if (_cm->verbose_high())
  3310       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3311                              _task_id, _task_queue->size());
  3315 void CMTask::drain_global_stack(bool partially) {
  3316   if (has_aborted())
  3317     return;
  3319   // We have a policy to drain the local queue before we attempt to
  3320   // drain the global stack.
  3321   assert(partially || _task_queue->size() == 0, "invariant");
  3323   // Decide what the target size is, depending whether we're going to
  3324   // drain it partially (so that other tasks can steal if they run out
  3325   // of things to do) or totally (at the very end).  Notice that,
  3326   // because we move entries from the global stack in chunks or
  3327   // because another task might be doing the same, we might in fact
  3328   // drop below the target. But, this is not a problem.
  3329   size_t target_size;
  3330   if (partially)
  3331     target_size = _cm->partial_mark_stack_size_target();
  3332   else
  3333     target_size = 0;
  3335   if (_cm->mark_stack_size() > target_size) {
  3336     if (_cm->verbose_low())
  3337       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3338                              _task_id, target_size);
  3340     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3341       get_entries_from_global_stack();
  3342       drain_local_queue(partially);
  3345     if (_cm->verbose_low())
  3346       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3347                              _task_id, _cm->mark_stack_size());
  3351 // SATB Queue has several assumptions on whether to call the par or
  3352 // non-par versions of the methods. this is why some of the code is
  3353 // replicated. We should really get rid of the single-threaded version
  3354 // of the code to simplify things.
  3355 void CMTask::drain_satb_buffers() {
  3356   if (has_aborted())
  3357     return;
  3359   // We set this so that the regular clock knows that we're in the
  3360   // middle of draining buffers and doesn't set the abort flag when it
  3361   // notices that SATB buffers are available for draining. It'd be
  3362   // very counter productive if it did that. :-)
  3363   _draining_satb_buffers = true;
  3365   CMObjectClosure oc(this);
  3366   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3367   if (ParallelGCThreads > 0)
  3368     satb_mq_set.set_par_closure(_task_id, &oc);
  3369   else
  3370     satb_mq_set.set_closure(&oc);
  3372   // This keeps claiming and applying the closure to completed buffers
  3373   // until we run out of buffers or we need to abort.
  3374   if (ParallelGCThreads > 0) {
  3375     while (!has_aborted() &&
  3376            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3377       if (_cm->verbose_medium())
  3378         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3379       statsOnly( ++_satb_buffers_processed );
  3380       regular_clock_call();
  3382   } else {
  3383     while (!has_aborted() &&
  3384            satb_mq_set.apply_closure_to_completed_buffer()) {
  3385       if (_cm->verbose_medium())
  3386         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3387       statsOnly( ++_satb_buffers_processed );
  3388       regular_clock_call();
  3392   if (!concurrent() && !has_aborted()) {
  3393     // We should only do this during remark.
  3394     if (ParallelGCThreads > 0)
  3395       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3396     else
  3397       satb_mq_set.iterate_closure_all_threads();
  3400   _draining_satb_buffers = false;
  3402   assert(has_aborted() ||
  3403          concurrent() ||
  3404          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3406   if (ParallelGCThreads > 0)
  3407     satb_mq_set.set_par_closure(_task_id, NULL);
  3408   else
  3409     satb_mq_set.set_closure(NULL);
  3411   // again, this was a potentially expensive operation, decrease the
  3412   // limits to get the regular clock call early
  3413   decrease_limits();
  3416 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3417   if (has_aborted())
  3418     return;
  3420   assert(_region_finger == NULL,
  3421          "it should be NULL when we're not scanning a region");
  3423   if (!_cm->region_stack_empty()) {
  3424     if (_cm->verbose_low())
  3425       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3426                              _task_id, _cm->region_stack_size());
  3428     MemRegion mr = _cm->region_stack_pop_with_lock();
  3429     // it returns MemRegion() if the pop fails
  3430     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3432     while (mr.start() != NULL) {
  3433       if (_cm->verbose_medium())
  3434         gclog_or_tty->print_cr("[%d] we are scanning region "
  3435                                "["PTR_FORMAT", "PTR_FORMAT")",
  3436                                _task_id, mr.start(), mr.end());
  3437       assert(mr.end() <= _cm->finger(),
  3438              "otherwise the region shouldn't be on the stack");
  3439       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3440       if (_nextMarkBitMap->iterate(bc, mr)) {
  3441         assert(!has_aborted(),
  3442                "cannot abort the task without aborting the bitmap iteration");
  3444         // We finished iterating over the region without aborting.
  3445         regular_clock_call();
  3446         if (has_aborted())
  3447           mr = MemRegion();
  3448         else {
  3449           mr = _cm->region_stack_pop_with_lock();
  3450           // it returns MemRegion() if the pop fails
  3451           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3453       } else {
  3454         assert(has_aborted(), "currently the only way to do so");
  3456         // The only way to abort the bitmap iteration is to return
  3457         // false from the do_bit() method. However, inside the
  3458         // do_bit() method we move the _region_finger to point to the
  3459         // object currently being looked at. So, if we bail out, we
  3460         // have definitely set _region_finger to something non-null.
  3461         assert(_region_finger != NULL, "invariant");
  3463         // The iteration was actually aborted. So now _region_finger
  3464         // points to the address of the object we last scanned. If we
  3465         // leave it there, when we restart this task, we will rescan
  3466         // the object. It is easy to avoid this. We move the finger by
  3467         // enough to point to the next possible object header (the
  3468         // bitmap knows by how much we need to move it as it knows its
  3469         // granularity).
  3470         MemRegion newRegion =
  3471           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3473         if (!newRegion.is_empty()) {
  3474           if (_cm->verbose_low()) {
  3475             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3476                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3477                                    _task_id,
  3478                                    newRegion.start(), newRegion.end());
  3480           // Now push the part of the region we didn't scan on the
  3481           // region stack to make sure a task scans it later.
  3482           _cm->region_stack_push_with_lock(newRegion);
  3484         // break from while
  3485         mr = MemRegion();
  3487       _region_finger = NULL;
  3490     if (_cm->verbose_low())
  3491       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3492                              _task_id, _cm->region_stack_size());
  3496 void CMTask::print_stats() {
  3497   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3498                          _task_id, _calls);
  3499   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3500                          _elapsed_time_ms, _termination_time_ms);
  3501   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3502                          _step_times_ms.num(), _step_times_ms.avg(),
  3503                          _step_times_ms.sd());
  3504   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3505                          _step_times_ms.maximum(), _step_times_ms.sum());
  3507 #if _MARKING_STATS_
  3508   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3509                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3510                          _all_clock_intervals_ms.sd());
  3511   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3512                          _all_clock_intervals_ms.maximum(),
  3513                          _all_clock_intervals_ms.sum());
  3514   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3515                          _clock_due_to_scanning, _clock_due_to_marking);
  3516   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3517                          _objs_scanned, _objs_found_on_bitmap);
  3518   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3519                          _local_pushes, _local_pops, _local_max_size);
  3520   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3521                          _global_pushes, _global_pops, _global_max_size);
  3522   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3523                          _global_transfers_to,_global_transfers_from);
  3524   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3525                          _regions_claimed, _region_stack_pops);
  3526   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3527   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3528                          _steal_attempts, _steals);
  3529   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3530   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3531                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3532   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3533                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3534 #endif // _MARKING_STATS_
  3537 /*****************************************************************************
  3539     The do_marking_step(time_target_ms) method is the building block
  3540     of the parallel marking framework. It can be called in parallel
  3541     with other invocations of do_marking_step() on different tasks
  3542     (but only one per task, obviously) and concurrently with the
  3543     mutator threads, or during remark, hence it eliminates the need
  3544     for two versions of the code. When called during remark, it will
  3545     pick up from where the task left off during the concurrent marking
  3546     phase. Interestingly, tasks are also claimable during evacuation
  3547     pauses too, since do_marking_step() ensures that it aborts before
  3548     it needs to yield.
  3550     The data structures that is uses to do marking work are the
  3551     following:
  3553       (1) Marking Bitmap. If there are gray objects that appear only
  3554       on the bitmap (this happens either when dealing with an overflow
  3555       or when the initial marking phase has simply marked the roots
  3556       and didn't push them on the stack), then tasks claim heap
  3557       regions whose bitmap they then scan to find gray objects. A
  3558       global finger indicates where the end of the last claimed region
  3559       is. A local finger indicates how far into the region a task has
  3560       scanned. The two fingers are used to determine how to gray an
  3561       object (i.e. whether simply marking it is OK, as it will be
  3562       visited by a task in the future, or whether it needs to be also
  3563       pushed on a stack).
  3565       (2) Local Queue. The local queue of the task which is accessed
  3566       reasonably efficiently by the task. Other tasks can steal from
  3567       it when they run out of work. Throughout the marking phase, a
  3568       task attempts to keep its local queue short but not totally
  3569       empty, so that entries are available for stealing by other
  3570       tasks. Only when there is no more work, a task will totally
  3571       drain its local queue.
  3573       (3) Global Mark Stack. This handles local queue overflow. During
  3574       marking only sets of entries are moved between it and the local
  3575       queues, as access to it requires a mutex and more fine-grain
  3576       interaction with it which might cause contention. If it
  3577       overflows, then the marking phase should restart and iterate
  3578       over the bitmap to identify gray objects. Throughout the marking
  3579       phase, tasks attempt to keep the global mark stack at a small
  3580       length but not totally empty, so that entries are available for
  3581       popping by other tasks. Only when there is no more work, tasks
  3582       will totally drain the global mark stack.
  3584       (4) Global Region Stack. Entries on it correspond to areas of
  3585       the bitmap that need to be scanned since they contain gray
  3586       objects. Pushes on the region stack only happen during
  3587       evacuation pauses and typically correspond to areas covered by
  3588       GC LABS. If it overflows, then the marking phase should restart
  3589       and iterate over the bitmap to identify gray objects. Tasks will
  3590       try to totally drain the region stack as soon as possible.
  3592       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3593       made available. Buffers are regularly removed from this queue
  3594       and scanned for roots, so that the queue doesn't get too
  3595       long. During remark, all completed buffers are processed, as
  3596       well as the filled in parts of any uncompleted buffers.
  3598     The do_marking_step() method tries to abort when the time target
  3599     has been reached. There are a few other cases when the
  3600     do_marking_step() method also aborts:
  3602       (1) When the marking phase has been aborted (after a Full GC).
  3604       (2) When a global overflow (either on the global stack or the
  3605       region stack) has been triggered. Before the task aborts, it
  3606       will actually sync up with the other tasks to ensure that all
  3607       the marking data structures (local queues, stacks, fingers etc.)
  3608       are re-initialised so that when do_marking_step() completes,
  3609       the marking phase can immediately restart.
  3611       (3) When enough completed SATB buffers are available. The
  3612       do_marking_step() method only tries to drain SATB buffers right
  3613       at the beginning. So, if enough buffers are available, the
  3614       marking step aborts and the SATB buffers are processed at
  3615       the beginning of the next invocation.
  3617       (4) To yield. when we have to yield then we abort and yield
  3618       right at the end of do_marking_step(). This saves us from a lot
  3619       of hassle as, by yielding we might allow a Full GC. If this
  3620       happens then objects will be compacted underneath our feet, the
  3621       heap might shrink, etc. We save checking for this by just
  3622       aborting and doing the yield right at the end.
  3624     From the above it follows that the do_marking_step() method should
  3625     be called in a loop (or, otherwise, regularly) until it completes.
  3627     If a marking step completes without its has_aborted() flag being
  3628     true, it means it has completed the current marking phase (and
  3629     also all other marking tasks have done so and have all synced up).
  3631     A method called regular_clock_call() is invoked "regularly" (in
  3632     sub ms intervals) throughout marking. It is this clock method that
  3633     checks all the abort conditions which were mentioned above and
  3634     decides when the task should abort. A work-based scheme is used to
  3635     trigger this clock method: when the number of object words the
  3636     marking phase has scanned or the number of references the marking
  3637     phase has visited reach a given limit. Additional invocations to
  3638     the method clock have been planted in a few other strategic places
  3639     too. The initial reason for the clock method was to avoid calling
  3640     vtime too regularly, as it is quite expensive. So, once it was in
  3641     place, it was natural to piggy-back all the other conditions on it
  3642     too and not constantly check them throughout the code.
  3644  *****************************************************************************/
  3646 void CMTask::do_marking_step(double time_target_ms) {
  3647   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3648   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3650   assert(concurrent() || _cm->region_stack_empty(),
  3651          "the region stack should have been cleared before remark");
  3652   assert(_region_finger == NULL,
  3653          "this should be non-null only when a region is being scanned");
  3655   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3656   assert(_task_queues != NULL, "invariant");
  3657   assert(_task_queue != NULL, "invariant");
  3658   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3660   assert(!_claimed,
  3661          "only one thread should claim this task at any one time");
  3663   // OK, this doesn't safeguard again all possible scenarios, as it is
  3664   // possible for two threads to set the _claimed flag at the same
  3665   // time. But it is only for debugging purposes anyway and it will
  3666   // catch most problems.
  3667   _claimed = true;
  3669   _start_time_ms = os::elapsedVTime() * 1000.0;
  3670   statsOnly( _interval_start_time_ms = _start_time_ms );
  3672   double diff_prediction_ms =
  3673     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3674   _time_target_ms = time_target_ms - diff_prediction_ms;
  3676   // set up the variables that are used in the work-based scheme to
  3677   // call the regular clock method
  3678   _words_scanned = 0;
  3679   _refs_reached  = 0;
  3680   recalculate_limits();
  3682   // clear all flags
  3683   clear_has_aborted();
  3684   _has_aborted_timed_out = false;
  3685   _draining_satb_buffers = false;
  3687   ++_calls;
  3689   if (_cm->verbose_low())
  3690     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3691                            "target = %1.2lfms >>>>>>>>>>",
  3692                            _task_id, _calls, _time_target_ms);
  3694   // Set up the bitmap and oop closures. Anything that uses them is
  3695   // eventually called from this method, so it is OK to allocate these
  3696   // statically.
  3697   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3698   CMOopClosure    oop_closure(_g1h, _cm, this);
  3699   set_oop_closure(&oop_closure);
  3701   if (_cm->has_overflown()) {
  3702     // This can happen if the region stack or the mark stack overflows
  3703     // during a GC pause and this task, after a yield point,
  3704     // restarts. We have to abort as we need to get into the overflow
  3705     // protocol which happens right at the end of this task.
  3706     set_has_aborted();
  3709   // First drain any available SATB buffers. After this, we will not
  3710   // look at SATB buffers before the next invocation of this method.
  3711   // If enough completed SATB buffers are queued up, the regular clock
  3712   // will abort this task so that it restarts.
  3713   drain_satb_buffers();
  3714   // ...then partially drain the local queue and the global stack
  3715   drain_local_queue(true);
  3716   drain_global_stack(true);
  3718   // Then totally drain the region stack.  We will not look at
  3719   // it again before the next invocation of this method. Entries on
  3720   // the region stack are only added during evacuation pauses, for
  3721   // which we have to yield. When we do, we abort the task anyway so
  3722   // it will look at the region stack again when it restarts.
  3723   bitmap_closure.set_scanning_heap_region(false);
  3724   drain_region_stack(&bitmap_closure);
  3725   // ...then partially drain the local queue and the global stack
  3726   drain_local_queue(true);
  3727   drain_global_stack(true);
  3729   do {
  3730     if (!has_aborted() && _curr_region != NULL) {
  3731       // This means that we're already holding on to a region.
  3732       assert(_finger != NULL, "if region is not NULL, then the finger "
  3733              "should not be NULL either");
  3735       // We might have restarted this task after an evacuation pause
  3736       // which might have evacuated the region we're holding on to
  3737       // underneath our feet. Let's read its limit again to make sure
  3738       // that we do not iterate over a region of the heap that
  3739       // contains garbage (update_region_limit() will also move
  3740       // _finger to the start of the region if it is found empty).
  3741       update_region_limit();
  3742       // We will start from _finger not from the start of the region,
  3743       // as we might be restarting this task after aborting half-way
  3744       // through scanning this region. In this case, _finger points to
  3745       // the address where we last found a marked object. If this is a
  3746       // fresh region, _finger points to start().
  3747       MemRegion mr = MemRegion(_finger, _region_limit);
  3749       if (_cm->verbose_low())
  3750         gclog_or_tty->print_cr("[%d] we're scanning part "
  3751                                "["PTR_FORMAT", "PTR_FORMAT") "
  3752                                "of region "PTR_FORMAT,
  3753                                _task_id, _finger, _region_limit, _curr_region);
  3755       // Let's iterate over the bitmap of the part of the
  3756       // region that is left.
  3757       bitmap_closure.set_scanning_heap_region(true);
  3758       if (mr.is_empty() ||
  3759           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3760         // We successfully completed iterating over the region. Now,
  3761         // let's give up the region.
  3762         giveup_current_region();
  3763         regular_clock_call();
  3764       } else {
  3765         assert(has_aborted(), "currently the only way to do so");
  3766         // The only way to abort the bitmap iteration is to return
  3767         // false from the do_bit() method. However, inside the
  3768         // do_bit() method we move the _finger to point to the
  3769         // object currently being looked at. So, if we bail out, we
  3770         // have definitely set _finger to something non-null.
  3771         assert(_finger != NULL, "invariant");
  3773         // Region iteration was actually aborted. So now _finger
  3774         // points to the address of the object we last scanned. If we
  3775         // leave it there, when we restart this task, we will rescan
  3776         // the object. It is easy to avoid this. We move the finger by
  3777         // enough to point to the next possible object header (the
  3778         // bitmap knows by how much we need to move it as it knows its
  3779         // granularity).
  3780         assert(_finger < _region_limit, "invariant");
  3781         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3782         // Check if bitmap iteration was aborted while scanning the last object
  3783         if (new_finger >= _region_limit) {
  3784             giveup_current_region();
  3785         } else {
  3786             move_finger_to(new_finger);
  3790     // At this point we have either completed iterating over the
  3791     // region we were holding on to, or we have aborted.
  3793     // We then partially drain the local queue and the global stack.
  3794     // (Do we really need this?)
  3795     drain_local_queue(true);
  3796     drain_global_stack(true);
  3798     // Read the note on the claim_region() method on why it might
  3799     // return NULL with potentially more regions available for
  3800     // claiming and why we have to check out_of_regions() to determine
  3801     // whether we're done or not.
  3802     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3803       // We are going to try to claim a new region. We should have
  3804       // given up on the previous one.
  3805       // Separated the asserts so that we know which one fires.
  3806       assert(_curr_region  == NULL, "invariant");
  3807       assert(_finger       == NULL, "invariant");
  3808       assert(_region_limit == NULL, "invariant");
  3809       if (_cm->verbose_low())
  3810         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3811       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3812       if (claimed_region != NULL) {
  3813         // Yes, we managed to claim one
  3814         statsOnly( ++_regions_claimed );
  3816         if (_cm->verbose_low())
  3817           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3818                                  "region "PTR_FORMAT,
  3819                                  _task_id, claimed_region);
  3821         setup_for_region(claimed_region);
  3822         assert(_curr_region == claimed_region, "invariant");
  3824       // It is important to call the regular clock here. It might take
  3825       // a while to claim a region if, for example, we hit a large
  3826       // block of empty regions. So we need to call the regular clock
  3827       // method once round the loop to make sure it's called
  3828       // frequently enough.
  3829       regular_clock_call();
  3832     if (!has_aborted() && _curr_region == NULL) {
  3833       assert(_cm->out_of_regions(),
  3834              "at this point we should be out of regions");
  3836   } while ( _curr_region != NULL && !has_aborted());
  3838   if (!has_aborted()) {
  3839     // We cannot check whether the global stack is empty, since other
  3840     // tasks might be pushing objects to it concurrently. We also cannot
  3841     // check if the region stack is empty because if a thread is aborting
  3842     // it can push a partially done region back.
  3843     assert(_cm->out_of_regions(),
  3844            "at this point we should be out of regions");
  3846     if (_cm->verbose_low())
  3847       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3849     // Try to reduce the number of available SATB buffers so that
  3850     // remark has less work to do.
  3851     drain_satb_buffers();
  3854   // Since we've done everything else, we can now totally drain the
  3855   // local queue and global stack.
  3856   drain_local_queue(false);
  3857   drain_global_stack(false);
  3859   // Attempt at work stealing from other task's queues.
  3860   if (!has_aborted()) {
  3861     // We have not aborted. This means that we have finished all that
  3862     // we could. Let's try to do some stealing...
  3864     // We cannot check whether the global stack is empty, since other
  3865     // tasks might be pushing objects to it concurrently. We also cannot
  3866     // check if the region stack is empty because if a thread is aborting
  3867     // it can push a partially done region back.
  3868     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3869            "only way to reach here");
  3871     if (_cm->verbose_low())
  3872       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3874     while (!has_aborted()) {
  3875       oop obj;
  3876       statsOnly( ++_steal_attempts );
  3878       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3879         if (_cm->verbose_medium())
  3880           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3881                                  _task_id, (void*) obj);
  3883         statsOnly( ++_steals );
  3885         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3886                "any stolen object should be marked");
  3887         scan_object(obj);
  3889         // And since we're towards the end, let's totally drain the
  3890         // local queue and global stack.
  3891         drain_local_queue(false);
  3892         drain_global_stack(false);
  3893       } else {
  3894         break;
  3899   // We still haven't aborted. Now, let's try to get into the
  3900   // termination protocol.
  3901   if (!has_aborted()) {
  3902     // We cannot check whether the global stack is empty, since other
  3903     // tasks might be concurrently pushing objects on it. We also cannot
  3904     // check if the region stack is empty because if a thread is aborting
  3905     // it can push a partially done region back.
  3906     // Separated the asserts so that we know which one fires.
  3907     assert(_cm->out_of_regions(), "only way to reach here");
  3908     assert(_task_queue->size() == 0, "only way to reach here");
  3910     if (_cm->verbose_low())
  3911       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3913     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3914     // The CMTask class also extends the TerminatorTerminator class,
  3915     // hence its should_exit_termination() method will also decide
  3916     // whether to exit the termination protocol or not.
  3917     bool finished = _cm->terminator()->offer_termination(this);
  3918     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3919     _termination_time_ms +=
  3920       termination_end_time_ms - _termination_start_time_ms;
  3922     if (finished) {
  3923       // We're all done.
  3925       if (_task_id == 0) {
  3926         // let's allow task 0 to do this
  3927         if (concurrent()) {
  3928           assert(_cm->concurrent_marking_in_progress(), "invariant");
  3929           // we need to set this to false before the next
  3930           // safepoint. This way we ensure that the marking phase
  3931           // doesn't observe any more heap expansions.
  3932           _cm->clear_concurrent_marking_in_progress();
  3936       // We can now guarantee that the global stack is empty, since
  3937       // all other tasks have finished. We separated the guarantees so
  3938       // that, if a condition is false, we can immediately find out
  3939       // which one.
  3940       guarantee(_cm->out_of_regions(), "only way to reach here");
  3941       guarantee(_cm->region_stack_empty(), "only way to reach here");
  3942       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  3943       guarantee(_task_queue->size() == 0, "only way to reach here");
  3944       guarantee(!_cm->has_overflown(), "only way to reach here");
  3945       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  3946       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  3948       if (_cm->verbose_low())
  3949         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3950     } else {
  3951       // Apparently there's more work to do. Let's abort this task. It
  3952       // will restart it and we can hopefully find more things to do.
  3954       if (_cm->verbose_low())
  3955         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3957       set_has_aborted();
  3958       statsOnly( ++_aborted_termination );
  3962   // Mainly for debugging purposes to make sure that a pointer to the
  3963   // closure which was statically allocated in this frame doesn't
  3964   // escape it by accident.
  3965   set_oop_closure(NULL);
  3966   double end_time_ms = os::elapsedVTime() * 1000.0;
  3967   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3968   // Update the step history.
  3969   _step_times_ms.add(elapsed_time_ms);
  3971   if (has_aborted()) {
  3972     // The task was aborted for some reason.
  3974     statsOnly( ++_aborted );
  3976     if (_has_aborted_timed_out) {
  3977       double diff_ms = elapsed_time_ms - _time_target_ms;
  3978       // Keep statistics of how well we did with respect to hitting
  3979       // our target only if we actually timed out (if we aborted for
  3980       // other reasons, then the results might get skewed).
  3981       _marking_step_diffs_ms.add(diff_ms);
  3984     if (_cm->has_overflown()) {
  3985       // This is the interesting one. We aborted because a global
  3986       // overflow was raised. This means we have to restart the
  3987       // marking phase and start iterating over regions. However, in
  3988       // order to do this we have to make sure that all tasks stop
  3989       // what they are doing and re-initialise in a safe manner. We
  3990       // will achieve this with the use of two barrier sync points.
  3992       if (_cm->verbose_low())
  3993         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  3995       _cm->enter_first_sync_barrier(_task_id);
  3996       // When we exit this sync barrier we know that all tasks have
  3997       // stopped doing marking work. So, it's now safe to
  3998       // re-initialise our data structures. At the end of this method,
  3999       // task 0 will clear the global data structures.
  4001       statsOnly( ++_aborted_overflow );
  4003       // We clear the local state of this task...
  4004       clear_region_fields();
  4006       // ...and enter the second barrier.
  4007       _cm->enter_second_sync_barrier(_task_id);
  4008       // At this point everything has bee re-initialised and we're
  4009       // ready to restart.
  4012     if (_cm->verbose_low()) {
  4013       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4014                              "elapsed = %1.2lfms <<<<<<<<<<",
  4015                              _task_id, _time_target_ms, elapsed_time_ms);
  4016       if (_cm->has_aborted())
  4017         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4018                                _task_id);
  4020   } else {
  4021     if (_cm->verbose_low())
  4022       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4023                              "elapsed = %1.2lfms <<<<<<<<<<",
  4024                              _task_id, _time_target_ms, elapsed_time_ms);
  4027   _claimed = false;
  4030 CMTask::CMTask(int task_id,
  4031                ConcurrentMark* cm,
  4032                CMTaskQueue* task_queue,
  4033                CMTaskQueueSet* task_queues)
  4034   : _g1h(G1CollectedHeap::heap()),
  4035     _task_id(task_id), _cm(cm),
  4036     _claimed(false),
  4037     _nextMarkBitMap(NULL), _hash_seed(17),
  4038     _task_queue(task_queue),
  4039     _task_queues(task_queues),
  4040     _oop_closure(NULL) {
  4041   guarantee(task_queue != NULL, "invariant");
  4042   guarantee(task_queues != NULL, "invariant");
  4044   statsOnly( _clock_due_to_scanning = 0;
  4045              _clock_due_to_marking  = 0 );
  4047   _marking_step_diffs_ms.add(0.5);

mercurial