src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 04 Aug 2009 16:00:17 -0700

author
johnc
date
Tue, 04 Aug 2009 16:00:17 -0700
changeset 1325
6cb8e9df7174
parent 1301
18f526145aea
child 1347
308762b2bf14
permissions
-rw-r--r--

6819077: G1: first GC thread coming late into the GC.
Summary: The first worker thread is delayed when entering the GC because it clears the card count table that is used in identifying hot cards. Replace the card count table with a dynamically sized evicting hash table that includes an epoch based counter.
Reviewed-by: iveresov, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     guarantee(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 MemRegion CMRegionStack::pop() {
   301   while (true) {
   302     // Otherwise...
   303     jint index = _index;
   305     if (index == 0) {
   306       return MemRegion();
   307     }
   308     jint next_index = index-1;
   309     jint res = Atomic::cmpxchg(next_index, &_index, index);
   310     if (res == index) {
   311       MemRegion mr = _base[next_index];
   312       if (mr.start() != NULL) {
   313         tmp_guarantee_CM( mr.end() != NULL, "invariant" );
   314         tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
   315         return mr;
   316       } else {
   317         // that entry was invalidated... let's skip it
   318         tmp_guarantee_CM( mr.end() == NULL, "invariant" );
   319       }
   320     }
   321     // Otherwise, we need to try again.
   322   }
   323 }
   325 bool CMRegionStack::invalidate_entries_into_cset() {
   326   bool result = false;
   327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   328   for (int i = 0; i < _oops_do_bound; ++i) {
   329     MemRegion mr = _base[i];
   330     if (mr.start() != NULL) {
   331       tmp_guarantee_CM( mr.end() != NULL, "invariant");
   332       tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
   333       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   334       tmp_guarantee_CM( hr != NULL, "invariant" );
   335       if (hr->in_collection_set()) {
   336         // The region points into the collection set
   337         _base[i] = MemRegion();
   338         result = true;
   339       }
   340     } else {
   341       // that entry was invalidated... let's skip it
   342       tmp_guarantee_CM( mr.end() == NULL, "invariant" );
   343     }
   344   }
   345   return result;
   346 }
   348 template<class OopClosureClass>
   349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   350   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   351          || SafepointSynchronize::is_at_safepoint(),
   352          "Drain recursion must be yield-safe.");
   353   bool res = true;
   354   debug_only(_drain_in_progress = true);
   355   debug_only(_drain_in_progress_yields = yield_after);
   356   while (!isEmpty()) {
   357     oop newOop = pop();
   358     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   359     assert(newOop->is_oop(), "Expected an oop");
   360     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   361            "only grey objects on this stack");
   362     // iterate over the oops in this oop, marking and pushing
   363     // the ones in CMS generation.
   364     newOop->oop_iterate(cl);
   365     if (yield_after && _cm->do_yield_check()) {
   366       res = false; break;
   367     }
   368   }
   369   debug_only(_drain_in_progress = false);
   370   return res;
   371 }
   373 void CMMarkStack::oops_do(OopClosure* f) {
   374   if (_index == 0) return;
   375   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   376          "Bound must be set.");
   377   for (int i = 0; i < _oops_do_bound; i++) {
   378     f->do_oop(&_base[i]);
   379   }
   380   _oops_do_bound = -1;
   381 }
   383 bool ConcurrentMark::not_yet_marked(oop obj) const {
   384   return (_g1h->is_obj_ill(obj)
   385           || (_g1h->is_in_permanent(obj)
   386               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   387 }
   389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   391 #endif // _MSC_VER
   393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   394                                int max_regions) :
   395   _markBitMap1(rs, MinObjAlignment - 1),
   396   _markBitMap2(rs, MinObjAlignment - 1),
   398   _parallel_marking_threads(0),
   399   _sleep_factor(0.0),
   400   _marking_task_overhead(1.0),
   401   _cleanup_sleep_factor(0.0),
   402   _cleanup_task_overhead(1.0),
   403   _region_bm(max_regions, false /* in_resource_area*/),
   404   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   405            CardTableModRefBS::card_shift,
   406            false /* in_resource_area*/),
   407   _prevMarkBitMap(&_markBitMap1),
   408   _nextMarkBitMap(&_markBitMap2),
   409   _at_least_one_mark_complete(false),
   411   _markStack(this),
   412   _regionStack(),
   413   // _finger set in set_non_marking_state
   415   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   416   // _active_tasks set in set_non_marking_state
   417   // _tasks set inside the constructor
   418   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   419   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   421   _has_overflown(false),
   422   _concurrent(false),
   423   _has_aborted(false),
   424   _restart_for_overflow(false),
   425   _concurrent_marking_in_progress(false),
   426   _should_gray_objects(false),
   428   // _verbose_level set below
   430   _init_times(),
   431   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   432   _cleanup_times(),
   433   _total_counting_time(0.0),
   434   _total_rs_scrub_time(0.0),
   436   _parallel_workers(NULL),
   437   _cleanup_co_tracker(G1CLGroup)
   438 {
   439   CMVerboseLevel verbose_level =
   440     (CMVerboseLevel) G1MarkingVerboseLevel;
   441   if (verbose_level < no_verbose)
   442     verbose_level = no_verbose;
   443   if (verbose_level > high_verbose)
   444     verbose_level = high_verbose;
   445   _verbose_level = verbose_level;
   447   if (verbose_low())
   448     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   449                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   451   _markStack.allocate(G1MarkStackSize);
   452   _regionStack.allocate(G1MarkRegionStackSize);
   454   // Create & start a ConcurrentMark thread.
   455   _cmThread = new ConcurrentMarkThread(this);
   456   assert(cmThread() != NULL, "CM Thread should have been created");
   457   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   459   _g1h = G1CollectedHeap::heap();
   460   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   461   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   462   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   464   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   465   satb_qs.set_buffer_size(G1SATBLogBufferSize);
   467   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   468   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   469   for (int i = 0 ; i < size; i++) {
   470     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   471   }
   473   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   474   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   476   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   477   _active_tasks = _max_task_num;
   478   for (int i = 0; i < (int) _max_task_num; ++i) {
   479     CMTaskQueue* task_queue = new CMTaskQueue();
   480     task_queue->initialize();
   481     _task_queues->register_queue(i, task_queue);
   483     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   484     _accum_task_vtime[i] = 0.0;
   485   }
   487   if (ParallelMarkingThreads > ParallelGCThreads) {
   488     vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
   489                                   "than ParallelGCThreads.");
   490   }
   491   if (ParallelGCThreads == 0) {
   492     // if we are not running with any parallel GC threads we will not
   493     // spawn any marking threads either
   494     _parallel_marking_threads =   0;
   495     _sleep_factor             = 0.0;
   496     _marking_task_overhead    = 1.0;
   497   } else {
   498     if (ParallelMarkingThreads > 0) {
   499       // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
   500       // if both are set
   502       _parallel_marking_threads = ParallelMarkingThreads;
   503       _sleep_factor             = 0.0;
   504       _marking_task_overhead    = 1.0;
   505     } else if (G1MarkingOverheadPercent > 0) {
   506       // we will calculate the number of parallel marking threads
   507       // based on a target overhead with respect to the soft real-time
   508       // goal
   510       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   511       double overall_cm_overhead =
   512         (double) MaxGCPauseMillis * marking_overhead /
   513         (double) GCPauseIntervalMillis;
   514       double cpu_ratio = 1.0 / (double) os::processor_count();
   515       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   516       double marking_task_overhead =
   517         overall_cm_overhead / marking_thread_num *
   518                                                 (double) os::processor_count();
   519       double sleep_factor =
   520                          (1.0 - marking_task_overhead) / marking_task_overhead;
   522       _parallel_marking_threads = (size_t) marking_thread_num;
   523       _sleep_factor             = sleep_factor;
   524       _marking_task_overhead    = marking_task_overhead;
   525     } else {
   526       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   527       _sleep_factor             = 0.0;
   528       _marking_task_overhead    = 1.0;
   529     }
   531     if (parallel_marking_threads() > 1)
   532       _cleanup_task_overhead = 1.0;
   533     else
   534       _cleanup_task_overhead = marking_task_overhead();
   535     _cleanup_sleep_factor =
   536                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   538 #if 0
   539     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   540     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   541     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   542     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   543     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   544 #endif
   546     guarantee( parallel_marking_threads() > 0, "peace of mind" );
   547     _parallel_workers = new WorkGang("Parallel Marking Threads",
   548                                      (int) parallel_marking_threads(), false, true);
   549     if (_parallel_workers == NULL)
   550       vm_exit_during_initialization("Failed necessary allocation.");
   551   }
   553   // so that the call below can read a sensible value
   554   _heap_start = (HeapWord*) rs.base();
   555   set_non_marking_state();
   556 }
   558 void ConcurrentMark::update_g1_committed(bool force) {
   559   // If concurrent marking is not in progress, then we do not need to
   560   // update _heap_end. This has a subtle and important
   561   // side-effect. Imagine that two evacuation pauses happen between
   562   // marking completion and remark. The first one can grow the
   563   // heap (hence now the finger is below the heap end). Then, the
   564   // second one could unnecessarily push regions on the region
   565   // stack. This causes the invariant that the region stack is empty
   566   // at the beginning of remark to be false. By ensuring that we do
   567   // not observe heap expansions after marking is complete, then we do
   568   // not have this problem.
   569   if (!concurrent_marking_in_progress() && !force)
   570     return;
   572   MemRegion committed = _g1h->g1_committed();
   573   tmp_guarantee_CM( committed.start() == _heap_start,
   574                     "start shouldn't change" );
   575   HeapWord* new_end = committed.end();
   576   if (new_end > _heap_end) {
   577     // The heap has been expanded.
   579     _heap_end = new_end;
   580   }
   581   // Notice that the heap can also shrink. However, this only happens
   582   // during a Full GC (at least currently) and the entire marking
   583   // phase will bail out and the task will not be restarted. So, let's
   584   // do nothing.
   585 }
   587 void ConcurrentMark::reset() {
   588   // Starting values for these two. This should be called in a STW
   589   // phase. CM will be notified of any future g1_committed expansions
   590   // will be at the end of evacuation pauses, when tasks are
   591   // inactive.
   592   MemRegion committed = _g1h->g1_committed();
   593   _heap_start = committed.start();
   594   _heap_end   = committed.end();
   596   guarantee( _heap_start != NULL &&
   597              _heap_end != NULL   &&
   598              _heap_start < _heap_end, "heap bounds should look ok" );
   600   // reset all the marking data structures and any necessary flags
   601   clear_marking_state();
   603   if (verbose_low())
   604     gclog_or_tty->print_cr("[global] resetting");
   606   // We do reset all of them, since different phases will use
   607   // different number of active threads. So, it's easiest to have all
   608   // of them ready.
   609   for (int i = 0; i < (int) _max_task_num; ++i)
   610     _tasks[i]->reset(_nextMarkBitMap);
   612   // we need this to make sure that the flag is on during the evac
   613   // pause with initial mark piggy-backed
   614   set_concurrent_marking_in_progress();
   615 }
   617 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   618   guarantee( active_tasks <= _max_task_num, "we should not have more" );
   620   _active_tasks = active_tasks;
   621   // Need to update the three data structures below according to the
   622   // number of active threads for this phase.
   623   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   624   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   625   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   627   _concurrent = concurrent;
   628   // We propagate this to all tasks, not just the active ones.
   629   for (int i = 0; i < (int) _max_task_num; ++i)
   630     _tasks[i]->set_concurrent(concurrent);
   632   if (concurrent) {
   633     set_concurrent_marking_in_progress();
   634   } else {
   635     // We currently assume that the concurrent flag has been set to
   636     // false before we start remark. At this point we should also be
   637     // in a STW phase.
   638     guarantee( !concurrent_marking_in_progress(), "invariant" );
   639     guarantee( _finger == _heap_end, "only way to get here" );
   640     update_g1_committed(true);
   641   }
   642 }
   644 void ConcurrentMark::set_non_marking_state() {
   645   // We set the global marking state to some default values when we're
   646   // not doing marking.
   647   clear_marking_state();
   648   _active_tasks = 0;
   649   clear_concurrent_marking_in_progress();
   650 }
   652 ConcurrentMark::~ConcurrentMark() {
   653   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   654   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   655   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   656                     _par_cleanup_thread_state);
   658   for (int i = 0; i < (int) _max_task_num; ++i) {
   659     delete _task_queues->queue(i);
   660     delete _tasks[i];
   661   }
   662   delete _task_queues;
   663   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   664 }
   666 // This closure is used to mark refs into the g1 generation
   667 // from external roots in the CMS bit map.
   668 // Called at the first checkpoint.
   669 //
   671 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
   672 #if PRINT_REACHABLE_AT_INITIAL_MARK
   673 static FILE* reachable_file = NULL;
   675 class PrintReachableClosure: public OopsInGenClosure {
   676   CMBitMap* _bm;
   677   int _level;
   678 public:
   679   PrintReachableClosure(CMBitMap* bm) :
   680     _bm(bm), _level(0) {
   681     guarantee(reachable_file != NULL, "pre-condition");
   682   }
   683   void do_oop(oop* p) {
   684     oop obj = *p;
   685     HeapWord* obj_addr = (HeapWord*)obj;
   686     if (obj == NULL) return;
   687     fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
   688             _level, p, (void*) obj, _bm->isMarked(obj_addr));
   689     if (!_bm->isMarked(obj_addr)) {
   690       _bm->mark(obj_addr);
   691       _level++;
   692       obj->oop_iterate(this);
   693       _level--;
   694     }
   695   }
   696 };
   697 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
   699 #define SEND_HEAP_DUMP_TO_FILE 0
   700 #if SEND_HEAP_DUMP_TO_FILE
   701 static FILE* heap_dump_file = NULL;
   702 #endif // SEND_HEAP_DUMP_TO_FILE
   704 void ConcurrentMark::clearNextBitmap() {
   705    guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
   707    // clear the mark bitmap (no grey objects to start with).
   708    // We need to do this in chunks and offer to yield in between
   709    // each chunk.
   710    HeapWord* start  = _nextMarkBitMap->startWord();
   711    HeapWord* end    = _nextMarkBitMap->endWord();
   712    HeapWord* cur    = start;
   713    size_t chunkSize = M;
   714    while (cur < end) {
   715      HeapWord* next = cur + chunkSize;
   716      if (next > end)
   717        next = end;
   718      MemRegion mr(cur,next);
   719      _nextMarkBitMap->clearRange(mr);
   720      cur = next;
   721      do_yield_check();
   722    }
   723 }
   725 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   726 public:
   727   bool doHeapRegion(HeapRegion* r) {
   728     if (!r->continuesHumongous()) {
   729       r->note_start_of_marking(true);
   730     }
   731     return false;
   732   }
   733 };
   735 void ConcurrentMark::checkpointRootsInitialPre() {
   736   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   737   G1CollectorPolicy* g1p = g1h->g1_policy();
   739   _has_aborted = false;
   741   // Find all the reachable objects...
   742 #if PRINT_REACHABLE_AT_INITIAL_MARK
   743   guarantee(reachable_file == NULL, "Protocol");
   744   char fn_buf[100];
   745   sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
   746   reachable_file = fopen(fn_buf, "w");
   747   // clear the mark bitmap (no grey objects to start with)
   748   _nextMarkBitMap->clearAll();
   749   PrintReachableClosure prcl(_nextMarkBitMap);
   750   g1h->process_strong_roots(
   751                             false,   // fake perm gen collection
   752                             SharedHeap::SO_AllClasses,
   753                             &prcl, // Regular roots
   754                             &prcl    // Perm Gen Roots
   755                             );
   756   // The root iteration above "consumed" dirty cards in the perm gen.
   757   // Therefore, as a shortcut, we dirty all such cards.
   758   g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
   759   fclose(reachable_file);
   760   reachable_file = NULL;
   761   // clear the mark bitmap again.
   762   _nextMarkBitMap->clearAll();
   763   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
   764   COMPILER2_PRESENT(DerivedPointerTable::clear());
   765 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
   767   // Initialise marking structures. This has to be done in a STW phase.
   768   reset();
   769 }
   771 class CMMarkRootsClosure: public OopsInGenClosure {
   772 private:
   773   ConcurrentMark*  _cm;
   774   G1CollectedHeap* _g1h;
   775   bool             _do_barrier;
   777 public:
   778   CMMarkRootsClosure(ConcurrentMark* cm,
   779                      G1CollectedHeap* g1h,
   780                      bool do_barrier) : _cm(cm), _g1h(g1h),
   781                                         _do_barrier(do_barrier) { }
   783   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   784   virtual void do_oop(      oop* p) { do_oop_work(p); }
   786   template <class T> void do_oop_work(T* p) {
   787     T heap_oop = oopDesc::load_heap_oop(p);
   788     if (!oopDesc::is_null(heap_oop)) {
   789       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   790       assert(obj->is_oop() || obj->mark() == NULL,
   791              "expected an oop, possibly with mark word displaced");
   792       HeapWord* addr = (HeapWord*)obj;
   793       if (_g1h->is_in_g1_reserved(addr)) {
   794         _cm->grayRoot(obj);
   795       }
   796     }
   797     if (_do_barrier) {
   798       assert(!_g1h->is_in_g1_reserved(p),
   799              "Should be called on external roots");
   800       do_barrier(p);
   801     }
   802   }
   803 };
   805 void ConcurrentMark::checkpointRootsInitialPost() {
   806   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   808   // For each region note start of marking.
   809   NoteStartOfMarkHRClosure startcl;
   810   g1h->heap_region_iterate(&startcl);
   812   // Start weak-reference discovery.
   813   ReferenceProcessor* rp = g1h->ref_processor();
   814   rp->verify_no_references_recorded();
   815   rp->enable_discovery(); // enable ("weak") refs discovery
   816   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   818   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   819   satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
   820   satb_mq_set.set_active_all_threads(true);
   822   // update_g1_committed() will be called at the end of an evac pause
   823   // when marking is on. So, it's also called at the end of the
   824   // initial-mark pause to update the heap end, if the heap expands
   825   // during it. No need to call it here.
   827   guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
   829   size_t max_marking_threads =
   830     MAX2((size_t) 1, parallel_marking_threads());
   831   for (int i = 0; i < (int)_max_task_num; ++i) {
   832     _tasks[i]->enable_co_tracker();
   833     if (i < (int) max_marking_threads)
   834       _tasks[i]->reset_co_tracker(marking_task_overhead());
   835     else
   836       _tasks[i]->reset_co_tracker(0.0);
   837   }
   838 }
   840 // Checkpoint the roots into this generation from outside
   841 // this generation. [Note this initial checkpoint need only
   842 // be approximate -- we'll do a catch up phase subsequently.]
   843 void ConcurrentMark::checkpointRootsInitial() {
   844   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   845   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   847   double start = os::elapsedTime();
   848   GCOverheadReporter::recordSTWStart(start);
   850   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   851   g1p->record_concurrent_mark_init_start();
   852   checkpointRootsInitialPre();
   854   // YSR: when concurrent precleaning is in place, we'll
   855   // need to clear the cached card table here
   857   ResourceMark rm;
   858   HandleMark  hm;
   860   g1h->ensure_parsability(false);
   861   g1h->perm_gen()->save_marks();
   863   CMMarkRootsClosure notOlder(this, g1h, false);
   864   CMMarkRootsClosure older(this, g1h, true);
   866   g1h->set_marking_started();
   867   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   869   g1h->process_strong_roots(false,   // fake perm gen collection
   870                             SharedHeap::SO_AllClasses,
   871                             &notOlder, // Regular roots
   872                             &older    // Perm Gen Roots
   873                             );
   874   checkpointRootsInitialPost();
   876   // Statistics.
   877   double end = os::elapsedTime();
   878   _init_times.add((end - start) * 1000.0);
   879   GCOverheadReporter::recordSTWEnd(end);
   881   g1p->record_concurrent_mark_init_end();
   882 }
   884 /*
   885    Notice that in the next two methods, we actually leave the STS
   886    during the barrier sync and join it immediately afterwards. If we
   887    do not do this, this then the following deadlock can occur: one
   888    thread could be in the barrier sync code, waiting for the other
   889    thread to also sync up, whereas another one could be trying to
   890    yield, while also waiting for the other threads to sync up too.
   892    Because the thread that does the sync barrier has left the STS, it
   893    is possible to be suspended for a Full GC or an evacuation pause
   894    could occur. This is actually safe, since the entering the sync
   895    barrier is one of the last things do_marking_step() does, and it
   896    doesn't manipulate any data structures afterwards.
   897 */
   899 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   900   if (verbose_low())
   901     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   903   ConcurrentGCThread::stsLeave();
   904   _first_overflow_barrier_sync.enter();
   905   ConcurrentGCThread::stsJoin();
   906   // at this point everyone should have synced up and not be doing any
   907   // more work
   909   if (verbose_low())
   910     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   912   // let task 0 do this
   913   if (task_num == 0) {
   914     // task 0 is responsible for clearing the global data structures
   915     clear_marking_state();
   917     if (PrintGC) {
   918       gclog_or_tty->date_stamp(PrintGCDateStamps);
   919       gclog_or_tty->stamp(PrintGCTimeStamps);
   920       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   921     }
   922   }
   924   // after this, each task should reset its own data structures then
   925   // then go into the second barrier
   926 }
   928 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   929   if (verbose_low())
   930     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   932   ConcurrentGCThread::stsLeave();
   933   _second_overflow_barrier_sync.enter();
   934   ConcurrentGCThread::stsJoin();
   935   // at this point everything should be re-initialised and ready to go
   937   if (verbose_low())
   938     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   939 }
   941 void ConcurrentMark::grayRoot(oop p) {
   942   HeapWord* addr = (HeapWord*) p;
   943   // We can't really check against _heap_start and _heap_end, since it
   944   // is possible during an evacuation pause with piggy-backed
   945   // initial-mark that the committed space is expanded during the
   946   // pause without CM observing this change. So the assertions below
   947   // is a bit conservative; but better than nothing.
   948   tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
   949                     "address should be within the heap bounds" );
   951   if (!_nextMarkBitMap->isMarked(addr))
   952     _nextMarkBitMap->parMark(addr);
   953 }
   955 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   956   // The objects on the region have already been marked "in bulk" by
   957   // the caller. We only need to decide whether to push the region on
   958   // the region stack or not.
   960   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   961     // We're done with marking and waiting for remark. We do not need to
   962     // push anything else on the region stack.
   963     return;
   965   HeapWord* finger = _finger;
   967   if (verbose_low())
   968     gclog_or_tty->print_cr("[global] attempting to push "
   969                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   970                            PTR_FORMAT, mr.start(), mr.end(), finger);
   972   if (mr.start() < finger) {
   973     // The finger is always heap region aligned and it is not possible
   974     // for mr to span heap regions.
   975     tmp_guarantee_CM( mr.end() <= finger, "invariant" );
   977     tmp_guarantee_CM( mr.start() <= mr.end() &&
   978                       _heap_start <= mr.start() &&
   979                       mr.end() <= _heap_end,
   980                   "region boundaries should fall within the committed space" );
   981     if (verbose_low())
   982       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   983                              "below the finger, pushing it",
   984                              mr.start(), mr.end());
   986     if (!region_stack_push(mr)) {
   987       if (verbose_low())
   988         gclog_or_tty->print_cr("[global] region stack has overflown.");
   989     }
   990   }
   991 }
   993 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   994   // The object is not marked by the caller. We need to at least mark
   995   // it and maybe push in on the stack.
   997   HeapWord* addr = (HeapWord*)p;
   998   if (!_nextMarkBitMap->isMarked(addr)) {
   999     // We definitely need to mark it, irrespective whether we bail out
  1000     // because we're done with marking.
  1001     if (_nextMarkBitMap->parMark(addr)) {
  1002       if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1003         // If we're done with concurrent marking and we're waiting for
  1004         // remark, then we're not pushing anything on the stack.
  1005         return;
  1007       // No OrderAccess:store_load() is needed. It is implicit in the
  1008       // CAS done in parMark(addr) above
  1009       HeapWord* finger = _finger;
  1011       if (addr < finger) {
  1012         if (!mark_stack_push(oop(addr))) {
  1013           if (verbose_low())
  1014             gclog_or_tty->print_cr("[global] global stack overflow "
  1015                                    "during parMark");
  1022 class CMConcurrentMarkingTask: public AbstractGangTask {
  1023 private:
  1024   ConcurrentMark*       _cm;
  1025   ConcurrentMarkThread* _cmt;
  1027 public:
  1028   void work(int worker_i) {
  1029     guarantee( Thread::current()->is_ConcurrentGC_thread(),
  1030                "this should only be done by a conc GC thread" );
  1032     double start_vtime = os::elapsedVTime();
  1034     ConcurrentGCThread::stsJoin();
  1036     guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
  1037     CMTask* the_task = _cm->task(worker_i);
  1038     the_task->start_co_tracker();
  1039     the_task->record_start_time();
  1040     if (!_cm->has_aborted()) {
  1041       do {
  1042         double start_vtime_sec = os::elapsedVTime();
  1043         double start_time_sec = os::elapsedTime();
  1044         the_task->do_marking_step(10.0);
  1045         double end_time_sec = os::elapsedTime();
  1046         double end_vtime_sec = os::elapsedVTime();
  1047         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1048         double elapsed_time_sec = end_time_sec - start_time_sec;
  1049         _cm->clear_has_overflown();
  1051         bool ret = _cm->do_yield_check(worker_i);
  1053         jlong sleep_time_ms;
  1054         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1055           sleep_time_ms =
  1056             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1057           ConcurrentGCThread::stsLeave();
  1058           os::sleep(Thread::current(), sleep_time_ms, false);
  1059           ConcurrentGCThread::stsJoin();
  1061         double end_time2_sec = os::elapsedTime();
  1062         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1064         the_task->update_co_tracker();
  1066 #if 0
  1067           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1068                                  "overhead %1.4lf",
  1069                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1070                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1071           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1072                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1073 #endif
  1074       } while (!_cm->has_aborted() && the_task->has_aborted());
  1076     the_task->record_end_time();
  1077     guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
  1079     ConcurrentGCThread::stsLeave();
  1081     double end_vtime = os::elapsedVTime();
  1082     the_task->update_co_tracker(true);
  1083     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1086   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1087                           ConcurrentMarkThread* cmt) :
  1088       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1090   ~CMConcurrentMarkingTask() { }
  1091 };
  1093 void ConcurrentMark::markFromRoots() {
  1094   // we might be tempted to assert that:
  1095   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1096   //        "inconsistent argument?");
  1097   // However that wouldn't be right, because it's possible that
  1098   // a safepoint is indeed in progress as a younger generation
  1099   // stop-the-world GC happens even as we mark in this generation.
  1101   _restart_for_overflow = false;
  1103   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1105   CMConcurrentMarkingTask markingTask(this, cmThread());
  1106   if (parallel_marking_threads() > 0)
  1107     _parallel_workers->run_task(&markingTask);
  1108   else
  1109     markingTask.work(0);
  1110   print_stats();
  1113 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1114   // world is stopped at this checkpoint
  1115   assert(SafepointSynchronize::is_at_safepoint(),
  1116          "world should be stopped");
  1117   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1119   // If a full collection has happened, we shouldn't do this.
  1120   if (has_aborted()) {
  1121     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1122     return;
  1125   if (VerifyDuringGC) {
  1126     HandleMark hm;  // handle scope
  1127     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1128     Universe::heap()->prepare_for_verify();
  1129     Universe::verify(true, false, true);
  1132   G1CollectorPolicy* g1p = g1h->g1_policy();
  1133   g1p->record_concurrent_mark_remark_start();
  1135   double start = os::elapsedTime();
  1136   GCOverheadReporter::recordSTWStart(start);
  1138   checkpointRootsFinalWork();
  1140   double mark_work_end = os::elapsedTime();
  1142   weakRefsWork(clear_all_soft_refs);
  1144   if (has_overflown()) {
  1145     // Oops.  We overflowed.  Restart concurrent marking.
  1146     _restart_for_overflow = true;
  1147     // Clear the flag. We do not need it any more.
  1148     clear_has_overflown();
  1149     if (G1TraceMarkStackOverflow)
  1150       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1151   } else {
  1152     // We're done with marking.
  1153     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
  1155     if (VerifyDuringGC) {
  1156       HandleMark hm;  // handle scope
  1157       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1158       Universe::heap()->prepare_for_verify();
  1159       Universe::heap()->verify(/* allow_dirty */      true,
  1160                                /* silent */           false,
  1161                                /* use_prev_marking */ false);
  1165 #if VERIFY_OBJS_PROCESSED
  1166   _scan_obj_cl.objs_processed = 0;
  1167   ThreadLocalObjQueue::objs_enqueued = 0;
  1168 #endif
  1170   // Statistics
  1171   double now = os::elapsedTime();
  1172   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1173   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1174   _remark_times.add((now - start) * 1000.0);
  1176   GCOverheadReporter::recordSTWEnd(now);
  1177   for (int i = 0; i < (int)_max_task_num; ++i)
  1178     _tasks[i]->disable_co_tracker();
  1179   _cleanup_co_tracker.enable();
  1180   _cleanup_co_tracker.reset(cleanup_task_overhead());
  1181   g1p->record_concurrent_mark_remark_end();
  1185 #define CARD_BM_TEST_MODE 0
  1187 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1189   CMBitMapRO* _bm;
  1190   ConcurrentMark* _cm;
  1191   COTracker* _co_tracker;
  1192   bool _changed;
  1193   bool _yield;
  1194   size_t _words_done;
  1195   size_t _tot_live;
  1196   size_t _tot_used;
  1197   size_t _regions_done;
  1198   double _start_vtime_sec;
  1200   BitMap* _region_bm;
  1201   BitMap* _card_bm;
  1202   intptr_t _bottom_card_num;
  1203   bool _final;
  1205   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1206     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1207 #if CARD_BM_TEST_MODE
  1208       guarantee(_card_bm->at(i - _bottom_card_num),
  1209                 "Should already be set.");
  1210 #else
  1211       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1212 #endif
  1216 public:
  1217   CalcLiveObjectsClosure(bool final,
  1218                          CMBitMapRO *bm, ConcurrentMark *cm,
  1219                          BitMap* region_bm, BitMap* card_bm,
  1220                          COTracker* co_tracker) :
  1221     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1222     _words_done(0), _tot_live(0), _tot_used(0),
  1223     _region_bm(region_bm), _card_bm(card_bm),
  1224     _final(final), _co_tracker(co_tracker),
  1225     _regions_done(0), _start_vtime_sec(0.0)
  1227     _bottom_card_num =
  1228       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1229                CardTableModRefBS::card_shift);
  1232   // It takes a region that's not empty (i.e., it has at least one
  1233   // live object in it and sets its corresponding bit on the region
  1234   // bitmap to 1. If the region is "starts humongous" it will also set
  1235   // to 1 the bits on the region bitmap that correspond to its
  1236   // associated "continues humongous" regions.
  1237   void set_bit_for_region(HeapRegion* hr) {
  1238     assert(!hr->continuesHumongous(), "should have filtered those out");
  1240     size_t index = hr->hrs_index();
  1241     if (!hr->startsHumongous()) {
  1242       // Normal (non-humongous) case: just set the bit.
  1243       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1244     } else {
  1245       // Starts humongous case: calculate how many regions are part of
  1246       // this humongous region and then set the bit range. It might
  1247       // have been a bit more efficient to look at the object that
  1248       // spans these humongous regions to calculate their number from
  1249       // the object's size. However, it's a good idea to calculate
  1250       // this based on the metadata itself, and not the region
  1251       // contents, so that this code is not aware of what goes into
  1252       // the humongous regions (in case this changes in the future).
  1253       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1254       size_t end_index = index + 1;
  1255       while (end_index < g1h->n_regions()) {
  1256         HeapRegion* chr = g1h->region_at(end_index);
  1257         if (!chr->continuesHumongous()) {
  1258           break;
  1260         end_index += 1;
  1262       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1263                                    (BitMap::idx_t) end_index, true);
  1267   bool doHeapRegion(HeapRegion* hr) {
  1268     if (_co_tracker != NULL)
  1269       _co_tracker->update();
  1271     if (!_final && _regions_done == 0)
  1272       _start_vtime_sec = os::elapsedVTime();
  1274     if (hr->continuesHumongous()) {
  1275       // We will ignore these here and process them when their
  1276       // associated "starts humongous" region is processed (see
  1277       // set_bit_for_heap_region()). Note that we cannot rely on their
  1278       // associated "starts humongous" region to have their bit set to
  1279       // 1 since, due to the region chunking in the parallel region
  1280       // iteration, a "continues humongous" region might be visited
  1281       // before its associated "starts humongous".
  1282       return false;
  1285     HeapWord* nextTop = hr->next_top_at_mark_start();
  1286     HeapWord* start   = hr->top_at_conc_mark_count();
  1287     assert(hr->bottom() <= start && start <= hr->end() &&
  1288            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1289            start <= nextTop,
  1290            "Preconditions.");
  1291     // Otherwise, record the number of word's we'll examine.
  1292     size_t words_done = (nextTop - start);
  1293     // Find the first marked object at or after "start".
  1294     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1295     size_t marked_bytes = 0;
  1297     // Below, the term "card num" means the result of shifting an address
  1298     // by the card shift -- address 0 corresponds to card number 0.  One
  1299     // must subtract the card num of the bottom of the heap to obtain a
  1300     // card table index.
  1301     // The first card num of the sequence of live cards currently being
  1302     // constructed.  -1 ==> no sequence.
  1303     intptr_t start_card_num = -1;
  1304     // The last card num of the sequence of live cards currently being
  1305     // constructed.  -1 ==> no sequence.
  1306     intptr_t last_card_num = -1;
  1308     while (start < nextTop) {
  1309       if (_yield && _cm->do_yield_check()) {
  1310         // We yielded.  It might be for a full collection, in which case
  1311         // all bets are off; terminate the traversal.
  1312         if (_cm->has_aborted()) {
  1313           _changed = false;
  1314           return true;
  1315         } else {
  1316           // Otherwise, it might be a collection pause, and the region
  1317           // we're looking at might be in the collection set.  We'll
  1318           // abandon this region.
  1319           return false;
  1322       oop obj = oop(start);
  1323       int obj_sz = obj->size();
  1324       // The card num of the start of the current object.
  1325       intptr_t obj_card_num =
  1326         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1328       HeapWord* obj_last = start + obj_sz - 1;
  1329       intptr_t obj_last_card_num =
  1330         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1332       if (obj_card_num != last_card_num) {
  1333         if (start_card_num == -1) {
  1334           assert(last_card_num == -1, "Both or neither.");
  1335           start_card_num = obj_card_num;
  1336         } else {
  1337           assert(last_card_num != -1, "Both or neither.");
  1338           assert(obj_card_num >= last_card_num, "Inv");
  1339           if ((obj_card_num - last_card_num) > 1) {
  1340             // Mark the last run, and start a new one.
  1341             mark_card_num_range(start_card_num, last_card_num);
  1342             start_card_num = obj_card_num;
  1345 #if CARD_BM_TEST_MODE
  1346         /*
  1347         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1348                                obj_card_num - _bottom_card_num,
  1349                                obj_last_card_num - _bottom_card_num);
  1350         */
  1351         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1352           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1354 #endif
  1356       // In any case, we set the last card num.
  1357       last_card_num = obj_last_card_num;
  1359       marked_bytes += obj_sz * HeapWordSize;
  1360       // Find the next marked object after this one.
  1361       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1362       _changed = true;
  1364     // Handle the last range, if any.
  1365     if (start_card_num != -1)
  1366       mark_card_num_range(start_card_num, last_card_num);
  1367     if (_final) {
  1368       // Mark the allocated-since-marking portion...
  1369       HeapWord* tp = hr->top();
  1370       if (nextTop < tp) {
  1371         start_card_num =
  1372           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1373         last_card_num =
  1374           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1375         mark_card_num_range(start_card_num, last_card_num);
  1376         // This definitely means the region has live objects.
  1377         set_bit_for_region(hr);
  1381     hr->add_to_marked_bytes(marked_bytes);
  1382     // Update the live region bitmap.
  1383     if (marked_bytes > 0) {
  1384       set_bit_for_region(hr);
  1386     hr->set_top_at_conc_mark_count(nextTop);
  1387     _tot_live += hr->next_live_bytes();
  1388     _tot_used += hr->used();
  1389     _words_done = words_done;
  1391     if (!_final) {
  1392       ++_regions_done;
  1393       if (_regions_done % 10 == 0) {
  1394         double end_vtime_sec = os::elapsedVTime();
  1395         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1396         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1397           jlong sleep_time_ms =
  1398             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1399 #if 0
  1400           gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1401                                  "overhead %1.4lf",
  1402                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1403                                  _co_tracker->concOverhead(os::elapsedTime()));
  1404 #endif
  1405           os::sleep(Thread::current(), sleep_time_ms, false);
  1406           _start_vtime_sec = end_vtime_sec;
  1411     return false;
  1414   bool changed() { return _changed;  }
  1415   void reset()   { _changed = false; _words_done = 0; }
  1416   void no_yield() { _yield = false; }
  1417   size_t words_done() { return _words_done; }
  1418   size_t tot_live() { return _tot_live; }
  1419   size_t tot_used() { return _tot_used; }
  1420 };
  1423 void ConcurrentMark::calcDesiredRegions() {
  1424   guarantee( _cleanup_co_tracker.enabled(), "invariant" );
  1425   _cleanup_co_tracker.start();
  1427   _region_bm.clear();
  1428   _card_bm.clear();
  1429   CalcLiveObjectsClosure calccl(false /*final*/,
  1430                                 nextMarkBitMap(), this,
  1431                                 &_region_bm, &_card_bm,
  1432                                 &_cleanup_co_tracker);
  1433   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1434   g1h->heap_region_iterate(&calccl);
  1436   do {
  1437     calccl.reset();
  1438     g1h->heap_region_iterate(&calccl);
  1439   } while (calccl.changed());
  1441   _cleanup_co_tracker.update(true);
  1444 class G1ParFinalCountTask: public AbstractGangTask {
  1445 protected:
  1446   G1CollectedHeap* _g1h;
  1447   CMBitMap* _bm;
  1448   size_t _n_workers;
  1449   size_t *_live_bytes;
  1450   size_t *_used_bytes;
  1451   BitMap* _region_bm;
  1452   BitMap* _card_bm;
  1453 public:
  1454   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1455                       BitMap* region_bm, BitMap* card_bm) :
  1456     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1457     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1459     if (ParallelGCThreads > 0)
  1460       _n_workers = _g1h->workers()->total_workers();
  1461     else
  1462       _n_workers = 1;
  1463     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1464     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1467   ~G1ParFinalCountTask() {
  1468     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1469     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1472   void work(int i) {
  1473     CalcLiveObjectsClosure calccl(true /*final*/,
  1474                                   _bm, _g1h->concurrent_mark(),
  1475                                   _region_bm, _card_bm,
  1476                                   NULL /* CO tracker */);
  1477     calccl.no_yield();
  1478     if (ParallelGCThreads > 0) {
  1479       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1480                                             HeapRegion::FinalCountClaimValue);
  1481     } else {
  1482       _g1h->heap_region_iterate(&calccl);
  1484     assert(calccl.complete(), "Shouldn't have yielded!");
  1486     guarantee( (size_t)i < _n_workers, "invariant" );
  1487     _live_bytes[i] = calccl.tot_live();
  1488     _used_bytes[i] = calccl.tot_used();
  1490   size_t live_bytes()  {
  1491     size_t live_bytes = 0;
  1492     for (size_t i = 0; i < _n_workers; ++i)
  1493       live_bytes += _live_bytes[i];
  1494     return live_bytes;
  1496   size_t used_bytes()  {
  1497     size_t used_bytes = 0;
  1498     for (size_t i = 0; i < _n_workers; ++i)
  1499       used_bytes += _used_bytes[i];
  1500     return used_bytes;
  1502 };
  1504 class G1ParNoteEndTask;
  1506 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1507   G1CollectedHeap* _g1;
  1508   int _worker_num;
  1509   size_t _max_live_bytes;
  1510   size_t _regions_claimed;
  1511   size_t _freed_bytes;
  1512   size_t _cleared_h_regions;
  1513   size_t _freed_regions;
  1514   UncleanRegionList* _unclean_region_list;
  1515   double _claimed_region_time;
  1516   double _max_region_time;
  1518 public:
  1519   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1520                              UncleanRegionList* list,
  1521                              int worker_num);
  1522   size_t freed_bytes() { return _freed_bytes; }
  1523   size_t cleared_h_regions() { return _cleared_h_regions; }
  1524   size_t freed_regions() { return  _freed_regions; }
  1525   UncleanRegionList* unclean_region_list() {
  1526     return _unclean_region_list;
  1529   bool doHeapRegion(HeapRegion *r);
  1531   size_t max_live_bytes() { return _max_live_bytes; }
  1532   size_t regions_claimed() { return _regions_claimed; }
  1533   double claimed_region_time_sec() { return _claimed_region_time; }
  1534   double max_region_time_sec() { return _max_region_time; }
  1535 };
  1537 class G1ParNoteEndTask: public AbstractGangTask {
  1538   friend class G1NoteEndOfConcMarkClosure;
  1539 protected:
  1540   G1CollectedHeap* _g1h;
  1541   size_t _max_live_bytes;
  1542   size_t _freed_bytes;
  1543   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1544 public:
  1545   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1546                    ConcurrentMark::ParCleanupThreadState**
  1547                    par_cleanup_thread_state) :
  1548     AbstractGangTask("G1 note end"), _g1h(g1h),
  1549     _max_live_bytes(0), _freed_bytes(0),
  1550     _par_cleanup_thread_state(par_cleanup_thread_state)
  1551   {}
  1553   void work(int i) {
  1554     double start = os::elapsedTime();
  1555     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1556                                            &_par_cleanup_thread_state[i]->list,
  1557                                            i);
  1558     if (ParallelGCThreads > 0) {
  1559       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1560                                             HeapRegion::NoteEndClaimValue);
  1561     } else {
  1562       _g1h->heap_region_iterate(&g1_note_end);
  1564     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1566     // Now finish up freeing the current thread's regions.
  1567     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1568                                   g1_note_end.cleared_h_regions(),
  1569                                   0, NULL);
  1571       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1572       _max_live_bytes += g1_note_end.max_live_bytes();
  1573       _freed_bytes += g1_note_end.freed_bytes();
  1575     double end = os::elapsedTime();
  1576     if (G1PrintParCleanupStats) {
  1577       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1578                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1579                           i, start, end, (end-start)*1000.0,
  1580                           g1_note_end.regions_claimed(),
  1581                           g1_note_end.claimed_region_time_sec()*1000.0,
  1582                           g1_note_end.max_region_time_sec()*1000.0);
  1585   size_t max_live_bytes() { return _max_live_bytes; }
  1586   size_t freed_bytes() { return _freed_bytes; }
  1587 };
  1589 class G1ParScrubRemSetTask: public AbstractGangTask {
  1590 protected:
  1591   G1RemSet* _g1rs;
  1592   BitMap* _region_bm;
  1593   BitMap* _card_bm;
  1594 public:
  1595   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1596                        BitMap* region_bm, BitMap* card_bm) :
  1597     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1598     _region_bm(region_bm), _card_bm(card_bm)
  1599   {}
  1601   void work(int i) {
  1602     if (ParallelGCThreads > 0) {
  1603       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1604                        HeapRegion::ScrubRemSetClaimValue);
  1605     } else {
  1606       _g1rs->scrub(_region_bm, _card_bm);
  1610 };
  1612 G1NoteEndOfConcMarkClosure::
  1613 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1614                            UncleanRegionList* list,
  1615                            int worker_num)
  1616   : _g1(g1), _worker_num(worker_num),
  1617     _max_live_bytes(0), _regions_claimed(0),
  1618     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1619     _claimed_region_time(0.0), _max_region_time(0.0),
  1620     _unclean_region_list(list)
  1621 {}
  1623 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1624   // We use a claim value of zero here because all regions
  1625   // were claimed with value 1 in the FinalCount task.
  1626   r->reset_gc_time_stamp();
  1627   if (!r->continuesHumongous()) {
  1628     double start = os::elapsedTime();
  1629     _regions_claimed++;
  1630     r->note_end_of_marking();
  1631     _max_live_bytes += r->max_live_bytes();
  1632     _g1->free_region_if_totally_empty_work(r,
  1633                                            _freed_bytes,
  1634                                            _cleared_h_regions,
  1635                                            _freed_regions,
  1636                                            _unclean_region_list,
  1637                                            true /*par*/);
  1638     double region_time = (os::elapsedTime() - start);
  1639     _claimed_region_time += region_time;
  1640     if (region_time > _max_region_time) _max_region_time = region_time;
  1642   return false;
  1645 void ConcurrentMark::cleanup() {
  1646   // world is stopped at this checkpoint
  1647   assert(SafepointSynchronize::is_at_safepoint(),
  1648          "world should be stopped");
  1649   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1651   // If a full collection has happened, we shouldn't do this.
  1652   if (has_aborted()) {
  1653     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1654     return;
  1657   if (VerifyDuringGC) {
  1658     HandleMark hm;  // handle scope
  1659     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1660     Universe::heap()->prepare_for_verify();
  1661     Universe::verify(/* allow dirty  */ true,
  1662                      /* silent       */ false,
  1663                      /* prev marking */ true);
  1666   _cleanup_co_tracker.disable();
  1668   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1669   g1p->record_concurrent_mark_cleanup_start();
  1671   double start = os::elapsedTime();
  1672   GCOverheadReporter::recordSTWStart(start);
  1674   // Do counting once more with the world stopped for good measure.
  1675   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1676                                         &_region_bm, &_card_bm);
  1677   if (ParallelGCThreads > 0) {
  1678     assert(g1h->check_heap_region_claim_values(
  1679                                                HeapRegion::InitialClaimValue),
  1680            "sanity check");
  1682     int n_workers = g1h->workers()->total_workers();
  1683     g1h->set_par_threads(n_workers);
  1684     g1h->workers()->run_task(&g1_par_count_task);
  1685     g1h->set_par_threads(0);
  1687     assert(g1h->check_heap_region_claim_values(
  1688                                              HeapRegion::FinalCountClaimValue),
  1689            "sanity check");
  1690   } else {
  1691     g1_par_count_task.work(0);
  1694   size_t known_garbage_bytes =
  1695     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1696 #if 0
  1697   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1698                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1699                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1700                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1701 #endif // 0
  1702   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1704   size_t start_used_bytes = g1h->used();
  1705   _at_least_one_mark_complete = true;
  1706   g1h->set_marking_complete();
  1708   double count_end = os::elapsedTime();
  1709   double this_final_counting_time = (count_end - start);
  1710   if (G1PrintParCleanupStats) {
  1711     gclog_or_tty->print_cr("Cleanup:");
  1712     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1713                            this_final_counting_time*1000.0);
  1715   _total_counting_time += this_final_counting_time;
  1717   // Install newly created mark bitMap as "prev".
  1718   swapMarkBitMaps();
  1720   g1h->reset_gc_time_stamp();
  1722   // Note end of marking in all heap regions.
  1723   double note_end_start = os::elapsedTime();
  1724   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1725   if (ParallelGCThreads > 0) {
  1726     int n_workers = g1h->workers()->total_workers();
  1727     g1h->set_par_threads(n_workers);
  1728     g1h->workers()->run_task(&g1_par_note_end_task);
  1729     g1h->set_par_threads(0);
  1731     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1732            "sanity check");
  1733   } else {
  1734     g1_par_note_end_task.work(0);
  1736   g1h->set_unclean_regions_coming(true);
  1737   double note_end_end = os::elapsedTime();
  1738   // Tell the mutators that there might be unclean regions coming...
  1739   if (G1PrintParCleanupStats) {
  1740     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1741                            (note_end_end - note_end_start)*1000.0);
  1745   // call below, since it affects the metric by which we sort the heap
  1746   // regions.
  1747   if (G1ScrubRemSets) {
  1748     double rs_scrub_start = os::elapsedTime();
  1749     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1750     if (ParallelGCThreads > 0) {
  1751       int n_workers = g1h->workers()->total_workers();
  1752       g1h->set_par_threads(n_workers);
  1753       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1754       g1h->set_par_threads(0);
  1756       assert(g1h->check_heap_region_claim_values(
  1757                                             HeapRegion::ScrubRemSetClaimValue),
  1758              "sanity check");
  1759     } else {
  1760       g1_par_scrub_rs_task.work(0);
  1763     double rs_scrub_end = os::elapsedTime();
  1764     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1765     _total_rs_scrub_time += this_rs_scrub_time;
  1768   // this will also free any regions totally full of garbage objects,
  1769   // and sort the regions.
  1770   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1771                         g1_par_note_end_task.freed_bytes(),
  1772                         g1_par_note_end_task.max_live_bytes());
  1774   // Statistics.
  1775   double end = os::elapsedTime();
  1776   _cleanup_times.add((end - start) * 1000.0);
  1777   GCOverheadReporter::recordSTWEnd(end);
  1779   // G1CollectedHeap::heap()->print();
  1780   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1781   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1783   if (PrintGC || PrintGCDetails) {
  1784     g1h->print_size_transition(gclog_or_tty,
  1785                                start_used_bytes,
  1786                                g1h->used(),
  1787                                g1h->capacity());
  1790   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1791   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1793   // We need to make this be a "collection" so any collection pause that
  1794   // races with it goes around and waits for completeCleanup to finish.
  1795   g1h->increment_total_collections();
  1797   if (VerifyDuringGC) {
  1798     HandleMark hm;  // handle scope
  1799     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1800     Universe::heap()->prepare_for_verify();
  1801     Universe::verify(/* allow dirty  */ true,
  1802                      /* silent       */ false,
  1803                      /* prev marking */ true);
  1807 void ConcurrentMark::completeCleanup() {
  1808   // A full collection intervened.
  1809   if (has_aborted()) return;
  1811   int first = 0;
  1812   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1813   for (int t = 0; t < last; t++) {
  1814     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1815     assert(list->well_formed(), "Inv");
  1816     HeapRegion* hd = list->hd();
  1817     while (hd != NULL) {
  1818       // Now finish up the other stuff.
  1819       hd->rem_set()->clear();
  1820       HeapRegion* next_hd = hd->next_from_unclean_list();
  1821       (void)list->pop();
  1822       guarantee(list->hd() == next_hd, "how not?");
  1823       _g1h->put_region_on_unclean_list(hd);
  1824       if (!hd->isHumongous()) {
  1825         // Add this to the _free_regions count by 1.
  1826         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1828       hd = list->hd();
  1829       guarantee(hd == next_hd, "how not?");
  1835 class G1CMIsAliveClosure: public BoolObjectClosure {
  1836   G1CollectedHeap* _g1;
  1837  public:
  1838   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1839     _g1(g1)
  1840   {}
  1842   void do_object(oop obj) {
  1843     assert(false, "not to be invoked");
  1845   bool do_object_b(oop obj) {
  1846     HeapWord* addr = (HeapWord*)obj;
  1847     return addr != NULL &&
  1848            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1850 };
  1852 class G1CMKeepAliveClosure: public OopClosure {
  1853   G1CollectedHeap* _g1;
  1854   ConcurrentMark*  _cm;
  1855   CMBitMap*        _bitMap;
  1856  public:
  1857   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1858                        CMBitMap* bitMap) :
  1859     _g1(g1), _cm(cm),
  1860     _bitMap(bitMap) {}
  1862   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1863   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1865   template <class T> void do_oop_work(T* p) {
  1866     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1867     HeapWord* addr = (HeapWord*)thisOop;
  1868     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1869       _bitMap->mark(addr);
  1870       _cm->mark_stack_push(thisOop);
  1873 };
  1875 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1876   CMMarkStack*                  _markStack;
  1877   CMBitMap*                     _bitMap;
  1878   G1CMKeepAliveClosure*         _oopClosure;
  1879  public:
  1880   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1881                                G1CMKeepAliveClosure* oopClosure) :
  1882     _bitMap(bitMap),
  1883     _markStack(markStack),
  1884     _oopClosure(oopClosure)
  1885   {}
  1887   void do_void() {
  1888     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1890 };
  1892 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1893   ResourceMark rm;
  1894   HandleMark   hm;
  1895   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1896   ReferenceProcessor* rp = g1h->ref_processor();
  1898   // Process weak references.
  1899   rp->setup_policy(clear_all_soft_refs);
  1900   assert(_markStack.isEmpty(), "mark stack should be empty");
  1902   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1903   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1904   G1CMDrainMarkingStackClosure
  1905     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1906                                &g1KeepAliveClosure);
  1908   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1909   rp->process_discovered_references(&g1IsAliveClosure,
  1910                                     &g1KeepAliveClosure,
  1911                                     &g1DrainMarkingStackClosure,
  1912                                     NULL);
  1913   assert(_markStack.overflow() || _markStack.isEmpty(),
  1914          "mark stack should be empty (unless it overflowed)");
  1915   if (_markStack.overflow()) {
  1916     set_has_overflown();
  1919   rp->enqueue_discovered_references();
  1920   rp->verify_no_references_recorded();
  1921   assert(!rp->discovery_enabled(), "should have been disabled");
  1923   // Now clean up stale oops in SymbolTable and StringTable
  1924   SymbolTable::unlink(&g1IsAliveClosure);
  1925   StringTable::unlink(&g1IsAliveClosure);
  1928 void ConcurrentMark::swapMarkBitMaps() {
  1929   CMBitMapRO* temp = _prevMarkBitMap;
  1930   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1931   _nextMarkBitMap  = (CMBitMap*)  temp;
  1934 class CMRemarkTask: public AbstractGangTask {
  1935 private:
  1936   ConcurrentMark *_cm;
  1938 public:
  1939   void work(int worker_i) {
  1940     // Since all available tasks are actually started, we should
  1941     // only proceed if we're supposed to be actived.
  1942     if ((size_t)worker_i < _cm->active_tasks()) {
  1943       CMTask* task = _cm->task(worker_i);
  1944       task->record_start_time();
  1945       do {
  1946         task->do_marking_step(1000000000.0 /* something very large */);
  1947       } while (task->has_aborted() && !_cm->has_overflown());
  1948       // If we overflow, then we do not want to restart. We instead
  1949       // want to abort remark and do concurrent marking again.
  1950       task->record_end_time();
  1954   CMRemarkTask(ConcurrentMark* cm) :
  1955     AbstractGangTask("Par Remark"), _cm(cm) { }
  1956 };
  1958 void ConcurrentMark::checkpointRootsFinalWork() {
  1959   ResourceMark rm;
  1960   HandleMark   hm;
  1961   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1963   g1h->ensure_parsability(false);
  1965   if (ParallelGCThreads > 0) {
  1966     g1h->change_strong_roots_parity();
  1967     // this is remark, so we'll use up all available threads
  1968     int active_workers = ParallelGCThreads;
  1969     set_phase(active_workers, false);
  1971     CMRemarkTask remarkTask(this);
  1972     // We will start all available threads, even if we decide that the
  1973     // active_workers will be fewer. The extra ones will just bail out
  1974     // immediately.
  1975     int n_workers = g1h->workers()->total_workers();
  1976     g1h->set_par_threads(n_workers);
  1977     g1h->workers()->run_task(&remarkTask);
  1978     g1h->set_par_threads(0);
  1980     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1981     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1982   } else {
  1983     g1h->change_strong_roots_parity();
  1984     // this is remark, so we'll use up all available threads
  1985     int active_workers = 1;
  1986     set_phase(active_workers, false);
  1988     CMRemarkTask remarkTask(this);
  1989     // We will start all available threads, even if we decide that the
  1990     // active_workers will be fewer. The extra ones will just bail out
  1991     // immediately.
  1992     remarkTask.work(0);
  1994     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1995     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1998   print_stats();
  2000   if (!restart_for_overflow())
  2001     set_non_marking_state();
  2003 #if VERIFY_OBJS_PROCESSED
  2004   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  2005     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  2006                            _scan_obj_cl.objs_processed,
  2007                            ThreadLocalObjQueue::objs_enqueued);
  2008     guarantee(_scan_obj_cl.objs_processed ==
  2009               ThreadLocalObjQueue::objs_enqueued,
  2010               "Different number of objs processed and enqueued.");
  2012 #endif
  2015 class ReachablePrinterOopClosure: public OopClosure {
  2016 private:
  2017   G1CollectedHeap* _g1h;
  2018   CMBitMapRO*      _bitmap;
  2019   outputStream*    _out;
  2021 public:
  2022   ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
  2023     _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
  2025   void do_oop(narrowOop* p) { do_oop_work(p); }
  2026   void do_oop(      oop* p) { do_oop_work(p); }
  2028   template <class T> void do_oop_work(T* p) {
  2029     oop         obj = oopDesc::load_decode_heap_oop(p);
  2030     const char* str = NULL;
  2031     const char* str2 = "";
  2033     if (!_g1h->is_in_g1_reserved(obj))
  2034       str = "outside G1 reserved";
  2035     else {
  2036       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  2037       guarantee( hr != NULL, "invariant" );
  2038       if (hr->obj_allocated_since_prev_marking(obj)) {
  2039         str = "over TAMS";
  2040         if (_bitmap->isMarked((HeapWord*) obj))
  2041           str2 = " AND MARKED";
  2042       } else if (_bitmap->isMarked((HeapWord*) obj))
  2043         str = "marked";
  2044       else
  2045         str = "#### NOT MARKED ####";
  2048     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
  2049                    p, (void*) obj, str, str2);
  2051 };
  2053 class ReachablePrinterClosure: public BitMapClosure {
  2054 private:
  2055   CMBitMapRO* _bitmap;
  2056   outputStream* _out;
  2058 public:
  2059   ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
  2060     _bitmap(bitmap), _out(out) { }
  2062   bool do_bit(size_t offset) {
  2063     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
  2064     ReachablePrinterOopClosure oopCl(_bitmap, _out);
  2066     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
  2067     oop(addr)->oop_iterate(&oopCl);
  2068     _out->print_cr("");
  2070     return true;
  2072 };
  2074 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
  2075 private:
  2076   CMBitMapRO* _bitmap;
  2077   outputStream* _out;
  2079 public:
  2080   void do_object(oop o) {
  2081     ReachablePrinterOopClosure oopCl(_bitmap, _out);
  2083     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
  2084     o->oop_iterate(&oopCl);
  2085     _out->print_cr("");
  2088   ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
  2089     _bitmap(bitmap), _out(out) { }
  2090 };
  2092 class RegionReachablePrinterClosure : public HeapRegionClosure {
  2093 private:
  2094   CMBitMapRO* _bitmap;
  2095   outputStream* _out;
  2097 public:
  2098   bool doHeapRegion(HeapRegion* hr) {
  2099     HeapWord* b = hr->bottom();
  2100     HeapWord* e = hr->end();
  2101     HeapWord* t = hr->top();
  2102     HeapWord* p = hr->prev_top_at_mark_start();
  2103     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2104                    "PTAMS: "PTR_FORMAT, b, e, t, p);
  2105     _out->print_cr("");
  2107     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
  2108     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
  2110     return false;
  2113   RegionReachablePrinterClosure(CMBitMapRO* bitmap,
  2114                                 outputStream* out) :
  2115     _bitmap(bitmap), _out(out) { }
  2116 };
  2118 void ConcurrentMark::print_prev_bitmap_reachable() {
  2119   outputStream* out = gclog_or_tty;
  2121 #if SEND_HEAP_DUMP_TO_FILE
  2122   guarantee(heap_dump_file == NULL, "Protocol");
  2123   char fn_buf[100];
  2124   sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
  2125   heap_dump_file = fopen(fn_buf, "w");
  2126   fileStream fstream(heap_dump_file);
  2127   out = &fstream;
  2128 #endif // SEND_HEAP_DUMP_TO_FILE
  2130   RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
  2131   out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
  2132   _g1h->heap_region_iterate(&rcl);
  2133   out->print_cr("");
  2135   ReachablePrinterClosure cl(_prevMarkBitMap, out);
  2136   out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
  2137   _prevMarkBitMap->iterate(&cl);
  2138   out->print_cr("");
  2140 #if SEND_HEAP_DUMP_TO_FILE
  2141   fclose(heap_dump_file);
  2142   heap_dump_file = NULL;
  2143 #endif // SEND_HEAP_DUMP_TO_FILE
  2146 // This note is for drainAllSATBBuffers and the code in between.
  2147 // In the future we could reuse a task to do this work during an
  2148 // evacuation pause (since now tasks are not active and can be claimed
  2149 // during an evacuation pause). This was a late change to the code and
  2150 // is currently not being taken advantage of.
  2152 class CMGlobalObjectClosure : public ObjectClosure {
  2153 private:
  2154   ConcurrentMark* _cm;
  2156 public:
  2157   void do_object(oop obj) {
  2158     _cm->deal_with_reference(obj);
  2161   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2162 };
  2164 void ConcurrentMark::deal_with_reference(oop obj) {
  2165   if (verbose_high())
  2166     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2167                            (void*) obj);
  2170   HeapWord* objAddr = (HeapWord*) obj;
  2171   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2172   if (_g1h->is_in_g1_reserved(objAddr)) {
  2173     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
  2174     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2175     if (_g1h->is_obj_ill(obj, hr)) {
  2176       if (verbose_high())
  2177         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2178                                "marked", (void*) obj);
  2180       // we need to mark it first
  2181       if (_nextMarkBitMap->parMark(objAddr)) {
  2182         // No OrderAccess:store_load() is needed. It is implicit in the
  2183         // CAS done in parMark(objAddr) above
  2184         HeapWord* finger = _finger;
  2185         if (objAddr < finger) {
  2186           if (verbose_high())
  2187             gclog_or_tty->print_cr("[global] below the global finger "
  2188                                    "("PTR_FORMAT"), pushing it", finger);
  2189           if (!mark_stack_push(obj)) {
  2190             if (verbose_low())
  2191               gclog_or_tty->print_cr("[global] global stack overflow during "
  2192                                      "deal_with_reference");
  2200 void ConcurrentMark::drainAllSATBBuffers() {
  2201   CMGlobalObjectClosure oc(this);
  2202   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2203   satb_mq_set.set_closure(&oc);
  2205   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2206     if (verbose_medium())
  2207       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2210   // no need to check whether we should do this, as this is only
  2211   // called during an evacuation pause
  2212   satb_mq_set.iterate_closure_all_threads();
  2214   satb_mq_set.set_closure(NULL);
  2215   guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  2218 void ConcurrentMark::markPrev(oop p) {
  2219   // Note we are overriding the read-only view of the prev map here, via
  2220   // the cast.
  2221   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2224 void ConcurrentMark::clear(oop p) {
  2225   assert(p != NULL && p->is_oop(), "expected an oop");
  2226   HeapWord* addr = (HeapWord*)p;
  2227   assert(addr >= _nextMarkBitMap->startWord() ||
  2228          addr < _nextMarkBitMap->endWord(), "in a region");
  2230   _nextMarkBitMap->clear(addr);
  2233 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2234   // Note we are overriding the read-only view of the prev map here, via
  2235   // the cast.
  2236   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2237   _nextMarkBitMap->clearRange(mr);
  2240 HeapRegion*
  2241 ConcurrentMark::claim_region(int task_num) {
  2242   // "checkpoint" the finger
  2243   HeapWord* finger = _finger;
  2245   // _heap_end will not change underneath our feet; it only changes at
  2246   // yield points.
  2247   while (finger < _heap_end) {
  2248     tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
  2250     // is the gap between reading the finger and doing the CAS too long?
  2252     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2253     HeapWord*   bottom        = curr_region->bottom();
  2254     HeapWord*   end           = curr_region->end();
  2255     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2257     if (verbose_low())
  2258       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2259                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2260                              "limit = "PTR_FORMAT,
  2261                              task_num, curr_region, bottom, end, limit);
  2263     HeapWord* res =
  2264       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2265     if (res == finger) {
  2266       // we succeeded
  2268       // notice that _finger == end cannot be guaranteed here since,
  2269       // someone else might have moved the finger even further
  2270       guarantee( _finger >= end, "the finger should have moved forward" );
  2272       if (verbose_low())
  2273         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2274                                PTR_FORMAT, task_num, curr_region);
  2276       if (limit > bottom) {
  2277         if (verbose_low())
  2278           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2279                                  "returning it ", task_num, curr_region);
  2280         return curr_region;
  2281       } else {
  2282         tmp_guarantee_CM( limit == bottom,
  2283                           "the region limit should be at bottom" );
  2284         if (verbose_low())
  2285           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2286                                  "returning NULL", task_num, curr_region);
  2287         // we return NULL and the caller should try calling
  2288         // claim_region() again.
  2289         return NULL;
  2291     } else {
  2292       guarantee( _finger > finger, "the finger should have moved forward" );
  2293       if (verbose_low())
  2294         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2295                                "global finger = "PTR_FORMAT", "
  2296                                "our finger = "PTR_FORMAT,
  2297                                task_num, _finger, finger);
  2299       // read it again
  2300       finger = _finger;
  2304   return NULL;
  2307 void ConcurrentMark::oops_do(OopClosure* cl) {
  2308   if (_markStack.size() > 0 && verbose_low())
  2309     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2310                            "size = %d", _markStack.size());
  2311   // we first iterate over the contents of the mark stack...
  2312   _markStack.oops_do(cl);
  2314   for (int i = 0; i < (int)_max_task_num; ++i) {
  2315     OopTaskQueue* queue = _task_queues->queue((int)i);
  2317     if (queue->size() > 0 && verbose_low())
  2318       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2319                              "size = %d", i, queue->size());
  2321     // ...then over the contents of the all the task queues.
  2322     queue->oops_do(cl);
  2325   // finally, invalidate any entries that in the region stack that
  2326   // point into the collection set
  2327   if (_regionStack.invalidate_entries_into_cset()) {
  2328     // otherwise, any gray objects copied during the evacuation pause
  2329     // might not be visited.
  2330     guarantee( _should_gray_objects, "invariant" );
  2334 void ConcurrentMark::clear_marking_state() {
  2335   _markStack.setEmpty();
  2336   _markStack.clear_overflow();
  2337   _regionStack.setEmpty();
  2338   _regionStack.clear_overflow();
  2339   clear_has_overflown();
  2340   _finger = _heap_start;
  2342   for (int i = 0; i < (int)_max_task_num; ++i) {
  2343     OopTaskQueue* queue = _task_queues->queue(i);
  2344     queue->set_empty();
  2348 void ConcurrentMark::print_stats() {
  2349   if (verbose_stats()) {
  2350     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2351     for (size_t i = 0; i < _active_tasks; ++i) {
  2352       _tasks[i]->print_stats();
  2353       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2358 class CSMarkOopClosure: public OopClosure {
  2359   friend class CSMarkBitMapClosure;
  2361   G1CollectedHeap* _g1h;
  2362   CMBitMap*        _bm;
  2363   ConcurrentMark*  _cm;
  2364   oop*             _ms;
  2365   jint*            _array_ind_stack;
  2366   int              _ms_size;
  2367   int              _ms_ind;
  2368   int              _array_increment;
  2370   bool push(oop obj, int arr_ind = 0) {
  2371     if (_ms_ind == _ms_size) {
  2372       gclog_or_tty->print_cr("Mark stack is full.");
  2373       return false;
  2375     _ms[_ms_ind] = obj;
  2376     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2377     _ms_ind++;
  2378     return true;
  2381   oop pop() {
  2382     if (_ms_ind == 0) return NULL;
  2383     else {
  2384       _ms_ind--;
  2385       return _ms[_ms_ind];
  2389   template <class T> bool drain() {
  2390     while (_ms_ind > 0) {
  2391       oop obj = pop();
  2392       assert(obj != NULL, "Since index was non-zero.");
  2393       if (obj->is_objArray()) {
  2394         jint arr_ind = _array_ind_stack[_ms_ind];
  2395         objArrayOop aobj = objArrayOop(obj);
  2396         jint len = aobj->length();
  2397         jint next_arr_ind = arr_ind + _array_increment;
  2398         if (next_arr_ind < len) {
  2399           push(obj, next_arr_ind);
  2401         // Now process this portion of this one.
  2402         int lim = MIN2(next_arr_ind, len);
  2403         for (int j = arr_ind; j < lim; j++) {
  2404           do_oop(aobj->obj_at_addr<T>(j));
  2407       } else {
  2408         obj->oop_iterate(this);
  2410       if (abort()) return false;
  2412     return true;
  2415 public:
  2416   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2417     _g1h(G1CollectedHeap::heap()),
  2418     _cm(cm),
  2419     _bm(cm->nextMarkBitMap()),
  2420     _ms_size(ms_size), _ms_ind(0),
  2421     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2422     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2423     _array_increment(MAX2(ms_size/8, 16))
  2424   {}
  2426   ~CSMarkOopClosure() {
  2427     FREE_C_HEAP_ARRAY(oop, _ms);
  2428     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2431   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2432   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2434   template <class T> void do_oop_work(T* p) {
  2435     T heap_oop = oopDesc::load_heap_oop(p);
  2436     if (oopDesc::is_null(heap_oop)) return;
  2437     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2438     if (obj->is_forwarded()) {
  2439       // If the object has already been forwarded, we have to make sure
  2440       // that it's marked.  So follow the forwarding pointer.  Note that
  2441       // this does the right thing for self-forwarding pointers in the
  2442       // evacuation failure case.
  2443       obj = obj->forwardee();
  2445     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2446     if (hr != NULL) {
  2447       if (hr->in_collection_set()) {
  2448         if (_g1h->is_obj_ill(obj)) {
  2449           _bm->mark((HeapWord*)obj);
  2450           if (!push(obj)) {
  2451             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2452             set_abort();
  2455       } else {
  2456         // Outside the collection set; we need to gray it
  2457         _cm->deal_with_reference(obj);
  2461 };
  2463 class CSMarkBitMapClosure: public BitMapClosure {
  2464   G1CollectedHeap* _g1h;
  2465   CMBitMap*        _bitMap;
  2466   ConcurrentMark*  _cm;
  2467   CSMarkOopClosure _oop_cl;
  2468 public:
  2469   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2470     _g1h(G1CollectedHeap::heap()),
  2471     _bitMap(cm->nextMarkBitMap()),
  2472     _oop_cl(cm, ms_size)
  2473   {}
  2475   ~CSMarkBitMapClosure() {}
  2477   bool do_bit(size_t offset) {
  2478     // convert offset into a HeapWord*
  2479     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2480     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2481            "address out of range");
  2482     assert(_bitMap->isMarked(addr), "tautology");
  2483     oop obj = oop(addr);
  2484     if (!obj->is_forwarded()) {
  2485       if (!_oop_cl.push(obj)) return false;
  2486       if (UseCompressedOops) {
  2487         if (!_oop_cl.drain<narrowOop>()) return false;
  2488       } else {
  2489         if (!_oop_cl.drain<oop>()) return false;
  2492     // Otherwise...
  2493     return true;
  2495 };
  2498 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2499   CMBitMap* _bm;
  2500   CSMarkBitMapClosure _bit_cl;
  2501   enum SomePrivateConstants {
  2502     MSSize = 1000
  2503   };
  2504   bool _completed;
  2505 public:
  2506   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2507     _bm(cm->nextMarkBitMap()),
  2508     _bit_cl(cm, MSSize),
  2509     _completed(true)
  2510   {}
  2512   ~CompleteMarkingInCSHRClosure() {}
  2514   bool doHeapRegion(HeapRegion* r) {
  2515     if (!r->evacuation_failed()) {
  2516       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2517       if (!mr.is_empty()) {
  2518         if (!_bm->iterate(&_bit_cl, mr)) {
  2519           _completed = false;
  2520           return true;
  2524     return false;
  2527   bool completed() { return _completed; }
  2528 };
  2530 class ClearMarksInHRClosure: public HeapRegionClosure {
  2531   CMBitMap* _bm;
  2532 public:
  2533   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2535   bool doHeapRegion(HeapRegion* r) {
  2536     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2537       MemRegion usedMR = r->used_region();
  2538       _bm->clearRange(r->used_region());
  2540     return false;
  2542 };
  2544 void ConcurrentMark::complete_marking_in_collection_set() {
  2545   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2547   if (!g1h->mark_in_progress()) {
  2548     g1h->g1_policy()->record_mark_closure_time(0.0);
  2549     return;
  2552   int i = 1;
  2553   double start = os::elapsedTime();
  2554   while (true) {
  2555     i++;
  2556     CompleteMarkingInCSHRClosure cmplt(this);
  2557     g1h->collection_set_iterate(&cmplt);
  2558     if (cmplt.completed()) break;
  2560   double end_time = os::elapsedTime();
  2561   double elapsed_time_ms = (end_time - start) * 1000.0;
  2562   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2563   if (PrintGCDetails) {
  2564     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2567   ClearMarksInHRClosure clr(nextMarkBitMap());
  2568   g1h->collection_set_iterate(&clr);
  2571 // The next two methods deal with the following optimisation. Some
  2572 // objects are gray by being marked and located above the finger. If
  2573 // they are copied, during an evacuation pause, below the finger then
  2574 // the need to be pushed on the stack. The observation is that, if
  2575 // there are no regions in the collection set located above the
  2576 // finger, then the above cannot happen, hence we do not need to
  2577 // explicitly gray any objects when copying them to below the
  2578 // finger. The global stack will be scanned to ensure that, if it
  2579 // points to objects being copied, it will update their
  2580 // location. There is a tricky situation with the gray objects in
  2581 // region stack that are being coped, however. See the comment in
  2582 // newCSet().
  2584 void ConcurrentMark::newCSet() {
  2585   if (!concurrent_marking_in_progress())
  2586     // nothing to do if marking is not in progress
  2587     return;
  2589   // find what the lowest finger is among the global and local fingers
  2590   _min_finger = _finger;
  2591   for (int i = 0; i < (int)_max_task_num; ++i) {
  2592     CMTask* task = _tasks[i];
  2593     HeapWord* task_finger = task->finger();
  2594     if (task_finger != NULL && task_finger < _min_finger)
  2595       _min_finger = task_finger;
  2598   _should_gray_objects = false;
  2600   // This fixes a very subtle and fustrating bug. It might be the case
  2601   // that, during en evacuation pause, heap regions that contain
  2602   // objects that are gray (by being in regions contained in the
  2603   // region stack) are included in the collection set. Since such gray
  2604   // objects will be moved, and because it's not easy to redirect
  2605   // region stack entries to point to a new location (because objects
  2606   // in one region might be scattered to multiple regions after they
  2607   // are copied), one option is to ensure that all marked objects
  2608   // copied during a pause are pushed on the stack. Notice, however,
  2609   // that this problem can only happen when the region stack is not
  2610   // empty during an evacuation pause. So, we make the fix a bit less
  2611   // conservative and ensure that regions are pushed on the stack,
  2612   // irrespective whether all collection set regions are below the
  2613   // finger, if the region stack is not empty. This is expected to be
  2614   // a rare case, so I don't think it's necessary to be smarted about it.
  2615   if (!region_stack_empty())
  2616     _should_gray_objects = true;
  2619 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2620   if (!concurrent_marking_in_progress())
  2621     return;
  2623   HeapWord* region_end = hr->end();
  2624   if (region_end > _min_finger)
  2625     _should_gray_objects = true;
  2628 void ConcurrentMark::disable_co_trackers() {
  2629   if (has_aborted()) {
  2630     if (_cleanup_co_tracker.enabled())
  2631       _cleanup_co_tracker.disable();
  2632     for (int i = 0; i < (int)_max_task_num; ++i) {
  2633       CMTask* task = _tasks[i];
  2634       if (task->co_tracker_enabled())
  2635         task->disable_co_tracker();
  2637   } else {
  2638     guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
  2639     for (int i = 0; i < (int)_max_task_num; ++i) {
  2640       CMTask* task = _tasks[i];
  2641       guarantee( !task->co_tracker_enabled(), "invariant" );
  2646 // abandon current marking iteration due to a Full GC
  2647 void ConcurrentMark::abort() {
  2648   // Clear all marks to force marking thread to do nothing
  2649   _nextMarkBitMap->clearAll();
  2650   // Empty mark stack
  2651   clear_marking_state();
  2652   for (int i = 0; i < (int)_max_task_num; ++i)
  2653     _tasks[i]->clear_region_fields();
  2654   _has_aborted = true;
  2656   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2657   satb_mq_set.abandon_partial_marking();
  2658   satb_mq_set.set_active_all_threads(false);
  2661 static void print_ms_time_info(const char* prefix, const char* name,
  2662                                NumberSeq& ns) {
  2663   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2664                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2665   if (ns.num() > 0) {
  2666     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2667                            prefix, ns.sd(), ns.maximum());
  2671 void ConcurrentMark::print_summary_info() {
  2672   gclog_or_tty->print_cr(" Concurrent marking:");
  2673   print_ms_time_info("  ", "init marks", _init_times);
  2674   print_ms_time_info("  ", "remarks", _remark_times);
  2676     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2677     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2680   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2681   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2682                          _total_counting_time,
  2683                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2684                           (double)_cleanup_times.num()
  2685                          : 0.0));
  2686   if (G1ScrubRemSets) {
  2687     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2688                            _total_rs_scrub_time,
  2689                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2690                             (double)_cleanup_times.num()
  2691                            : 0.0));
  2693   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2694                          (_init_times.sum() + _remark_times.sum() +
  2695                           _cleanup_times.sum())/1000.0);
  2696   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2697                 "(%8.2f s marking, %8.2f s counting).",
  2698                 cmThread()->vtime_accum(),
  2699                 cmThread()->vtime_mark_accum(),
  2700                 cmThread()->vtime_count_accum());
  2703 // Closures
  2704 // XXX: there seems to be a lot of code  duplication here;
  2705 // should refactor and consolidate the shared code.
  2707 // This closure is used to mark refs into the CMS generation in
  2708 // the CMS bit map. Called at the first checkpoint.
  2710 // We take a break if someone is trying to stop the world.
  2711 bool ConcurrentMark::do_yield_check(int worker_i) {
  2712   if (should_yield()) {
  2713     if (worker_i == 0)
  2714       _g1h->g1_policy()->record_concurrent_pause();
  2715     cmThread()->yield();
  2716     if (worker_i == 0)
  2717       _g1h->g1_policy()->record_concurrent_pause_end();
  2718     return true;
  2719   } else {
  2720     return false;
  2724 bool ConcurrentMark::should_yield() {
  2725   return cmThread()->should_yield();
  2728 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2729   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2730   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2733 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2734                                                  void* last) {
  2735   return
  2736     containing_card_is_marked(start) &&
  2737     containing_card_is_marked(last);
  2740 #ifndef PRODUCT
  2741 // for debugging purposes
  2742 void ConcurrentMark::print_finger() {
  2743   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2744                          _heap_start, _heap_end, _finger);
  2745   for (int i = 0; i < (int) _max_task_num; ++i) {
  2746     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2748   gclog_or_tty->print_cr("");
  2750 #endif
  2752 // Closure for iteration over bitmaps
  2753 class CMBitMapClosure : public BitMapClosure {
  2754 private:
  2755   // the bitmap that is being iterated over
  2756   CMBitMap*                   _nextMarkBitMap;
  2757   ConcurrentMark*             _cm;
  2758   CMTask*                     _task;
  2759   // true if we're scanning a heap region claimed by the task (so that
  2760   // we move the finger along), false if we're not, i.e. currently when
  2761   // scanning a heap region popped from the region stack (so that we
  2762   // do not move the task finger along; it'd be a mistake if we did so).
  2763   bool                        _scanning_heap_region;
  2765 public:
  2766   CMBitMapClosure(CMTask *task,
  2767                   ConcurrentMark* cm,
  2768                   CMBitMap* nextMarkBitMap)
  2769     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2771   void set_scanning_heap_region(bool scanning_heap_region) {
  2772     _scanning_heap_region = scanning_heap_region;
  2775   bool do_bit(size_t offset) {
  2776     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2777     tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
  2778     tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
  2780     if (_scanning_heap_region) {
  2781       statsOnly( _task->increase_objs_found_on_bitmap() );
  2782       tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
  2783       // We move that task's local finger along.
  2784       _task->move_finger_to(addr);
  2785     } else {
  2786       // We move the task's region finger along.
  2787       _task->move_region_finger_to(addr);
  2790     _task->scan_object(oop(addr));
  2791     // we only partially drain the local queue and global stack
  2792     _task->drain_local_queue(true);
  2793     _task->drain_global_stack(true);
  2795     // if the has_aborted flag has been raised, we need to bail out of
  2796     // the iteration
  2797     return !_task->has_aborted();
  2799 };
  2801 // Closure for iterating over objects, currently only used for
  2802 // processing SATB buffers.
  2803 class CMObjectClosure : public ObjectClosure {
  2804 private:
  2805   CMTask* _task;
  2807 public:
  2808   void do_object(oop obj) {
  2809     _task->deal_with_reference(obj);
  2812   CMObjectClosure(CMTask* task) : _task(task) { }
  2813 };
  2815 // Closure for iterating over object fields
  2816 class CMOopClosure : public OopClosure {
  2817 private:
  2818   G1CollectedHeap*   _g1h;
  2819   ConcurrentMark*    _cm;
  2820   CMTask*            _task;
  2822 public:
  2823   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2824   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2826   template <class T> void do_oop_work(T* p) {
  2827     tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
  2828     tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
  2830     oop obj = oopDesc::load_decode_heap_oop(p);
  2831     if (_cm->verbose_high())
  2832       gclog_or_tty->print_cr("[%d] we're looking at location "
  2833                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2834                              _task->task_id(), p, (void*) obj);
  2835     _task->deal_with_reference(obj);
  2838   CMOopClosure(G1CollectedHeap* g1h,
  2839                ConcurrentMark* cm,
  2840                CMTask* task)
  2841     : _g1h(g1h), _cm(cm), _task(task) { }
  2842 };
  2844 void CMTask::setup_for_region(HeapRegion* hr) {
  2845   tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
  2846       "claim_region() should have filtered out continues humongous regions" );
  2848   if (_cm->verbose_low())
  2849     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2850                            _task_id, hr);
  2852   _curr_region  = hr;
  2853   _finger       = hr->bottom();
  2854   update_region_limit();
  2857 void CMTask::update_region_limit() {
  2858   HeapRegion* hr            = _curr_region;
  2859   HeapWord* bottom          = hr->bottom();
  2860   HeapWord* limit           = hr->next_top_at_mark_start();
  2862   if (limit == bottom) {
  2863     if (_cm->verbose_low())
  2864       gclog_or_tty->print_cr("[%d] found an empty region "
  2865                              "["PTR_FORMAT", "PTR_FORMAT")",
  2866                              _task_id, bottom, limit);
  2867     // The region was collected underneath our feet.
  2868     // We set the finger to bottom to ensure that the bitmap
  2869     // iteration that will follow this will not do anything.
  2870     // (this is not a condition that holds when we set the region up,
  2871     // as the region is not supposed to be empty in the first place)
  2872     _finger = bottom;
  2873   } else if (limit >= _region_limit) {
  2874     tmp_guarantee_CM( limit >= _finger, "peace of mind" );
  2875   } else {
  2876     tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
  2877     // This can happen under some pretty unusual circumstances.  An
  2878     // evacuation pause empties the region underneath our feet (NTAMS
  2879     // at bottom). We then do some allocation in the region (NTAMS
  2880     // stays at bottom), followed by the region being used as a GC
  2881     // alloc region (NTAMS will move to top() and the objects
  2882     // originally below it will be grayed). All objects now marked in
  2883     // the region are explicitly grayed, if below the global finger,
  2884     // and we do not need in fact to scan anything else. So, we simply
  2885     // set _finger to be limit to ensure that the bitmap iteration
  2886     // doesn't do anything.
  2887     _finger = limit;
  2890   _region_limit = limit;
  2893 void CMTask::giveup_current_region() {
  2894   tmp_guarantee_CM( _curr_region != NULL, "invariant" );
  2895   if (_cm->verbose_low())
  2896     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2897                            _task_id, _curr_region);
  2898   clear_region_fields();
  2901 void CMTask::clear_region_fields() {
  2902   // Values for these three fields that indicate that we're not
  2903   // holding on to a region.
  2904   _curr_region   = NULL;
  2905   _finger        = NULL;
  2906   _region_limit  = NULL;
  2908   _region_finger = NULL;
  2911 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2912   guarantee( nextMarkBitMap != NULL, "invariant" );
  2914   if (_cm->verbose_low())
  2915     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2917   _nextMarkBitMap                = nextMarkBitMap;
  2918   clear_region_fields();
  2920   _calls                         = 0;
  2921   _elapsed_time_ms               = 0.0;
  2922   _termination_time_ms           = 0.0;
  2923   _termination_start_time_ms     = 0.0;
  2925 #if _MARKING_STATS_
  2926   _local_pushes                  = 0;
  2927   _local_pops                    = 0;
  2928   _local_max_size                = 0;
  2929   _objs_scanned                  = 0;
  2930   _global_pushes                 = 0;
  2931   _global_pops                   = 0;
  2932   _global_max_size               = 0;
  2933   _global_transfers_to           = 0;
  2934   _global_transfers_from         = 0;
  2935   _region_stack_pops             = 0;
  2936   _regions_claimed               = 0;
  2937   _objs_found_on_bitmap          = 0;
  2938   _satb_buffers_processed        = 0;
  2939   _steal_attempts                = 0;
  2940   _steals                        = 0;
  2941   _aborted                       = 0;
  2942   _aborted_overflow              = 0;
  2943   _aborted_cm_aborted            = 0;
  2944   _aborted_yield                 = 0;
  2945   _aborted_timed_out             = 0;
  2946   _aborted_satb                  = 0;
  2947   _aborted_termination           = 0;
  2948 #endif // _MARKING_STATS_
  2951 bool CMTask::should_exit_termination() {
  2952   regular_clock_call();
  2953   // This is called when we are in the termination protocol. We should
  2954   // quit if, for some reason, this task wants to abort or the global
  2955   // stack is not empty (this means that we can get work from it).
  2956   return !_cm->mark_stack_empty() || has_aborted();
  2959 // This determines whether the method below will check both the local
  2960 // and global fingers when determining whether to push on the stack a
  2961 // gray object (value 1) or whether it will only check the global one
  2962 // (value 0). The tradeoffs are that the former will be a bit more
  2963 // accurate and possibly push less on the stack, but it might also be
  2964 // a little bit slower.
  2966 #define _CHECK_BOTH_FINGERS_      1
  2968 void CMTask::deal_with_reference(oop obj) {
  2969   if (_cm->verbose_high())
  2970     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2971                            _task_id, (void*) obj);
  2973   ++_refs_reached;
  2975   HeapWord* objAddr = (HeapWord*) obj;
  2976   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2977   if (_g1h->is_in_g1_reserved(objAddr)) {
  2978     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
  2979     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  2980     if (_g1h->is_obj_ill(obj, hr)) {
  2981       if (_cm->verbose_high())
  2982         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  2983                                _task_id, (void*) obj);
  2985       // we need to mark it first
  2986       if (_nextMarkBitMap->parMark(objAddr)) {
  2987         // No OrderAccess:store_load() is needed. It is implicit in the
  2988         // CAS done in parMark(objAddr) above
  2989         HeapWord* global_finger = _cm->finger();
  2991 #if _CHECK_BOTH_FINGERS_
  2992         // we will check both the local and global fingers
  2994         if (_finger != NULL && objAddr < _finger) {
  2995           if (_cm->verbose_high())
  2996             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  2997                                    "pushing it", _task_id, _finger);
  2998           push(obj);
  2999         } else if (_curr_region != NULL && objAddr < _region_limit) {
  3000           // do nothing
  3001         } else if (objAddr < global_finger) {
  3002           // Notice that the global finger might be moving forward
  3003           // concurrently. This is not a problem. In the worst case, we
  3004           // mark the object while it is above the global finger and, by
  3005           // the time we read the global finger, it has moved forward
  3006           // passed this object. In this case, the object will probably
  3007           // be visited when a task is scanning the region and will also
  3008           // be pushed on the stack. So, some duplicate work, but no
  3009           // correctness problems.
  3011           if (_cm->verbose_high())
  3012             gclog_or_tty->print_cr("[%d] below the global finger "
  3013                                    "("PTR_FORMAT"), pushing it",
  3014                                    _task_id, global_finger);
  3015           push(obj);
  3016         } else {
  3017           // do nothing
  3019 #else // _CHECK_BOTH_FINGERS_
  3020       // we will only check the global finger
  3022         if (objAddr < global_finger) {
  3023           // see long comment above
  3025           if (_cm->verbose_high())
  3026             gclog_or_tty->print_cr("[%d] below the global finger "
  3027                                    "("PTR_FORMAT"), pushing it",
  3028                                    _task_id, global_finger);
  3029           push(obj);
  3031 #endif // _CHECK_BOTH_FINGERS_
  3037 void CMTask::push(oop obj) {
  3038   HeapWord* objAddr = (HeapWord*) obj;
  3039   tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
  3040   tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
  3041   tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
  3042   tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
  3044   if (_cm->verbose_high())
  3045     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3047   if (!_task_queue->push(obj)) {
  3048     // The local task queue looks full. We need to push some entries
  3049     // to the global stack.
  3051     if (_cm->verbose_medium())
  3052       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3053                              "moving entries to the global stack",
  3054                              _task_id);
  3055     move_entries_to_global_stack();
  3057     // this should succeed since, even if we overflow the global
  3058     // stack, we should have definitely removed some entries from the
  3059     // local queue. So, there must be space on it.
  3060     bool success = _task_queue->push(obj);
  3061     tmp_guarantee_CM( success, "invariant" );
  3064   statsOnly( int tmp_size = _task_queue->size();
  3065              if (tmp_size > _local_max_size)
  3066                _local_max_size = tmp_size;
  3067              ++_local_pushes );
  3070 void CMTask::reached_limit() {
  3071   tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
  3072                     _refs_reached >= _refs_reached_limit ,
  3073                  "shouldn't have been called otherwise" );
  3074   regular_clock_call();
  3077 void CMTask::regular_clock_call() {
  3078   if (has_aborted())
  3079     return;
  3081   // First, we need to recalculate the words scanned and refs reached
  3082   // limits for the next clock call.
  3083   recalculate_limits();
  3085   // During the regular clock call we do the following
  3087   // (1) If an overflow has been flagged, then we abort.
  3088   if (_cm->has_overflown()) {
  3089     set_has_aborted();
  3090     return;
  3093   // If we are not concurrent (i.e. we're doing remark) we don't need
  3094   // to check anything else. The other steps are only needed during
  3095   // the concurrent marking phase.
  3096   if (!concurrent())
  3097     return;
  3099   // (2) If marking has been aborted for Full GC, then we also abort.
  3100   if (_cm->has_aborted()) {
  3101     set_has_aborted();
  3102     statsOnly( ++_aborted_cm_aborted );
  3103     return;
  3106   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3108   // (3) If marking stats are enabled, then we update the step history.
  3109 #if _MARKING_STATS_
  3110   if (_words_scanned >= _words_scanned_limit)
  3111     ++_clock_due_to_scanning;
  3112   if (_refs_reached >= _refs_reached_limit)
  3113     ++_clock_due_to_marking;
  3115   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3116   _interval_start_time_ms = curr_time_ms;
  3117   _all_clock_intervals_ms.add(last_interval_ms);
  3119   if (_cm->verbose_medium()) {
  3120     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3121                            "scanned = %d%s, refs reached = %d%s",
  3122                            _task_id, last_interval_ms,
  3123                            _words_scanned,
  3124                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3125                            _refs_reached,
  3126                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3128 #endif // _MARKING_STATS_
  3130   // (4) We check whether we should yield. If we have to, then we abort.
  3131   if (_cm->should_yield()) {
  3132     // We should yield. To do this we abort the task. The caller is
  3133     // responsible for yielding.
  3134     set_has_aborted();
  3135     statsOnly( ++_aborted_yield );
  3136     return;
  3139   // (5) We check whether we've reached our time quota. If we have,
  3140   // then we abort.
  3141   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3142   if (elapsed_time_ms > _time_target_ms) {
  3143     set_has_aborted();
  3144     _has_aborted_timed_out = true;
  3145     statsOnly( ++_aborted_timed_out );
  3146     return;
  3149   // (6) Finally, we check whether there are enough completed STAB
  3150   // buffers available for processing. If there are, we abort.
  3151   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3152   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3153     if (_cm->verbose_low())
  3154       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3155                              _task_id);
  3156     // we do need to process SATB buffers, we'll abort and restart
  3157     // the marking task to do so
  3158     set_has_aborted();
  3159     statsOnly( ++_aborted_satb );
  3160     return;
  3164 void CMTask::recalculate_limits() {
  3165   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3166   _words_scanned_limit      = _real_words_scanned_limit;
  3168   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3169   _refs_reached_limit       = _real_refs_reached_limit;
  3172 void CMTask::decrease_limits() {
  3173   // This is called when we believe that we're going to do an infrequent
  3174   // operation which will increase the per byte scanned cost (i.e. move
  3175   // entries to/from the global stack). It basically tries to decrease the
  3176   // scanning limit so that the clock is called earlier.
  3178   if (_cm->verbose_medium())
  3179     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3181   _words_scanned_limit = _real_words_scanned_limit -
  3182     3 * words_scanned_period / 4;
  3183   _refs_reached_limit  = _real_refs_reached_limit -
  3184     3 * refs_reached_period / 4;
  3187 void CMTask::move_entries_to_global_stack() {
  3188   // local array where we'll store the entries that will be popped
  3189   // from the local queue
  3190   oop buffer[global_stack_transfer_size];
  3192   int n = 0;
  3193   oop obj;
  3194   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3195     buffer[n] = obj;
  3196     ++n;
  3199   if (n > 0) {
  3200     // we popped at least one entry from the local queue
  3202     statsOnly( ++_global_transfers_to; _local_pops += n );
  3204     if (!_cm->mark_stack_push(buffer, n)) {
  3205       if (_cm->verbose_low())
  3206         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3207       set_has_aborted();
  3208     } else {
  3209       // the transfer was successful
  3211       if (_cm->verbose_medium())
  3212         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3213                                _task_id, n);
  3214       statsOnly( int tmp_size = _cm->mark_stack_size();
  3215                  if (tmp_size > _global_max_size)
  3216                    _global_max_size = tmp_size;
  3217                  _global_pushes += n );
  3221   // this operation was quite expensive, so decrease the limits
  3222   decrease_limits();
  3225 void CMTask::get_entries_from_global_stack() {
  3226   // local array where we'll store the entries that will be popped
  3227   // from the global stack.
  3228   oop buffer[global_stack_transfer_size];
  3229   int n;
  3230   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3231   tmp_guarantee_CM( n <= global_stack_transfer_size,
  3232                     "we should not pop more than the given limit" );
  3233   if (n > 0) {
  3234     // yes, we did actually pop at least one entry
  3236     statsOnly( ++_global_transfers_from; _global_pops += n );
  3237     if (_cm->verbose_medium())
  3238       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3239                              _task_id, n);
  3240     for (int i = 0; i < n; ++i) {
  3241       bool success = _task_queue->push(buffer[i]);
  3242       // We only call this when the local queue is empty or under a
  3243       // given target limit. So, we do not expect this push to fail.
  3244       tmp_guarantee_CM( success, "invariant" );
  3247     statsOnly( int tmp_size = _task_queue->size();
  3248                if (tmp_size > _local_max_size)
  3249                  _local_max_size = tmp_size;
  3250                _local_pushes += n );
  3253   // this operation was quite expensive, so decrease the limits
  3254   decrease_limits();
  3257 void CMTask::drain_local_queue(bool partially) {
  3258   if (has_aborted())
  3259     return;
  3261   // Decide what the target size is, depending whether we're going to
  3262   // drain it partially (so that other tasks can steal if they run out
  3263   // of things to do) or totally (at the very end).
  3264   size_t target_size;
  3265   if (partially)
  3266     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3267   else
  3268     target_size = 0;
  3270   if (_task_queue->size() > target_size) {
  3271     if (_cm->verbose_high())
  3272       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3273                              _task_id, target_size);
  3275     oop obj;
  3276     bool ret = _task_queue->pop_local(obj);
  3277     while (ret) {
  3278       statsOnly( ++_local_pops );
  3280       if (_cm->verbose_high())
  3281         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3282                                (void*) obj);
  3284       tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
  3285                         "invariant" );
  3286       tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
  3287                         "invariant" );
  3289       scan_object(obj);
  3291       if (_task_queue->size() <= target_size || has_aborted())
  3292         ret = false;
  3293       else
  3294         ret = _task_queue->pop_local(obj);
  3297     if (_cm->verbose_high())
  3298       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3299                              _task_id, _task_queue->size());
  3303 void CMTask::drain_global_stack(bool partially) {
  3304   if (has_aborted())
  3305     return;
  3307   // We have a policy to drain the local queue before we attempt to
  3308   // drain the global stack.
  3309   tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
  3311   // Decide what the target size is, depending whether we're going to
  3312   // drain it partially (so that other tasks can steal if they run out
  3313   // of things to do) or totally (at the very end).  Notice that,
  3314   // because we move entries from the global stack in chunks or
  3315   // because another task might be doing the same, we might in fact
  3316   // drop below the target. But, this is not a problem.
  3317   size_t target_size;
  3318   if (partially)
  3319     target_size = _cm->partial_mark_stack_size_target();
  3320   else
  3321     target_size = 0;
  3323   if (_cm->mark_stack_size() > target_size) {
  3324     if (_cm->verbose_low())
  3325       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3326                              _task_id, target_size);
  3328     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3329       get_entries_from_global_stack();
  3330       drain_local_queue(partially);
  3333     if (_cm->verbose_low())
  3334       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3335                              _task_id, _cm->mark_stack_size());
  3339 // SATB Queue has several assumptions on whether to call the par or
  3340 // non-par versions of the methods. this is why some of the code is
  3341 // replicated. We should really get rid of the single-threaded version
  3342 // of the code to simplify things.
  3343 void CMTask::drain_satb_buffers() {
  3344   if (has_aborted())
  3345     return;
  3347   // We set this so that the regular clock knows that we're in the
  3348   // middle of draining buffers and doesn't set the abort flag when it
  3349   // notices that SATB buffers are available for draining. It'd be
  3350   // very counter productive if it did that. :-)
  3351   _draining_satb_buffers = true;
  3353   CMObjectClosure oc(this);
  3354   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3355   if (ParallelGCThreads > 0)
  3356     satb_mq_set.set_par_closure(_task_id, &oc);
  3357   else
  3358     satb_mq_set.set_closure(&oc);
  3360   // This keeps claiming and applying the closure to completed buffers
  3361   // until we run out of buffers or we need to abort.
  3362   if (ParallelGCThreads > 0) {
  3363     while (!has_aborted() &&
  3364            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3365       if (_cm->verbose_medium())
  3366         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3367       statsOnly( ++_satb_buffers_processed );
  3368       regular_clock_call();
  3370   } else {
  3371     while (!has_aborted() &&
  3372            satb_mq_set.apply_closure_to_completed_buffer()) {
  3373       if (_cm->verbose_medium())
  3374         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3375       statsOnly( ++_satb_buffers_processed );
  3376       regular_clock_call();
  3380   if (!concurrent() && !has_aborted()) {
  3381     // We should only do this during remark.
  3382     if (ParallelGCThreads > 0)
  3383       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3384     else
  3385       satb_mq_set.iterate_closure_all_threads();
  3388   _draining_satb_buffers = false;
  3390   tmp_guarantee_CM( has_aborted() ||
  3391                     concurrent() ||
  3392                     satb_mq_set.completed_buffers_num() == 0, "invariant" );
  3394   if (ParallelGCThreads > 0)
  3395     satb_mq_set.set_par_closure(_task_id, NULL);
  3396   else
  3397     satb_mq_set.set_closure(NULL);
  3399   // again, this was a potentially expensive operation, decrease the
  3400   // limits to get the regular clock call early
  3401   decrease_limits();
  3404 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3405   if (has_aborted())
  3406     return;
  3408   tmp_guarantee_CM( _region_finger == NULL,
  3409                     "it should be NULL when we're not scanning a region" );
  3411   if (!_cm->region_stack_empty()) {
  3412     if (_cm->verbose_low())
  3413       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3414                              _task_id, _cm->region_stack_size());
  3416     MemRegion mr = _cm->region_stack_pop();
  3417     // it returns MemRegion() if the pop fails
  3418     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3420     while (mr.start() != NULL) {
  3421       if (_cm->verbose_medium())
  3422         gclog_or_tty->print_cr("[%d] we are scanning region "
  3423                                "["PTR_FORMAT", "PTR_FORMAT")",
  3424                                _task_id, mr.start(), mr.end());
  3425       tmp_guarantee_CM( mr.end() <= _cm->finger(),
  3426                         "otherwise the region shouldn't be on the stack" );
  3427       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3428       if (_nextMarkBitMap->iterate(bc, mr)) {
  3429         tmp_guarantee_CM( !has_aborted(),
  3430                "cannot abort the task without aborting the bitmap iteration" );
  3432         // We finished iterating over the region without aborting.
  3433         regular_clock_call();
  3434         if (has_aborted())
  3435           mr = MemRegion();
  3436         else {
  3437           mr = _cm->region_stack_pop();
  3438           // it returns MemRegion() if the pop fails
  3439           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3441       } else {
  3442         guarantee( has_aborted(), "currently the only way to do so" );
  3444         // The only way to abort the bitmap iteration is to return
  3445         // false from the do_bit() method. However, inside the
  3446         // do_bit() method we move the _region_finger to point to the
  3447         // object currently being looked at. So, if we bail out, we
  3448         // have definitely set _region_finger to something non-null.
  3449         guarantee( _region_finger != NULL, "invariant" );
  3451         // The iteration was actually aborted. So now _region_finger
  3452         // points to the address of the object we last scanned. If we
  3453         // leave it there, when we restart this task, we will rescan
  3454         // the object. It is easy to avoid this. We move the finger by
  3455         // enough to point to the next possible object header (the
  3456         // bitmap knows by how much we need to move it as it knows its
  3457         // granularity).
  3458         MemRegion newRegion =
  3459           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3461         if (!newRegion.is_empty()) {
  3462           if (_cm->verbose_low()) {
  3463             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3464                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3465                                    _task_id,
  3466                                    newRegion.start(), newRegion.end());
  3468           // Now push the part of the region we didn't scan on the
  3469           // region stack to make sure a task scans it later.
  3470           _cm->region_stack_push(newRegion);
  3472         // break from while
  3473         mr = MemRegion();
  3475       _region_finger = NULL;
  3478     // We only push regions on the region stack during evacuation
  3479     // pauses. So if we come out the above iteration because we region
  3480     // stack is empty, it will remain empty until the next yield
  3481     // point. So, the guarantee below is safe.
  3482     guarantee( has_aborted() || _cm->region_stack_empty(),
  3483                "only way to exit the loop" );
  3485     if (_cm->verbose_low())
  3486       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3487                              _task_id, _cm->region_stack_size());
  3491 void CMTask::print_stats() {
  3492   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3493                          _task_id, _calls);
  3494   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3495                          _elapsed_time_ms, _termination_time_ms);
  3496   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3497                          _step_times_ms.num(), _step_times_ms.avg(),
  3498                          _step_times_ms.sd());
  3499   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3500                          _step_times_ms.maximum(), _step_times_ms.sum());
  3502 #if _MARKING_STATS_
  3503   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3504                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3505                          _all_clock_intervals_ms.sd());
  3506   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3507                          _all_clock_intervals_ms.maximum(),
  3508                          _all_clock_intervals_ms.sum());
  3509   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3510                          _clock_due_to_scanning, _clock_due_to_marking);
  3511   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3512                          _objs_scanned, _objs_found_on_bitmap);
  3513   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3514                          _local_pushes, _local_pops, _local_max_size);
  3515   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3516                          _global_pushes, _global_pops, _global_max_size);
  3517   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3518                          _global_transfers_to,_global_transfers_from);
  3519   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3520                          _regions_claimed, _region_stack_pops);
  3521   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3522   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3523                          _steal_attempts, _steals);
  3524   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3525   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3526                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3527   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3528                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3529 #endif // _MARKING_STATS_
  3532 /*****************************************************************************
  3534     The do_marking_step(time_target_ms) method is the building block
  3535     of the parallel marking framework. It can be called in parallel
  3536     with other invocations of do_marking_step() on different tasks
  3537     (but only one per task, obviously) and concurrently with the
  3538     mutator threads, or during remark, hence it eliminates the need
  3539     for two versions of the code. When called during remark, it will
  3540     pick up from where the task left off during the concurrent marking
  3541     phase. Interestingly, tasks are also claimable during evacuation
  3542     pauses too, since do_marking_step() ensures that it aborts before
  3543     it needs to yield.
  3545     The data structures that is uses to do marking work are the
  3546     following:
  3548       (1) Marking Bitmap. If there are gray objects that appear only
  3549       on the bitmap (this happens either when dealing with an overflow
  3550       or when the initial marking phase has simply marked the roots
  3551       and didn't push them on the stack), then tasks claim heap
  3552       regions whose bitmap they then scan to find gray objects. A
  3553       global finger indicates where the end of the last claimed region
  3554       is. A local finger indicates how far into the region a task has
  3555       scanned. The two fingers are used to determine how to gray an
  3556       object (i.e. whether simply marking it is OK, as it will be
  3557       visited by a task in the future, or whether it needs to be also
  3558       pushed on a stack).
  3560       (2) Local Queue. The local queue of the task which is accessed
  3561       reasonably efficiently by the task. Other tasks can steal from
  3562       it when they run out of work. Throughout the marking phase, a
  3563       task attempts to keep its local queue short but not totally
  3564       empty, so that entries are available for stealing by other
  3565       tasks. Only when there is no more work, a task will totally
  3566       drain its local queue.
  3568       (3) Global Mark Stack. This handles local queue overflow. During
  3569       marking only sets of entries are moved between it and the local
  3570       queues, as access to it requires a mutex and more fine-grain
  3571       interaction with it which might cause contention. If it
  3572       overflows, then the marking phase should restart and iterate
  3573       over the bitmap to identify gray objects. Throughout the marking
  3574       phase, tasks attempt to keep the global mark stack at a small
  3575       length but not totally empty, so that entries are available for
  3576       popping by other tasks. Only when there is no more work, tasks
  3577       will totally drain the global mark stack.
  3579       (4) Global Region Stack. Entries on it correspond to areas of
  3580       the bitmap that need to be scanned since they contain gray
  3581       objects. Pushes on the region stack only happen during
  3582       evacuation pauses and typically correspond to areas covered by
  3583       GC LABS. If it overflows, then the marking phase should restart
  3584       and iterate over the bitmap to identify gray objects. Tasks will
  3585       try to totally drain the region stack as soon as possible.
  3587       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3588       made available. Buffers are regularly removed from this queue
  3589       and scanned for roots, so that the queue doesn't get too
  3590       long. During remark, all completed buffers are processed, as
  3591       well as the filled in parts of any uncompleted buffers.
  3593     The do_marking_step() method tries to abort when the time target
  3594     has been reached. There are a few other cases when the
  3595     do_marking_step() method also aborts:
  3597       (1) When the marking phase has been aborted (after a Full GC).
  3599       (2) When a global overflow (either on the global stack or the
  3600       region stack) has been triggered. Before the task aborts, it
  3601       will actually sync up with the other tasks to ensure that all
  3602       the marking data structures (local queues, stacks, fingers etc.)
  3603       are re-initialised so that when do_marking_step() completes,
  3604       the marking phase can immediately restart.
  3606       (3) When enough completed SATB buffers are available. The
  3607       do_marking_step() method only tries to drain SATB buffers right
  3608       at the beginning. So, if enough buffers are available, the
  3609       marking step aborts and the SATB buffers are processed at
  3610       the beginning of the next invocation.
  3612       (4) To yield. when we have to yield then we abort and yield
  3613       right at the end of do_marking_step(). This saves us from a lot
  3614       of hassle as, by yielding we might allow a Full GC. If this
  3615       happens then objects will be compacted underneath our feet, the
  3616       heap might shrink, etc. We save checking for this by just
  3617       aborting and doing the yield right at the end.
  3619     From the above it follows that the do_marking_step() method should
  3620     be called in a loop (or, otherwise, regularly) until it completes.
  3622     If a marking step completes without its has_aborted() flag being
  3623     true, it means it has completed the current marking phase (and
  3624     also all other marking tasks have done so and have all synced up).
  3626     A method called regular_clock_call() is invoked "regularly" (in
  3627     sub ms intervals) throughout marking. It is this clock method that
  3628     checks all the abort conditions which were mentioned above and
  3629     decides when the task should abort. A work-based scheme is used to
  3630     trigger this clock method: when the number of object words the
  3631     marking phase has scanned or the number of references the marking
  3632     phase has visited reach a given limit. Additional invocations to
  3633     the method clock have been planted in a few other strategic places
  3634     too. The initial reason for the clock method was to avoid calling
  3635     vtime too regularly, as it is quite expensive. So, once it was in
  3636     place, it was natural to piggy-back all the other conditions on it
  3637     too and not constantly check them throughout the code.
  3639  *****************************************************************************/
  3641 void CMTask::do_marking_step(double time_target_ms) {
  3642   guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
  3643   guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
  3645   guarantee( concurrent() || _cm->region_stack_empty(),
  3646              "the region stack should have been cleared before remark" );
  3647   guarantee( _region_finger == NULL,
  3648              "this should be non-null only when a region is being scanned" );
  3650   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3651   guarantee( _task_queues != NULL, "invariant" );
  3652   guarantee( _task_queue != NULL,  "invariant" );
  3653   guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
  3655   guarantee( !_claimed,
  3656              "only one thread should claim this task at any one time" );
  3658   // OK, this doesn't safeguard again all possible scenarios, as it is
  3659   // possible for two threads to set the _claimed flag at the same
  3660   // time. But it is only for debugging purposes anyway and it will
  3661   // catch most problems.
  3662   _claimed = true;
  3664   _start_time_ms = os::elapsedVTime() * 1000.0;
  3665   statsOnly( _interval_start_time_ms = _start_time_ms );
  3667   double diff_prediction_ms =
  3668     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3669   _time_target_ms = time_target_ms - diff_prediction_ms;
  3671   // set up the variables that are used in the work-based scheme to
  3672   // call the regular clock method
  3673   _words_scanned = 0;
  3674   _refs_reached  = 0;
  3675   recalculate_limits();
  3677   // clear all flags
  3678   clear_has_aborted();
  3679   _has_aborted_timed_out = false;
  3680   _draining_satb_buffers = false;
  3682   ++_calls;
  3684   if (_cm->verbose_low())
  3685     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3686                            "target = %1.2lfms >>>>>>>>>>",
  3687                            _task_id, _calls, _time_target_ms);
  3689   // Set up the bitmap and oop closures. Anything that uses them is
  3690   // eventually called from this method, so it is OK to allocate these
  3691   // statically.
  3692   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3693   CMOopClosure    oop_closure(_g1h, _cm, this);
  3694   set_oop_closure(&oop_closure);
  3696   if (_cm->has_overflown()) {
  3697     // This can happen if the region stack or the mark stack overflows
  3698     // during a GC pause and this task, after a yield point,
  3699     // restarts. We have to abort as we need to get into the overflow
  3700     // protocol which happens right at the end of this task.
  3701     set_has_aborted();
  3704   // First drain any available SATB buffers. After this, we will not
  3705   // look at SATB buffers before the next invocation of this method.
  3706   // If enough completed SATB buffers are queued up, the regular clock
  3707   // will abort this task so that it restarts.
  3708   drain_satb_buffers();
  3709   // ...then partially drain the local queue and the global stack
  3710   drain_local_queue(true);
  3711   drain_global_stack(true);
  3713   // Then totally drain the region stack.  We will not look at
  3714   // it again before the next invocation of this method. Entries on
  3715   // the region stack are only added during evacuation pauses, for
  3716   // which we have to yield. When we do, we abort the task anyway so
  3717   // it will look at the region stack again when it restarts.
  3718   bitmap_closure.set_scanning_heap_region(false);
  3719   drain_region_stack(&bitmap_closure);
  3720   // ...then partially drain the local queue and the global stack
  3721   drain_local_queue(true);
  3722   drain_global_stack(true);
  3724   do {
  3725     if (!has_aborted() && _curr_region != NULL) {
  3726       // This means that we're already holding on to a region.
  3727       tmp_guarantee_CM( _finger != NULL,
  3728                         "if region is not NULL, then the finger "
  3729                         "should not be NULL either" );
  3731       // We might have restarted this task after an evacuation pause
  3732       // which might have evacuated the region we're holding on to
  3733       // underneath our feet. Let's read its limit again to make sure
  3734       // that we do not iterate over a region of the heap that
  3735       // contains garbage (update_region_limit() will also move
  3736       // _finger to the start of the region if it is found empty).
  3737       update_region_limit();
  3738       // We will start from _finger not from the start of the region,
  3739       // as we might be restarting this task after aborting half-way
  3740       // through scanning this region. In this case, _finger points to
  3741       // the address where we last found a marked object. If this is a
  3742       // fresh region, _finger points to start().
  3743       MemRegion mr = MemRegion(_finger, _region_limit);
  3745       if (_cm->verbose_low())
  3746         gclog_or_tty->print_cr("[%d] we're scanning part "
  3747                                "["PTR_FORMAT", "PTR_FORMAT") "
  3748                                "of region "PTR_FORMAT,
  3749                                _task_id, _finger, _region_limit, _curr_region);
  3751       // Let's iterate over the bitmap of the part of the
  3752       // region that is left.
  3753       bitmap_closure.set_scanning_heap_region(true);
  3754       if (mr.is_empty() ||
  3755           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3756         // We successfully completed iterating over the region. Now,
  3757         // let's give up the region.
  3758         giveup_current_region();
  3759         regular_clock_call();
  3760       } else {
  3761         guarantee( has_aborted(), "currently the only way to do so" );
  3762         // The only way to abort the bitmap iteration is to return
  3763         // false from the do_bit() method. However, inside the
  3764         // do_bit() method we move the _finger to point to the
  3765         // object currently being looked at. So, if we bail out, we
  3766         // have definitely set _finger to something non-null.
  3767         guarantee( _finger != NULL, "invariant" );
  3769         // Region iteration was actually aborted. So now _finger
  3770         // points to the address of the object we last scanned. If we
  3771         // leave it there, when we restart this task, we will rescan
  3772         // the object. It is easy to avoid this. We move the finger by
  3773         // enough to point to the next possible object header (the
  3774         // bitmap knows by how much we need to move it as it knows its
  3775         // granularity).
  3776         move_finger_to(_nextMarkBitMap->nextWord(_finger));
  3779     // At this point we have either completed iterating over the
  3780     // region we were holding on to, or we have aborted.
  3782     // We then partially drain the local queue and the global stack.
  3783     // (Do we really need this?)
  3784     drain_local_queue(true);
  3785     drain_global_stack(true);
  3787     // Read the note on the claim_region() method on why it might
  3788     // return NULL with potentially more regions available for
  3789     // claiming and why we have to check out_of_regions() to determine
  3790     // whether we're done or not.
  3791     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3792       // We are going to try to claim a new region. We should have
  3793       // given up on the previous one.
  3794       tmp_guarantee_CM( _curr_region  == NULL &&
  3795                         _finger       == NULL &&
  3796                         _region_limit == NULL, "invariant" );
  3797       if (_cm->verbose_low())
  3798         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3799       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3800       if (claimed_region != NULL) {
  3801         // Yes, we managed to claim one
  3802         statsOnly( ++_regions_claimed );
  3804         if (_cm->verbose_low())
  3805           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3806                                  "region "PTR_FORMAT,
  3807                                  _task_id, claimed_region);
  3809         setup_for_region(claimed_region);
  3810         tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
  3812       // It is important to call the regular clock here. It might take
  3813       // a while to claim a region if, for example, we hit a large
  3814       // block of empty regions. So we need to call the regular clock
  3815       // method once round the loop to make sure it's called
  3816       // frequently enough.
  3817       regular_clock_call();
  3820     if (!has_aborted() && _curr_region == NULL) {
  3821       tmp_guarantee_CM( _cm->out_of_regions(),
  3822                         "at this point we should be out of regions" );
  3824   } while ( _curr_region != NULL && !has_aborted());
  3826   if (!has_aborted()) {
  3827     // We cannot check whether the global stack is empty, since other
  3828     // tasks might be pushing objects to it concurrently. We also cannot
  3829     // check if the region stack is empty because if a thread is aborting
  3830     // it can push a partially done region back.
  3831     tmp_guarantee_CM( _cm->out_of_regions(),
  3832                       "at this point we should be out of regions" );
  3834     if (_cm->verbose_low())
  3835       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3837     // Try to reduce the number of available SATB buffers so that
  3838     // remark has less work to do.
  3839     drain_satb_buffers();
  3842   // Since we've done everything else, we can now totally drain the
  3843   // local queue and global stack.
  3844   drain_local_queue(false);
  3845   drain_global_stack(false);
  3847   // Attempt at work stealing from other task's queues.
  3848   if (!has_aborted()) {
  3849     // We have not aborted. This means that we have finished all that
  3850     // we could. Let's try to do some stealing...
  3852     // We cannot check whether the global stack is empty, since other
  3853     // tasks might be pushing objects to it concurrently. We also cannot
  3854     // check if the region stack is empty because if a thread is aborting
  3855     // it can push a partially done region back.
  3856     guarantee( _cm->out_of_regions() &&
  3857                _task_queue->size() == 0, "only way to reach here" );
  3859     if (_cm->verbose_low())
  3860       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3862     while (!has_aborted()) {
  3863       oop obj;
  3864       statsOnly( ++_steal_attempts );
  3866       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3867         if (_cm->verbose_medium())
  3868           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3869                                  _task_id, (void*) obj);
  3871         statsOnly( ++_steals );
  3873         tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
  3874                           "any stolen object should be marked" );
  3875         scan_object(obj);
  3877         // And since we're towards the end, let's totally drain the
  3878         // local queue and global stack.
  3879         drain_local_queue(false);
  3880         drain_global_stack(false);
  3881       } else {
  3882         break;
  3887   // We still haven't aborted. Now, let's try to get into the
  3888   // termination protocol.
  3889   if (!has_aborted()) {
  3890     // We cannot check whether the global stack is empty, since other
  3891     // tasks might be concurrently pushing objects on it. We also cannot
  3892     // check if the region stack is empty because if a thread is aborting
  3893     // it can push a partially done region back.
  3894     guarantee( _cm->out_of_regions() &&
  3895                _task_queue->size() == 0, "only way to reach here" );
  3897     if (_cm->verbose_low())
  3898       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3900     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3901     // The CMTask class also extends the TerminatorTerminator class,
  3902     // hence its should_exit_termination() method will also decide
  3903     // whether to exit the termination protocol or not.
  3904     bool finished = _cm->terminator()->offer_termination(this);
  3905     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3906     _termination_time_ms +=
  3907       termination_end_time_ms - _termination_start_time_ms;
  3909     if (finished) {
  3910       // We're all done.
  3912       if (_task_id == 0) {
  3913         // let's allow task 0 to do this
  3914         if (concurrent()) {
  3915           guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
  3916           // we need to set this to false before the next
  3917           // safepoint. This way we ensure that the marking phase
  3918           // doesn't observe any more heap expansions.
  3919           _cm->clear_concurrent_marking_in_progress();
  3923       // We can now guarantee that the global stack is empty, since
  3924       // all other tasks have finished.
  3925       guarantee( _cm->out_of_regions() &&
  3926                  _cm->region_stack_empty() &&
  3927                  _cm->mark_stack_empty() &&
  3928                  _task_queue->size() == 0 &&
  3929                  !_cm->has_overflown() &&
  3930                  !_cm->mark_stack_overflow() &&
  3931                  !_cm->region_stack_overflow(),
  3932                  "only way to reach here" );
  3934       if (_cm->verbose_low())
  3935         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3936     } else {
  3937       // Apparently there's more work to do. Let's abort this task. It
  3938       // will restart it and we can hopefully find more things to do.
  3940       if (_cm->verbose_low())
  3941         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3943       set_has_aborted();
  3944       statsOnly( ++_aborted_termination );
  3948   // Mainly for debugging purposes to make sure that a pointer to the
  3949   // closure which was statically allocated in this frame doesn't
  3950   // escape it by accident.
  3951   set_oop_closure(NULL);
  3952   double end_time_ms = os::elapsedVTime() * 1000.0;
  3953   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3954   // Update the step history.
  3955   _step_times_ms.add(elapsed_time_ms);
  3957   if (has_aborted()) {
  3958     // The task was aborted for some reason.
  3960     statsOnly( ++_aborted );
  3962     if (_has_aborted_timed_out) {
  3963       double diff_ms = elapsed_time_ms - _time_target_ms;
  3964       // Keep statistics of how well we did with respect to hitting
  3965       // our target only if we actually timed out (if we aborted for
  3966       // other reasons, then the results might get skewed).
  3967       _marking_step_diffs_ms.add(diff_ms);
  3970     if (_cm->has_overflown()) {
  3971       // This is the interesting one. We aborted because a global
  3972       // overflow was raised. This means we have to restart the
  3973       // marking phase and start iterating over regions. However, in
  3974       // order to do this we have to make sure that all tasks stop
  3975       // what they are doing and re-initialise in a safe manner. We
  3976       // will achieve this with the use of two barrier sync points.
  3978       if (_cm->verbose_low())
  3979         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  3981       _cm->enter_first_sync_barrier(_task_id);
  3982       // When we exit this sync barrier we know that all tasks have
  3983       // stopped doing marking work. So, it's now safe to
  3984       // re-initialise our data structures. At the end of this method,
  3985       // task 0 will clear the global data structures.
  3987       statsOnly( ++_aborted_overflow );
  3989       // We clear the local state of this task...
  3990       clear_region_fields();
  3992       // ...and enter the second barrier.
  3993       _cm->enter_second_sync_barrier(_task_id);
  3994       // At this point everything has bee re-initialised and we're
  3995       // ready to restart.
  3998     if (_cm->verbose_low()) {
  3999       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  4000                              "elapsed = %1.2lfms <<<<<<<<<<",
  4001                              _task_id, _time_target_ms, elapsed_time_ms);
  4002       if (_cm->has_aborted())
  4003         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  4004                                _task_id);
  4006   } else {
  4007     if (_cm->verbose_low())
  4008       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4009                              "elapsed = %1.2lfms <<<<<<<<<<",
  4010                              _task_id, _time_target_ms, elapsed_time_ms);
  4013   _claimed = false;
  4016 CMTask::CMTask(int task_id,
  4017                ConcurrentMark* cm,
  4018                CMTaskQueue* task_queue,
  4019                CMTaskQueueSet* task_queues)
  4020   : _g1h(G1CollectedHeap::heap()),
  4021     _co_tracker(G1CMGroup),
  4022     _task_id(task_id), _cm(cm),
  4023     _claimed(false),
  4024     _nextMarkBitMap(NULL), _hash_seed(17),
  4025     _task_queue(task_queue),
  4026     _task_queues(task_queues),
  4027     _oop_closure(NULL) {
  4028   guarantee( task_queue != NULL, "invariant" );
  4029   guarantee( task_queues != NULL, "invariant" );
  4031   statsOnly( _clock_due_to_scanning = 0;
  4032              _clock_due_to_marking  = 0 );
  4034   _marking_step_diffs_ms.add(0.5);

mercurial