src/share/vm/gc_implementation/g1/concurrentMark.cpp

Mon, 31 Aug 2009 05:27:29 -0700

author
apetrusenko
date
Mon, 31 Aug 2009 05:27:29 -0700
changeset 1375
8624da129f0b
parent 1371
e1fdf4fd34dc
child 1428
54b3b351d6f9
permissions
-rw-r--r--

6841313: G1: dirty cards of survivor regions in parallel
Reviewed-by: tonyp, iveresov

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     guarantee(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 MemRegion CMRegionStack::pop() {
   301   while (true) {
   302     // Otherwise...
   303     jint index = _index;
   305     if (index == 0) {
   306       return MemRegion();
   307     }
   308     jint next_index = index-1;
   309     jint res = Atomic::cmpxchg(next_index, &_index, index);
   310     if (res == index) {
   311       MemRegion mr = _base[next_index];
   312       if (mr.start() != NULL) {
   313         tmp_guarantee_CM( mr.end() != NULL, "invariant" );
   314         tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
   315         return mr;
   316       } else {
   317         // that entry was invalidated... let's skip it
   318         tmp_guarantee_CM( mr.end() == NULL, "invariant" );
   319       }
   320     }
   321     // Otherwise, we need to try again.
   322   }
   323 }
   325 bool CMRegionStack::invalidate_entries_into_cset() {
   326   bool result = false;
   327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   328   for (int i = 0; i < _oops_do_bound; ++i) {
   329     MemRegion mr = _base[i];
   330     if (mr.start() != NULL) {
   331       tmp_guarantee_CM( mr.end() != NULL, "invariant");
   332       tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
   333       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   334       tmp_guarantee_CM( hr != NULL, "invariant" );
   335       if (hr->in_collection_set()) {
   336         // The region points into the collection set
   337         _base[i] = MemRegion();
   338         result = true;
   339       }
   340     } else {
   341       // that entry was invalidated... let's skip it
   342       tmp_guarantee_CM( mr.end() == NULL, "invariant" );
   343     }
   344   }
   345   return result;
   346 }
   348 template<class OopClosureClass>
   349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   350   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   351          || SafepointSynchronize::is_at_safepoint(),
   352          "Drain recursion must be yield-safe.");
   353   bool res = true;
   354   debug_only(_drain_in_progress = true);
   355   debug_only(_drain_in_progress_yields = yield_after);
   356   while (!isEmpty()) {
   357     oop newOop = pop();
   358     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   359     assert(newOop->is_oop(), "Expected an oop");
   360     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   361            "only grey objects on this stack");
   362     // iterate over the oops in this oop, marking and pushing
   363     // the ones in CMS generation.
   364     newOop->oop_iterate(cl);
   365     if (yield_after && _cm->do_yield_check()) {
   366       res = false; break;
   367     }
   368   }
   369   debug_only(_drain_in_progress = false);
   370   return res;
   371 }
   373 void CMMarkStack::oops_do(OopClosure* f) {
   374   if (_index == 0) return;
   375   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   376          "Bound must be set.");
   377   for (int i = 0; i < _oops_do_bound; i++) {
   378     f->do_oop(&_base[i]);
   379   }
   380   _oops_do_bound = -1;
   381 }
   383 bool ConcurrentMark::not_yet_marked(oop obj) const {
   384   return (_g1h->is_obj_ill(obj)
   385           || (_g1h->is_in_permanent(obj)
   386               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   387 }
   389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   391 #endif // _MSC_VER
   393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   394                                int max_regions) :
   395   _markBitMap1(rs, MinObjAlignment - 1),
   396   _markBitMap2(rs, MinObjAlignment - 1),
   398   _parallel_marking_threads(0),
   399   _sleep_factor(0.0),
   400   _marking_task_overhead(1.0),
   401   _cleanup_sleep_factor(0.0),
   402   _cleanup_task_overhead(1.0),
   403   _region_bm(max_regions, false /* in_resource_area*/),
   404   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   405            CardTableModRefBS::card_shift,
   406            false /* in_resource_area*/),
   407   _prevMarkBitMap(&_markBitMap1),
   408   _nextMarkBitMap(&_markBitMap2),
   409   _at_least_one_mark_complete(false),
   411   _markStack(this),
   412   _regionStack(),
   413   // _finger set in set_non_marking_state
   415   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   416   // _active_tasks set in set_non_marking_state
   417   // _tasks set inside the constructor
   418   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   419   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   421   _has_overflown(false),
   422   _concurrent(false),
   423   _has_aborted(false),
   424   _restart_for_overflow(false),
   425   _concurrent_marking_in_progress(false),
   426   _should_gray_objects(false),
   428   // _verbose_level set below
   430   _init_times(),
   431   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   432   _cleanup_times(),
   433   _total_counting_time(0.0),
   434   _total_rs_scrub_time(0.0),
   436   _parallel_workers(NULL)
   437 {
   438   CMVerboseLevel verbose_level =
   439     (CMVerboseLevel) G1MarkingVerboseLevel;
   440   if (verbose_level < no_verbose)
   441     verbose_level = no_verbose;
   442   if (verbose_level > high_verbose)
   443     verbose_level = high_verbose;
   444   _verbose_level = verbose_level;
   446   if (verbose_low())
   447     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   448                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   450   _markStack.allocate(G1MarkStackSize);
   451   _regionStack.allocate(G1MarkRegionStackSize);
   453   // Create & start a ConcurrentMark thread.
   454   _cmThread = new ConcurrentMarkThread(this);
   455   assert(cmThread() != NULL, "CM Thread should have been created");
   456   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   458   _g1h = G1CollectedHeap::heap();
   459   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   460   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   461   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   463   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   464   satb_qs.set_buffer_size(G1SATBLogBufferSize);
   466   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   467   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   468   for (int i = 0 ; i < size; i++) {
   469     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   470   }
   472   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   473   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   475   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   476   _active_tasks = _max_task_num;
   477   for (int i = 0; i < (int) _max_task_num; ++i) {
   478     CMTaskQueue* task_queue = new CMTaskQueue();
   479     task_queue->initialize();
   480     _task_queues->register_queue(i, task_queue);
   482     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   483     _accum_task_vtime[i] = 0.0;
   484   }
   486   if (ParallelMarkingThreads > ParallelGCThreads) {
   487     vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
   488                                   "than ParallelGCThreads.");
   489   }
   490   if (ParallelGCThreads == 0) {
   491     // if we are not running with any parallel GC threads we will not
   492     // spawn any marking threads either
   493     _parallel_marking_threads =   0;
   494     _sleep_factor             = 0.0;
   495     _marking_task_overhead    = 1.0;
   496   } else {
   497     if (ParallelMarkingThreads > 0) {
   498       // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
   499       // if both are set
   501       _parallel_marking_threads = ParallelMarkingThreads;
   502       _sleep_factor             = 0.0;
   503       _marking_task_overhead    = 1.0;
   504     } else if (G1MarkingOverheadPercent > 0) {
   505       // we will calculate the number of parallel marking threads
   506       // based on a target overhead with respect to the soft real-time
   507       // goal
   509       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   510       double overall_cm_overhead =
   511         (double) MaxGCPauseMillis * marking_overhead /
   512         (double) GCPauseIntervalMillis;
   513       double cpu_ratio = 1.0 / (double) os::processor_count();
   514       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   515       double marking_task_overhead =
   516         overall_cm_overhead / marking_thread_num *
   517                                                 (double) os::processor_count();
   518       double sleep_factor =
   519                          (1.0 - marking_task_overhead) / marking_task_overhead;
   521       _parallel_marking_threads = (size_t) marking_thread_num;
   522       _sleep_factor             = sleep_factor;
   523       _marking_task_overhead    = marking_task_overhead;
   524     } else {
   525       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   526       _sleep_factor             = 0.0;
   527       _marking_task_overhead    = 1.0;
   528     }
   530     if (parallel_marking_threads() > 1)
   531       _cleanup_task_overhead = 1.0;
   532     else
   533       _cleanup_task_overhead = marking_task_overhead();
   534     _cleanup_sleep_factor =
   535                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   537 #if 0
   538     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   539     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   540     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   541     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   542     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   543 #endif
   545     guarantee( parallel_marking_threads() > 0, "peace of mind" );
   546     _parallel_workers = new WorkGang("Parallel Marking Threads",
   547                                      (int) parallel_marking_threads(), false, true);
   548     if (_parallel_workers == NULL)
   549       vm_exit_during_initialization("Failed necessary allocation.");
   550   }
   552   // so that the call below can read a sensible value
   553   _heap_start = (HeapWord*) rs.base();
   554   set_non_marking_state();
   555 }
   557 void ConcurrentMark::update_g1_committed(bool force) {
   558   // If concurrent marking is not in progress, then we do not need to
   559   // update _heap_end. This has a subtle and important
   560   // side-effect. Imagine that two evacuation pauses happen between
   561   // marking completion and remark. The first one can grow the
   562   // heap (hence now the finger is below the heap end). Then, the
   563   // second one could unnecessarily push regions on the region
   564   // stack. This causes the invariant that the region stack is empty
   565   // at the beginning of remark to be false. By ensuring that we do
   566   // not observe heap expansions after marking is complete, then we do
   567   // not have this problem.
   568   if (!concurrent_marking_in_progress() && !force)
   569     return;
   571   MemRegion committed = _g1h->g1_committed();
   572   tmp_guarantee_CM( committed.start() == _heap_start,
   573                     "start shouldn't change" );
   574   HeapWord* new_end = committed.end();
   575   if (new_end > _heap_end) {
   576     // The heap has been expanded.
   578     _heap_end = new_end;
   579   }
   580   // Notice that the heap can also shrink. However, this only happens
   581   // during a Full GC (at least currently) and the entire marking
   582   // phase will bail out and the task will not be restarted. So, let's
   583   // do nothing.
   584 }
   586 void ConcurrentMark::reset() {
   587   // Starting values for these two. This should be called in a STW
   588   // phase. CM will be notified of any future g1_committed expansions
   589   // will be at the end of evacuation pauses, when tasks are
   590   // inactive.
   591   MemRegion committed = _g1h->g1_committed();
   592   _heap_start = committed.start();
   593   _heap_end   = committed.end();
   595   guarantee( _heap_start != NULL &&
   596              _heap_end != NULL   &&
   597              _heap_start < _heap_end, "heap bounds should look ok" );
   599   // reset all the marking data structures and any necessary flags
   600   clear_marking_state();
   602   if (verbose_low())
   603     gclog_or_tty->print_cr("[global] resetting");
   605   // We do reset all of them, since different phases will use
   606   // different number of active threads. So, it's easiest to have all
   607   // of them ready.
   608   for (int i = 0; i < (int) _max_task_num; ++i)
   609     _tasks[i]->reset(_nextMarkBitMap);
   611   // we need this to make sure that the flag is on during the evac
   612   // pause with initial mark piggy-backed
   613   set_concurrent_marking_in_progress();
   614 }
   616 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   617   guarantee( active_tasks <= _max_task_num, "we should not have more" );
   619   _active_tasks = active_tasks;
   620   // Need to update the three data structures below according to the
   621   // number of active threads for this phase.
   622   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   623   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   624   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   626   _concurrent = concurrent;
   627   // We propagate this to all tasks, not just the active ones.
   628   for (int i = 0; i < (int) _max_task_num; ++i)
   629     _tasks[i]->set_concurrent(concurrent);
   631   if (concurrent) {
   632     set_concurrent_marking_in_progress();
   633   } else {
   634     // We currently assume that the concurrent flag has been set to
   635     // false before we start remark. At this point we should also be
   636     // in a STW phase.
   637     guarantee( !concurrent_marking_in_progress(), "invariant" );
   638     guarantee( _finger == _heap_end, "only way to get here" );
   639     update_g1_committed(true);
   640   }
   641 }
   643 void ConcurrentMark::set_non_marking_state() {
   644   // We set the global marking state to some default values when we're
   645   // not doing marking.
   646   clear_marking_state();
   647   _active_tasks = 0;
   648   clear_concurrent_marking_in_progress();
   649 }
   651 ConcurrentMark::~ConcurrentMark() {
   652   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   653   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   654   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   655                     _par_cleanup_thread_state);
   657   for (int i = 0; i < (int) _max_task_num; ++i) {
   658     delete _task_queues->queue(i);
   659     delete _tasks[i];
   660   }
   661   delete _task_queues;
   662   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   663 }
   665 // This closure is used to mark refs into the g1 generation
   666 // from external roots in the CMS bit map.
   667 // Called at the first checkpoint.
   668 //
   670 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
   671 #if PRINT_REACHABLE_AT_INITIAL_MARK
   672 static FILE* reachable_file = NULL;
   674 class PrintReachableClosure: public OopsInGenClosure {
   675   CMBitMap* _bm;
   676   int _level;
   677 public:
   678   PrintReachableClosure(CMBitMap* bm) :
   679     _bm(bm), _level(0) {
   680     guarantee(reachable_file != NULL, "pre-condition");
   681   }
   682   void do_oop(oop* p) {
   683     oop obj = *p;
   684     HeapWord* obj_addr = (HeapWord*)obj;
   685     if (obj == NULL) return;
   686     fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
   687             _level, p, (void*) obj, _bm->isMarked(obj_addr));
   688     if (!_bm->isMarked(obj_addr)) {
   689       _bm->mark(obj_addr);
   690       _level++;
   691       obj->oop_iterate(this);
   692       _level--;
   693     }
   694   }
   695 };
   696 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
   698 #define SEND_HEAP_DUMP_TO_FILE 0
   699 #if SEND_HEAP_DUMP_TO_FILE
   700 static FILE* heap_dump_file = NULL;
   701 #endif // SEND_HEAP_DUMP_TO_FILE
   703 void ConcurrentMark::clearNextBitmap() {
   704    guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
   706    // clear the mark bitmap (no grey objects to start with).
   707    // We need to do this in chunks and offer to yield in between
   708    // each chunk.
   709    HeapWord* start  = _nextMarkBitMap->startWord();
   710    HeapWord* end    = _nextMarkBitMap->endWord();
   711    HeapWord* cur    = start;
   712    size_t chunkSize = M;
   713    while (cur < end) {
   714      HeapWord* next = cur + chunkSize;
   715      if (next > end)
   716        next = end;
   717      MemRegion mr(cur,next);
   718      _nextMarkBitMap->clearRange(mr);
   719      cur = next;
   720      do_yield_check();
   721    }
   722 }
   724 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   725 public:
   726   bool doHeapRegion(HeapRegion* r) {
   727     if (!r->continuesHumongous()) {
   728       r->note_start_of_marking(true);
   729     }
   730     return false;
   731   }
   732 };
   734 void ConcurrentMark::checkpointRootsInitialPre() {
   735   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   736   G1CollectorPolicy* g1p = g1h->g1_policy();
   738   _has_aborted = false;
   740   // Find all the reachable objects...
   741 #if PRINT_REACHABLE_AT_INITIAL_MARK
   742   guarantee(reachable_file == NULL, "Protocol");
   743   char fn_buf[100];
   744   sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
   745   reachable_file = fopen(fn_buf, "w");
   746   // clear the mark bitmap (no grey objects to start with)
   747   _nextMarkBitMap->clearAll();
   748   PrintReachableClosure prcl(_nextMarkBitMap);
   749   g1h->process_strong_roots(
   750                             false,   // fake perm gen collection
   751                             SharedHeap::SO_AllClasses,
   752                             &prcl, // Regular roots
   753                             &prcl    // Perm Gen Roots
   754                             );
   755   // The root iteration above "consumed" dirty cards in the perm gen.
   756   // Therefore, as a shortcut, we dirty all such cards.
   757   g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
   758   fclose(reachable_file);
   759   reachable_file = NULL;
   760   // clear the mark bitmap again.
   761   _nextMarkBitMap->clearAll();
   762   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
   763   COMPILER2_PRESENT(DerivedPointerTable::clear());
   764 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
   766   // Initialise marking structures. This has to be done in a STW phase.
   767   reset();
   768 }
   770 class CMMarkRootsClosure: public OopsInGenClosure {
   771 private:
   772   ConcurrentMark*  _cm;
   773   G1CollectedHeap* _g1h;
   774   bool             _do_barrier;
   776 public:
   777   CMMarkRootsClosure(ConcurrentMark* cm,
   778                      G1CollectedHeap* g1h,
   779                      bool do_barrier) : _cm(cm), _g1h(g1h),
   780                                         _do_barrier(do_barrier) { }
   782   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   783   virtual void do_oop(      oop* p) { do_oop_work(p); }
   785   template <class T> void do_oop_work(T* p) {
   786     T heap_oop = oopDesc::load_heap_oop(p);
   787     if (!oopDesc::is_null(heap_oop)) {
   788       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   789       assert(obj->is_oop() || obj->mark() == NULL,
   790              "expected an oop, possibly with mark word displaced");
   791       HeapWord* addr = (HeapWord*)obj;
   792       if (_g1h->is_in_g1_reserved(addr)) {
   793         _cm->grayRoot(obj);
   794       }
   795     }
   796     if (_do_barrier) {
   797       assert(!_g1h->is_in_g1_reserved(p),
   798              "Should be called on external roots");
   799       do_barrier(p);
   800     }
   801   }
   802 };
   804 void ConcurrentMark::checkpointRootsInitialPost() {
   805   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   807   // For each region note start of marking.
   808   NoteStartOfMarkHRClosure startcl;
   809   g1h->heap_region_iterate(&startcl);
   811   // Start weak-reference discovery.
   812   ReferenceProcessor* rp = g1h->ref_processor();
   813   rp->verify_no_references_recorded();
   814   rp->enable_discovery(); // enable ("weak") refs discovery
   815   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   817   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   818   satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
   819   satb_mq_set.set_active_all_threads(true);
   821   // update_g1_committed() will be called at the end of an evac pause
   822   // when marking is on. So, it's also called at the end of the
   823   // initial-mark pause to update the heap end, if the heap expands
   824   // during it. No need to call it here.
   825 }
   827 // Checkpoint the roots into this generation from outside
   828 // this generation. [Note this initial checkpoint need only
   829 // be approximate -- we'll do a catch up phase subsequently.]
   830 void ConcurrentMark::checkpointRootsInitial() {
   831   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   832   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   834   double start = os::elapsedTime();
   836   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   837   g1p->record_concurrent_mark_init_start();
   838   checkpointRootsInitialPre();
   840   // YSR: when concurrent precleaning is in place, we'll
   841   // need to clear the cached card table here
   843   ResourceMark rm;
   844   HandleMark  hm;
   846   g1h->ensure_parsability(false);
   847   g1h->perm_gen()->save_marks();
   849   CMMarkRootsClosure notOlder(this, g1h, false);
   850   CMMarkRootsClosure older(this, g1h, true);
   852   g1h->set_marking_started();
   853   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   855   g1h->process_strong_roots(false,   // fake perm gen collection
   856                             SharedHeap::SO_AllClasses,
   857                             &notOlder, // Regular roots
   858                             &older    // Perm Gen Roots
   859                             );
   860   checkpointRootsInitialPost();
   862   // Statistics.
   863   double end = os::elapsedTime();
   864   _init_times.add((end - start) * 1000.0);
   866   g1p->record_concurrent_mark_init_end();
   867 }
   869 /*
   870    Notice that in the next two methods, we actually leave the STS
   871    during the barrier sync and join it immediately afterwards. If we
   872    do not do this, this then the following deadlock can occur: one
   873    thread could be in the barrier sync code, waiting for the other
   874    thread to also sync up, whereas another one could be trying to
   875    yield, while also waiting for the other threads to sync up too.
   877    Because the thread that does the sync barrier has left the STS, it
   878    is possible to be suspended for a Full GC or an evacuation pause
   879    could occur. This is actually safe, since the entering the sync
   880    barrier is one of the last things do_marking_step() does, and it
   881    doesn't manipulate any data structures afterwards.
   882 */
   884 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   885   if (verbose_low())
   886     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   888   ConcurrentGCThread::stsLeave();
   889   _first_overflow_barrier_sync.enter();
   890   ConcurrentGCThread::stsJoin();
   891   // at this point everyone should have synced up and not be doing any
   892   // more work
   894   if (verbose_low())
   895     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   897   // let task 0 do this
   898   if (task_num == 0) {
   899     // task 0 is responsible for clearing the global data structures
   900     clear_marking_state();
   902     if (PrintGC) {
   903       gclog_or_tty->date_stamp(PrintGCDateStamps);
   904       gclog_or_tty->stamp(PrintGCTimeStamps);
   905       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   906     }
   907   }
   909   // after this, each task should reset its own data structures then
   910   // then go into the second barrier
   911 }
   913 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   914   if (verbose_low())
   915     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   917   ConcurrentGCThread::stsLeave();
   918   _second_overflow_barrier_sync.enter();
   919   ConcurrentGCThread::stsJoin();
   920   // at this point everything should be re-initialised and ready to go
   922   if (verbose_low())
   923     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   924 }
   926 void ConcurrentMark::grayRoot(oop p) {
   927   HeapWord* addr = (HeapWord*) p;
   928   // We can't really check against _heap_start and _heap_end, since it
   929   // is possible during an evacuation pause with piggy-backed
   930   // initial-mark that the committed space is expanded during the
   931   // pause without CM observing this change. So the assertions below
   932   // is a bit conservative; but better than nothing.
   933   tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
   934                     "address should be within the heap bounds" );
   936   if (!_nextMarkBitMap->isMarked(addr))
   937     _nextMarkBitMap->parMark(addr);
   938 }
   940 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   941   // The objects on the region have already been marked "in bulk" by
   942   // the caller. We only need to decide whether to push the region on
   943   // the region stack or not.
   945   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   946     // We're done with marking and waiting for remark. We do not need to
   947     // push anything else on the region stack.
   948     return;
   950   HeapWord* finger = _finger;
   952   if (verbose_low())
   953     gclog_or_tty->print_cr("[global] attempting to push "
   954                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   955                            PTR_FORMAT, mr.start(), mr.end(), finger);
   957   if (mr.start() < finger) {
   958     // The finger is always heap region aligned and it is not possible
   959     // for mr to span heap regions.
   960     tmp_guarantee_CM( mr.end() <= finger, "invariant" );
   962     tmp_guarantee_CM( mr.start() <= mr.end() &&
   963                       _heap_start <= mr.start() &&
   964                       mr.end() <= _heap_end,
   965                   "region boundaries should fall within the committed space" );
   966     if (verbose_low())
   967       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   968                              "below the finger, pushing it",
   969                              mr.start(), mr.end());
   971     if (!region_stack_push(mr)) {
   972       if (verbose_low())
   973         gclog_or_tty->print_cr("[global] region stack has overflown.");
   974     }
   975   }
   976 }
   978 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   979   // The object is not marked by the caller. We need to at least mark
   980   // it and maybe push in on the stack.
   982   HeapWord* addr = (HeapWord*)p;
   983   if (!_nextMarkBitMap->isMarked(addr)) {
   984     // We definitely need to mark it, irrespective whether we bail out
   985     // because we're done with marking.
   986     if (_nextMarkBitMap->parMark(addr)) {
   987       if (!concurrent_marking_in_progress() || !_should_gray_objects)
   988         // If we're done with concurrent marking and we're waiting for
   989         // remark, then we're not pushing anything on the stack.
   990         return;
   992       // No OrderAccess:store_load() is needed. It is implicit in the
   993       // CAS done in parMark(addr) above
   994       HeapWord* finger = _finger;
   996       if (addr < finger) {
   997         if (!mark_stack_push(oop(addr))) {
   998           if (verbose_low())
   999             gclog_or_tty->print_cr("[global] global stack overflow "
  1000                                    "during parMark");
  1007 class CMConcurrentMarkingTask: public AbstractGangTask {
  1008 private:
  1009   ConcurrentMark*       _cm;
  1010   ConcurrentMarkThread* _cmt;
  1012 public:
  1013   void work(int worker_i) {
  1014     guarantee( Thread::current()->is_ConcurrentGC_thread(),
  1015                "this should only be done by a conc GC thread" );
  1017     double start_vtime = os::elapsedVTime();
  1019     ConcurrentGCThread::stsJoin();
  1021     guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
  1022     CMTask* the_task = _cm->task(worker_i);
  1023     the_task->record_start_time();
  1024     if (!_cm->has_aborted()) {
  1025       do {
  1026         double start_vtime_sec = os::elapsedVTime();
  1027         double start_time_sec = os::elapsedTime();
  1028         the_task->do_marking_step(10.0);
  1029         double end_time_sec = os::elapsedTime();
  1030         double end_vtime_sec = os::elapsedVTime();
  1031         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1032         double elapsed_time_sec = end_time_sec - start_time_sec;
  1033         _cm->clear_has_overflown();
  1035         bool ret = _cm->do_yield_check(worker_i);
  1037         jlong sleep_time_ms;
  1038         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1039           sleep_time_ms =
  1040             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1041           ConcurrentGCThread::stsLeave();
  1042           os::sleep(Thread::current(), sleep_time_ms, false);
  1043           ConcurrentGCThread::stsJoin();
  1045         double end_time2_sec = os::elapsedTime();
  1046         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1048 #if 0
  1049           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1050                                  "overhead %1.4lf",
  1051                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1052                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1053           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1054                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1055 #endif
  1056       } while (!_cm->has_aborted() && the_task->has_aborted());
  1058     the_task->record_end_time();
  1059     guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
  1061     ConcurrentGCThread::stsLeave();
  1063     double end_vtime = os::elapsedVTime();
  1064     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1067   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1068                           ConcurrentMarkThread* cmt) :
  1069       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1071   ~CMConcurrentMarkingTask() { }
  1072 };
  1074 void ConcurrentMark::markFromRoots() {
  1075   // we might be tempted to assert that:
  1076   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1077   //        "inconsistent argument?");
  1078   // However that wouldn't be right, because it's possible that
  1079   // a safepoint is indeed in progress as a younger generation
  1080   // stop-the-world GC happens even as we mark in this generation.
  1082   _restart_for_overflow = false;
  1084   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1086   CMConcurrentMarkingTask markingTask(this, cmThread());
  1087   if (parallel_marking_threads() > 0)
  1088     _parallel_workers->run_task(&markingTask);
  1089   else
  1090     markingTask.work(0);
  1091   print_stats();
  1094 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1095   // world is stopped at this checkpoint
  1096   assert(SafepointSynchronize::is_at_safepoint(),
  1097          "world should be stopped");
  1098   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1100   // If a full collection has happened, we shouldn't do this.
  1101   if (has_aborted()) {
  1102     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1103     return;
  1106   if (VerifyDuringGC) {
  1107     HandleMark hm;  // handle scope
  1108     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1109     Universe::heap()->prepare_for_verify();
  1110     Universe::verify(true, false, true);
  1113   G1CollectorPolicy* g1p = g1h->g1_policy();
  1114   g1p->record_concurrent_mark_remark_start();
  1116   double start = os::elapsedTime();
  1118   checkpointRootsFinalWork();
  1120   double mark_work_end = os::elapsedTime();
  1122   weakRefsWork(clear_all_soft_refs);
  1124   if (has_overflown()) {
  1125     // Oops.  We overflowed.  Restart concurrent marking.
  1126     _restart_for_overflow = true;
  1127     // Clear the flag. We do not need it any more.
  1128     clear_has_overflown();
  1129     if (G1TraceMarkStackOverflow)
  1130       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1131   } else {
  1132     // We're done with marking.
  1133     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
  1135     if (VerifyDuringGC) {
  1136       HandleMark hm;  // handle scope
  1137       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1138       Universe::heap()->prepare_for_verify();
  1139       Universe::heap()->verify(/* allow_dirty */      true,
  1140                                /* silent */           false,
  1141                                /* use_prev_marking */ false);
  1145 #if VERIFY_OBJS_PROCESSED
  1146   _scan_obj_cl.objs_processed = 0;
  1147   ThreadLocalObjQueue::objs_enqueued = 0;
  1148 #endif
  1150   // Statistics
  1151   double now = os::elapsedTime();
  1152   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1153   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1154   _remark_times.add((now - start) * 1000.0);
  1156   g1p->record_concurrent_mark_remark_end();
  1160 #define CARD_BM_TEST_MODE 0
  1162 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1164   CMBitMapRO* _bm;
  1165   ConcurrentMark* _cm;
  1166   bool _changed;
  1167   bool _yield;
  1168   size_t _words_done;
  1169   size_t _tot_live;
  1170   size_t _tot_used;
  1171   size_t _regions_done;
  1172   double _start_vtime_sec;
  1174   BitMap* _region_bm;
  1175   BitMap* _card_bm;
  1176   intptr_t _bottom_card_num;
  1177   bool _final;
  1179   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1180     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1181 #if CARD_BM_TEST_MODE
  1182       guarantee(_card_bm->at(i - _bottom_card_num),
  1183                 "Should already be set.");
  1184 #else
  1185       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1186 #endif
  1190 public:
  1191   CalcLiveObjectsClosure(bool final,
  1192                          CMBitMapRO *bm, ConcurrentMark *cm,
  1193                          BitMap* region_bm, BitMap* card_bm) :
  1194     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1195     _words_done(0), _tot_live(0), _tot_used(0),
  1196     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1197     _regions_done(0), _start_vtime_sec(0.0)
  1199     _bottom_card_num =
  1200       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1201                CardTableModRefBS::card_shift);
  1204   // It takes a region that's not empty (i.e., it has at least one
  1205   // live object in it and sets its corresponding bit on the region
  1206   // bitmap to 1. If the region is "starts humongous" it will also set
  1207   // to 1 the bits on the region bitmap that correspond to its
  1208   // associated "continues humongous" regions.
  1209   void set_bit_for_region(HeapRegion* hr) {
  1210     assert(!hr->continuesHumongous(), "should have filtered those out");
  1212     size_t index = hr->hrs_index();
  1213     if (!hr->startsHumongous()) {
  1214       // Normal (non-humongous) case: just set the bit.
  1215       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1216     } else {
  1217       // Starts humongous case: calculate how many regions are part of
  1218       // this humongous region and then set the bit range. It might
  1219       // have been a bit more efficient to look at the object that
  1220       // spans these humongous regions to calculate their number from
  1221       // the object's size. However, it's a good idea to calculate
  1222       // this based on the metadata itself, and not the region
  1223       // contents, so that this code is not aware of what goes into
  1224       // the humongous regions (in case this changes in the future).
  1225       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1226       size_t end_index = index + 1;
  1227       while (end_index < g1h->n_regions()) {
  1228         HeapRegion* chr = g1h->region_at(end_index);
  1229         if (!chr->continuesHumongous()) {
  1230           break;
  1232         end_index += 1;
  1234       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1235                                    (BitMap::idx_t) end_index, true);
  1239   bool doHeapRegion(HeapRegion* hr) {
  1240     if (!_final && _regions_done == 0)
  1241       _start_vtime_sec = os::elapsedVTime();
  1243     if (hr->continuesHumongous()) {
  1244       // We will ignore these here and process them when their
  1245       // associated "starts humongous" region is processed (see
  1246       // set_bit_for_heap_region()). Note that we cannot rely on their
  1247       // associated "starts humongous" region to have their bit set to
  1248       // 1 since, due to the region chunking in the parallel region
  1249       // iteration, a "continues humongous" region might be visited
  1250       // before its associated "starts humongous".
  1251       return false;
  1254     HeapWord* nextTop = hr->next_top_at_mark_start();
  1255     HeapWord* start   = hr->top_at_conc_mark_count();
  1256     assert(hr->bottom() <= start && start <= hr->end() &&
  1257            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1258            start <= nextTop,
  1259            "Preconditions.");
  1260     // Otherwise, record the number of word's we'll examine.
  1261     size_t words_done = (nextTop - start);
  1262     // Find the first marked object at or after "start".
  1263     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1264     size_t marked_bytes = 0;
  1266     // Below, the term "card num" means the result of shifting an address
  1267     // by the card shift -- address 0 corresponds to card number 0.  One
  1268     // must subtract the card num of the bottom of the heap to obtain a
  1269     // card table index.
  1270     // The first card num of the sequence of live cards currently being
  1271     // constructed.  -1 ==> no sequence.
  1272     intptr_t start_card_num = -1;
  1273     // The last card num of the sequence of live cards currently being
  1274     // constructed.  -1 ==> no sequence.
  1275     intptr_t last_card_num = -1;
  1277     while (start < nextTop) {
  1278       if (_yield && _cm->do_yield_check()) {
  1279         // We yielded.  It might be for a full collection, in which case
  1280         // all bets are off; terminate the traversal.
  1281         if (_cm->has_aborted()) {
  1282           _changed = false;
  1283           return true;
  1284         } else {
  1285           // Otherwise, it might be a collection pause, and the region
  1286           // we're looking at might be in the collection set.  We'll
  1287           // abandon this region.
  1288           return false;
  1291       oop obj = oop(start);
  1292       int obj_sz = obj->size();
  1293       // The card num of the start of the current object.
  1294       intptr_t obj_card_num =
  1295         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1297       HeapWord* obj_last = start + obj_sz - 1;
  1298       intptr_t obj_last_card_num =
  1299         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1301       if (obj_card_num != last_card_num) {
  1302         if (start_card_num == -1) {
  1303           assert(last_card_num == -1, "Both or neither.");
  1304           start_card_num = obj_card_num;
  1305         } else {
  1306           assert(last_card_num != -1, "Both or neither.");
  1307           assert(obj_card_num >= last_card_num, "Inv");
  1308           if ((obj_card_num - last_card_num) > 1) {
  1309             // Mark the last run, and start a new one.
  1310             mark_card_num_range(start_card_num, last_card_num);
  1311             start_card_num = obj_card_num;
  1314 #if CARD_BM_TEST_MODE
  1315         /*
  1316         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1317                                obj_card_num - _bottom_card_num,
  1318                                obj_last_card_num - _bottom_card_num);
  1319         */
  1320         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1321           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1323 #endif
  1325       // In any case, we set the last card num.
  1326       last_card_num = obj_last_card_num;
  1328       marked_bytes += obj_sz * HeapWordSize;
  1329       // Find the next marked object after this one.
  1330       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1331       _changed = true;
  1333     // Handle the last range, if any.
  1334     if (start_card_num != -1)
  1335       mark_card_num_range(start_card_num, last_card_num);
  1336     if (_final) {
  1337       // Mark the allocated-since-marking portion...
  1338       HeapWord* tp = hr->top();
  1339       if (nextTop < tp) {
  1340         start_card_num =
  1341           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1342         last_card_num =
  1343           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1344         mark_card_num_range(start_card_num, last_card_num);
  1345         // This definitely means the region has live objects.
  1346         set_bit_for_region(hr);
  1350     hr->add_to_marked_bytes(marked_bytes);
  1351     // Update the live region bitmap.
  1352     if (marked_bytes > 0) {
  1353       set_bit_for_region(hr);
  1355     hr->set_top_at_conc_mark_count(nextTop);
  1356     _tot_live += hr->next_live_bytes();
  1357     _tot_used += hr->used();
  1358     _words_done = words_done;
  1360     if (!_final) {
  1361       ++_regions_done;
  1362       if (_regions_done % 10 == 0) {
  1363         double end_vtime_sec = os::elapsedVTime();
  1364         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1365         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1366           jlong sleep_time_ms =
  1367             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1368           os::sleep(Thread::current(), sleep_time_ms, false);
  1369           _start_vtime_sec = end_vtime_sec;
  1374     return false;
  1377   bool changed() { return _changed;  }
  1378   void reset()   { _changed = false; _words_done = 0; }
  1379   void no_yield() { _yield = false; }
  1380   size_t words_done() { return _words_done; }
  1381   size_t tot_live() { return _tot_live; }
  1382   size_t tot_used() { return _tot_used; }
  1383 };
  1386 void ConcurrentMark::calcDesiredRegions() {
  1387   _region_bm.clear();
  1388   _card_bm.clear();
  1389   CalcLiveObjectsClosure calccl(false /*final*/,
  1390                                 nextMarkBitMap(), this,
  1391                                 &_region_bm, &_card_bm);
  1392   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1393   g1h->heap_region_iterate(&calccl);
  1395   do {
  1396     calccl.reset();
  1397     g1h->heap_region_iterate(&calccl);
  1398   } while (calccl.changed());
  1401 class G1ParFinalCountTask: public AbstractGangTask {
  1402 protected:
  1403   G1CollectedHeap* _g1h;
  1404   CMBitMap* _bm;
  1405   size_t _n_workers;
  1406   size_t *_live_bytes;
  1407   size_t *_used_bytes;
  1408   BitMap* _region_bm;
  1409   BitMap* _card_bm;
  1410 public:
  1411   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1412                       BitMap* region_bm, BitMap* card_bm) :
  1413     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1414     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1416     if (ParallelGCThreads > 0)
  1417       _n_workers = _g1h->workers()->total_workers();
  1418     else
  1419       _n_workers = 1;
  1420     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1421     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1424   ~G1ParFinalCountTask() {
  1425     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1426     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1429   void work(int i) {
  1430     CalcLiveObjectsClosure calccl(true /*final*/,
  1431                                   _bm, _g1h->concurrent_mark(),
  1432                                   _region_bm, _card_bm);
  1433     calccl.no_yield();
  1434     if (ParallelGCThreads > 0) {
  1435       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1436                                             HeapRegion::FinalCountClaimValue);
  1437     } else {
  1438       _g1h->heap_region_iterate(&calccl);
  1440     assert(calccl.complete(), "Shouldn't have yielded!");
  1442     guarantee( (size_t)i < _n_workers, "invariant" );
  1443     _live_bytes[i] = calccl.tot_live();
  1444     _used_bytes[i] = calccl.tot_used();
  1446   size_t live_bytes()  {
  1447     size_t live_bytes = 0;
  1448     for (size_t i = 0; i < _n_workers; ++i)
  1449       live_bytes += _live_bytes[i];
  1450     return live_bytes;
  1452   size_t used_bytes()  {
  1453     size_t used_bytes = 0;
  1454     for (size_t i = 0; i < _n_workers; ++i)
  1455       used_bytes += _used_bytes[i];
  1456     return used_bytes;
  1458 };
  1460 class G1ParNoteEndTask;
  1462 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1463   G1CollectedHeap* _g1;
  1464   int _worker_num;
  1465   size_t _max_live_bytes;
  1466   size_t _regions_claimed;
  1467   size_t _freed_bytes;
  1468   size_t _cleared_h_regions;
  1469   size_t _freed_regions;
  1470   UncleanRegionList* _unclean_region_list;
  1471   double _claimed_region_time;
  1472   double _max_region_time;
  1474 public:
  1475   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1476                              UncleanRegionList* list,
  1477                              int worker_num);
  1478   size_t freed_bytes() { return _freed_bytes; }
  1479   size_t cleared_h_regions() { return _cleared_h_regions; }
  1480   size_t freed_regions() { return  _freed_regions; }
  1481   UncleanRegionList* unclean_region_list() {
  1482     return _unclean_region_list;
  1485   bool doHeapRegion(HeapRegion *r);
  1487   size_t max_live_bytes() { return _max_live_bytes; }
  1488   size_t regions_claimed() { return _regions_claimed; }
  1489   double claimed_region_time_sec() { return _claimed_region_time; }
  1490   double max_region_time_sec() { return _max_region_time; }
  1491 };
  1493 class G1ParNoteEndTask: public AbstractGangTask {
  1494   friend class G1NoteEndOfConcMarkClosure;
  1495 protected:
  1496   G1CollectedHeap* _g1h;
  1497   size_t _max_live_bytes;
  1498   size_t _freed_bytes;
  1499   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1500 public:
  1501   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1502                    ConcurrentMark::ParCleanupThreadState**
  1503                    par_cleanup_thread_state) :
  1504     AbstractGangTask("G1 note end"), _g1h(g1h),
  1505     _max_live_bytes(0), _freed_bytes(0),
  1506     _par_cleanup_thread_state(par_cleanup_thread_state)
  1507   {}
  1509   void work(int i) {
  1510     double start = os::elapsedTime();
  1511     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1512                                            &_par_cleanup_thread_state[i]->list,
  1513                                            i);
  1514     if (ParallelGCThreads > 0) {
  1515       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1516                                             HeapRegion::NoteEndClaimValue);
  1517     } else {
  1518       _g1h->heap_region_iterate(&g1_note_end);
  1520     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1522     // Now finish up freeing the current thread's regions.
  1523     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1524                                   g1_note_end.cleared_h_regions(),
  1525                                   0, NULL);
  1527       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1528       _max_live_bytes += g1_note_end.max_live_bytes();
  1529       _freed_bytes += g1_note_end.freed_bytes();
  1531     double end = os::elapsedTime();
  1532     if (G1PrintParCleanupStats) {
  1533       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1534                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1535                           i, start, end, (end-start)*1000.0,
  1536                           g1_note_end.regions_claimed(),
  1537                           g1_note_end.claimed_region_time_sec()*1000.0,
  1538                           g1_note_end.max_region_time_sec()*1000.0);
  1541   size_t max_live_bytes() { return _max_live_bytes; }
  1542   size_t freed_bytes() { return _freed_bytes; }
  1543 };
  1545 class G1ParScrubRemSetTask: public AbstractGangTask {
  1546 protected:
  1547   G1RemSet* _g1rs;
  1548   BitMap* _region_bm;
  1549   BitMap* _card_bm;
  1550 public:
  1551   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1552                        BitMap* region_bm, BitMap* card_bm) :
  1553     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1554     _region_bm(region_bm), _card_bm(card_bm)
  1555   {}
  1557   void work(int i) {
  1558     if (ParallelGCThreads > 0) {
  1559       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1560                        HeapRegion::ScrubRemSetClaimValue);
  1561     } else {
  1562       _g1rs->scrub(_region_bm, _card_bm);
  1566 };
  1568 G1NoteEndOfConcMarkClosure::
  1569 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1570                            UncleanRegionList* list,
  1571                            int worker_num)
  1572   : _g1(g1), _worker_num(worker_num),
  1573     _max_live_bytes(0), _regions_claimed(0),
  1574     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1575     _claimed_region_time(0.0), _max_region_time(0.0),
  1576     _unclean_region_list(list)
  1577 {}
  1579 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1580   // We use a claim value of zero here because all regions
  1581   // were claimed with value 1 in the FinalCount task.
  1582   r->reset_gc_time_stamp();
  1583   if (!r->continuesHumongous()) {
  1584     double start = os::elapsedTime();
  1585     _regions_claimed++;
  1586     r->note_end_of_marking();
  1587     _max_live_bytes += r->max_live_bytes();
  1588     _g1->free_region_if_totally_empty_work(r,
  1589                                            _freed_bytes,
  1590                                            _cleared_h_regions,
  1591                                            _freed_regions,
  1592                                            _unclean_region_list,
  1593                                            true /*par*/);
  1594     double region_time = (os::elapsedTime() - start);
  1595     _claimed_region_time += region_time;
  1596     if (region_time > _max_region_time) _max_region_time = region_time;
  1598   return false;
  1601 void ConcurrentMark::cleanup() {
  1602   // world is stopped at this checkpoint
  1603   assert(SafepointSynchronize::is_at_safepoint(),
  1604          "world should be stopped");
  1605   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1607   // If a full collection has happened, we shouldn't do this.
  1608   if (has_aborted()) {
  1609     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1610     return;
  1613   if (VerifyDuringGC) {
  1614     HandleMark hm;  // handle scope
  1615     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1616     Universe::heap()->prepare_for_verify();
  1617     Universe::verify(/* allow dirty  */ true,
  1618                      /* silent       */ false,
  1619                      /* prev marking */ true);
  1622   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1623   g1p->record_concurrent_mark_cleanup_start();
  1625   double start = os::elapsedTime();
  1627   // Do counting once more with the world stopped for good measure.
  1628   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1629                                         &_region_bm, &_card_bm);
  1630   if (ParallelGCThreads > 0) {
  1631     assert(g1h->check_heap_region_claim_values(
  1632                                                HeapRegion::InitialClaimValue),
  1633            "sanity check");
  1635     int n_workers = g1h->workers()->total_workers();
  1636     g1h->set_par_threads(n_workers);
  1637     g1h->workers()->run_task(&g1_par_count_task);
  1638     g1h->set_par_threads(0);
  1640     assert(g1h->check_heap_region_claim_values(
  1641                                              HeapRegion::FinalCountClaimValue),
  1642            "sanity check");
  1643   } else {
  1644     g1_par_count_task.work(0);
  1647   size_t known_garbage_bytes =
  1648     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1649 #if 0
  1650   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1651                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1652                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1653                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1654 #endif // 0
  1655   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1657   size_t start_used_bytes = g1h->used();
  1658   _at_least_one_mark_complete = true;
  1659   g1h->set_marking_complete();
  1661   double count_end = os::elapsedTime();
  1662   double this_final_counting_time = (count_end - start);
  1663   if (G1PrintParCleanupStats) {
  1664     gclog_or_tty->print_cr("Cleanup:");
  1665     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1666                            this_final_counting_time*1000.0);
  1668   _total_counting_time += this_final_counting_time;
  1670   // Install newly created mark bitMap as "prev".
  1671   swapMarkBitMaps();
  1673   g1h->reset_gc_time_stamp();
  1675   // Note end of marking in all heap regions.
  1676   double note_end_start = os::elapsedTime();
  1677   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1678   if (ParallelGCThreads > 0) {
  1679     int n_workers = g1h->workers()->total_workers();
  1680     g1h->set_par_threads(n_workers);
  1681     g1h->workers()->run_task(&g1_par_note_end_task);
  1682     g1h->set_par_threads(0);
  1684     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1685            "sanity check");
  1686   } else {
  1687     g1_par_note_end_task.work(0);
  1689   g1h->set_unclean_regions_coming(true);
  1690   double note_end_end = os::elapsedTime();
  1691   // Tell the mutators that there might be unclean regions coming...
  1692   if (G1PrintParCleanupStats) {
  1693     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1694                            (note_end_end - note_end_start)*1000.0);
  1698   // call below, since it affects the metric by which we sort the heap
  1699   // regions.
  1700   if (G1ScrubRemSets) {
  1701     double rs_scrub_start = os::elapsedTime();
  1702     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1703     if (ParallelGCThreads > 0) {
  1704       int n_workers = g1h->workers()->total_workers();
  1705       g1h->set_par_threads(n_workers);
  1706       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1707       g1h->set_par_threads(0);
  1709       assert(g1h->check_heap_region_claim_values(
  1710                                             HeapRegion::ScrubRemSetClaimValue),
  1711              "sanity check");
  1712     } else {
  1713       g1_par_scrub_rs_task.work(0);
  1716     double rs_scrub_end = os::elapsedTime();
  1717     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1718     _total_rs_scrub_time += this_rs_scrub_time;
  1721   // this will also free any regions totally full of garbage objects,
  1722   // and sort the regions.
  1723   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1724                         g1_par_note_end_task.freed_bytes(),
  1725                         g1_par_note_end_task.max_live_bytes());
  1727   // Statistics.
  1728   double end = os::elapsedTime();
  1729   _cleanup_times.add((end - start) * 1000.0);
  1731   // G1CollectedHeap::heap()->print();
  1732   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1733   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1735   if (PrintGC || PrintGCDetails) {
  1736     g1h->print_size_transition(gclog_or_tty,
  1737                                start_used_bytes,
  1738                                g1h->used(),
  1739                                g1h->capacity());
  1742   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1743   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1745   // We need to make this be a "collection" so any collection pause that
  1746   // races with it goes around and waits for completeCleanup to finish.
  1747   g1h->increment_total_collections();
  1749   if (VerifyDuringGC) {
  1750     HandleMark hm;  // handle scope
  1751     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1752     Universe::heap()->prepare_for_verify();
  1753     Universe::verify(/* allow dirty  */ true,
  1754                      /* silent       */ false,
  1755                      /* prev marking */ true);
  1759 void ConcurrentMark::completeCleanup() {
  1760   // A full collection intervened.
  1761   if (has_aborted()) return;
  1763   int first = 0;
  1764   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1765   for (int t = 0; t < last; t++) {
  1766     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1767     assert(list->well_formed(), "Inv");
  1768     HeapRegion* hd = list->hd();
  1769     while (hd != NULL) {
  1770       // Now finish up the other stuff.
  1771       hd->rem_set()->clear();
  1772       HeapRegion* next_hd = hd->next_from_unclean_list();
  1773       (void)list->pop();
  1774       guarantee(list->hd() == next_hd, "how not?");
  1775       _g1h->put_region_on_unclean_list(hd);
  1776       if (!hd->isHumongous()) {
  1777         // Add this to the _free_regions count by 1.
  1778         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1780       hd = list->hd();
  1781       guarantee(hd == next_hd, "how not?");
  1787 class G1CMIsAliveClosure: public BoolObjectClosure {
  1788   G1CollectedHeap* _g1;
  1789  public:
  1790   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1791     _g1(g1)
  1792   {}
  1794   void do_object(oop obj) {
  1795     assert(false, "not to be invoked");
  1797   bool do_object_b(oop obj) {
  1798     HeapWord* addr = (HeapWord*)obj;
  1799     return addr != NULL &&
  1800            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1802 };
  1804 class G1CMKeepAliveClosure: public OopClosure {
  1805   G1CollectedHeap* _g1;
  1806   ConcurrentMark*  _cm;
  1807   CMBitMap*        _bitMap;
  1808  public:
  1809   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1810                        CMBitMap* bitMap) :
  1811     _g1(g1), _cm(cm),
  1812     _bitMap(bitMap) {}
  1814   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1815   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1817   template <class T> void do_oop_work(T* p) {
  1818     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1819     HeapWord* addr = (HeapWord*)thisOop;
  1820     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1821       _bitMap->mark(addr);
  1822       _cm->mark_stack_push(thisOop);
  1825 };
  1827 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1828   CMMarkStack*                  _markStack;
  1829   CMBitMap*                     _bitMap;
  1830   G1CMKeepAliveClosure*         _oopClosure;
  1831  public:
  1832   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1833                                G1CMKeepAliveClosure* oopClosure) :
  1834     _bitMap(bitMap),
  1835     _markStack(markStack),
  1836     _oopClosure(oopClosure)
  1837   {}
  1839   void do_void() {
  1840     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1842 };
  1844 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1845   ResourceMark rm;
  1846   HandleMark   hm;
  1847   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1848   ReferenceProcessor* rp = g1h->ref_processor();
  1850   // Process weak references.
  1851   rp->setup_policy(clear_all_soft_refs);
  1852   assert(_markStack.isEmpty(), "mark stack should be empty");
  1854   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1855   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1856   G1CMDrainMarkingStackClosure
  1857     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1858                                &g1KeepAliveClosure);
  1860   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1861   rp->process_discovered_references(&g1IsAliveClosure,
  1862                                     &g1KeepAliveClosure,
  1863                                     &g1DrainMarkingStackClosure,
  1864                                     NULL);
  1865   assert(_markStack.overflow() || _markStack.isEmpty(),
  1866          "mark stack should be empty (unless it overflowed)");
  1867   if (_markStack.overflow()) {
  1868     set_has_overflown();
  1871   rp->enqueue_discovered_references();
  1872   rp->verify_no_references_recorded();
  1873   assert(!rp->discovery_enabled(), "should have been disabled");
  1875   // Now clean up stale oops in SymbolTable and StringTable
  1876   SymbolTable::unlink(&g1IsAliveClosure);
  1877   StringTable::unlink(&g1IsAliveClosure);
  1880 void ConcurrentMark::swapMarkBitMaps() {
  1881   CMBitMapRO* temp = _prevMarkBitMap;
  1882   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1883   _nextMarkBitMap  = (CMBitMap*)  temp;
  1886 class CMRemarkTask: public AbstractGangTask {
  1887 private:
  1888   ConcurrentMark *_cm;
  1890 public:
  1891   void work(int worker_i) {
  1892     // Since all available tasks are actually started, we should
  1893     // only proceed if we're supposed to be actived.
  1894     if ((size_t)worker_i < _cm->active_tasks()) {
  1895       CMTask* task = _cm->task(worker_i);
  1896       task->record_start_time();
  1897       do {
  1898         task->do_marking_step(1000000000.0 /* something very large */);
  1899       } while (task->has_aborted() && !_cm->has_overflown());
  1900       // If we overflow, then we do not want to restart. We instead
  1901       // want to abort remark and do concurrent marking again.
  1902       task->record_end_time();
  1906   CMRemarkTask(ConcurrentMark* cm) :
  1907     AbstractGangTask("Par Remark"), _cm(cm) { }
  1908 };
  1910 void ConcurrentMark::checkpointRootsFinalWork() {
  1911   ResourceMark rm;
  1912   HandleMark   hm;
  1913   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1915   g1h->ensure_parsability(false);
  1917   if (ParallelGCThreads > 0) {
  1918     g1h->change_strong_roots_parity();
  1919     // this is remark, so we'll use up all available threads
  1920     int active_workers = ParallelGCThreads;
  1921     set_phase(active_workers, false);
  1923     CMRemarkTask remarkTask(this);
  1924     // We will start all available threads, even if we decide that the
  1925     // active_workers will be fewer. The extra ones will just bail out
  1926     // immediately.
  1927     int n_workers = g1h->workers()->total_workers();
  1928     g1h->set_par_threads(n_workers);
  1929     g1h->workers()->run_task(&remarkTask);
  1930     g1h->set_par_threads(0);
  1932     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1933     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1934   } else {
  1935     g1h->change_strong_roots_parity();
  1936     // this is remark, so we'll use up all available threads
  1937     int active_workers = 1;
  1938     set_phase(active_workers, false);
  1940     CMRemarkTask remarkTask(this);
  1941     // We will start all available threads, even if we decide that the
  1942     // active_workers will be fewer. The extra ones will just bail out
  1943     // immediately.
  1944     remarkTask.work(0);
  1946     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1947     guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1950   print_stats();
  1952   if (!restart_for_overflow())
  1953     set_non_marking_state();
  1955 #if VERIFY_OBJS_PROCESSED
  1956   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1957     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1958                            _scan_obj_cl.objs_processed,
  1959                            ThreadLocalObjQueue::objs_enqueued);
  1960     guarantee(_scan_obj_cl.objs_processed ==
  1961               ThreadLocalObjQueue::objs_enqueued,
  1962               "Different number of objs processed and enqueued.");
  1964 #endif
  1967 class ReachablePrinterOopClosure: public OopClosure {
  1968 private:
  1969   G1CollectedHeap* _g1h;
  1970   CMBitMapRO*      _bitmap;
  1971   outputStream*    _out;
  1973 public:
  1974   ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
  1975     _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
  1977   void do_oop(narrowOop* p) { do_oop_work(p); }
  1978   void do_oop(      oop* p) { do_oop_work(p); }
  1980   template <class T> void do_oop_work(T* p) {
  1981     oop         obj = oopDesc::load_decode_heap_oop(p);
  1982     const char* str = NULL;
  1983     const char* str2 = "";
  1985     if (!_g1h->is_in_g1_reserved(obj))
  1986       str = "outside G1 reserved";
  1987     else {
  1988       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  1989       guarantee( hr != NULL, "invariant" );
  1990       if (hr->obj_allocated_since_prev_marking(obj)) {
  1991         str = "over TAMS";
  1992         if (_bitmap->isMarked((HeapWord*) obj))
  1993           str2 = " AND MARKED";
  1994       } else if (_bitmap->isMarked((HeapWord*) obj))
  1995         str = "marked";
  1996       else
  1997         str = "#### NOT MARKED ####";
  2000     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
  2001                    p, (void*) obj, str, str2);
  2003 };
  2005 class ReachablePrinterClosure: public BitMapClosure {
  2006 private:
  2007   CMBitMapRO* _bitmap;
  2008   outputStream* _out;
  2010 public:
  2011   ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
  2012     _bitmap(bitmap), _out(out) { }
  2014   bool do_bit(size_t offset) {
  2015     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
  2016     ReachablePrinterOopClosure oopCl(_bitmap, _out);
  2018     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
  2019     oop(addr)->oop_iterate(&oopCl);
  2020     _out->print_cr("");
  2022     return true;
  2024 };
  2026 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
  2027 private:
  2028   CMBitMapRO* _bitmap;
  2029   outputStream* _out;
  2031 public:
  2032   void do_object(oop o) {
  2033     ReachablePrinterOopClosure oopCl(_bitmap, _out);
  2035     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
  2036     o->oop_iterate(&oopCl);
  2037     _out->print_cr("");
  2040   ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
  2041     _bitmap(bitmap), _out(out) { }
  2042 };
  2044 class RegionReachablePrinterClosure : public HeapRegionClosure {
  2045 private:
  2046   CMBitMapRO* _bitmap;
  2047   outputStream* _out;
  2049 public:
  2050   bool doHeapRegion(HeapRegion* hr) {
  2051     HeapWord* b = hr->bottom();
  2052     HeapWord* e = hr->end();
  2053     HeapWord* t = hr->top();
  2054     HeapWord* p = hr->prev_top_at_mark_start();
  2055     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2056                    "PTAMS: "PTR_FORMAT, b, e, t, p);
  2057     _out->print_cr("");
  2059     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
  2060     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
  2062     return false;
  2065   RegionReachablePrinterClosure(CMBitMapRO* bitmap,
  2066                                 outputStream* out) :
  2067     _bitmap(bitmap), _out(out) { }
  2068 };
  2070 void ConcurrentMark::print_prev_bitmap_reachable() {
  2071   outputStream* out = gclog_or_tty;
  2073 #if SEND_HEAP_DUMP_TO_FILE
  2074   guarantee(heap_dump_file == NULL, "Protocol");
  2075   char fn_buf[100];
  2076   sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
  2077   heap_dump_file = fopen(fn_buf, "w");
  2078   fileStream fstream(heap_dump_file);
  2079   out = &fstream;
  2080 #endif // SEND_HEAP_DUMP_TO_FILE
  2082   RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
  2083   out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
  2084   _g1h->heap_region_iterate(&rcl);
  2085   out->print_cr("");
  2087   ReachablePrinterClosure cl(_prevMarkBitMap, out);
  2088   out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
  2089   _prevMarkBitMap->iterate(&cl);
  2090   out->print_cr("");
  2092 #if SEND_HEAP_DUMP_TO_FILE
  2093   fclose(heap_dump_file);
  2094   heap_dump_file = NULL;
  2095 #endif // SEND_HEAP_DUMP_TO_FILE
  2098 // This note is for drainAllSATBBuffers and the code in between.
  2099 // In the future we could reuse a task to do this work during an
  2100 // evacuation pause (since now tasks are not active and can be claimed
  2101 // during an evacuation pause). This was a late change to the code and
  2102 // is currently not being taken advantage of.
  2104 class CMGlobalObjectClosure : public ObjectClosure {
  2105 private:
  2106   ConcurrentMark* _cm;
  2108 public:
  2109   void do_object(oop obj) {
  2110     _cm->deal_with_reference(obj);
  2113   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2114 };
  2116 void ConcurrentMark::deal_with_reference(oop obj) {
  2117   if (verbose_high())
  2118     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2119                            (void*) obj);
  2122   HeapWord* objAddr = (HeapWord*) obj;
  2123   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2124   if (_g1h->is_in_g1_reserved(objAddr)) {
  2125     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
  2126     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2127     if (_g1h->is_obj_ill(obj, hr)) {
  2128       if (verbose_high())
  2129         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2130                                "marked", (void*) obj);
  2132       // we need to mark it first
  2133       if (_nextMarkBitMap->parMark(objAddr)) {
  2134         // No OrderAccess:store_load() is needed. It is implicit in the
  2135         // CAS done in parMark(objAddr) above
  2136         HeapWord* finger = _finger;
  2137         if (objAddr < finger) {
  2138           if (verbose_high())
  2139             gclog_or_tty->print_cr("[global] below the global finger "
  2140                                    "("PTR_FORMAT"), pushing it", finger);
  2141           if (!mark_stack_push(obj)) {
  2142             if (verbose_low())
  2143               gclog_or_tty->print_cr("[global] global stack overflow during "
  2144                                      "deal_with_reference");
  2152 void ConcurrentMark::drainAllSATBBuffers() {
  2153   CMGlobalObjectClosure oc(this);
  2154   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2155   satb_mq_set.set_closure(&oc);
  2157   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2158     if (verbose_medium())
  2159       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2162   // no need to check whether we should do this, as this is only
  2163   // called during an evacuation pause
  2164   satb_mq_set.iterate_closure_all_threads();
  2166   satb_mq_set.set_closure(NULL);
  2167   guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  2170 void ConcurrentMark::markPrev(oop p) {
  2171   // Note we are overriding the read-only view of the prev map here, via
  2172   // the cast.
  2173   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2176 void ConcurrentMark::clear(oop p) {
  2177   assert(p != NULL && p->is_oop(), "expected an oop");
  2178   HeapWord* addr = (HeapWord*)p;
  2179   assert(addr >= _nextMarkBitMap->startWord() ||
  2180          addr < _nextMarkBitMap->endWord(), "in a region");
  2182   _nextMarkBitMap->clear(addr);
  2185 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2186   // Note we are overriding the read-only view of the prev map here, via
  2187   // the cast.
  2188   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2189   _nextMarkBitMap->clearRange(mr);
  2192 HeapRegion*
  2193 ConcurrentMark::claim_region(int task_num) {
  2194   // "checkpoint" the finger
  2195   HeapWord* finger = _finger;
  2197   // _heap_end will not change underneath our feet; it only changes at
  2198   // yield points.
  2199   while (finger < _heap_end) {
  2200     tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
  2202     // is the gap between reading the finger and doing the CAS too long?
  2204     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2205     HeapWord*   bottom        = curr_region->bottom();
  2206     HeapWord*   end           = curr_region->end();
  2207     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2209     if (verbose_low())
  2210       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2211                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2212                              "limit = "PTR_FORMAT,
  2213                              task_num, curr_region, bottom, end, limit);
  2215     HeapWord* res =
  2216       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2217     if (res == finger) {
  2218       // we succeeded
  2220       // notice that _finger == end cannot be guaranteed here since,
  2221       // someone else might have moved the finger even further
  2222       guarantee( _finger >= end, "the finger should have moved forward" );
  2224       if (verbose_low())
  2225         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2226                                PTR_FORMAT, task_num, curr_region);
  2228       if (limit > bottom) {
  2229         if (verbose_low())
  2230           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2231                                  "returning it ", task_num, curr_region);
  2232         return curr_region;
  2233       } else {
  2234         tmp_guarantee_CM( limit == bottom,
  2235                           "the region limit should be at bottom" );
  2236         if (verbose_low())
  2237           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2238                                  "returning NULL", task_num, curr_region);
  2239         // we return NULL and the caller should try calling
  2240         // claim_region() again.
  2241         return NULL;
  2243     } else {
  2244       guarantee( _finger > finger, "the finger should have moved forward" );
  2245       if (verbose_low())
  2246         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2247                                "global finger = "PTR_FORMAT", "
  2248                                "our finger = "PTR_FORMAT,
  2249                                task_num, _finger, finger);
  2251       // read it again
  2252       finger = _finger;
  2256   return NULL;
  2259 void ConcurrentMark::oops_do(OopClosure* cl) {
  2260   if (_markStack.size() > 0 && verbose_low())
  2261     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2262                            "size = %d", _markStack.size());
  2263   // we first iterate over the contents of the mark stack...
  2264   _markStack.oops_do(cl);
  2266   for (int i = 0; i < (int)_max_task_num; ++i) {
  2267     OopTaskQueue* queue = _task_queues->queue((int)i);
  2269     if (queue->size() > 0 && verbose_low())
  2270       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2271                              "size = %d", i, queue->size());
  2273     // ...then over the contents of the all the task queues.
  2274     queue->oops_do(cl);
  2277   // finally, invalidate any entries that in the region stack that
  2278   // point into the collection set
  2279   if (_regionStack.invalidate_entries_into_cset()) {
  2280     // otherwise, any gray objects copied during the evacuation pause
  2281     // might not be visited.
  2282     guarantee( _should_gray_objects, "invariant" );
  2286 void ConcurrentMark::clear_marking_state() {
  2287   _markStack.setEmpty();
  2288   _markStack.clear_overflow();
  2289   _regionStack.setEmpty();
  2290   _regionStack.clear_overflow();
  2291   clear_has_overflown();
  2292   _finger = _heap_start;
  2294   for (int i = 0; i < (int)_max_task_num; ++i) {
  2295     OopTaskQueue* queue = _task_queues->queue(i);
  2296     queue->set_empty();
  2300 void ConcurrentMark::print_stats() {
  2301   if (verbose_stats()) {
  2302     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2303     for (size_t i = 0; i < _active_tasks; ++i) {
  2304       _tasks[i]->print_stats();
  2305       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2310 class CSMarkOopClosure: public OopClosure {
  2311   friend class CSMarkBitMapClosure;
  2313   G1CollectedHeap* _g1h;
  2314   CMBitMap*        _bm;
  2315   ConcurrentMark*  _cm;
  2316   oop*             _ms;
  2317   jint*            _array_ind_stack;
  2318   int              _ms_size;
  2319   int              _ms_ind;
  2320   int              _array_increment;
  2322   bool push(oop obj, int arr_ind = 0) {
  2323     if (_ms_ind == _ms_size) {
  2324       gclog_or_tty->print_cr("Mark stack is full.");
  2325       return false;
  2327     _ms[_ms_ind] = obj;
  2328     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2329     _ms_ind++;
  2330     return true;
  2333   oop pop() {
  2334     if (_ms_ind == 0) return NULL;
  2335     else {
  2336       _ms_ind--;
  2337       return _ms[_ms_ind];
  2341   template <class T> bool drain() {
  2342     while (_ms_ind > 0) {
  2343       oop obj = pop();
  2344       assert(obj != NULL, "Since index was non-zero.");
  2345       if (obj->is_objArray()) {
  2346         jint arr_ind = _array_ind_stack[_ms_ind];
  2347         objArrayOop aobj = objArrayOop(obj);
  2348         jint len = aobj->length();
  2349         jint next_arr_ind = arr_ind + _array_increment;
  2350         if (next_arr_ind < len) {
  2351           push(obj, next_arr_ind);
  2353         // Now process this portion of this one.
  2354         int lim = MIN2(next_arr_ind, len);
  2355         for (int j = arr_ind; j < lim; j++) {
  2356           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2359       } else {
  2360         obj->oop_iterate(this);
  2362       if (abort()) return false;
  2364     return true;
  2367 public:
  2368   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2369     _g1h(G1CollectedHeap::heap()),
  2370     _cm(cm),
  2371     _bm(cm->nextMarkBitMap()),
  2372     _ms_size(ms_size), _ms_ind(0),
  2373     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2374     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2375     _array_increment(MAX2(ms_size/8, 16))
  2376   {}
  2378   ~CSMarkOopClosure() {
  2379     FREE_C_HEAP_ARRAY(oop, _ms);
  2380     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2383   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2384   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2386   template <class T> void do_oop_work(T* p) {
  2387     T heap_oop = oopDesc::load_heap_oop(p);
  2388     if (oopDesc::is_null(heap_oop)) return;
  2389     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2390     if (obj->is_forwarded()) {
  2391       // If the object has already been forwarded, we have to make sure
  2392       // that it's marked.  So follow the forwarding pointer.  Note that
  2393       // this does the right thing for self-forwarding pointers in the
  2394       // evacuation failure case.
  2395       obj = obj->forwardee();
  2397     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2398     if (hr != NULL) {
  2399       if (hr->in_collection_set()) {
  2400         if (_g1h->is_obj_ill(obj)) {
  2401           _bm->mark((HeapWord*)obj);
  2402           if (!push(obj)) {
  2403             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2404             set_abort();
  2407       } else {
  2408         // Outside the collection set; we need to gray it
  2409         _cm->deal_with_reference(obj);
  2413 };
  2415 class CSMarkBitMapClosure: public BitMapClosure {
  2416   G1CollectedHeap* _g1h;
  2417   CMBitMap*        _bitMap;
  2418   ConcurrentMark*  _cm;
  2419   CSMarkOopClosure _oop_cl;
  2420 public:
  2421   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2422     _g1h(G1CollectedHeap::heap()),
  2423     _bitMap(cm->nextMarkBitMap()),
  2424     _oop_cl(cm, ms_size)
  2425   {}
  2427   ~CSMarkBitMapClosure() {}
  2429   bool do_bit(size_t offset) {
  2430     // convert offset into a HeapWord*
  2431     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2432     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2433            "address out of range");
  2434     assert(_bitMap->isMarked(addr), "tautology");
  2435     oop obj = oop(addr);
  2436     if (!obj->is_forwarded()) {
  2437       if (!_oop_cl.push(obj)) return false;
  2438       if (UseCompressedOops) {
  2439         if (!_oop_cl.drain<narrowOop>()) return false;
  2440       } else {
  2441         if (!_oop_cl.drain<oop>()) return false;
  2444     // Otherwise...
  2445     return true;
  2447 };
  2450 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2451   CMBitMap* _bm;
  2452   CSMarkBitMapClosure _bit_cl;
  2453   enum SomePrivateConstants {
  2454     MSSize = 1000
  2455   };
  2456   bool _completed;
  2457 public:
  2458   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2459     _bm(cm->nextMarkBitMap()),
  2460     _bit_cl(cm, MSSize),
  2461     _completed(true)
  2462   {}
  2464   ~CompleteMarkingInCSHRClosure() {}
  2466   bool doHeapRegion(HeapRegion* r) {
  2467     if (!r->evacuation_failed()) {
  2468       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2469       if (!mr.is_empty()) {
  2470         if (!_bm->iterate(&_bit_cl, mr)) {
  2471           _completed = false;
  2472           return true;
  2476     return false;
  2479   bool completed() { return _completed; }
  2480 };
  2482 class ClearMarksInHRClosure: public HeapRegionClosure {
  2483   CMBitMap* _bm;
  2484 public:
  2485   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2487   bool doHeapRegion(HeapRegion* r) {
  2488     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2489       MemRegion usedMR = r->used_region();
  2490       _bm->clearRange(r->used_region());
  2492     return false;
  2494 };
  2496 void ConcurrentMark::complete_marking_in_collection_set() {
  2497   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2499   if (!g1h->mark_in_progress()) {
  2500     g1h->g1_policy()->record_mark_closure_time(0.0);
  2501     return;
  2504   int i = 1;
  2505   double start = os::elapsedTime();
  2506   while (true) {
  2507     i++;
  2508     CompleteMarkingInCSHRClosure cmplt(this);
  2509     g1h->collection_set_iterate(&cmplt);
  2510     if (cmplt.completed()) break;
  2512   double end_time = os::elapsedTime();
  2513   double elapsed_time_ms = (end_time - start) * 1000.0;
  2514   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2515   if (PrintGCDetails) {
  2516     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2519   ClearMarksInHRClosure clr(nextMarkBitMap());
  2520   g1h->collection_set_iterate(&clr);
  2523 // The next two methods deal with the following optimisation. Some
  2524 // objects are gray by being marked and located above the finger. If
  2525 // they are copied, during an evacuation pause, below the finger then
  2526 // the need to be pushed on the stack. The observation is that, if
  2527 // there are no regions in the collection set located above the
  2528 // finger, then the above cannot happen, hence we do not need to
  2529 // explicitly gray any objects when copying them to below the
  2530 // finger. The global stack will be scanned to ensure that, if it
  2531 // points to objects being copied, it will update their
  2532 // location. There is a tricky situation with the gray objects in
  2533 // region stack that are being coped, however. See the comment in
  2534 // newCSet().
  2536 void ConcurrentMark::newCSet() {
  2537   if (!concurrent_marking_in_progress())
  2538     // nothing to do if marking is not in progress
  2539     return;
  2541   // find what the lowest finger is among the global and local fingers
  2542   _min_finger = _finger;
  2543   for (int i = 0; i < (int)_max_task_num; ++i) {
  2544     CMTask* task = _tasks[i];
  2545     HeapWord* task_finger = task->finger();
  2546     if (task_finger != NULL && task_finger < _min_finger)
  2547       _min_finger = task_finger;
  2550   _should_gray_objects = false;
  2552   // This fixes a very subtle and fustrating bug. It might be the case
  2553   // that, during en evacuation pause, heap regions that contain
  2554   // objects that are gray (by being in regions contained in the
  2555   // region stack) are included in the collection set. Since such gray
  2556   // objects will be moved, and because it's not easy to redirect
  2557   // region stack entries to point to a new location (because objects
  2558   // in one region might be scattered to multiple regions after they
  2559   // are copied), one option is to ensure that all marked objects
  2560   // copied during a pause are pushed on the stack. Notice, however,
  2561   // that this problem can only happen when the region stack is not
  2562   // empty during an evacuation pause. So, we make the fix a bit less
  2563   // conservative and ensure that regions are pushed on the stack,
  2564   // irrespective whether all collection set regions are below the
  2565   // finger, if the region stack is not empty. This is expected to be
  2566   // a rare case, so I don't think it's necessary to be smarted about it.
  2567   if (!region_stack_empty())
  2568     _should_gray_objects = true;
  2571 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2572   if (!concurrent_marking_in_progress())
  2573     return;
  2575   HeapWord* region_end = hr->end();
  2576   if (region_end > _min_finger)
  2577     _should_gray_objects = true;
  2580 // abandon current marking iteration due to a Full GC
  2581 void ConcurrentMark::abort() {
  2582   // Clear all marks to force marking thread to do nothing
  2583   _nextMarkBitMap->clearAll();
  2584   // Empty mark stack
  2585   clear_marking_state();
  2586   for (int i = 0; i < (int)_max_task_num; ++i)
  2587     _tasks[i]->clear_region_fields();
  2588   _has_aborted = true;
  2590   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2591   satb_mq_set.abandon_partial_marking();
  2592   satb_mq_set.set_active_all_threads(false);
  2595 static void print_ms_time_info(const char* prefix, const char* name,
  2596                                NumberSeq& ns) {
  2597   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2598                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2599   if (ns.num() > 0) {
  2600     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2601                            prefix, ns.sd(), ns.maximum());
  2605 void ConcurrentMark::print_summary_info() {
  2606   gclog_or_tty->print_cr(" Concurrent marking:");
  2607   print_ms_time_info("  ", "init marks", _init_times);
  2608   print_ms_time_info("  ", "remarks", _remark_times);
  2610     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2611     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2614   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2615   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2616                          _total_counting_time,
  2617                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2618                           (double)_cleanup_times.num()
  2619                          : 0.0));
  2620   if (G1ScrubRemSets) {
  2621     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2622                            _total_rs_scrub_time,
  2623                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2624                             (double)_cleanup_times.num()
  2625                            : 0.0));
  2627   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2628                          (_init_times.sum() + _remark_times.sum() +
  2629                           _cleanup_times.sum())/1000.0);
  2630   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2631                 "(%8.2f s marking, %8.2f s counting).",
  2632                 cmThread()->vtime_accum(),
  2633                 cmThread()->vtime_mark_accum(),
  2634                 cmThread()->vtime_count_accum());
  2637 // Closures
  2638 // XXX: there seems to be a lot of code  duplication here;
  2639 // should refactor and consolidate the shared code.
  2641 // This closure is used to mark refs into the CMS generation in
  2642 // the CMS bit map. Called at the first checkpoint.
  2644 // We take a break if someone is trying to stop the world.
  2645 bool ConcurrentMark::do_yield_check(int worker_i) {
  2646   if (should_yield()) {
  2647     if (worker_i == 0)
  2648       _g1h->g1_policy()->record_concurrent_pause();
  2649     cmThread()->yield();
  2650     if (worker_i == 0)
  2651       _g1h->g1_policy()->record_concurrent_pause_end();
  2652     return true;
  2653   } else {
  2654     return false;
  2658 bool ConcurrentMark::should_yield() {
  2659   return cmThread()->should_yield();
  2662 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2663   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2664   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2667 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2668                                                  void* last) {
  2669   return
  2670     containing_card_is_marked(start) &&
  2671     containing_card_is_marked(last);
  2674 #ifndef PRODUCT
  2675 // for debugging purposes
  2676 void ConcurrentMark::print_finger() {
  2677   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2678                          _heap_start, _heap_end, _finger);
  2679   for (int i = 0; i < (int) _max_task_num; ++i) {
  2680     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2682   gclog_or_tty->print_cr("");
  2684 #endif
  2686 // Closure for iteration over bitmaps
  2687 class CMBitMapClosure : public BitMapClosure {
  2688 private:
  2689   // the bitmap that is being iterated over
  2690   CMBitMap*                   _nextMarkBitMap;
  2691   ConcurrentMark*             _cm;
  2692   CMTask*                     _task;
  2693   // true if we're scanning a heap region claimed by the task (so that
  2694   // we move the finger along), false if we're not, i.e. currently when
  2695   // scanning a heap region popped from the region stack (so that we
  2696   // do not move the task finger along; it'd be a mistake if we did so).
  2697   bool                        _scanning_heap_region;
  2699 public:
  2700   CMBitMapClosure(CMTask *task,
  2701                   ConcurrentMark* cm,
  2702                   CMBitMap* nextMarkBitMap)
  2703     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2705   void set_scanning_heap_region(bool scanning_heap_region) {
  2706     _scanning_heap_region = scanning_heap_region;
  2709   bool do_bit(size_t offset) {
  2710     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2711     tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
  2712     tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
  2714     if (_scanning_heap_region) {
  2715       statsOnly( _task->increase_objs_found_on_bitmap() );
  2716       tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
  2717       // We move that task's local finger along.
  2718       _task->move_finger_to(addr);
  2719     } else {
  2720       // We move the task's region finger along.
  2721       _task->move_region_finger_to(addr);
  2724     _task->scan_object(oop(addr));
  2725     // we only partially drain the local queue and global stack
  2726     _task->drain_local_queue(true);
  2727     _task->drain_global_stack(true);
  2729     // if the has_aborted flag has been raised, we need to bail out of
  2730     // the iteration
  2731     return !_task->has_aborted();
  2733 };
  2735 // Closure for iterating over objects, currently only used for
  2736 // processing SATB buffers.
  2737 class CMObjectClosure : public ObjectClosure {
  2738 private:
  2739   CMTask* _task;
  2741 public:
  2742   void do_object(oop obj) {
  2743     _task->deal_with_reference(obj);
  2746   CMObjectClosure(CMTask* task) : _task(task) { }
  2747 };
  2749 // Closure for iterating over object fields
  2750 class CMOopClosure : public OopClosure {
  2751 private:
  2752   G1CollectedHeap*   _g1h;
  2753   ConcurrentMark*    _cm;
  2754   CMTask*            _task;
  2756 public:
  2757   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2758   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2760   template <class T> void do_oop_work(T* p) {
  2761     tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
  2762     tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
  2764     oop obj = oopDesc::load_decode_heap_oop(p);
  2765     if (_cm->verbose_high())
  2766       gclog_or_tty->print_cr("[%d] we're looking at location "
  2767                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2768                              _task->task_id(), p, (void*) obj);
  2769     _task->deal_with_reference(obj);
  2772   CMOopClosure(G1CollectedHeap* g1h,
  2773                ConcurrentMark* cm,
  2774                CMTask* task)
  2775     : _g1h(g1h), _cm(cm), _task(task) { }
  2776 };
  2778 void CMTask::setup_for_region(HeapRegion* hr) {
  2779   tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
  2780       "claim_region() should have filtered out continues humongous regions" );
  2782   if (_cm->verbose_low())
  2783     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2784                            _task_id, hr);
  2786   _curr_region  = hr;
  2787   _finger       = hr->bottom();
  2788   update_region_limit();
  2791 void CMTask::update_region_limit() {
  2792   HeapRegion* hr            = _curr_region;
  2793   HeapWord* bottom          = hr->bottom();
  2794   HeapWord* limit           = hr->next_top_at_mark_start();
  2796   if (limit == bottom) {
  2797     if (_cm->verbose_low())
  2798       gclog_or_tty->print_cr("[%d] found an empty region "
  2799                              "["PTR_FORMAT", "PTR_FORMAT")",
  2800                              _task_id, bottom, limit);
  2801     // The region was collected underneath our feet.
  2802     // We set the finger to bottom to ensure that the bitmap
  2803     // iteration that will follow this will not do anything.
  2804     // (this is not a condition that holds when we set the region up,
  2805     // as the region is not supposed to be empty in the first place)
  2806     _finger = bottom;
  2807   } else if (limit >= _region_limit) {
  2808     tmp_guarantee_CM( limit >= _finger, "peace of mind" );
  2809   } else {
  2810     tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
  2811     // This can happen under some pretty unusual circumstances.  An
  2812     // evacuation pause empties the region underneath our feet (NTAMS
  2813     // at bottom). We then do some allocation in the region (NTAMS
  2814     // stays at bottom), followed by the region being used as a GC
  2815     // alloc region (NTAMS will move to top() and the objects
  2816     // originally below it will be grayed). All objects now marked in
  2817     // the region are explicitly grayed, if below the global finger,
  2818     // and we do not need in fact to scan anything else. So, we simply
  2819     // set _finger to be limit to ensure that the bitmap iteration
  2820     // doesn't do anything.
  2821     _finger = limit;
  2824   _region_limit = limit;
  2827 void CMTask::giveup_current_region() {
  2828   tmp_guarantee_CM( _curr_region != NULL, "invariant" );
  2829   if (_cm->verbose_low())
  2830     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2831                            _task_id, _curr_region);
  2832   clear_region_fields();
  2835 void CMTask::clear_region_fields() {
  2836   // Values for these three fields that indicate that we're not
  2837   // holding on to a region.
  2838   _curr_region   = NULL;
  2839   _finger        = NULL;
  2840   _region_limit  = NULL;
  2842   _region_finger = NULL;
  2845 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2846   guarantee( nextMarkBitMap != NULL, "invariant" );
  2848   if (_cm->verbose_low())
  2849     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2851   _nextMarkBitMap                = nextMarkBitMap;
  2852   clear_region_fields();
  2854   _calls                         = 0;
  2855   _elapsed_time_ms               = 0.0;
  2856   _termination_time_ms           = 0.0;
  2857   _termination_start_time_ms     = 0.0;
  2859 #if _MARKING_STATS_
  2860   _local_pushes                  = 0;
  2861   _local_pops                    = 0;
  2862   _local_max_size                = 0;
  2863   _objs_scanned                  = 0;
  2864   _global_pushes                 = 0;
  2865   _global_pops                   = 0;
  2866   _global_max_size               = 0;
  2867   _global_transfers_to           = 0;
  2868   _global_transfers_from         = 0;
  2869   _region_stack_pops             = 0;
  2870   _regions_claimed               = 0;
  2871   _objs_found_on_bitmap          = 0;
  2872   _satb_buffers_processed        = 0;
  2873   _steal_attempts                = 0;
  2874   _steals                        = 0;
  2875   _aborted                       = 0;
  2876   _aborted_overflow              = 0;
  2877   _aborted_cm_aborted            = 0;
  2878   _aborted_yield                 = 0;
  2879   _aborted_timed_out             = 0;
  2880   _aborted_satb                  = 0;
  2881   _aborted_termination           = 0;
  2882 #endif // _MARKING_STATS_
  2885 bool CMTask::should_exit_termination() {
  2886   regular_clock_call();
  2887   // This is called when we are in the termination protocol. We should
  2888   // quit if, for some reason, this task wants to abort or the global
  2889   // stack is not empty (this means that we can get work from it).
  2890   return !_cm->mark_stack_empty() || has_aborted();
  2893 // This determines whether the method below will check both the local
  2894 // and global fingers when determining whether to push on the stack a
  2895 // gray object (value 1) or whether it will only check the global one
  2896 // (value 0). The tradeoffs are that the former will be a bit more
  2897 // accurate and possibly push less on the stack, but it might also be
  2898 // a little bit slower.
  2900 #define _CHECK_BOTH_FINGERS_      1
  2902 void CMTask::deal_with_reference(oop obj) {
  2903   if (_cm->verbose_high())
  2904     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2905                            _task_id, (void*) obj);
  2907   ++_refs_reached;
  2909   HeapWord* objAddr = (HeapWord*) obj;
  2910   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2911   if (_g1h->is_in_g1_reserved(objAddr)) {
  2912     tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
  2913     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  2914     if (_g1h->is_obj_ill(obj, hr)) {
  2915       if (_cm->verbose_high())
  2916         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  2917                                _task_id, (void*) obj);
  2919       // we need to mark it first
  2920       if (_nextMarkBitMap->parMark(objAddr)) {
  2921         // No OrderAccess:store_load() is needed. It is implicit in the
  2922         // CAS done in parMark(objAddr) above
  2923         HeapWord* global_finger = _cm->finger();
  2925 #if _CHECK_BOTH_FINGERS_
  2926         // we will check both the local and global fingers
  2928         if (_finger != NULL && objAddr < _finger) {
  2929           if (_cm->verbose_high())
  2930             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  2931                                    "pushing it", _task_id, _finger);
  2932           push(obj);
  2933         } else if (_curr_region != NULL && objAddr < _region_limit) {
  2934           // do nothing
  2935         } else if (objAddr < global_finger) {
  2936           // Notice that the global finger might be moving forward
  2937           // concurrently. This is not a problem. In the worst case, we
  2938           // mark the object while it is above the global finger and, by
  2939           // the time we read the global finger, it has moved forward
  2940           // passed this object. In this case, the object will probably
  2941           // be visited when a task is scanning the region and will also
  2942           // be pushed on the stack. So, some duplicate work, but no
  2943           // correctness problems.
  2945           if (_cm->verbose_high())
  2946             gclog_or_tty->print_cr("[%d] below the global finger "
  2947                                    "("PTR_FORMAT"), pushing it",
  2948                                    _task_id, global_finger);
  2949           push(obj);
  2950         } else {
  2951           // do nothing
  2953 #else // _CHECK_BOTH_FINGERS_
  2954       // we will only check the global finger
  2956         if (objAddr < global_finger) {
  2957           // see long comment above
  2959           if (_cm->verbose_high())
  2960             gclog_or_tty->print_cr("[%d] below the global finger "
  2961                                    "("PTR_FORMAT"), pushing it",
  2962                                    _task_id, global_finger);
  2963           push(obj);
  2965 #endif // _CHECK_BOTH_FINGERS_
  2971 void CMTask::push(oop obj) {
  2972   HeapWord* objAddr = (HeapWord*) obj;
  2973   tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
  2974   tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
  2975   tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
  2976   tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
  2978   if (_cm->verbose_high())
  2979     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  2981   if (!_task_queue->push(obj)) {
  2982     // The local task queue looks full. We need to push some entries
  2983     // to the global stack.
  2985     if (_cm->verbose_medium())
  2986       gclog_or_tty->print_cr("[%d] task queue overflow, "
  2987                              "moving entries to the global stack",
  2988                              _task_id);
  2989     move_entries_to_global_stack();
  2991     // this should succeed since, even if we overflow the global
  2992     // stack, we should have definitely removed some entries from the
  2993     // local queue. So, there must be space on it.
  2994     bool success = _task_queue->push(obj);
  2995     tmp_guarantee_CM( success, "invariant" );
  2998   statsOnly( int tmp_size = _task_queue->size();
  2999              if (tmp_size > _local_max_size)
  3000                _local_max_size = tmp_size;
  3001              ++_local_pushes );
  3004 void CMTask::reached_limit() {
  3005   tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
  3006                     _refs_reached >= _refs_reached_limit ,
  3007                  "shouldn't have been called otherwise" );
  3008   regular_clock_call();
  3011 void CMTask::regular_clock_call() {
  3012   if (has_aborted())
  3013     return;
  3015   // First, we need to recalculate the words scanned and refs reached
  3016   // limits for the next clock call.
  3017   recalculate_limits();
  3019   // During the regular clock call we do the following
  3021   // (1) If an overflow has been flagged, then we abort.
  3022   if (_cm->has_overflown()) {
  3023     set_has_aborted();
  3024     return;
  3027   // If we are not concurrent (i.e. we're doing remark) we don't need
  3028   // to check anything else. The other steps are only needed during
  3029   // the concurrent marking phase.
  3030   if (!concurrent())
  3031     return;
  3033   // (2) If marking has been aborted for Full GC, then we also abort.
  3034   if (_cm->has_aborted()) {
  3035     set_has_aborted();
  3036     statsOnly( ++_aborted_cm_aborted );
  3037     return;
  3040   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3042   // (3) If marking stats are enabled, then we update the step history.
  3043 #if _MARKING_STATS_
  3044   if (_words_scanned >= _words_scanned_limit)
  3045     ++_clock_due_to_scanning;
  3046   if (_refs_reached >= _refs_reached_limit)
  3047     ++_clock_due_to_marking;
  3049   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3050   _interval_start_time_ms = curr_time_ms;
  3051   _all_clock_intervals_ms.add(last_interval_ms);
  3053   if (_cm->verbose_medium()) {
  3054     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3055                            "scanned = %d%s, refs reached = %d%s",
  3056                            _task_id, last_interval_ms,
  3057                            _words_scanned,
  3058                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3059                            _refs_reached,
  3060                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3062 #endif // _MARKING_STATS_
  3064   // (4) We check whether we should yield. If we have to, then we abort.
  3065   if (_cm->should_yield()) {
  3066     // We should yield. To do this we abort the task. The caller is
  3067     // responsible for yielding.
  3068     set_has_aborted();
  3069     statsOnly( ++_aborted_yield );
  3070     return;
  3073   // (5) We check whether we've reached our time quota. If we have,
  3074   // then we abort.
  3075   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3076   if (elapsed_time_ms > _time_target_ms) {
  3077     set_has_aborted();
  3078     _has_aborted_timed_out = true;
  3079     statsOnly( ++_aborted_timed_out );
  3080     return;
  3083   // (6) Finally, we check whether there are enough completed STAB
  3084   // buffers available for processing. If there are, we abort.
  3085   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3086   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3087     if (_cm->verbose_low())
  3088       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3089                              _task_id);
  3090     // we do need to process SATB buffers, we'll abort and restart
  3091     // the marking task to do so
  3092     set_has_aborted();
  3093     statsOnly( ++_aborted_satb );
  3094     return;
  3098 void CMTask::recalculate_limits() {
  3099   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3100   _words_scanned_limit      = _real_words_scanned_limit;
  3102   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3103   _refs_reached_limit       = _real_refs_reached_limit;
  3106 void CMTask::decrease_limits() {
  3107   // This is called when we believe that we're going to do an infrequent
  3108   // operation which will increase the per byte scanned cost (i.e. move
  3109   // entries to/from the global stack). It basically tries to decrease the
  3110   // scanning limit so that the clock is called earlier.
  3112   if (_cm->verbose_medium())
  3113     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3115   _words_scanned_limit = _real_words_scanned_limit -
  3116     3 * words_scanned_period / 4;
  3117   _refs_reached_limit  = _real_refs_reached_limit -
  3118     3 * refs_reached_period / 4;
  3121 void CMTask::move_entries_to_global_stack() {
  3122   // local array where we'll store the entries that will be popped
  3123   // from the local queue
  3124   oop buffer[global_stack_transfer_size];
  3126   int n = 0;
  3127   oop obj;
  3128   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3129     buffer[n] = obj;
  3130     ++n;
  3133   if (n > 0) {
  3134     // we popped at least one entry from the local queue
  3136     statsOnly( ++_global_transfers_to; _local_pops += n );
  3138     if (!_cm->mark_stack_push(buffer, n)) {
  3139       if (_cm->verbose_low())
  3140         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3141       set_has_aborted();
  3142     } else {
  3143       // the transfer was successful
  3145       if (_cm->verbose_medium())
  3146         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3147                                _task_id, n);
  3148       statsOnly( int tmp_size = _cm->mark_stack_size();
  3149                  if (tmp_size > _global_max_size)
  3150                    _global_max_size = tmp_size;
  3151                  _global_pushes += n );
  3155   // this operation was quite expensive, so decrease the limits
  3156   decrease_limits();
  3159 void CMTask::get_entries_from_global_stack() {
  3160   // local array where we'll store the entries that will be popped
  3161   // from the global stack.
  3162   oop buffer[global_stack_transfer_size];
  3163   int n;
  3164   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3165   tmp_guarantee_CM( n <= global_stack_transfer_size,
  3166                     "we should not pop more than the given limit" );
  3167   if (n > 0) {
  3168     // yes, we did actually pop at least one entry
  3170     statsOnly( ++_global_transfers_from; _global_pops += n );
  3171     if (_cm->verbose_medium())
  3172       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3173                              _task_id, n);
  3174     for (int i = 0; i < n; ++i) {
  3175       bool success = _task_queue->push(buffer[i]);
  3176       // We only call this when the local queue is empty or under a
  3177       // given target limit. So, we do not expect this push to fail.
  3178       tmp_guarantee_CM( success, "invariant" );
  3181     statsOnly( int tmp_size = _task_queue->size();
  3182                if (tmp_size > _local_max_size)
  3183                  _local_max_size = tmp_size;
  3184                _local_pushes += n );
  3187   // this operation was quite expensive, so decrease the limits
  3188   decrease_limits();
  3191 void CMTask::drain_local_queue(bool partially) {
  3192   if (has_aborted())
  3193     return;
  3195   // Decide what the target size is, depending whether we're going to
  3196   // drain it partially (so that other tasks can steal if they run out
  3197   // of things to do) or totally (at the very end).
  3198   size_t target_size;
  3199   if (partially)
  3200     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3201   else
  3202     target_size = 0;
  3204   if (_task_queue->size() > target_size) {
  3205     if (_cm->verbose_high())
  3206       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3207                              _task_id, target_size);
  3209     oop obj;
  3210     bool ret = _task_queue->pop_local(obj);
  3211     while (ret) {
  3212       statsOnly( ++_local_pops );
  3214       if (_cm->verbose_high())
  3215         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3216                                (void*) obj);
  3218       tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
  3219                         "invariant" );
  3220       tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
  3221                         "invariant" );
  3223       scan_object(obj);
  3225       if (_task_queue->size() <= target_size || has_aborted())
  3226         ret = false;
  3227       else
  3228         ret = _task_queue->pop_local(obj);
  3231     if (_cm->verbose_high())
  3232       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3233                              _task_id, _task_queue->size());
  3237 void CMTask::drain_global_stack(bool partially) {
  3238   if (has_aborted())
  3239     return;
  3241   // We have a policy to drain the local queue before we attempt to
  3242   // drain the global stack.
  3243   tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
  3245   // Decide what the target size is, depending whether we're going to
  3246   // drain it partially (so that other tasks can steal if they run out
  3247   // of things to do) or totally (at the very end).  Notice that,
  3248   // because we move entries from the global stack in chunks or
  3249   // because another task might be doing the same, we might in fact
  3250   // drop below the target. But, this is not a problem.
  3251   size_t target_size;
  3252   if (partially)
  3253     target_size = _cm->partial_mark_stack_size_target();
  3254   else
  3255     target_size = 0;
  3257   if (_cm->mark_stack_size() > target_size) {
  3258     if (_cm->verbose_low())
  3259       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3260                              _task_id, target_size);
  3262     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3263       get_entries_from_global_stack();
  3264       drain_local_queue(partially);
  3267     if (_cm->verbose_low())
  3268       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3269                              _task_id, _cm->mark_stack_size());
  3273 // SATB Queue has several assumptions on whether to call the par or
  3274 // non-par versions of the methods. this is why some of the code is
  3275 // replicated. We should really get rid of the single-threaded version
  3276 // of the code to simplify things.
  3277 void CMTask::drain_satb_buffers() {
  3278   if (has_aborted())
  3279     return;
  3281   // We set this so that the regular clock knows that we're in the
  3282   // middle of draining buffers and doesn't set the abort flag when it
  3283   // notices that SATB buffers are available for draining. It'd be
  3284   // very counter productive if it did that. :-)
  3285   _draining_satb_buffers = true;
  3287   CMObjectClosure oc(this);
  3288   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3289   if (ParallelGCThreads > 0)
  3290     satb_mq_set.set_par_closure(_task_id, &oc);
  3291   else
  3292     satb_mq_set.set_closure(&oc);
  3294   // This keeps claiming and applying the closure to completed buffers
  3295   // until we run out of buffers or we need to abort.
  3296   if (ParallelGCThreads > 0) {
  3297     while (!has_aborted() &&
  3298            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3299       if (_cm->verbose_medium())
  3300         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3301       statsOnly( ++_satb_buffers_processed );
  3302       regular_clock_call();
  3304   } else {
  3305     while (!has_aborted() &&
  3306            satb_mq_set.apply_closure_to_completed_buffer()) {
  3307       if (_cm->verbose_medium())
  3308         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3309       statsOnly( ++_satb_buffers_processed );
  3310       regular_clock_call();
  3314   if (!concurrent() && !has_aborted()) {
  3315     // We should only do this during remark.
  3316     if (ParallelGCThreads > 0)
  3317       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3318     else
  3319       satb_mq_set.iterate_closure_all_threads();
  3322   _draining_satb_buffers = false;
  3324   tmp_guarantee_CM( has_aborted() ||
  3325                     concurrent() ||
  3326                     satb_mq_set.completed_buffers_num() == 0, "invariant" );
  3328   if (ParallelGCThreads > 0)
  3329     satb_mq_set.set_par_closure(_task_id, NULL);
  3330   else
  3331     satb_mq_set.set_closure(NULL);
  3333   // again, this was a potentially expensive operation, decrease the
  3334   // limits to get the regular clock call early
  3335   decrease_limits();
  3338 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3339   if (has_aborted())
  3340     return;
  3342   tmp_guarantee_CM( _region_finger == NULL,
  3343                     "it should be NULL when we're not scanning a region" );
  3345   if (!_cm->region_stack_empty()) {
  3346     if (_cm->verbose_low())
  3347       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3348                              _task_id, _cm->region_stack_size());
  3350     MemRegion mr = _cm->region_stack_pop();
  3351     // it returns MemRegion() if the pop fails
  3352     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3354     while (mr.start() != NULL) {
  3355       if (_cm->verbose_medium())
  3356         gclog_or_tty->print_cr("[%d] we are scanning region "
  3357                                "["PTR_FORMAT", "PTR_FORMAT")",
  3358                                _task_id, mr.start(), mr.end());
  3359       tmp_guarantee_CM( mr.end() <= _cm->finger(),
  3360                         "otherwise the region shouldn't be on the stack" );
  3361       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3362       if (_nextMarkBitMap->iterate(bc, mr)) {
  3363         tmp_guarantee_CM( !has_aborted(),
  3364                "cannot abort the task without aborting the bitmap iteration" );
  3366         // We finished iterating over the region without aborting.
  3367         regular_clock_call();
  3368         if (has_aborted())
  3369           mr = MemRegion();
  3370         else {
  3371           mr = _cm->region_stack_pop();
  3372           // it returns MemRegion() if the pop fails
  3373           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3375       } else {
  3376         guarantee( has_aborted(), "currently the only way to do so" );
  3378         // The only way to abort the bitmap iteration is to return
  3379         // false from the do_bit() method. However, inside the
  3380         // do_bit() method we move the _region_finger to point to the
  3381         // object currently being looked at. So, if we bail out, we
  3382         // have definitely set _region_finger to something non-null.
  3383         guarantee( _region_finger != NULL, "invariant" );
  3385         // The iteration was actually aborted. So now _region_finger
  3386         // points to the address of the object we last scanned. If we
  3387         // leave it there, when we restart this task, we will rescan
  3388         // the object. It is easy to avoid this. We move the finger by
  3389         // enough to point to the next possible object header (the
  3390         // bitmap knows by how much we need to move it as it knows its
  3391         // granularity).
  3392         MemRegion newRegion =
  3393           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3395         if (!newRegion.is_empty()) {
  3396           if (_cm->verbose_low()) {
  3397             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3398                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3399                                    _task_id,
  3400                                    newRegion.start(), newRegion.end());
  3402           // Now push the part of the region we didn't scan on the
  3403           // region stack to make sure a task scans it later.
  3404           _cm->region_stack_push(newRegion);
  3406         // break from while
  3407         mr = MemRegion();
  3409       _region_finger = NULL;
  3412     // We only push regions on the region stack during evacuation
  3413     // pauses. So if we come out the above iteration because we region
  3414     // stack is empty, it will remain empty until the next yield
  3415     // point. So, the guarantee below is safe.
  3416     guarantee( has_aborted() || _cm->region_stack_empty(),
  3417                "only way to exit the loop" );
  3419     if (_cm->verbose_low())
  3420       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3421                              _task_id, _cm->region_stack_size());
  3425 void CMTask::print_stats() {
  3426   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3427                          _task_id, _calls);
  3428   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3429                          _elapsed_time_ms, _termination_time_ms);
  3430   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3431                          _step_times_ms.num(), _step_times_ms.avg(),
  3432                          _step_times_ms.sd());
  3433   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3434                          _step_times_ms.maximum(), _step_times_ms.sum());
  3436 #if _MARKING_STATS_
  3437   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3438                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3439                          _all_clock_intervals_ms.sd());
  3440   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3441                          _all_clock_intervals_ms.maximum(),
  3442                          _all_clock_intervals_ms.sum());
  3443   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3444                          _clock_due_to_scanning, _clock_due_to_marking);
  3445   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3446                          _objs_scanned, _objs_found_on_bitmap);
  3447   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3448                          _local_pushes, _local_pops, _local_max_size);
  3449   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3450                          _global_pushes, _global_pops, _global_max_size);
  3451   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3452                          _global_transfers_to,_global_transfers_from);
  3453   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3454                          _regions_claimed, _region_stack_pops);
  3455   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3456   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3457                          _steal_attempts, _steals);
  3458   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3459   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3460                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3461   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3462                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3463 #endif // _MARKING_STATS_
  3466 /*****************************************************************************
  3468     The do_marking_step(time_target_ms) method is the building block
  3469     of the parallel marking framework. It can be called in parallel
  3470     with other invocations of do_marking_step() on different tasks
  3471     (but only one per task, obviously) and concurrently with the
  3472     mutator threads, or during remark, hence it eliminates the need
  3473     for two versions of the code. When called during remark, it will
  3474     pick up from where the task left off during the concurrent marking
  3475     phase. Interestingly, tasks are also claimable during evacuation
  3476     pauses too, since do_marking_step() ensures that it aborts before
  3477     it needs to yield.
  3479     The data structures that is uses to do marking work are the
  3480     following:
  3482       (1) Marking Bitmap. If there are gray objects that appear only
  3483       on the bitmap (this happens either when dealing with an overflow
  3484       or when the initial marking phase has simply marked the roots
  3485       and didn't push them on the stack), then tasks claim heap
  3486       regions whose bitmap they then scan to find gray objects. A
  3487       global finger indicates where the end of the last claimed region
  3488       is. A local finger indicates how far into the region a task has
  3489       scanned. The two fingers are used to determine how to gray an
  3490       object (i.e. whether simply marking it is OK, as it will be
  3491       visited by a task in the future, or whether it needs to be also
  3492       pushed on a stack).
  3494       (2) Local Queue. The local queue of the task which is accessed
  3495       reasonably efficiently by the task. Other tasks can steal from
  3496       it when they run out of work. Throughout the marking phase, a
  3497       task attempts to keep its local queue short but not totally
  3498       empty, so that entries are available for stealing by other
  3499       tasks. Only when there is no more work, a task will totally
  3500       drain its local queue.
  3502       (3) Global Mark Stack. This handles local queue overflow. During
  3503       marking only sets of entries are moved between it and the local
  3504       queues, as access to it requires a mutex and more fine-grain
  3505       interaction with it which might cause contention. If it
  3506       overflows, then the marking phase should restart and iterate
  3507       over the bitmap to identify gray objects. Throughout the marking
  3508       phase, tasks attempt to keep the global mark stack at a small
  3509       length but not totally empty, so that entries are available for
  3510       popping by other tasks. Only when there is no more work, tasks
  3511       will totally drain the global mark stack.
  3513       (4) Global Region Stack. Entries on it correspond to areas of
  3514       the bitmap that need to be scanned since they contain gray
  3515       objects. Pushes on the region stack only happen during
  3516       evacuation pauses and typically correspond to areas covered by
  3517       GC LABS. If it overflows, then the marking phase should restart
  3518       and iterate over the bitmap to identify gray objects. Tasks will
  3519       try to totally drain the region stack as soon as possible.
  3521       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3522       made available. Buffers are regularly removed from this queue
  3523       and scanned for roots, so that the queue doesn't get too
  3524       long. During remark, all completed buffers are processed, as
  3525       well as the filled in parts of any uncompleted buffers.
  3527     The do_marking_step() method tries to abort when the time target
  3528     has been reached. There are a few other cases when the
  3529     do_marking_step() method also aborts:
  3531       (1) When the marking phase has been aborted (after a Full GC).
  3533       (2) When a global overflow (either on the global stack or the
  3534       region stack) has been triggered. Before the task aborts, it
  3535       will actually sync up with the other tasks to ensure that all
  3536       the marking data structures (local queues, stacks, fingers etc.)
  3537       are re-initialised so that when do_marking_step() completes,
  3538       the marking phase can immediately restart.
  3540       (3) When enough completed SATB buffers are available. The
  3541       do_marking_step() method only tries to drain SATB buffers right
  3542       at the beginning. So, if enough buffers are available, the
  3543       marking step aborts and the SATB buffers are processed at
  3544       the beginning of the next invocation.
  3546       (4) To yield. when we have to yield then we abort and yield
  3547       right at the end of do_marking_step(). This saves us from a lot
  3548       of hassle as, by yielding we might allow a Full GC. If this
  3549       happens then objects will be compacted underneath our feet, the
  3550       heap might shrink, etc. We save checking for this by just
  3551       aborting and doing the yield right at the end.
  3553     From the above it follows that the do_marking_step() method should
  3554     be called in a loop (or, otherwise, regularly) until it completes.
  3556     If a marking step completes without its has_aborted() flag being
  3557     true, it means it has completed the current marking phase (and
  3558     also all other marking tasks have done so and have all synced up).
  3560     A method called regular_clock_call() is invoked "regularly" (in
  3561     sub ms intervals) throughout marking. It is this clock method that
  3562     checks all the abort conditions which were mentioned above and
  3563     decides when the task should abort. A work-based scheme is used to
  3564     trigger this clock method: when the number of object words the
  3565     marking phase has scanned or the number of references the marking
  3566     phase has visited reach a given limit. Additional invocations to
  3567     the method clock have been planted in a few other strategic places
  3568     too. The initial reason for the clock method was to avoid calling
  3569     vtime too regularly, as it is quite expensive. So, once it was in
  3570     place, it was natural to piggy-back all the other conditions on it
  3571     too and not constantly check them throughout the code.
  3573  *****************************************************************************/
  3575 void CMTask::do_marking_step(double time_target_ms) {
  3576   guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
  3577   guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
  3579   guarantee( concurrent() || _cm->region_stack_empty(),
  3580              "the region stack should have been cleared before remark" );
  3581   guarantee( _region_finger == NULL,
  3582              "this should be non-null only when a region is being scanned" );
  3584   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3585   guarantee( _task_queues != NULL, "invariant" );
  3586   guarantee( _task_queue != NULL,  "invariant" );
  3587   guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
  3589   guarantee( !_claimed,
  3590              "only one thread should claim this task at any one time" );
  3592   // OK, this doesn't safeguard again all possible scenarios, as it is
  3593   // possible for two threads to set the _claimed flag at the same
  3594   // time. But it is only for debugging purposes anyway and it will
  3595   // catch most problems.
  3596   _claimed = true;
  3598   _start_time_ms = os::elapsedVTime() * 1000.0;
  3599   statsOnly( _interval_start_time_ms = _start_time_ms );
  3601   double diff_prediction_ms =
  3602     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3603   _time_target_ms = time_target_ms - diff_prediction_ms;
  3605   // set up the variables that are used in the work-based scheme to
  3606   // call the regular clock method
  3607   _words_scanned = 0;
  3608   _refs_reached  = 0;
  3609   recalculate_limits();
  3611   // clear all flags
  3612   clear_has_aborted();
  3613   _has_aborted_timed_out = false;
  3614   _draining_satb_buffers = false;
  3616   ++_calls;
  3618   if (_cm->verbose_low())
  3619     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3620                            "target = %1.2lfms >>>>>>>>>>",
  3621                            _task_id, _calls, _time_target_ms);
  3623   // Set up the bitmap and oop closures. Anything that uses them is
  3624   // eventually called from this method, so it is OK to allocate these
  3625   // statically.
  3626   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3627   CMOopClosure    oop_closure(_g1h, _cm, this);
  3628   set_oop_closure(&oop_closure);
  3630   if (_cm->has_overflown()) {
  3631     // This can happen if the region stack or the mark stack overflows
  3632     // during a GC pause and this task, after a yield point,
  3633     // restarts. We have to abort as we need to get into the overflow
  3634     // protocol which happens right at the end of this task.
  3635     set_has_aborted();
  3638   // First drain any available SATB buffers. After this, we will not
  3639   // look at SATB buffers before the next invocation of this method.
  3640   // If enough completed SATB buffers are queued up, the regular clock
  3641   // will abort this task so that it restarts.
  3642   drain_satb_buffers();
  3643   // ...then partially drain the local queue and the global stack
  3644   drain_local_queue(true);
  3645   drain_global_stack(true);
  3647   // Then totally drain the region stack.  We will not look at
  3648   // it again before the next invocation of this method. Entries on
  3649   // the region stack are only added during evacuation pauses, for
  3650   // which we have to yield. When we do, we abort the task anyway so
  3651   // it will look at the region stack again when it restarts.
  3652   bitmap_closure.set_scanning_heap_region(false);
  3653   drain_region_stack(&bitmap_closure);
  3654   // ...then partially drain the local queue and the global stack
  3655   drain_local_queue(true);
  3656   drain_global_stack(true);
  3658   do {
  3659     if (!has_aborted() && _curr_region != NULL) {
  3660       // This means that we're already holding on to a region.
  3661       tmp_guarantee_CM( _finger != NULL,
  3662                         "if region is not NULL, then the finger "
  3663                         "should not be NULL either" );
  3665       // We might have restarted this task after an evacuation pause
  3666       // which might have evacuated the region we're holding on to
  3667       // underneath our feet. Let's read its limit again to make sure
  3668       // that we do not iterate over a region of the heap that
  3669       // contains garbage (update_region_limit() will also move
  3670       // _finger to the start of the region if it is found empty).
  3671       update_region_limit();
  3672       // We will start from _finger not from the start of the region,
  3673       // as we might be restarting this task after aborting half-way
  3674       // through scanning this region. In this case, _finger points to
  3675       // the address where we last found a marked object. If this is a
  3676       // fresh region, _finger points to start().
  3677       MemRegion mr = MemRegion(_finger, _region_limit);
  3679       if (_cm->verbose_low())
  3680         gclog_or_tty->print_cr("[%d] we're scanning part "
  3681                                "["PTR_FORMAT", "PTR_FORMAT") "
  3682                                "of region "PTR_FORMAT,
  3683                                _task_id, _finger, _region_limit, _curr_region);
  3685       // Let's iterate over the bitmap of the part of the
  3686       // region that is left.
  3687       bitmap_closure.set_scanning_heap_region(true);
  3688       if (mr.is_empty() ||
  3689           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3690         // We successfully completed iterating over the region. Now,
  3691         // let's give up the region.
  3692         giveup_current_region();
  3693         regular_clock_call();
  3694       } else {
  3695         guarantee( has_aborted(), "currently the only way to do so" );
  3696         // The only way to abort the bitmap iteration is to return
  3697         // false from the do_bit() method. However, inside the
  3698         // do_bit() method we move the _finger to point to the
  3699         // object currently being looked at. So, if we bail out, we
  3700         // have definitely set _finger to something non-null.
  3701         guarantee( _finger != NULL, "invariant" );
  3703         // Region iteration was actually aborted. So now _finger
  3704         // points to the address of the object we last scanned. If we
  3705         // leave it there, when we restart this task, we will rescan
  3706         // the object. It is easy to avoid this. We move the finger by
  3707         // enough to point to the next possible object header (the
  3708         // bitmap knows by how much we need to move it as it knows its
  3709         // granularity).
  3710         move_finger_to(_nextMarkBitMap->nextWord(_finger));
  3713     // At this point we have either completed iterating over the
  3714     // region we were holding on to, or we have aborted.
  3716     // We then partially drain the local queue and the global stack.
  3717     // (Do we really need this?)
  3718     drain_local_queue(true);
  3719     drain_global_stack(true);
  3721     // Read the note on the claim_region() method on why it might
  3722     // return NULL with potentially more regions available for
  3723     // claiming and why we have to check out_of_regions() to determine
  3724     // whether we're done or not.
  3725     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3726       // We are going to try to claim a new region. We should have
  3727       // given up on the previous one.
  3728       tmp_guarantee_CM( _curr_region  == NULL &&
  3729                         _finger       == NULL &&
  3730                         _region_limit == NULL, "invariant" );
  3731       if (_cm->verbose_low())
  3732         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3733       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3734       if (claimed_region != NULL) {
  3735         // Yes, we managed to claim one
  3736         statsOnly( ++_regions_claimed );
  3738         if (_cm->verbose_low())
  3739           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3740                                  "region "PTR_FORMAT,
  3741                                  _task_id, claimed_region);
  3743         setup_for_region(claimed_region);
  3744         tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
  3746       // It is important to call the regular clock here. It might take
  3747       // a while to claim a region if, for example, we hit a large
  3748       // block of empty regions. So we need to call the regular clock
  3749       // method once round the loop to make sure it's called
  3750       // frequently enough.
  3751       regular_clock_call();
  3754     if (!has_aborted() && _curr_region == NULL) {
  3755       tmp_guarantee_CM( _cm->out_of_regions(),
  3756                         "at this point we should be out of regions" );
  3758   } while ( _curr_region != NULL && !has_aborted());
  3760   if (!has_aborted()) {
  3761     // We cannot check whether the global stack is empty, since other
  3762     // tasks might be pushing objects to it concurrently. We also cannot
  3763     // check if the region stack is empty because if a thread is aborting
  3764     // it can push a partially done region back.
  3765     tmp_guarantee_CM( _cm->out_of_regions(),
  3766                       "at this point we should be out of regions" );
  3768     if (_cm->verbose_low())
  3769       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3771     // Try to reduce the number of available SATB buffers so that
  3772     // remark has less work to do.
  3773     drain_satb_buffers();
  3776   // Since we've done everything else, we can now totally drain the
  3777   // local queue and global stack.
  3778   drain_local_queue(false);
  3779   drain_global_stack(false);
  3781   // Attempt at work stealing from other task's queues.
  3782   if (!has_aborted()) {
  3783     // We have not aborted. This means that we have finished all that
  3784     // we could. Let's try to do some stealing...
  3786     // We cannot check whether the global stack is empty, since other
  3787     // tasks might be pushing objects to it concurrently. We also cannot
  3788     // check if the region stack is empty because if a thread is aborting
  3789     // it can push a partially done region back.
  3790     guarantee( _cm->out_of_regions() &&
  3791                _task_queue->size() == 0, "only way to reach here" );
  3793     if (_cm->verbose_low())
  3794       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3796     while (!has_aborted()) {
  3797       oop obj;
  3798       statsOnly( ++_steal_attempts );
  3800       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3801         if (_cm->verbose_medium())
  3802           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3803                                  _task_id, (void*) obj);
  3805         statsOnly( ++_steals );
  3807         tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
  3808                           "any stolen object should be marked" );
  3809         scan_object(obj);
  3811         // And since we're towards the end, let's totally drain the
  3812         // local queue and global stack.
  3813         drain_local_queue(false);
  3814         drain_global_stack(false);
  3815       } else {
  3816         break;
  3821   // We still haven't aborted. Now, let's try to get into the
  3822   // termination protocol.
  3823   if (!has_aborted()) {
  3824     // We cannot check whether the global stack is empty, since other
  3825     // tasks might be concurrently pushing objects on it. We also cannot
  3826     // check if the region stack is empty because if a thread is aborting
  3827     // it can push a partially done region back.
  3828     guarantee( _cm->out_of_regions() &&
  3829                _task_queue->size() == 0, "only way to reach here" );
  3831     if (_cm->verbose_low())
  3832       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3834     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3835     // The CMTask class also extends the TerminatorTerminator class,
  3836     // hence its should_exit_termination() method will also decide
  3837     // whether to exit the termination protocol or not.
  3838     bool finished = _cm->terminator()->offer_termination(this);
  3839     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3840     _termination_time_ms +=
  3841       termination_end_time_ms - _termination_start_time_ms;
  3843     if (finished) {
  3844       // We're all done.
  3846       if (_task_id == 0) {
  3847         // let's allow task 0 to do this
  3848         if (concurrent()) {
  3849           guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
  3850           // we need to set this to false before the next
  3851           // safepoint. This way we ensure that the marking phase
  3852           // doesn't observe any more heap expansions.
  3853           _cm->clear_concurrent_marking_in_progress();
  3857       // We can now guarantee that the global stack is empty, since
  3858       // all other tasks have finished.
  3859       guarantee( _cm->out_of_regions() &&
  3860                  _cm->region_stack_empty() &&
  3861                  _cm->mark_stack_empty() &&
  3862                  _task_queue->size() == 0 &&
  3863                  !_cm->has_overflown() &&
  3864                  !_cm->mark_stack_overflow() &&
  3865                  !_cm->region_stack_overflow(),
  3866                  "only way to reach here" );
  3868       if (_cm->verbose_low())
  3869         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3870     } else {
  3871       // Apparently there's more work to do. Let's abort this task. It
  3872       // will restart it and we can hopefully find more things to do.
  3874       if (_cm->verbose_low())
  3875         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3877       set_has_aborted();
  3878       statsOnly( ++_aborted_termination );
  3882   // Mainly for debugging purposes to make sure that a pointer to the
  3883   // closure which was statically allocated in this frame doesn't
  3884   // escape it by accident.
  3885   set_oop_closure(NULL);
  3886   double end_time_ms = os::elapsedVTime() * 1000.0;
  3887   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3888   // Update the step history.
  3889   _step_times_ms.add(elapsed_time_ms);
  3891   if (has_aborted()) {
  3892     // The task was aborted for some reason.
  3894     statsOnly( ++_aborted );
  3896     if (_has_aborted_timed_out) {
  3897       double diff_ms = elapsed_time_ms - _time_target_ms;
  3898       // Keep statistics of how well we did with respect to hitting
  3899       // our target only if we actually timed out (if we aborted for
  3900       // other reasons, then the results might get skewed).
  3901       _marking_step_diffs_ms.add(diff_ms);
  3904     if (_cm->has_overflown()) {
  3905       // This is the interesting one. We aborted because a global
  3906       // overflow was raised. This means we have to restart the
  3907       // marking phase and start iterating over regions. However, in
  3908       // order to do this we have to make sure that all tasks stop
  3909       // what they are doing and re-initialise in a safe manner. We
  3910       // will achieve this with the use of two barrier sync points.
  3912       if (_cm->verbose_low())
  3913         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  3915       _cm->enter_first_sync_barrier(_task_id);
  3916       // When we exit this sync barrier we know that all tasks have
  3917       // stopped doing marking work. So, it's now safe to
  3918       // re-initialise our data structures. At the end of this method,
  3919       // task 0 will clear the global data structures.
  3921       statsOnly( ++_aborted_overflow );
  3923       // We clear the local state of this task...
  3924       clear_region_fields();
  3926       // ...and enter the second barrier.
  3927       _cm->enter_second_sync_barrier(_task_id);
  3928       // At this point everything has bee re-initialised and we're
  3929       // ready to restart.
  3932     if (_cm->verbose_low()) {
  3933       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  3934                              "elapsed = %1.2lfms <<<<<<<<<<",
  3935                              _task_id, _time_target_ms, elapsed_time_ms);
  3936       if (_cm->has_aborted())
  3937         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  3938                                _task_id);
  3940   } else {
  3941     if (_cm->verbose_low())
  3942       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  3943                              "elapsed = %1.2lfms <<<<<<<<<<",
  3944                              _task_id, _time_target_ms, elapsed_time_ms);
  3947   _claimed = false;
  3950 CMTask::CMTask(int task_id,
  3951                ConcurrentMark* cm,
  3952                CMTaskQueue* task_queue,
  3953                CMTaskQueueSet* task_queues)
  3954   : _g1h(G1CollectedHeap::heap()),
  3955     _task_id(task_id), _cm(cm),
  3956     _claimed(false),
  3957     _nextMarkBitMap(NULL), _hash_seed(17),
  3958     _task_queue(task_queue),
  3959     _task_queues(task_queues),
  3960     _oop_closure(NULL) {
  3961   guarantee( task_queue != NULL, "invariant" );
  3962   guarantee( task_queues != NULL, "invariant" );
  3964   statsOnly( _clock_due_to_scanning = 0;
  3965              _clock_due_to_marking  = 0 );
  3967   _marking_step_diffs_ms.add(0.5);

mercurial