src/share/vm/gc_implementation/g1/concurrentMark.cpp

Tue, 26 Jan 2010 16:52:29 -0800

author
ysr
date
Tue, 26 Jan 2010 16:52:29 -0800
changeset 1629
34fb2662f6c2
parent 1546
44f61c24ddab
child 1717
b81f3572f355
permissions
-rw-r--r--

6920090: G1: Disable ReduceInitialCardMarks at least until 6920109 is fixed
Summary: G1 now answers "no" to the query can_elide_initializing_store_barrier() in the product build. A debug flag allows alternate behaviour in debug builds.
Reviewed-by: iveresov, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     assert(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 MemRegion CMRegionStack::pop() {
   301   while (true) {
   302     // Otherwise...
   303     jint index = _index;
   305     if (index == 0) {
   306       return MemRegion();
   307     }
   308     jint next_index = index-1;
   309     jint res = Atomic::cmpxchg(next_index, &_index, index);
   310     if (res == index) {
   311       MemRegion mr = _base[next_index];
   312       if (mr.start() != NULL) {
   313         assert(mr.end() != NULL, "invariant");
   314         assert(mr.word_size() > 0, "invariant");
   315         return mr;
   316       } else {
   317         // that entry was invalidated... let's skip it
   318         assert(mr.end() == NULL, "invariant");
   319       }
   320     }
   321     // Otherwise, we need to try again.
   322   }
   323 }
   325 bool CMRegionStack::invalidate_entries_into_cset() {
   326   bool result = false;
   327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   328   for (int i = 0; i < _oops_do_bound; ++i) {
   329     MemRegion mr = _base[i];
   330     if (mr.start() != NULL) {
   331       assert(mr.end() != NULL, "invariant");
   332       assert(mr.word_size() > 0, "invariant");
   333       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   334       assert(hr != NULL, "invariant");
   335       if (hr->in_collection_set()) {
   336         // The region points into the collection set
   337         _base[i] = MemRegion();
   338         result = true;
   339       }
   340     } else {
   341       // that entry was invalidated... let's skip it
   342       assert(mr.end() == NULL, "invariant");
   343     }
   344   }
   345   return result;
   346 }
   348 template<class OopClosureClass>
   349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   350   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   351          || SafepointSynchronize::is_at_safepoint(),
   352          "Drain recursion must be yield-safe.");
   353   bool res = true;
   354   debug_only(_drain_in_progress = true);
   355   debug_only(_drain_in_progress_yields = yield_after);
   356   while (!isEmpty()) {
   357     oop newOop = pop();
   358     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   359     assert(newOop->is_oop(), "Expected an oop");
   360     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   361            "only grey objects on this stack");
   362     // iterate over the oops in this oop, marking and pushing
   363     // the ones in CMS generation.
   364     newOop->oop_iterate(cl);
   365     if (yield_after && _cm->do_yield_check()) {
   366       res = false; break;
   367     }
   368   }
   369   debug_only(_drain_in_progress = false);
   370   return res;
   371 }
   373 void CMMarkStack::oops_do(OopClosure* f) {
   374   if (_index == 0) return;
   375   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   376          "Bound must be set.");
   377   for (int i = 0; i < _oops_do_bound; i++) {
   378     f->do_oop(&_base[i]);
   379   }
   380   _oops_do_bound = -1;
   381 }
   383 bool ConcurrentMark::not_yet_marked(oop obj) const {
   384   return (_g1h->is_obj_ill(obj)
   385           || (_g1h->is_in_permanent(obj)
   386               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   387 }
   389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   391 #endif // _MSC_VER
   393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   394                                int max_regions) :
   395   _markBitMap1(rs, MinObjAlignment - 1),
   396   _markBitMap2(rs, MinObjAlignment - 1),
   398   _parallel_marking_threads(0),
   399   _sleep_factor(0.0),
   400   _marking_task_overhead(1.0),
   401   _cleanup_sleep_factor(0.0),
   402   _cleanup_task_overhead(1.0),
   403   _region_bm(max_regions, false /* in_resource_area*/),
   404   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   405            CardTableModRefBS::card_shift,
   406            false /* in_resource_area*/),
   407   _prevMarkBitMap(&_markBitMap1),
   408   _nextMarkBitMap(&_markBitMap2),
   409   _at_least_one_mark_complete(false),
   411   _markStack(this),
   412   _regionStack(),
   413   // _finger set in set_non_marking_state
   415   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   416   // _active_tasks set in set_non_marking_state
   417   // _tasks set inside the constructor
   418   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   419   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   421   _has_overflown(false),
   422   _concurrent(false),
   423   _has_aborted(false),
   424   _restart_for_overflow(false),
   425   _concurrent_marking_in_progress(false),
   426   _should_gray_objects(false),
   428   // _verbose_level set below
   430   _init_times(),
   431   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   432   _cleanup_times(),
   433   _total_counting_time(0.0),
   434   _total_rs_scrub_time(0.0),
   436   _parallel_workers(NULL)
   437 {
   438   CMVerboseLevel verbose_level =
   439     (CMVerboseLevel) G1MarkingVerboseLevel;
   440   if (verbose_level < no_verbose)
   441     verbose_level = no_verbose;
   442   if (verbose_level > high_verbose)
   443     verbose_level = high_verbose;
   444   _verbose_level = verbose_level;
   446   if (verbose_low())
   447     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   448                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   450   _markStack.allocate(G1MarkStackSize);
   451   _regionStack.allocate(G1MarkRegionStackSize);
   453   // Create & start a ConcurrentMark thread.
   454   _cmThread = new ConcurrentMarkThread(this);
   455   assert(cmThread() != NULL, "CM Thread should have been created");
   456   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   458   _g1h = G1CollectedHeap::heap();
   459   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   460   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   461   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   463   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   464   satb_qs.set_buffer_size(G1SATBLogBufferSize);
   466   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   467   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   468   for (int i = 0 ; i < size; i++) {
   469     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   470   }
   472   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   473   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   475   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   476   _active_tasks = _max_task_num;
   477   for (int i = 0; i < (int) _max_task_num; ++i) {
   478     CMTaskQueue* task_queue = new CMTaskQueue();
   479     task_queue->initialize();
   480     _task_queues->register_queue(i, task_queue);
   482     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   483     _accum_task_vtime[i] = 0.0;
   484   }
   486   if (ParallelMarkingThreads > ParallelGCThreads) {
   487     vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
   488                                   "than ParallelGCThreads.");
   489   }
   490   if (ParallelGCThreads == 0) {
   491     // if we are not running with any parallel GC threads we will not
   492     // spawn any marking threads either
   493     _parallel_marking_threads =   0;
   494     _sleep_factor             = 0.0;
   495     _marking_task_overhead    = 1.0;
   496   } else {
   497     if (ParallelMarkingThreads > 0) {
   498       // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
   499       // if both are set
   501       _parallel_marking_threads = ParallelMarkingThreads;
   502       _sleep_factor             = 0.0;
   503       _marking_task_overhead    = 1.0;
   504     } else if (G1MarkingOverheadPercent > 0) {
   505       // we will calculate the number of parallel marking threads
   506       // based on a target overhead with respect to the soft real-time
   507       // goal
   509       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   510       double overall_cm_overhead =
   511         (double) MaxGCPauseMillis * marking_overhead /
   512         (double) GCPauseIntervalMillis;
   513       double cpu_ratio = 1.0 / (double) os::processor_count();
   514       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   515       double marking_task_overhead =
   516         overall_cm_overhead / marking_thread_num *
   517                                                 (double) os::processor_count();
   518       double sleep_factor =
   519                          (1.0 - marking_task_overhead) / marking_task_overhead;
   521       _parallel_marking_threads = (size_t) marking_thread_num;
   522       _sleep_factor             = sleep_factor;
   523       _marking_task_overhead    = marking_task_overhead;
   524     } else {
   525       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   526       _sleep_factor             = 0.0;
   527       _marking_task_overhead    = 1.0;
   528     }
   530     if (parallel_marking_threads() > 1)
   531       _cleanup_task_overhead = 1.0;
   532     else
   533       _cleanup_task_overhead = marking_task_overhead();
   534     _cleanup_sleep_factor =
   535                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   537 #if 0
   538     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   539     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   540     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   541     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   542     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   543 #endif
   545     guarantee(parallel_marking_threads() > 0, "peace of mind");
   546     _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
   547                                      (int) parallel_marking_threads(), false, true);
   548     if (_parallel_workers == NULL)
   549       vm_exit_during_initialization("Failed necessary allocation.");
   550   }
   552   // so that the call below can read a sensible value
   553   _heap_start = (HeapWord*) rs.base();
   554   set_non_marking_state();
   555 }
   557 void ConcurrentMark::update_g1_committed(bool force) {
   558   // If concurrent marking is not in progress, then we do not need to
   559   // update _heap_end. This has a subtle and important
   560   // side-effect. Imagine that two evacuation pauses happen between
   561   // marking completion and remark. The first one can grow the
   562   // heap (hence now the finger is below the heap end). Then, the
   563   // second one could unnecessarily push regions on the region
   564   // stack. This causes the invariant that the region stack is empty
   565   // at the beginning of remark to be false. By ensuring that we do
   566   // not observe heap expansions after marking is complete, then we do
   567   // not have this problem.
   568   if (!concurrent_marking_in_progress() && !force)
   569     return;
   571   MemRegion committed = _g1h->g1_committed();
   572   assert(committed.start() == _heap_start, "start shouldn't change");
   573   HeapWord* new_end = committed.end();
   574   if (new_end > _heap_end) {
   575     // The heap has been expanded.
   577     _heap_end = new_end;
   578   }
   579   // Notice that the heap can also shrink. However, this only happens
   580   // during a Full GC (at least currently) and the entire marking
   581   // phase will bail out and the task will not be restarted. So, let's
   582   // do nothing.
   583 }
   585 void ConcurrentMark::reset() {
   586   // Starting values for these two. This should be called in a STW
   587   // phase. CM will be notified of any future g1_committed expansions
   588   // will be at the end of evacuation pauses, when tasks are
   589   // inactive.
   590   MemRegion committed = _g1h->g1_committed();
   591   _heap_start = committed.start();
   592   _heap_end   = committed.end();
   594   // Separated the asserts so that we know which one fires.
   595   assert(_heap_start != NULL, "heap bounds should look ok");
   596   assert(_heap_end != NULL, "heap bounds should look ok");
   597   assert(_heap_start < _heap_end, "heap bounds should look ok");
   599   // reset all the marking data structures and any necessary flags
   600   clear_marking_state();
   602   if (verbose_low())
   603     gclog_or_tty->print_cr("[global] resetting");
   605   // We do reset all of them, since different phases will use
   606   // different number of active threads. So, it's easiest to have all
   607   // of them ready.
   608   for (int i = 0; i < (int) _max_task_num; ++i)
   609     _tasks[i]->reset(_nextMarkBitMap);
   611   // we need this to make sure that the flag is on during the evac
   612   // pause with initial mark piggy-backed
   613   set_concurrent_marking_in_progress();
   614 }
   616 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   617   assert(active_tasks <= _max_task_num, "we should not have more");
   619   _active_tasks = active_tasks;
   620   // Need to update the three data structures below according to the
   621   // number of active threads for this phase.
   622   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   623   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   624   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   626   _concurrent = concurrent;
   627   // We propagate this to all tasks, not just the active ones.
   628   for (int i = 0; i < (int) _max_task_num; ++i)
   629     _tasks[i]->set_concurrent(concurrent);
   631   if (concurrent) {
   632     set_concurrent_marking_in_progress();
   633   } else {
   634     // We currently assume that the concurrent flag has been set to
   635     // false before we start remark. At this point we should also be
   636     // in a STW phase.
   637     assert(!concurrent_marking_in_progress(), "invariant");
   638     assert(_finger == _heap_end, "only way to get here");
   639     update_g1_committed(true);
   640   }
   641 }
   643 void ConcurrentMark::set_non_marking_state() {
   644   // We set the global marking state to some default values when we're
   645   // not doing marking.
   646   clear_marking_state();
   647   _active_tasks = 0;
   648   clear_concurrent_marking_in_progress();
   649 }
   651 ConcurrentMark::~ConcurrentMark() {
   652   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   653   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   654   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   655                     _par_cleanup_thread_state);
   657   for (int i = 0; i < (int) _max_task_num; ++i) {
   658     delete _task_queues->queue(i);
   659     delete _tasks[i];
   660   }
   661   delete _task_queues;
   662   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   663 }
   665 // This closure is used to mark refs into the g1 generation
   666 // from external roots in the CMS bit map.
   667 // Called at the first checkpoint.
   668 //
   670 void ConcurrentMark::clearNextBitmap() {
   671    guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
   673    // clear the mark bitmap (no grey objects to start with).
   674    // We need to do this in chunks and offer to yield in between
   675    // each chunk.
   676    HeapWord* start  = _nextMarkBitMap->startWord();
   677    HeapWord* end    = _nextMarkBitMap->endWord();
   678    HeapWord* cur    = start;
   679    size_t chunkSize = M;
   680    while (cur < end) {
   681      HeapWord* next = cur + chunkSize;
   682      if (next > end)
   683        next = end;
   684      MemRegion mr(cur,next);
   685      _nextMarkBitMap->clearRange(mr);
   686      cur = next;
   687      do_yield_check();
   688    }
   689 }
   691 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   692 public:
   693   bool doHeapRegion(HeapRegion* r) {
   694     if (!r->continuesHumongous()) {
   695       r->note_start_of_marking(true);
   696     }
   697     return false;
   698   }
   699 };
   701 void ConcurrentMark::checkpointRootsInitialPre() {
   702   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   703   G1CollectorPolicy* g1p = g1h->g1_policy();
   705   _has_aborted = false;
   707   if (G1PrintReachableAtInitialMark) {
   708     print_reachable(true, "before");
   709   }
   711   // Initialise marking structures. This has to be done in a STW phase.
   712   reset();
   713 }
   715 class CMMarkRootsClosure: public OopsInGenClosure {
   716 private:
   717   ConcurrentMark*  _cm;
   718   G1CollectedHeap* _g1h;
   719   bool             _do_barrier;
   721 public:
   722   CMMarkRootsClosure(ConcurrentMark* cm,
   723                      G1CollectedHeap* g1h,
   724                      bool do_barrier) : _cm(cm), _g1h(g1h),
   725                                         _do_barrier(do_barrier) { }
   727   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   728   virtual void do_oop(      oop* p) { do_oop_work(p); }
   730   template <class T> void do_oop_work(T* p) {
   731     T heap_oop = oopDesc::load_heap_oop(p);
   732     if (!oopDesc::is_null(heap_oop)) {
   733       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   734       assert(obj->is_oop() || obj->mark() == NULL,
   735              "expected an oop, possibly with mark word displaced");
   736       HeapWord* addr = (HeapWord*)obj;
   737       if (_g1h->is_in_g1_reserved(addr)) {
   738         _cm->grayRoot(obj);
   739       }
   740     }
   741     if (_do_barrier) {
   742       assert(!_g1h->is_in_g1_reserved(p),
   743              "Should be called on external roots");
   744       do_barrier(p);
   745     }
   746   }
   747 };
   749 void ConcurrentMark::checkpointRootsInitialPost() {
   750   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   752   // For each region note start of marking.
   753   NoteStartOfMarkHRClosure startcl;
   754   g1h->heap_region_iterate(&startcl);
   756   // Start weak-reference discovery.
   757   ReferenceProcessor* rp = g1h->ref_processor();
   758   rp->verify_no_references_recorded();
   759   rp->enable_discovery(); // enable ("weak") refs discovery
   760   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   762   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   763   satb_mq_set.set_active_all_threads(true);
   765   // update_g1_committed() will be called at the end of an evac pause
   766   // when marking is on. So, it's also called at the end of the
   767   // initial-mark pause to update the heap end, if the heap expands
   768   // during it. No need to call it here.
   769 }
   771 // Checkpoint the roots into this generation from outside
   772 // this generation. [Note this initial checkpoint need only
   773 // be approximate -- we'll do a catch up phase subsequently.]
   774 void ConcurrentMark::checkpointRootsInitial() {
   775   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   776   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   778   double start = os::elapsedTime();
   780   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   781   g1p->record_concurrent_mark_init_start();
   782   checkpointRootsInitialPre();
   784   // YSR: when concurrent precleaning is in place, we'll
   785   // need to clear the cached card table here
   787   ResourceMark rm;
   788   HandleMark  hm;
   790   g1h->ensure_parsability(false);
   791   g1h->perm_gen()->save_marks();
   793   CMMarkRootsClosure notOlder(this, g1h, false);
   794   CMMarkRootsClosure older(this, g1h, true);
   796   g1h->set_marking_started();
   797   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   799   g1h->process_strong_roots(true,    // activate StrongRootsScope
   800                             false,   // fake perm gen collection
   801                             SharedHeap::SO_AllClasses,
   802                             &notOlder, // Regular roots
   803                             NULL,     // do not visit active blobs
   804                             &older    // Perm Gen Roots
   805                             );
   806   checkpointRootsInitialPost();
   808   // Statistics.
   809   double end = os::elapsedTime();
   810   _init_times.add((end - start) * 1000.0);
   812   g1p->record_concurrent_mark_init_end();
   813 }
   815 /*
   816    Notice that in the next two methods, we actually leave the STS
   817    during the barrier sync and join it immediately afterwards. If we
   818    do not do this, this then the following deadlock can occur: one
   819    thread could be in the barrier sync code, waiting for the other
   820    thread to also sync up, whereas another one could be trying to
   821    yield, while also waiting for the other threads to sync up too.
   823    Because the thread that does the sync barrier has left the STS, it
   824    is possible to be suspended for a Full GC or an evacuation pause
   825    could occur. This is actually safe, since the entering the sync
   826    barrier is one of the last things do_marking_step() does, and it
   827    doesn't manipulate any data structures afterwards.
   828 */
   830 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   831   if (verbose_low())
   832     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   834   ConcurrentGCThread::stsLeave();
   835   _first_overflow_barrier_sync.enter();
   836   ConcurrentGCThread::stsJoin();
   837   // at this point everyone should have synced up and not be doing any
   838   // more work
   840   if (verbose_low())
   841     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   843   // let task 0 do this
   844   if (task_num == 0) {
   845     // task 0 is responsible for clearing the global data structures
   846     clear_marking_state();
   848     if (PrintGC) {
   849       gclog_or_tty->date_stamp(PrintGCDateStamps);
   850       gclog_or_tty->stamp(PrintGCTimeStamps);
   851       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   852     }
   853   }
   855   // after this, each task should reset its own data structures then
   856   // then go into the second barrier
   857 }
   859 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   860   if (verbose_low())
   861     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   863   ConcurrentGCThread::stsLeave();
   864   _second_overflow_barrier_sync.enter();
   865   ConcurrentGCThread::stsJoin();
   866   // at this point everything should be re-initialised and ready to go
   868   if (verbose_low())
   869     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   870 }
   872 void ConcurrentMark::grayRoot(oop p) {
   873   HeapWord* addr = (HeapWord*) p;
   874   // We can't really check against _heap_start and _heap_end, since it
   875   // is possible during an evacuation pause with piggy-backed
   876   // initial-mark that the committed space is expanded during the
   877   // pause without CM observing this change. So the assertions below
   878   // is a bit conservative; but better than nothing.
   879   assert(_g1h->g1_committed().contains(addr),
   880          "address should be within the heap bounds");
   882   if (!_nextMarkBitMap->isMarked(addr))
   883     _nextMarkBitMap->parMark(addr);
   884 }
   886 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   887   // The objects on the region have already been marked "in bulk" by
   888   // the caller. We only need to decide whether to push the region on
   889   // the region stack or not.
   891   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   892     // We're done with marking and waiting for remark. We do not need to
   893     // push anything else on the region stack.
   894     return;
   896   HeapWord* finger = _finger;
   898   if (verbose_low())
   899     gclog_or_tty->print_cr("[global] attempting to push "
   900                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   901                            PTR_FORMAT, mr.start(), mr.end(), finger);
   903   if (mr.start() < finger) {
   904     // The finger is always heap region aligned and it is not possible
   905     // for mr to span heap regions.
   906     assert(mr.end() <= finger, "invariant");
   908     // Separated the asserts so that we know which one fires.
   909     assert(mr.start() <= mr.end(),
   910            "region boundaries should fall within the committed space");
   911     assert(_heap_start <= mr.start(),
   912            "region boundaries should fall within the committed space");
   913     assert(mr.end() <= _heap_end,
   914            "region boundaries should fall within the committed space");
   915     if (verbose_low())
   916       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   917                              "below the finger, pushing it",
   918                              mr.start(), mr.end());
   920     if (!region_stack_push(mr)) {
   921       if (verbose_low())
   922         gclog_or_tty->print_cr("[global] region stack has overflown.");
   923     }
   924   }
   925 }
   927 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   928   // The object is not marked by the caller. We need to at least mark
   929   // it and maybe push in on the stack.
   931   HeapWord* addr = (HeapWord*)p;
   932   if (!_nextMarkBitMap->isMarked(addr)) {
   933     // We definitely need to mark it, irrespective whether we bail out
   934     // because we're done with marking.
   935     if (_nextMarkBitMap->parMark(addr)) {
   936       if (!concurrent_marking_in_progress() || !_should_gray_objects)
   937         // If we're done with concurrent marking and we're waiting for
   938         // remark, then we're not pushing anything on the stack.
   939         return;
   941       // No OrderAccess:store_load() is needed. It is implicit in the
   942       // CAS done in parMark(addr) above
   943       HeapWord* finger = _finger;
   945       if (addr < finger) {
   946         if (!mark_stack_push(oop(addr))) {
   947           if (verbose_low())
   948             gclog_or_tty->print_cr("[global] global stack overflow "
   949                                    "during parMark");
   950         }
   951       }
   952     }
   953   }
   954 }
   956 class CMConcurrentMarkingTask: public AbstractGangTask {
   957 private:
   958   ConcurrentMark*       _cm;
   959   ConcurrentMarkThread* _cmt;
   961 public:
   962   void work(int worker_i) {
   963     assert(Thread::current()->is_ConcurrentGC_thread(),
   964            "this should only be done by a conc GC thread");
   966     double start_vtime = os::elapsedVTime();
   968     ConcurrentGCThread::stsJoin();
   970     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
   971     CMTask* the_task = _cm->task(worker_i);
   972     the_task->record_start_time();
   973     if (!_cm->has_aborted()) {
   974       do {
   975         double start_vtime_sec = os::elapsedVTime();
   976         double start_time_sec = os::elapsedTime();
   977         the_task->do_marking_step(10.0);
   978         double end_time_sec = os::elapsedTime();
   979         double end_vtime_sec = os::elapsedVTime();
   980         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
   981         double elapsed_time_sec = end_time_sec - start_time_sec;
   982         _cm->clear_has_overflown();
   984         bool ret = _cm->do_yield_check(worker_i);
   986         jlong sleep_time_ms;
   987         if (!_cm->has_aborted() && the_task->has_aborted()) {
   988           sleep_time_ms =
   989             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
   990           ConcurrentGCThread::stsLeave();
   991           os::sleep(Thread::current(), sleep_time_ms, false);
   992           ConcurrentGCThread::stsJoin();
   993         }
   994         double end_time2_sec = os::elapsedTime();
   995         double elapsed_time2_sec = end_time2_sec - start_time_sec;
   997 #if 0
   998           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
   999                                  "overhead %1.4lf",
  1000                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1001                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1002           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1003                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1004 #endif
  1005       } while (!_cm->has_aborted() && the_task->has_aborted());
  1007     the_task->record_end_time();
  1008     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1010     ConcurrentGCThread::stsLeave();
  1012     double end_vtime = os::elapsedVTime();
  1013     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1016   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1017                           ConcurrentMarkThread* cmt) :
  1018       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1020   ~CMConcurrentMarkingTask() { }
  1021 };
  1023 void ConcurrentMark::markFromRoots() {
  1024   // we might be tempted to assert that:
  1025   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1026   //        "inconsistent argument?");
  1027   // However that wouldn't be right, because it's possible that
  1028   // a safepoint is indeed in progress as a younger generation
  1029   // stop-the-world GC happens even as we mark in this generation.
  1031   _restart_for_overflow = false;
  1033   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1035   CMConcurrentMarkingTask markingTask(this, cmThread());
  1036   if (parallel_marking_threads() > 0)
  1037     _parallel_workers->run_task(&markingTask);
  1038   else
  1039     markingTask.work(0);
  1040   print_stats();
  1043 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1044   // world is stopped at this checkpoint
  1045   assert(SafepointSynchronize::is_at_safepoint(),
  1046          "world should be stopped");
  1047   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1049   // If a full collection has happened, we shouldn't do this.
  1050   if (has_aborted()) {
  1051     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1052     return;
  1055   if (VerifyDuringGC) {
  1056     HandleMark hm;  // handle scope
  1057     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1058     Universe::heap()->prepare_for_verify();
  1059     Universe::verify(true, false, true);
  1062   G1CollectorPolicy* g1p = g1h->g1_policy();
  1063   g1p->record_concurrent_mark_remark_start();
  1065   double start = os::elapsedTime();
  1067   checkpointRootsFinalWork();
  1069   double mark_work_end = os::elapsedTime();
  1071   weakRefsWork(clear_all_soft_refs);
  1073   if (has_overflown()) {
  1074     // Oops.  We overflowed.  Restart concurrent marking.
  1075     _restart_for_overflow = true;
  1076     // Clear the flag. We do not need it any more.
  1077     clear_has_overflown();
  1078     if (G1TraceMarkStackOverflow)
  1079       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1080   } else {
  1081     // We're done with marking.
  1082     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
  1084     if (VerifyDuringGC) {
  1085       HandleMark hm;  // handle scope
  1086       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1087       Universe::heap()->prepare_for_verify();
  1088       Universe::heap()->verify(/* allow_dirty */      true,
  1089                                /* silent */           false,
  1090                                /* use_prev_marking */ false);
  1094 #if VERIFY_OBJS_PROCESSED
  1095   _scan_obj_cl.objs_processed = 0;
  1096   ThreadLocalObjQueue::objs_enqueued = 0;
  1097 #endif
  1099   // Statistics
  1100   double now = os::elapsedTime();
  1101   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1102   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1103   _remark_times.add((now - start) * 1000.0);
  1105   g1p->record_concurrent_mark_remark_end();
  1109 #define CARD_BM_TEST_MODE 0
  1111 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1113   CMBitMapRO* _bm;
  1114   ConcurrentMark* _cm;
  1115   bool _changed;
  1116   bool _yield;
  1117   size_t _words_done;
  1118   size_t _tot_live;
  1119   size_t _tot_used;
  1120   size_t _regions_done;
  1121   double _start_vtime_sec;
  1123   BitMap* _region_bm;
  1124   BitMap* _card_bm;
  1125   intptr_t _bottom_card_num;
  1126   bool _final;
  1128   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1129     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1130 #if CARD_BM_TEST_MODE
  1131       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1132 #else
  1133       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1134 #endif
  1138 public:
  1139   CalcLiveObjectsClosure(bool final,
  1140                          CMBitMapRO *bm, ConcurrentMark *cm,
  1141                          BitMap* region_bm, BitMap* card_bm) :
  1142     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1143     _words_done(0), _tot_live(0), _tot_used(0),
  1144     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1145     _regions_done(0), _start_vtime_sec(0.0)
  1147     _bottom_card_num =
  1148       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1149                CardTableModRefBS::card_shift);
  1152   // It takes a region that's not empty (i.e., it has at least one
  1153   // live object in it and sets its corresponding bit on the region
  1154   // bitmap to 1. If the region is "starts humongous" it will also set
  1155   // to 1 the bits on the region bitmap that correspond to its
  1156   // associated "continues humongous" regions.
  1157   void set_bit_for_region(HeapRegion* hr) {
  1158     assert(!hr->continuesHumongous(), "should have filtered those out");
  1160     size_t index = hr->hrs_index();
  1161     if (!hr->startsHumongous()) {
  1162       // Normal (non-humongous) case: just set the bit.
  1163       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1164     } else {
  1165       // Starts humongous case: calculate how many regions are part of
  1166       // this humongous region and then set the bit range. It might
  1167       // have been a bit more efficient to look at the object that
  1168       // spans these humongous regions to calculate their number from
  1169       // the object's size. However, it's a good idea to calculate
  1170       // this based on the metadata itself, and not the region
  1171       // contents, so that this code is not aware of what goes into
  1172       // the humongous regions (in case this changes in the future).
  1173       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1174       size_t end_index = index + 1;
  1175       while (end_index < g1h->n_regions()) {
  1176         HeapRegion* chr = g1h->region_at(end_index);
  1177         if (!chr->continuesHumongous()) {
  1178           break;
  1180         end_index += 1;
  1182       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1183                                    (BitMap::idx_t) end_index, true);
  1187   bool doHeapRegion(HeapRegion* hr) {
  1188     if (!_final && _regions_done == 0)
  1189       _start_vtime_sec = os::elapsedVTime();
  1191     if (hr->continuesHumongous()) {
  1192       // We will ignore these here and process them when their
  1193       // associated "starts humongous" region is processed (see
  1194       // set_bit_for_heap_region()). Note that we cannot rely on their
  1195       // associated "starts humongous" region to have their bit set to
  1196       // 1 since, due to the region chunking in the parallel region
  1197       // iteration, a "continues humongous" region might be visited
  1198       // before its associated "starts humongous".
  1199       return false;
  1202     HeapWord* nextTop = hr->next_top_at_mark_start();
  1203     HeapWord* start   = hr->top_at_conc_mark_count();
  1204     assert(hr->bottom() <= start && start <= hr->end() &&
  1205            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1206            start <= nextTop,
  1207            "Preconditions.");
  1208     // Otherwise, record the number of word's we'll examine.
  1209     size_t words_done = (nextTop - start);
  1210     // Find the first marked object at or after "start".
  1211     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1212     size_t marked_bytes = 0;
  1214     // Below, the term "card num" means the result of shifting an address
  1215     // by the card shift -- address 0 corresponds to card number 0.  One
  1216     // must subtract the card num of the bottom of the heap to obtain a
  1217     // card table index.
  1218     // The first card num of the sequence of live cards currently being
  1219     // constructed.  -1 ==> no sequence.
  1220     intptr_t start_card_num = -1;
  1221     // The last card num of the sequence of live cards currently being
  1222     // constructed.  -1 ==> no sequence.
  1223     intptr_t last_card_num = -1;
  1225     while (start < nextTop) {
  1226       if (_yield && _cm->do_yield_check()) {
  1227         // We yielded.  It might be for a full collection, in which case
  1228         // all bets are off; terminate the traversal.
  1229         if (_cm->has_aborted()) {
  1230           _changed = false;
  1231           return true;
  1232         } else {
  1233           // Otherwise, it might be a collection pause, and the region
  1234           // we're looking at might be in the collection set.  We'll
  1235           // abandon this region.
  1236           return false;
  1239       oop obj = oop(start);
  1240       int obj_sz = obj->size();
  1241       // The card num of the start of the current object.
  1242       intptr_t obj_card_num =
  1243         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1245       HeapWord* obj_last = start + obj_sz - 1;
  1246       intptr_t obj_last_card_num =
  1247         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1249       if (obj_card_num != last_card_num) {
  1250         if (start_card_num == -1) {
  1251           assert(last_card_num == -1, "Both or neither.");
  1252           start_card_num = obj_card_num;
  1253         } else {
  1254           assert(last_card_num != -1, "Both or neither.");
  1255           assert(obj_card_num >= last_card_num, "Inv");
  1256           if ((obj_card_num - last_card_num) > 1) {
  1257             // Mark the last run, and start a new one.
  1258             mark_card_num_range(start_card_num, last_card_num);
  1259             start_card_num = obj_card_num;
  1262 #if CARD_BM_TEST_MODE
  1263         /*
  1264         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1265                                obj_card_num - _bottom_card_num,
  1266                                obj_last_card_num - _bottom_card_num);
  1267         */
  1268         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1269           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1271 #endif
  1273       // In any case, we set the last card num.
  1274       last_card_num = obj_last_card_num;
  1276       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1277       // Find the next marked object after this one.
  1278       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1279       _changed = true;
  1281     // Handle the last range, if any.
  1282     if (start_card_num != -1)
  1283       mark_card_num_range(start_card_num, last_card_num);
  1284     if (_final) {
  1285       // Mark the allocated-since-marking portion...
  1286       HeapWord* tp = hr->top();
  1287       if (nextTop < tp) {
  1288         start_card_num =
  1289           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1290         last_card_num =
  1291           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1292         mark_card_num_range(start_card_num, last_card_num);
  1293         // This definitely means the region has live objects.
  1294         set_bit_for_region(hr);
  1298     hr->add_to_marked_bytes(marked_bytes);
  1299     // Update the live region bitmap.
  1300     if (marked_bytes > 0) {
  1301       set_bit_for_region(hr);
  1303     hr->set_top_at_conc_mark_count(nextTop);
  1304     _tot_live += hr->next_live_bytes();
  1305     _tot_used += hr->used();
  1306     _words_done = words_done;
  1308     if (!_final) {
  1309       ++_regions_done;
  1310       if (_regions_done % 10 == 0) {
  1311         double end_vtime_sec = os::elapsedVTime();
  1312         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1313         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1314           jlong sleep_time_ms =
  1315             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1316           os::sleep(Thread::current(), sleep_time_ms, false);
  1317           _start_vtime_sec = end_vtime_sec;
  1322     return false;
  1325   bool changed() { return _changed;  }
  1326   void reset()   { _changed = false; _words_done = 0; }
  1327   void no_yield() { _yield = false; }
  1328   size_t words_done() { return _words_done; }
  1329   size_t tot_live() { return _tot_live; }
  1330   size_t tot_used() { return _tot_used; }
  1331 };
  1334 void ConcurrentMark::calcDesiredRegions() {
  1335   _region_bm.clear();
  1336   _card_bm.clear();
  1337   CalcLiveObjectsClosure calccl(false /*final*/,
  1338                                 nextMarkBitMap(), this,
  1339                                 &_region_bm, &_card_bm);
  1340   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1341   g1h->heap_region_iterate(&calccl);
  1343   do {
  1344     calccl.reset();
  1345     g1h->heap_region_iterate(&calccl);
  1346   } while (calccl.changed());
  1349 class G1ParFinalCountTask: public AbstractGangTask {
  1350 protected:
  1351   G1CollectedHeap* _g1h;
  1352   CMBitMap* _bm;
  1353   size_t _n_workers;
  1354   size_t *_live_bytes;
  1355   size_t *_used_bytes;
  1356   BitMap* _region_bm;
  1357   BitMap* _card_bm;
  1358 public:
  1359   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1360                       BitMap* region_bm, BitMap* card_bm) :
  1361     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1362     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1364     if (ParallelGCThreads > 0)
  1365       _n_workers = _g1h->workers()->total_workers();
  1366     else
  1367       _n_workers = 1;
  1368     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1369     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1372   ~G1ParFinalCountTask() {
  1373     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1374     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1377   void work(int i) {
  1378     CalcLiveObjectsClosure calccl(true /*final*/,
  1379                                   _bm, _g1h->concurrent_mark(),
  1380                                   _region_bm, _card_bm);
  1381     calccl.no_yield();
  1382     if (ParallelGCThreads > 0) {
  1383       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1384                                             HeapRegion::FinalCountClaimValue);
  1385     } else {
  1386       _g1h->heap_region_iterate(&calccl);
  1388     assert(calccl.complete(), "Shouldn't have yielded!");
  1390     assert((size_t) i < _n_workers, "invariant");
  1391     _live_bytes[i] = calccl.tot_live();
  1392     _used_bytes[i] = calccl.tot_used();
  1394   size_t live_bytes()  {
  1395     size_t live_bytes = 0;
  1396     for (size_t i = 0; i < _n_workers; ++i)
  1397       live_bytes += _live_bytes[i];
  1398     return live_bytes;
  1400   size_t used_bytes()  {
  1401     size_t used_bytes = 0;
  1402     for (size_t i = 0; i < _n_workers; ++i)
  1403       used_bytes += _used_bytes[i];
  1404     return used_bytes;
  1406 };
  1408 class G1ParNoteEndTask;
  1410 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1411   G1CollectedHeap* _g1;
  1412   int _worker_num;
  1413   size_t _max_live_bytes;
  1414   size_t _regions_claimed;
  1415   size_t _freed_bytes;
  1416   size_t _cleared_h_regions;
  1417   size_t _freed_regions;
  1418   UncleanRegionList* _unclean_region_list;
  1419   double _claimed_region_time;
  1420   double _max_region_time;
  1422 public:
  1423   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1424                              UncleanRegionList* list,
  1425                              int worker_num);
  1426   size_t freed_bytes() { return _freed_bytes; }
  1427   size_t cleared_h_regions() { return _cleared_h_regions; }
  1428   size_t freed_regions() { return  _freed_regions; }
  1429   UncleanRegionList* unclean_region_list() {
  1430     return _unclean_region_list;
  1433   bool doHeapRegion(HeapRegion *r);
  1435   size_t max_live_bytes() { return _max_live_bytes; }
  1436   size_t regions_claimed() { return _regions_claimed; }
  1437   double claimed_region_time_sec() { return _claimed_region_time; }
  1438   double max_region_time_sec() { return _max_region_time; }
  1439 };
  1441 class G1ParNoteEndTask: public AbstractGangTask {
  1442   friend class G1NoteEndOfConcMarkClosure;
  1443 protected:
  1444   G1CollectedHeap* _g1h;
  1445   size_t _max_live_bytes;
  1446   size_t _freed_bytes;
  1447   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1448 public:
  1449   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1450                    ConcurrentMark::ParCleanupThreadState**
  1451                    par_cleanup_thread_state) :
  1452     AbstractGangTask("G1 note end"), _g1h(g1h),
  1453     _max_live_bytes(0), _freed_bytes(0),
  1454     _par_cleanup_thread_state(par_cleanup_thread_state)
  1455   {}
  1457   void work(int i) {
  1458     double start = os::elapsedTime();
  1459     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1460                                            &_par_cleanup_thread_state[i]->list,
  1461                                            i);
  1462     if (ParallelGCThreads > 0) {
  1463       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1464                                             HeapRegion::NoteEndClaimValue);
  1465     } else {
  1466       _g1h->heap_region_iterate(&g1_note_end);
  1468     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1470     // Now finish up freeing the current thread's regions.
  1471     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1472                                   g1_note_end.cleared_h_regions(),
  1473                                   0, NULL);
  1475       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1476       _max_live_bytes += g1_note_end.max_live_bytes();
  1477       _freed_bytes += g1_note_end.freed_bytes();
  1479     double end = os::elapsedTime();
  1480     if (G1PrintParCleanupStats) {
  1481       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1482                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1483                           i, start, end, (end-start)*1000.0,
  1484                           g1_note_end.regions_claimed(),
  1485                           g1_note_end.claimed_region_time_sec()*1000.0,
  1486                           g1_note_end.max_region_time_sec()*1000.0);
  1489   size_t max_live_bytes() { return _max_live_bytes; }
  1490   size_t freed_bytes() { return _freed_bytes; }
  1491 };
  1493 class G1ParScrubRemSetTask: public AbstractGangTask {
  1494 protected:
  1495   G1RemSet* _g1rs;
  1496   BitMap* _region_bm;
  1497   BitMap* _card_bm;
  1498 public:
  1499   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1500                        BitMap* region_bm, BitMap* card_bm) :
  1501     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1502     _region_bm(region_bm), _card_bm(card_bm)
  1503   {}
  1505   void work(int i) {
  1506     if (ParallelGCThreads > 0) {
  1507       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1508                        HeapRegion::ScrubRemSetClaimValue);
  1509     } else {
  1510       _g1rs->scrub(_region_bm, _card_bm);
  1514 };
  1516 G1NoteEndOfConcMarkClosure::
  1517 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1518                            UncleanRegionList* list,
  1519                            int worker_num)
  1520   : _g1(g1), _worker_num(worker_num),
  1521     _max_live_bytes(0), _regions_claimed(0),
  1522     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1523     _claimed_region_time(0.0), _max_region_time(0.0),
  1524     _unclean_region_list(list)
  1525 {}
  1527 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1528   // We use a claim value of zero here because all regions
  1529   // were claimed with value 1 in the FinalCount task.
  1530   r->reset_gc_time_stamp();
  1531   if (!r->continuesHumongous()) {
  1532     double start = os::elapsedTime();
  1533     _regions_claimed++;
  1534     r->note_end_of_marking();
  1535     _max_live_bytes += r->max_live_bytes();
  1536     _g1->free_region_if_totally_empty_work(r,
  1537                                            _freed_bytes,
  1538                                            _cleared_h_regions,
  1539                                            _freed_regions,
  1540                                            _unclean_region_list,
  1541                                            true /*par*/);
  1542     double region_time = (os::elapsedTime() - start);
  1543     _claimed_region_time += region_time;
  1544     if (region_time > _max_region_time) _max_region_time = region_time;
  1546   return false;
  1549 void ConcurrentMark::cleanup() {
  1550   // world is stopped at this checkpoint
  1551   assert(SafepointSynchronize::is_at_safepoint(),
  1552          "world should be stopped");
  1553   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1555   // If a full collection has happened, we shouldn't do this.
  1556   if (has_aborted()) {
  1557     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1558     return;
  1561   if (VerifyDuringGC) {
  1562     HandleMark hm;  // handle scope
  1563     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1564     Universe::heap()->prepare_for_verify();
  1565     Universe::verify(/* allow dirty  */ true,
  1566                      /* silent       */ false,
  1567                      /* prev marking */ true);
  1570   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1571   g1p->record_concurrent_mark_cleanup_start();
  1573   double start = os::elapsedTime();
  1575   // Do counting once more with the world stopped for good measure.
  1576   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1577                                         &_region_bm, &_card_bm);
  1578   if (ParallelGCThreads > 0) {
  1579     assert(g1h->check_heap_region_claim_values(
  1580                                                HeapRegion::InitialClaimValue),
  1581            "sanity check");
  1583     int n_workers = g1h->workers()->total_workers();
  1584     g1h->set_par_threads(n_workers);
  1585     g1h->workers()->run_task(&g1_par_count_task);
  1586     g1h->set_par_threads(0);
  1588     assert(g1h->check_heap_region_claim_values(
  1589                                              HeapRegion::FinalCountClaimValue),
  1590            "sanity check");
  1591   } else {
  1592     g1_par_count_task.work(0);
  1595   size_t known_garbage_bytes =
  1596     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1597 #if 0
  1598   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1599                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1600                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1601                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1602 #endif // 0
  1603   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1605   size_t start_used_bytes = g1h->used();
  1606   _at_least_one_mark_complete = true;
  1607   g1h->set_marking_complete();
  1609   double count_end = os::elapsedTime();
  1610   double this_final_counting_time = (count_end - start);
  1611   if (G1PrintParCleanupStats) {
  1612     gclog_or_tty->print_cr("Cleanup:");
  1613     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1614                            this_final_counting_time*1000.0);
  1616   _total_counting_time += this_final_counting_time;
  1618   // Install newly created mark bitMap as "prev".
  1619   swapMarkBitMaps();
  1621   g1h->reset_gc_time_stamp();
  1623   // Note end of marking in all heap regions.
  1624   double note_end_start = os::elapsedTime();
  1625   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1626   if (ParallelGCThreads > 0) {
  1627     int n_workers = g1h->workers()->total_workers();
  1628     g1h->set_par_threads(n_workers);
  1629     g1h->workers()->run_task(&g1_par_note_end_task);
  1630     g1h->set_par_threads(0);
  1632     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1633            "sanity check");
  1634   } else {
  1635     g1_par_note_end_task.work(0);
  1637   g1h->set_unclean_regions_coming(true);
  1638   double note_end_end = os::elapsedTime();
  1639   // Tell the mutators that there might be unclean regions coming...
  1640   if (G1PrintParCleanupStats) {
  1641     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1642                            (note_end_end - note_end_start)*1000.0);
  1646   // call below, since it affects the metric by which we sort the heap
  1647   // regions.
  1648   if (G1ScrubRemSets) {
  1649     double rs_scrub_start = os::elapsedTime();
  1650     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1651     if (ParallelGCThreads > 0) {
  1652       int n_workers = g1h->workers()->total_workers();
  1653       g1h->set_par_threads(n_workers);
  1654       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1655       g1h->set_par_threads(0);
  1657       assert(g1h->check_heap_region_claim_values(
  1658                                             HeapRegion::ScrubRemSetClaimValue),
  1659              "sanity check");
  1660     } else {
  1661       g1_par_scrub_rs_task.work(0);
  1664     double rs_scrub_end = os::elapsedTime();
  1665     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1666     _total_rs_scrub_time += this_rs_scrub_time;
  1669   // this will also free any regions totally full of garbage objects,
  1670   // and sort the regions.
  1671   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1672                         g1_par_note_end_task.freed_bytes(),
  1673                         g1_par_note_end_task.max_live_bytes());
  1675   // Statistics.
  1676   double end = os::elapsedTime();
  1677   _cleanup_times.add((end - start) * 1000.0);
  1679   // G1CollectedHeap::heap()->print();
  1680   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1681   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1683   if (PrintGC || PrintGCDetails) {
  1684     g1h->print_size_transition(gclog_or_tty,
  1685                                start_used_bytes,
  1686                                g1h->used(),
  1687                                g1h->capacity());
  1690   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1691   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1693   // We need to make this be a "collection" so any collection pause that
  1694   // races with it goes around and waits for completeCleanup to finish.
  1695   g1h->increment_total_collections();
  1697   if (VerifyDuringGC) {
  1698     HandleMark hm;  // handle scope
  1699     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1700     Universe::heap()->prepare_for_verify();
  1701     Universe::verify(/* allow dirty  */ true,
  1702                      /* silent       */ false,
  1703                      /* prev marking */ true);
  1707 void ConcurrentMark::completeCleanup() {
  1708   // A full collection intervened.
  1709   if (has_aborted()) return;
  1711   int first = 0;
  1712   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1713   for (int t = 0; t < last; t++) {
  1714     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1715     assert(list->well_formed(), "Inv");
  1716     HeapRegion* hd = list->hd();
  1717     while (hd != NULL) {
  1718       // Now finish up the other stuff.
  1719       hd->rem_set()->clear();
  1720       HeapRegion* next_hd = hd->next_from_unclean_list();
  1721       (void)list->pop();
  1722       assert(list->hd() == next_hd, "how not?");
  1723       _g1h->put_region_on_unclean_list(hd);
  1724       if (!hd->isHumongous()) {
  1725         // Add this to the _free_regions count by 1.
  1726         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1728       hd = list->hd();
  1729       assert(hd == next_hd, "how not?");
  1735 class G1CMIsAliveClosure: public BoolObjectClosure {
  1736   G1CollectedHeap* _g1;
  1737  public:
  1738   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1739     _g1(g1)
  1740   {}
  1742   void do_object(oop obj) {
  1743     assert(false, "not to be invoked");
  1745   bool do_object_b(oop obj) {
  1746     HeapWord* addr = (HeapWord*)obj;
  1747     return addr != NULL &&
  1748            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1750 };
  1752 class G1CMKeepAliveClosure: public OopClosure {
  1753   G1CollectedHeap* _g1;
  1754   ConcurrentMark*  _cm;
  1755   CMBitMap*        _bitMap;
  1756  public:
  1757   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1758                        CMBitMap* bitMap) :
  1759     _g1(g1), _cm(cm),
  1760     _bitMap(bitMap) {}
  1762   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1763   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1765   template <class T> void do_oop_work(T* p) {
  1766     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1767     HeapWord* addr = (HeapWord*)thisOop;
  1768     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1769       _bitMap->mark(addr);
  1770       _cm->mark_stack_push(thisOop);
  1773 };
  1775 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1776   CMMarkStack*                  _markStack;
  1777   CMBitMap*                     _bitMap;
  1778   G1CMKeepAliveClosure*         _oopClosure;
  1779  public:
  1780   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1781                                G1CMKeepAliveClosure* oopClosure) :
  1782     _bitMap(bitMap),
  1783     _markStack(markStack),
  1784     _oopClosure(oopClosure)
  1785   {}
  1787   void do_void() {
  1788     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1790 };
  1792 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1793   ResourceMark rm;
  1794   HandleMark   hm;
  1795   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1796   ReferenceProcessor* rp = g1h->ref_processor();
  1798   // Process weak references.
  1799   rp->setup_policy(clear_all_soft_refs);
  1800   assert(_markStack.isEmpty(), "mark stack should be empty");
  1802   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1803   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1804   G1CMDrainMarkingStackClosure
  1805     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1806                                &g1KeepAliveClosure);
  1808   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1809   rp->process_discovered_references(&g1IsAliveClosure,
  1810                                     &g1KeepAliveClosure,
  1811                                     &g1DrainMarkingStackClosure,
  1812                                     NULL);
  1813   assert(_markStack.overflow() || _markStack.isEmpty(),
  1814          "mark stack should be empty (unless it overflowed)");
  1815   if (_markStack.overflow()) {
  1816     set_has_overflown();
  1819   rp->enqueue_discovered_references();
  1820   rp->verify_no_references_recorded();
  1821   assert(!rp->discovery_enabled(), "should have been disabled");
  1823   // Now clean up stale oops in SymbolTable and StringTable
  1824   SymbolTable::unlink(&g1IsAliveClosure);
  1825   StringTable::unlink(&g1IsAliveClosure);
  1828 void ConcurrentMark::swapMarkBitMaps() {
  1829   CMBitMapRO* temp = _prevMarkBitMap;
  1830   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1831   _nextMarkBitMap  = (CMBitMap*)  temp;
  1834 class CMRemarkTask: public AbstractGangTask {
  1835 private:
  1836   ConcurrentMark *_cm;
  1838 public:
  1839   void work(int worker_i) {
  1840     // Since all available tasks are actually started, we should
  1841     // only proceed if we're supposed to be actived.
  1842     if ((size_t)worker_i < _cm->active_tasks()) {
  1843       CMTask* task = _cm->task(worker_i);
  1844       task->record_start_time();
  1845       do {
  1846         task->do_marking_step(1000000000.0 /* something very large */);
  1847       } while (task->has_aborted() && !_cm->has_overflown());
  1848       // If we overflow, then we do not want to restart. We instead
  1849       // want to abort remark and do concurrent marking again.
  1850       task->record_end_time();
  1854   CMRemarkTask(ConcurrentMark* cm) :
  1855     AbstractGangTask("Par Remark"), _cm(cm) { }
  1856 };
  1858 void ConcurrentMark::checkpointRootsFinalWork() {
  1859   ResourceMark rm;
  1860   HandleMark   hm;
  1861   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1863   g1h->ensure_parsability(false);
  1865   if (ParallelGCThreads > 0) {
  1866     G1CollectedHeap::StrongRootsScope srs(g1h);
  1867     // this is remark, so we'll use up all available threads
  1868     int active_workers = ParallelGCThreads;
  1869     set_phase(active_workers, false);
  1871     CMRemarkTask remarkTask(this);
  1872     // We will start all available threads, even if we decide that the
  1873     // active_workers will be fewer. The extra ones will just bail out
  1874     // immediately.
  1875     int n_workers = g1h->workers()->total_workers();
  1876     g1h->set_par_threads(n_workers);
  1877     g1h->workers()->run_task(&remarkTask);
  1878     g1h->set_par_threads(0);
  1879   } else {
  1880     G1CollectedHeap::StrongRootsScope srs(g1h);
  1881     // this is remark, so we'll use up all available threads
  1882     int active_workers = 1;
  1883     set_phase(active_workers, false);
  1885     CMRemarkTask remarkTask(this);
  1886     // We will start all available threads, even if we decide that the
  1887     // active_workers will be fewer. The extra ones will just bail out
  1888     // immediately.
  1889     remarkTask.work(0);
  1891   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1892   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1894   print_stats();
  1896   if (!restart_for_overflow())
  1897     set_non_marking_state();
  1899 #if VERIFY_OBJS_PROCESSED
  1900   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1901     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1902                            _scan_obj_cl.objs_processed,
  1903                            ThreadLocalObjQueue::objs_enqueued);
  1904     guarantee(_scan_obj_cl.objs_processed ==
  1905               ThreadLocalObjQueue::objs_enqueued,
  1906               "Different number of objs processed and enqueued.");
  1908 #endif
  1911 #ifndef PRODUCT
  1913 class ReachablePrinterOopClosure: public OopClosure {
  1914 private:
  1915   G1CollectedHeap* _g1h;
  1916   CMBitMapRO*      _bitmap;
  1917   outputStream*    _out;
  1918   bool             _use_prev_marking;
  1920 public:
  1921   ReachablePrinterOopClosure(CMBitMapRO*   bitmap,
  1922                              outputStream* out,
  1923                              bool          use_prev_marking) :
  1924     _g1h(G1CollectedHeap::heap()),
  1925     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  1927   void do_oop(narrowOop* p) { do_oop_work(p); }
  1928   void do_oop(      oop* p) { do_oop_work(p); }
  1930   template <class T> void do_oop_work(T* p) {
  1931     oop         obj = oopDesc::load_decode_heap_oop(p);
  1932     const char* str = NULL;
  1933     const char* str2 = "";
  1935     if (!_g1h->is_in_g1_reserved(obj))
  1936       str = "outside G1 reserved";
  1937     else {
  1938       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  1939       guarantee(hr != NULL, "invariant");
  1940       bool over_tams = false;
  1941       if (_use_prev_marking) {
  1942         over_tams = hr->obj_allocated_since_prev_marking(obj);
  1943       } else {
  1944         over_tams = hr->obj_allocated_since_next_marking(obj);
  1947       if (over_tams) {
  1948         str = "over TAMS";
  1949         if (_bitmap->isMarked((HeapWord*) obj)) {
  1950           str2 = " AND MARKED";
  1952       } else if (_bitmap->isMarked((HeapWord*) obj)) {
  1953         str = "marked";
  1954       } else {
  1955         str = "#### NOT MARKED ####";
  1959     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
  1960                    p, (void*) obj, str, str2);
  1962 };
  1964 class ReachablePrinterClosure: public BitMapClosure {
  1965 private:
  1966   CMBitMapRO*   _bitmap;
  1967   outputStream* _out;
  1968   bool          _use_prev_marking;
  1970 public:
  1971   ReachablePrinterClosure(CMBitMapRO*   bitmap,
  1972                           outputStream* out,
  1973                           bool          use_prev_marking) :
  1974     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  1976   bool do_bit(size_t offset) {
  1977     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
  1978     ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
  1980     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
  1981     oop(addr)->oop_iterate(&oopCl);
  1982     _out->print_cr("");
  1984     return true;
  1986 };
  1988 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
  1989 private:
  1990   CMBitMapRO*   _bitmap;
  1991   outputStream* _out;
  1992   bool          _use_prev_marking;
  1994 public:
  1995   ObjInRegionReachablePrinterClosure(CMBitMapRO*   bitmap,
  1996                                      outputStream* out,
  1997                                      bool          use_prev_marking) :
  1998     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2000   void do_object(oop o) {
  2001     ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
  2003     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
  2004     o->oop_iterate(&oopCl);
  2005     _out->print_cr("");
  2007 };
  2009 class RegionReachablePrinterClosure : public HeapRegionClosure {
  2010 private:
  2011   CMBitMapRO*   _bitmap;
  2012   outputStream* _out;
  2013   bool          _use_prev_marking;
  2015 public:
  2016   bool doHeapRegion(HeapRegion* hr) {
  2017     HeapWord* b = hr->bottom();
  2018     HeapWord* e = hr->end();
  2019     HeapWord* t = hr->top();
  2020     HeapWord* p = NULL;
  2021     if (_use_prev_marking) {
  2022       p = hr->prev_top_at_mark_start();
  2023     } else {
  2024       p = hr->next_top_at_mark_start();
  2026     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2027                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2028     _out->print_cr("");
  2030     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out, _use_prev_marking);
  2031     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
  2033     return false;
  2036   RegionReachablePrinterClosure(CMBitMapRO*   bitmap,
  2037                                 outputStream* out,
  2038                                 bool          use_prev_marking) :
  2039     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2040 };
  2042 void ConcurrentMark::print_reachable(bool use_prev_marking, const char* str) {
  2043   gclog_or_tty->print_cr("== Doing reachable object dump... ");
  2045   if (G1PrintReachableBaseFile == NULL) {
  2046     gclog_or_tty->print_cr("  #### error: no base file defined");
  2047     return;
  2050   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2051       (JVM_MAXPATHLEN - 1)) {
  2052     gclog_or_tty->print_cr("  #### error: file name too long");
  2053     return;
  2056   char file_name[JVM_MAXPATHLEN];
  2057   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2058   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2060   fileStream fout(file_name);
  2061   if (!fout.is_open()) {
  2062     gclog_or_tty->print_cr("  #### error: could not open file");
  2063     return;
  2066   outputStream* out = &fout;
  2068   CMBitMapRO* bitmap = NULL;
  2069   if (use_prev_marking) {
  2070     bitmap = _prevMarkBitMap;
  2071   } else {
  2072     bitmap = _nextMarkBitMap;
  2075   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2076   out->cr();
  2078   RegionReachablePrinterClosure rcl(bitmap, out, use_prev_marking);
  2079   out->print_cr("--- ITERATING OVER REGIONS WITH TAMS < TOP");
  2080   out->cr();
  2081   _g1h->heap_region_iterate(&rcl);
  2082   out->cr();
  2084   ReachablePrinterClosure cl(bitmap, out, use_prev_marking);
  2085   out->print_cr("--- ITERATING OVER MARKED OBJECTS ON THE BITMAP");
  2086   out->cr();
  2087   bitmap->iterate(&cl);
  2088   out->cr();
  2090   gclog_or_tty->print_cr("  done");
  2093 #endif // PRODUCT
  2095 // This note is for drainAllSATBBuffers and the code in between.
  2096 // In the future we could reuse a task to do this work during an
  2097 // evacuation pause (since now tasks are not active and can be claimed
  2098 // during an evacuation pause). This was a late change to the code and
  2099 // is currently not being taken advantage of.
  2101 class CMGlobalObjectClosure : public ObjectClosure {
  2102 private:
  2103   ConcurrentMark* _cm;
  2105 public:
  2106   void do_object(oop obj) {
  2107     _cm->deal_with_reference(obj);
  2110   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2111 };
  2113 void ConcurrentMark::deal_with_reference(oop obj) {
  2114   if (verbose_high())
  2115     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2116                            (void*) obj);
  2119   HeapWord* objAddr = (HeapWord*) obj;
  2120   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2121   if (_g1h->is_in_g1_reserved(objAddr)) {
  2122     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2123     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2124     if (_g1h->is_obj_ill(obj, hr)) {
  2125       if (verbose_high())
  2126         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2127                                "marked", (void*) obj);
  2129       // we need to mark it first
  2130       if (_nextMarkBitMap->parMark(objAddr)) {
  2131         // No OrderAccess:store_load() is needed. It is implicit in the
  2132         // CAS done in parMark(objAddr) above
  2133         HeapWord* finger = _finger;
  2134         if (objAddr < finger) {
  2135           if (verbose_high())
  2136             gclog_or_tty->print_cr("[global] below the global finger "
  2137                                    "("PTR_FORMAT"), pushing it", finger);
  2138           if (!mark_stack_push(obj)) {
  2139             if (verbose_low())
  2140               gclog_or_tty->print_cr("[global] global stack overflow during "
  2141                                      "deal_with_reference");
  2149 void ConcurrentMark::drainAllSATBBuffers() {
  2150   CMGlobalObjectClosure oc(this);
  2151   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2152   satb_mq_set.set_closure(&oc);
  2154   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2155     if (verbose_medium())
  2156       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2159   // no need to check whether we should do this, as this is only
  2160   // called during an evacuation pause
  2161   satb_mq_set.iterate_closure_all_threads();
  2163   satb_mq_set.set_closure(NULL);
  2164   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2167 void ConcurrentMark::markPrev(oop p) {
  2168   // Note we are overriding the read-only view of the prev map here, via
  2169   // the cast.
  2170   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2173 void ConcurrentMark::clear(oop p) {
  2174   assert(p != NULL && p->is_oop(), "expected an oop");
  2175   HeapWord* addr = (HeapWord*)p;
  2176   assert(addr >= _nextMarkBitMap->startWord() ||
  2177          addr < _nextMarkBitMap->endWord(), "in a region");
  2179   _nextMarkBitMap->clear(addr);
  2182 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2183   // Note we are overriding the read-only view of the prev map here, via
  2184   // the cast.
  2185   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2186   _nextMarkBitMap->clearRange(mr);
  2189 HeapRegion*
  2190 ConcurrentMark::claim_region(int task_num) {
  2191   // "checkpoint" the finger
  2192   HeapWord* finger = _finger;
  2194   // _heap_end will not change underneath our feet; it only changes at
  2195   // yield points.
  2196   while (finger < _heap_end) {
  2197     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2199     // is the gap between reading the finger and doing the CAS too long?
  2201     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2202     HeapWord*   bottom        = curr_region->bottom();
  2203     HeapWord*   end           = curr_region->end();
  2204     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2206     if (verbose_low())
  2207       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2208                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2209                              "limit = "PTR_FORMAT,
  2210                              task_num, curr_region, bottom, end, limit);
  2212     HeapWord* res =
  2213       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2214     if (res == finger) {
  2215       // we succeeded
  2217       // notice that _finger == end cannot be guaranteed here since,
  2218       // someone else might have moved the finger even further
  2219       assert(_finger >= end, "the finger should have moved forward");
  2221       if (verbose_low())
  2222         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2223                                PTR_FORMAT, task_num, curr_region);
  2225       if (limit > bottom) {
  2226         if (verbose_low())
  2227           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2228                                  "returning it ", task_num, curr_region);
  2229         return curr_region;
  2230       } else {
  2231         assert(limit == bottom,
  2232                "the region limit should be at bottom");
  2233         if (verbose_low())
  2234           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2235                                  "returning NULL", task_num, curr_region);
  2236         // we return NULL and the caller should try calling
  2237         // claim_region() again.
  2238         return NULL;
  2240     } else {
  2241       assert(_finger > finger, "the finger should have moved forward");
  2242       if (verbose_low())
  2243         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2244                                "global finger = "PTR_FORMAT", "
  2245                                "our finger = "PTR_FORMAT,
  2246                                task_num, _finger, finger);
  2248       // read it again
  2249       finger = _finger;
  2253   return NULL;
  2256 void ConcurrentMark::oops_do(OopClosure* cl) {
  2257   if (_markStack.size() > 0 && verbose_low())
  2258     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2259                            "size = %d", _markStack.size());
  2260   // we first iterate over the contents of the mark stack...
  2261   _markStack.oops_do(cl);
  2263   for (int i = 0; i < (int)_max_task_num; ++i) {
  2264     OopTaskQueue* queue = _task_queues->queue((int)i);
  2266     if (queue->size() > 0 && verbose_low())
  2267       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2268                              "size = %d", i, queue->size());
  2270     // ...then over the contents of the all the task queues.
  2271     queue->oops_do(cl);
  2274   // finally, invalidate any entries that in the region stack that
  2275   // point into the collection set
  2276   if (_regionStack.invalidate_entries_into_cset()) {
  2277     // otherwise, any gray objects copied during the evacuation pause
  2278     // might not be visited.
  2279     assert(_should_gray_objects, "invariant");
  2283 void ConcurrentMark::clear_marking_state() {
  2284   _markStack.setEmpty();
  2285   _markStack.clear_overflow();
  2286   _regionStack.setEmpty();
  2287   _regionStack.clear_overflow();
  2288   clear_has_overflown();
  2289   _finger = _heap_start;
  2291   for (int i = 0; i < (int)_max_task_num; ++i) {
  2292     OopTaskQueue* queue = _task_queues->queue(i);
  2293     queue->set_empty();
  2297 void ConcurrentMark::print_stats() {
  2298   if (verbose_stats()) {
  2299     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2300     for (size_t i = 0; i < _active_tasks; ++i) {
  2301       _tasks[i]->print_stats();
  2302       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2307 class CSMarkOopClosure: public OopClosure {
  2308   friend class CSMarkBitMapClosure;
  2310   G1CollectedHeap* _g1h;
  2311   CMBitMap*        _bm;
  2312   ConcurrentMark*  _cm;
  2313   oop*             _ms;
  2314   jint*            _array_ind_stack;
  2315   int              _ms_size;
  2316   int              _ms_ind;
  2317   int              _array_increment;
  2319   bool push(oop obj, int arr_ind = 0) {
  2320     if (_ms_ind == _ms_size) {
  2321       gclog_or_tty->print_cr("Mark stack is full.");
  2322       return false;
  2324     _ms[_ms_ind] = obj;
  2325     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2326     _ms_ind++;
  2327     return true;
  2330   oop pop() {
  2331     if (_ms_ind == 0) return NULL;
  2332     else {
  2333       _ms_ind--;
  2334       return _ms[_ms_ind];
  2338   template <class T> bool drain() {
  2339     while (_ms_ind > 0) {
  2340       oop obj = pop();
  2341       assert(obj != NULL, "Since index was non-zero.");
  2342       if (obj->is_objArray()) {
  2343         jint arr_ind = _array_ind_stack[_ms_ind];
  2344         objArrayOop aobj = objArrayOop(obj);
  2345         jint len = aobj->length();
  2346         jint next_arr_ind = arr_ind + _array_increment;
  2347         if (next_arr_ind < len) {
  2348           push(obj, next_arr_ind);
  2350         // Now process this portion of this one.
  2351         int lim = MIN2(next_arr_ind, len);
  2352         for (int j = arr_ind; j < lim; j++) {
  2353           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2356       } else {
  2357         obj->oop_iterate(this);
  2359       if (abort()) return false;
  2361     return true;
  2364 public:
  2365   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2366     _g1h(G1CollectedHeap::heap()),
  2367     _cm(cm),
  2368     _bm(cm->nextMarkBitMap()),
  2369     _ms_size(ms_size), _ms_ind(0),
  2370     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2371     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2372     _array_increment(MAX2(ms_size/8, 16))
  2373   {}
  2375   ~CSMarkOopClosure() {
  2376     FREE_C_HEAP_ARRAY(oop, _ms);
  2377     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2380   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2381   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2383   template <class T> void do_oop_work(T* p) {
  2384     T heap_oop = oopDesc::load_heap_oop(p);
  2385     if (oopDesc::is_null(heap_oop)) return;
  2386     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2387     if (obj->is_forwarded()) {
  2388       // If the object has already been forwarded, we have to make sure
  2389       // that it's marked.  So follow the forwarding pointer.  Note that
  2390       // this does the right thing for self-forwarding pointers in the
  2391       // evacuation failure case.
  2392       obj = obj->forwardee();
  2394     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2395     if (hr != NULL) {
  2396       if (hr->in_collection_set()) {
  2397         if (_g1h->is_obj_ill(obj)) {
  2398           _bm->mark((HeapWord*)obj);
  2399           if (!push(obj)) {
  2400             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2401             set_abort();
  2404       } else {
  2405         // Outside the collection set; we need to gray it
  2406         _cm->deal_with_reference(obj);
  2410 };
  2412 class CSMarkBitMapClosure: public BitMapClosure {
  2413   G1CollectedHeap* _g1h;
  2414   CMBitMap*        _bitMap;
  2415   ConcurrentMark*  _cm;
  2416   CSMarkOopClosure _oop_cl;
  2417 public:
  2418   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2419     _g1h(G1CollectedHeap::heap()),
  2420     _bitMap(cm->nextMarkBitMap()),
  2421     _oop_cl(cm, ms_size)
  2422   {}
  2424   ~CSMarkBitMapClosure() {}
  2426   bool do_bit(size_t offset) {
  2427     // convert offset into a HeapWord*
  2428     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2429     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2430            "address out of range");
  2431     assert(_bitMap->isMarked(addr), "tautology");
  2432     oop obj = oop(addr);
  2433     if (!obj->is_forwarded()) {
  2434       if (!_oop_cl.push(obj)) return false;
  2435       if (UseCompressedOops) {
  2436         if (!_oop_cl.drain<narrowOop>()) return false;
  2437       } else {
  2438         if (!_oop_cl.drain<oop>()) return false;
  2441     // Otherwise...
  2442     return true;
  2444 };
  2447 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2448   CMBitMap* _bm;
  2449   CSMarkBitMapClosure _bit_cl;
  2450   enum SomePrivateConstants {
  2451     MSSize = 1000
  2452   };
  2453   bool _completed;
  2454 public:
  2455   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2456     _bm(cm->nextMarkBitMap()),
  2457     _bit_cl(cm, MSSize),
  2458     _completed(true)
  2459   {}
  2461   ~CompleteMarkingInCSHRClosure() {}
  2463   bool doHeapRegion(HeapRegion* r) {
  2464     if (!r->evacuation_failed()) {
  2465       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2466       if (!mr.is_empty()) {
  2467         if (!_bm->iterate(&_bit_cl, mr)) {
  2468           _completed = false;
  2469           return true;
  2473     return false;
  2476   bool completed() { return _completed; }
  2477 };
  2479 class ClearMarksInHRClosure: public HeapRegionClosure {
  2480   CMBitMap* _bm;
  2481 public:
  2482   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2484   bool doHeapRegion(HeapRegion* r) {
  2485     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2486       MemRegion usedMR = r->used_region();
  2487       _bm->clearRange(r->used_region());
  2489     return false;
  2491 };
  2493 void ConcurrentMark::complete_marking_in_collection_set() {
  2494   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2496   if (!g1h->mark_in_progress()) {
  2497     g1h->g1_policy()->record_mark_closure_time(0.0);
  2498     return;
  2501   int i = 1;
  2502   double start = os::elapsedTime();
  2503   while (true) {
  2504     i++;
  2505     CompleteMarkingInCSHRClosure cmplt(this);
  2506     g1h->collection_set_iterate(&cmplt);
  2507     if (cmplt.completed()) break;
  2509   double end_time = os::elapsedTime();
  2510   double elapsed_time_ms = (end_time - start) * 1000.0;
  2511   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2512   if (PrintGCDetails) {
  2513     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2516   ClearMarksInHRClosure clr(nextMarkBitMap());
  2517   g1h->collection_set_iterate(&clr);
  2520 // The next two methods deal with the following optimisation. Some
  2521 // objects are gray by being marked and located above the finger. If
  2522 // they are copied, during an evacuation pause, below the finger then
  2523 // the need to be pushed on the stack. The observation is that, if
  2524 // there are no regions in the collection set located above the
  2525 // finger, then the above cannot happen, hence we do not need to
  2526 // explicitly gray any objects when copying them to below the
  2527 // finger. The global stack will be scanned to ensure that, if it
  2528 // points to objects being copied, it will update their
  2529 // location. There is a tricky situation with the gray objects in
  2530 // region stack that are being coped, however. See the comment in
  2531 // newCSet().
  2533 void ConcurrentMark::newCSet() {
  2534   if (!concurrent_marking_in_progress())
  2535     // nothing to do if marking is not in progress
  2536     return;
  2538   // find what the lowest finger is among the global and local fingers
  2539   _min_finger = _finger;
  2540   for (int i = 0; i < (int)_max_task_num; ++i) {
  2541     CMTask* task = _tasks[i];
  2542     HeapWord* task_finger = task->finger();
  2543     if (task_finger != NULL && task_finger < _min_finger)
  2544       _min_finger = task_finger;
  2547   _should_gray_objects = false;
  2549   // This fixes a very subtle and fustrating bug. It might be the case
  2550   // that, during en evacuation pause, heap regions that contain
  2551   // objects that are gray (by being in regions contained in the
  2552   // region stack) are included in the collection set. Since such gray
  2553   // objects will be moved, and because it's not easy to redirect
  2554   // region stack entries to point to a new location (because objects
  2555   // in one region might be scattered to multiple regions after they
  2556   // are copied), one option is to ensure that all marked objects
  2557   // copied during a pause are pushed on the stack. Notice, however,
  2558   // that this problem can only happen when the region stack is not
  2559   // empty during an evacuation pause. So, we make the fix a bit less
  2560   // conservative and ensure that regions are pushed on the stack,
  2561   // irrespective whether all collection set regions are below the
  2562   // finger, if the region stack is not empty. This is expected to be
  2563   // a rare case, so I don't think it's necessary to be smarted about it.
  2564   if (!region_stack_empty())
  2565     _should_gray_objects = true;
  2568 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2569   if (!concurrent_marking_in_progress())
  2570     return;
  2572   HeapWord* region_end = hr->end();
  2573   if (region_end > _min_finger)
  2574     _should_gray_objects = true;
  2577 // abandon current marking iteration due to a Full GC
  2578 void ConcurrentMark::abort() {
  2579   // Clear all marks to force marking thread to do nothing
  2580   _nextMarkBitMap->clearAll();
  2581   // Empty mark stack
  2582   clear_marking_state();
  2583   for (int i = 0; i < (int)_max_task_num; ++i)
  2584     _tasks[i]->clear_region_fields();
  2585   _has_aborted = true;
  2587   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2588   satb_mq_set.abandon_partial_marking();
  2589   satb_mq_set.set_active_all_threads(false);
  2592 static void print_ms_time_info(const char* prefix, const char* name,
  2593                                NumberSeq& ns) {
  2594   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2595                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2596   if (ns.num() > 0) {
  2597     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2598                            prefix, ns.sd(), ns.maximum());
  2602 void ConcurrentMark::print_summary_info() {
  2603   gclog_or_tty->print_cr(" Concurrent marking:");
  2604   print_ms_time_info("  ", "init marks", _init_times);
  2605   print_ms_time_info("  ", "remarks", _remark_times);
  2607     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2608     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2611   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2612   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2613                          _total_counting_time,
  2614                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2615                           (double)_cleanup_times.num()
  2616                          : 0.0));
  2617   if (G1ScrubRemSets) {
  2618     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2619                            _total_rs_scrub_time,
  2620                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2621                             (double)_cleanup_times.num()
  2622                            : 0.0));
  2624   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2625                          (_init_times.sum() + _remark_times.sum() +
  2626                           _cleanup_times.sum())/1000.0);
  2627   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2628                 "(%8.2f s marking, %8.2f s counting).",
  2629                 cmThread()->vtime_accum(),
  2630                 cmThread()->vtime_mark_accum(),
  2631                 cmThread()->vtime_count_accum());
  2634 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2635   _parallel_workers->print_worker_threads_on(st);
  2638 // Closures
  2639 // XXX: there seems to be a lot of code  duplication here;
  2640 // should refactor and consolidate the shared code.
  2642 // This closure is used to mark refs into the CMS generation in
  2643 // the CMS bit map. Called at the first checkpoint.
  2645 // We take a break if someone is trying to stop the world.
  2646 bool ConcurrentMark::do_yield_check(int worker_i) {
  2647   if (should_yield()) {
  2648     if (worker_i == 0)
  2649       _g1h->g1_policy()->record_concurrent_pause();
  2650     cmThread()->yield();
  2651     if (worker_i == 0)
  2652       _g1h->g1_policy()->record_concurrent_pause_end();
  2653     return true;
  2654   } else {
  2655     return false;
  2659 bool ConcurrentMark::should_yield() {
  2660   return cmThread()->should_yield();
  2663 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2664   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2665   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2668 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2669                                                  void* last) {
  2670   return
  2671     containing_card_is_marked(start) &&
  2672     containing_card_is_marked(last);
  2675 #ifndef PRODUCT
  2676 // for debugging purposes
  2677 void ConcurrentMark::print_finger() {
  2678   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2679                          _heap_start, _heap_end, _finger);
  2680   for (int i = 0; i < (int) _max_task_num; ++i) {
  2681     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2683   gclog_or_tty->print_cr("");
  2685 #endif
  2687 // Closure for iteration over bitmaps
  2688 class CMBitMapClosure : public BitMapClosure {
  2689 private:
  2690   // the bitmap that is being iterated over
  2691   CMBitMap*                   _nextMarkBitMap;
  2692   ConcurrentMark*             _cm;
  2693   CMTask*                     _task;
  2694   // true if we're scanning a heap region claimed by the task (so that
  2695   // we move the finger along), false if we're not, i.e. currently when
  2696   // scanning a heap region popped from the region stack (so that we
  2697   // do not move the task finger along; it'd be a mistake if we did so).
  2698   bool                        _scanning_heap_region;
  2700 public:
  2701   CMBitMapClosure(CMTask *task,
  2702                   ConcurrentMark* cm,
  2703                   CMBitMap* nextMarkBitMap)
  2704     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2706   void set_scanning_heap_region(bool scanning_heap_region) {
  2707     _scanning_heap_region = scanning_heap_region;
  2710   bool do_bit(size_t offset) {
  2711     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2712     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2713     assert( addr < _cm->finger(), "invariant");
  2715     if (_scanning_heap_region) {
  2716       statsOnly( _task->increase_objs_found_on_bitmap() );
  2717       assert(addr >= _task->finger(), "invariant");
  2718       // We move that task's local finger along.
  2719       _task->move_finger_to(addr);
  2720     } else {
  2721       // We move the task's region finger along.
  2722       _task->move_region_finger_to(addr);
  2725     _task->scan_object(oop(addr));
  2726     // we only partially drain the local queue and global stack
  2727     _task->drain_local_queue(true);
  2728     _task->drain_global_stack(true);
  2730     // if the has_aborted flag has been raised, we need to bail out of
  2731     // the iteration
  2732     return !_task->has_aborted();
  2734 };
  2736 // Closure for iterating over objects, currently only used for
  2737 // processing SATB buffers.
  2738 class CMObjectClosure : public ObjectClosure {
  2739 private:
  2740   CMTask* _task;
  2742 public:
  2743   void do_object(oop obj) {
  2744     _task->deal_with_reference(obj);
  2747   CMObjectClosure(CMTask* task) : _task(task) { }
  2748 };
  2750 // Closure for iterating over object fields
  2751 class CMOopClosure : public OopClosure {
  2752 private:
  2753   G1CollectedHeap*   _g1h;
  2754   ConcurrentMark*    _cm;
  2755   CMTask*            _task;
  2757 public:
  2758   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2759   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2761   template <class T> void do_oop_work(T* p) {
  2762     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2763     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2764            "invariant");
  2766     oop obj = oopDesc::load_decode_heap_oop(p);
  2767     if (_cm->verbose_high())
  2768       gclog_or_tty->print_cr("[%d] we're looking at location "
  2769                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2770                              _task->task_id(), p, (void*) obj);
  2771     _task->deal_with_reference(obj);
  2774   CMOopClosure(G1CollectedHeap* g1h,
  2775                ConcurrentMark* cm,
  2776                CMTask* task)
  2777     : _g1h(g1h), _cm(cm), _task(task) { }
  2778 };
  2780 void CMTask::setup_for_region(HeapRegion* hr) {
  2781   // Separated the asserts so that we know which one fires.
  2782   assert(hr != NULL,
  2783         "claim_region() should have filtered out continues humongous regions");
  2784   assert(!hr->continuesHumongous(),
  2785         "claim_region() should have filtered out continues humongous regions");
  2787   if (_cm->verbose_low())
  2788     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2789                            _task_id, hr);
  2791   _curr_region  = hr;
  2792   _finger       = hr->bottom();
  2793   update_region_limit();
  2796 void CMTask::update_region_limit() {
  2797   HeapRegion* hr            = _curr_region;
  2798   HeapWord* bottom          = hr->bottom();
  2799   HeapWord* limit           = hr->next_top_at_mark_start();
  2801   if (limit == bottom) {
  2802     if (_cm->verbose_low())
  2803       gclog_or_tty->print_cr("[%d] found an empty region "
  2804                              "["PTR_FORMAT", "PTR_FORMAT")",
  2805                              _task_id, bottom, limit);
  2806     // The region was collected underneath our feet.
  2807     // We set the finger to bottom to ensure that the bitmap
  2808     // iteration that will follow this will not do anything.
  2809     // (this is not a condition that holds when we set the region up,
  2810     // as the region is not supposed to be empty in the first place)
  2811     _finger = bottom;
  2812   } else if (limit >= _region_limit) {
  2813     assert(limit >= _finger, "peace of mind");
  2814   } else {
  2815     assert(limit < _region_limit, "only way to get here");
  2816     // This can happen under some pretty unusual circumstances.  An
  2817     // evacuation pause empties the region underneath our feet (NTAMS
  2818     // at bottom). We then do some allocation in the region (NTAMS
  2819     // stays at bottom), followed by the region being used as a GC
  2820     // alloc region (NTAMS will move to top() and the objects
  2821     // originally below it will be grayed). All objects now marked in
  2822     // the region are explicitly grayed, if below the global finger,
  2823     // and we do not need in fact to scan anything else. So, we simply
  2824     // set _finger to be limit to ensure that the bitmap iteration
  2825     // doesn't do anything.
  2826     _finger = limit;
  2829   _region_limit = limit;
  2832 void CMTask::giveup_current_region() {
  2833   assert(_curr_region != NULL, "invariant");
  2834   if (_cm->verbose_low())
  2835     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2836                            _task_id, _curr_region);
  2837   clear_region_fields();
  2840 void CMTask::clear_region_fields() {
  2841   // Values for these three fields that indicate that we're not
  2842   // holding on to a region.
  2843   _curr_region   = NULL;
  2844   _finger        = NULL;
  2845   _region_limit  = NULL;
  2847   _region_finger = NULL;
  2850 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2851   guarantee(nextMarkBitMap != NULL, "invariant");
  2853   if (_cm->verbose_low())
  2854     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2856   _nextMarkBitMap                = nextMarkBitMap;
  2857   clear_region_fields();
  2859   _calls                         = 0;
  2860   _elapsed_time_ms               = 0.0;
  2861   _termination_time_ms           = 0.0;
  2862   _termination_start_time_ms     = 0.0;
  2864 #if _MARKING_STATS_
  2865   _local_pushes                  = 0;
  2866   _local_pops                    = 0;
  2867   _local_max_size                = 0;
  2868   _objs_scanned                  = 0;
  2869   _global_pushes                 = 0;
  2870   _global_pops                   = 0;
  2871   _global_max_size               = 0;
  2872   _global_transfers_to           = 0;
  2873   _global_transfers_from         = 0;
  2874   _region_stack_pops             = 0;
  2875   _regions_claimed               = 0;
  2876   _objs_found_on_bitmap          = 0;
  2877   _satb_buffers_processed        = 0;
  2878   _steal_attempts                = 0;
  2879   _steals                        = 0;
  2880   _aborted                       = 0;
  2881   _aborted_overflow              = 0;
  2882   _aborted_cm_aborted            = 0;
  2883   _aborted_yield                 = 0;
  2884   _aborted_timed_out             = 0;
  2885   _aborted_satb                  = 0;
  2886   _aborted_termination           = 0;
  2887 #endif // _MARKING_STATS_
  2890 bool CMTask::should_exit_termination() {
  2891   regular_clock_call();
  2892   // This is called when we are in the termination protocol. We should
  2893   // quit if, for some reason, this task wants to abort or the global
  2894   // stack is not empty (this means that we can get work from it).
  2895   return !_cm->mark_stack_empty() || has_aborted();
  2898 // This determines whether the method below will check both the local
  2899 // and global fingers when determining whether to push on the stack a
  2900 // gray object (value 1) or whether it will only check the global one
  2901 // (value 0). The tradeoffs are that the former will be a bit more
  2902 // accurate and possibly push less on the stack, but it might also be
  2903 // a little bit slower.
  2905 #define _CHECK_BOTH_FINGERS_      1
  2907 void CMTask::deal_with_reference(oop obj) {
  2908   if (_cm->verbose_high())
  2909     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2910                            _task_id, (void*) obj);
  2912   ++_refs_reached;
  2914   HeapWord* objAddr = (HeapWord*) obj;
  2915   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2916   if (_g1h->is_in_g1_reserved(objAddr)) {
  2917     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2918     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  2919     if (_g1h->is_obj_ill(obj, hr)) {
  2920       if (_cm->verbose_high())
  2921         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  2922                                _task_id, (void*) obj);
  2924       // we need to mark it first
  2925       if (_nextMarkBitMap->parMark(objAddr)) {
  2926         // No OrderAccess:store_load() is needed. It is implicit in the
  2927         // CAS done in parMark(objAddr) above
  2928         HeapWord* global_finger = _cm->finger();
  2930 #if _CHECK_BOTH_FINGERS_
  2931         // we will check both the local and global fingers
  2933         if (_finger != NULL && objAddr < _finger) {
  2934           if (_cm->verbose_high())
  2935             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  2936                                    "pushing it", _task_id, _finger);
  2937           push(obj);
  2938         } else if (_curr_region != NULL && objAddr < _region_limit) {
  2939           // do nothing
  2940         } else if (objAddr < global_finger) {
  2941           // Notice that the global finger might be moving forward
  2942           // concurrently. This is not a problem. In the worst case, we
  2943           // mark the object while it is above the global finger and, by
  2944           // the time we read the global finger, it has moved forward
  2945           // passed this object. In this case, the object will probably
  2946           // be visited when a task is scanning the region and will also
  2947           // be pushed on the stack. So, some duplicate work, but no
  2948           // correctness problems.
  2950           if (_cm->verbose_high())
  2951             gclog_or_tty->print_cr("[%d] below the global finger "
  2952                                    "("PTR_FORMAT"), pushing it",
  2953                                    _task_id, global_finger);
  2954           push(obj);
  2955         } else {
  2956           // do nothing
  2958 #else // _CHECK_BOTH_FINGERS_
  2959       // we will only check the global finger
  2961         if (objAddr < global_finger) {
  2962           // see long comment above
  2964           if (_cm->verbose_high())
  2965             gclog_or_tty->print_cr("[%d] below the global finger "
  2966                                    "("PTR_FORMAT"), pushing it",
  2967                                    _task_id, global_finger);
  2968           push(obj);
  2970 #endif // _CHECK_BOTH_FINGERS_
  2976 void CMTask::push(oop obj) {
  2977   HeapWord* objAddr = (HeapWord*) obj;
  2978   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  2979   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  2980          "invariant");
  2981   assert(!_g1h->is_obj_ill(obj), "invariant");
  2982   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  2984   if (_cm->verbose_high())
  2985     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  2987   if (!_task_queue->push(obj)) {
  2988     // The local task queue looks full. We need to push some entries
  2989     // to the global stack.
  2991     if (_cm->verbose_medium())
  2992       gclog_or_tty->print_cr("[%d] task queue overflow, "
  2993                              "moving entries to the global stack",
  2994                              _task_id);
  2995     move_entries_to_global_stack();
  2997     // this should succeed since, even if we overflow the global
  2998     // stack, we should have definitely removed some entries from the
  2999     // local queue. So, there must be space on it.
  3000     bool success = _task_queue->push(obj);
  3001     assert(success, "invariant");
  3004   statsOnly( int tmp_size = _task_queue->size();
  3005              if (tmp_size > _local_max_size)
  3006                _local_max_size = tmp_size;
  3007              ++_local_pushes );
  3010 void CMTask::reached_limit() {
  3011   assert(_words_scanned >= _words_scanned_limit ||
  3012          _refs_reached >= _refs_reached_limit ,
  3013          "shouldn't have been called otherwise");
  3014   regular_clock_call();
  3017 void CMTask::regular_clock_call() {
  3018   if (has_aborted())
  3019     return;
  3021   // First, we need to recalculate the words scanned and refs reached
  3022   // limits for the next clock call.
  3023   recalculate_limits();
  3025   // During the regular clock call we do the following
  3027   // (1) If an overflow has been flagged, then we abort.
  3028   if (_cm->has_overflown()) {
  3029     set_has_aborted();
  3030     return;
  3033   // If we are not concurrent (i.e. we're doing remark) we don't need
  3034   // to check anything else. The other steps are only needed during
  3035   // the concurrent marking phase.
  3036   if (!concurrent())
  3037     return;
  3039   // (2) If marking has been aborted for Full GC, then we also abort.
  3040   if (_cm->has_aborted()) {
  3041     set_has_aborted();
  3042     statsOnly( ++_aborted_cm_aborted );
  3043     return;
  3046   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3048   // (3) If marking stats are enabled, then we update the step history.
  3049 #if _MARKING_STATS_
  3050   if (_words_scanned >= _words_scanned_limit)
  3051     ++_clock_due_to_scanning;
  3052   if (_refs_reached >= _refs_reached_limit)
  3053     ++_clock_due_to_marking;
  3055   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3056   _interval_start_time_ms = curr_time_ms;
  3057   _all_clock_intervals_ms.add(last_interval_ms);
  3059   if (_cm->verbose_medium()) {
  3060     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3061                            "scanned = %d%s, refs reached = %d%s",
  3062                            _task_id, last_interval_ms,
  3063                            _words_scanned,
  3064                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3065                            _refs_reached,
  3066                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3068 #endif // _MARKING_STATS_
  3070   // (4) We check whether we should yield. If we have to, then we abort.
  3071   if (_cm->should_yield()) {
  3072     // We should yield. To do this we abort the task. The caller is
  3073     // responsible for yielding.
  3074     set_has_aborted();
  3075     statsOnly( ++_aborted_yield );
  3076     return;
  3079   // (5) We check whether we've reached our time quota. If we have,
  3080   // then we abort.
  3081   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3082   if (elapsed_time_ms > _time_target_ms) {
  3083     set_has_aborted();
  3084     _has_aborted_timed_out = true;
  3085     statsOnly( ++_aborted_timed_out );
  3086     return;
  3089   // (6) Finally, we check whether there are enough completed STAB
  3090   // buffers available for processing. If there are, we abort.
  3091   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3092   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3093     if (_cm->verbose_low())
  3094       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3095                              _task_id);
  3096     // we do need to process SATB buffers, we'll abort and restart
  3097     // the marking task to do so
  3098     set_has_aborted();
  3099     statsOnly( ++_aborted_satb );
  3100     return;
  3104 void CMTask::recalculate_limits() {
  3105   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3106   _words_scanned_limit      = _real_words_scanned_limit;
  3108   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3109   _refs_reached_limit       = _real_refs_reached_limit;
  3112 void CMTask::decrease_limits() {
  3113   // This is called when we believe that we're going to do an infrequent
  3114   // operation which will increase the per byte scanned cost (i.e. move
  3115   // entries to/from the global stack). It basically tries to decrease the
  3116   // scanning limit so that the clock is called earlier.
  3118   if (_cm->verbose_medium())
  3119     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3121   _words_scanned_limit = _real_words_scanned_limit -
  3122     3 * words_scanned_period / 4;
  3123   _refs_reached_limit  = _real_refs_reached_limit -
  3124     3 * refs_reached_period / 4;
  3127 void CMTask::move_entries_to_global_stack() {
  3128   // local array where we'll store the entries that will be popped
  3129   // from the local queue
  3130   oop buffer[global_stack_transfer_size];
  3132   int n = 0;
  3133   oop obj;
  3134   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3135     buffer[n] = obj;
  3136     ++n;
  3139   if (n > 0) {
  3140     // we popped at least one entry from the local queue
  3142     statsOnly( ++_global_transfers_to; _local_pops += n );
  3144     if (!_cm->mark_stack_push(buffer, n)) {
  3145       if (_cm->verbose_low())
  3146         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3147       set_has_aborted();
  3148     } else {
  3149       // the transfer was successful
  3151       if (_cm->verbose_medium())
  3152         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3153                                _task_id, n);
  3154       statsOnly( int tmp_size = _cm->mark_stack_size();
  3155                  if (tmp_size > _global_max_size)
  3156                    _global_max_size = tmp_size;
  3157                  _global_pushes += n );
  3161   // this operation was quite expensive, so decrease the limits
  3162   decrease_limits();
  3165 void CMTask::get_entries_from_global_stack() {
  3166   // local array where we'll store the entries that will be popped
  3167   // from the global stack.
  3168   oop buffer[global_stack_transfer_size];
  3169   int n;
  3170   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3171   assert(n <= global_stack_transfer_size,
  3172          "we should not pop more than the given limit");
  3173   if (n > 0) {
  3174     // yes, we did actually pop at least one entry
  3176     statsOnly( ++_global_transfers_from; _global_pops += n );
  3177     if (_cm->verbose_medium())
  3178       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3179                              _task_id, n);
  3180     for (int i = 0; i < n; ++i) {
  3181       bool success = _task_queue->push(buffer[i]);
  3182       // We only call this when the local queue is empty or under a
  3183       // given target limit. So, we do not expect this push to fail.
  3184       assert(success, "invariant");
  3187     statsOnly( int tmp_size = _task_queue->size();
  3188                if (tmp_size > _local_max_size)
  3189                  _local_max_size = tmp_size;
  3190                _local_pushes += n );
  3193   // this operation was quite expensive, so decrease the limits
  3194   decrease_limits();
  3197 void CMTask::drain_local_queue(bool partially) {
  3198   if (has_aborted())
  3199     return;
  3201   // Decide what the target size is, depending whether we're going to
  3202   // drain it partially (so that other tasks can steal if they run out
  3203   // of things to do) or totally (at the very end).
  3204   size_t target_size;
  3205   if (partially)
  3206     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3207   else
  3208     target_size = 0;
  3210   if (_task_queue->size() > target_size) {
  3211     if (_cm->verbose_high())
  3212       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3213                              _task_id, target_size);
  3215     oop obj;
  3216     bool ret = _task_queue->pop_local(obj);
  3217     while (ret) {
  3218       statsOnly( ++_local_pops );
  3220       if (_cm->verbose_high())
  3221         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3222                                (void*) obj);
  3224       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3225       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3226              "invariant");
  3228       scan_object(obj);
  3230       if (_task_queue->size() <= target_size || has_aborted())
  3231         ret = false;
  3232       else
  3233         ret = _task_queue->pop_local(obj);
  3236     if (_cm->verbose_high())
  3237       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3238                              _task_id, _task_queue->size());
  3242 void CMTask::drain_global_stack(bool partially) {
  3243   if (has_aborted())
  3244     return;
  3246   // We have a policy to drain the local queue before we attempt to
  3247   // drain the global stack.
  3248   assert(partially || _task_queue->size() == 0, "invariant");
  3250   // Decide what the target size is, depending whether we're going to
  3251   // drain it partially (so that other tasks can steal if they run out
  3252   // of things to do) or totally (at the very end).  Notice that,
  3253   // because we move entries from the global stack in chunks or
  3254   // because another task might be doing the same, we might in fact
  3255   // drop below the target. But, this is not a problem.
  3256   size_t target_size;
  3257   if (partially)
  3258     target_size = _cm->partial_mark_stack_size_target();
  3259   else
  3260     target_size = 0;
  3262   if (_cm->mark_stack_size() > target_size) {
  3263     if (_cm->verbose_low())
  3264       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3265                              _task_id, target_size);
  3267     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3268       get_entries_from_global_stack();
  3269       drain_local_queue(partially);
  3272     if (_cm->verbose_low())
  3273       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3274                              _task_id, _cm->mark_stack_size());
  3278 // SATB Queue has several assumptions on whether to call the par or
  3279 // non-par versions of the methods. this is why some of the code is
  3280 // replicated. We should really get rid of the single-threaded version
  3281 // of the code to simplify things.
  3282 void CMTask::drain_satb_buffers() {
  3283   if (has_aborted())
  3284     return;
  3286   // We set this so that the regular clock knows that we're in the
  3287   // middle of draining buffers and doesn't set the abort flag when it
  3288   // notices that SATB buffers are available for draining. It'd be
  3289   // very counter productive if it did that. :-)
  3290   _draining_satb_buffers = true;
  3292   CMObjectClosure oc(this);
  3293   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3294   if (ParallelGCThreads > 0)
  3295     satb_mq_set.set_par_closure(_task_id, &oc);
  3296   else
  3297     satb_mq_set.set_closure(&oc);
  3299   // This keeps claiming and applying the closure to completed buffers
  3300   // until we run out of buffers or we need to abort.
  3301   if (ParallelGCThreads > 0) {
  3302     while (!has_aborted() &&
  3303            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3304       if (_cm->verbose_medium())
  3305         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3306       statsOnly( ++_satb_buffers_processed );
  3307       regular_clock_call();
  3309   } else {
  3310     while (!has_aborted() &&
  3311            satb_mq_set.apply_closure_to_completed_buffer()) {
  3312       if (_cm->verbose_medium())
  3313         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3314       statsOnly( ++_satb_buffers_processed );
  3315       regular_clock_call();
  3319   if (!concurrent() && !has_aborted()) {
  3320     // We should only do this during remark.
  3321     if (ParallelGCThreads > 0)
  3322       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3323     else
  3324       satb_mq_set.iterate_closure_all_threads();
  3327   _draining_satb_buffers = false;
  3329   assert(has_aborted() ||
  3330          concurrent() ||
  3331          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3333   if (ParallelGCThreads > 0)
  3334     satb_mq_set.set_par_closure(_task_id, NULL);
  3335   else
  3336     satb_mq_set.set_closure(NULL);
  3338   // again, this was a potentially expensive operation, decrease the
  3339   // limits to get the regular clock call early
  3340   decrease_limits();
  3343 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3344   if (has_aborted())
  3345     return;
  3347   assert(_region_finger == NULL,
  3348          "it should be NULL when we're not scanning a region");
  3350   if (!_cm->region_stack_empty()) {
  3351     if (_cm->verbose_low())
  3352       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3353                              _task_id, _cm->region_stack_size());
  3355     MemRegion mr = _cm->region_stack_pop();
  3356     // it returns MemRegion() if the pop fails
  3357     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3359     while (mr.start() != NULL) {
  3360       if (_cm->verbose_medium())
  3361         gclog_or_tty->print_cr("[%d] we are scanning region "
  3362                                "["PTR_FORMAT", "PTR_FORMAT")",
  3363                                _task_id, mr.start(), mr.end());
  3364       assert(mr.end() <= _cm->finger(),
  3365              "otherwise the region shouldn't be on the stack");
  3366       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3367       if (_nextMarkBitMap->iterate(bc, mr)) {
  3368         assert(!has_aborted(),
  3369                "cannot abort the task without aborting the bitmap iteration");
  3371         // We finished iterating over the region without aborting.
  3372         regular_clock_call();
  3373         if (has_aborted())
  3374           mr = MemRegion();
  3375         else {
  3376           mr = _cm->region_stack_pop();
  3377           // it returns MemRegion() if the pop fails
  3378           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3380       } else {
  3381         assert(has_aborted(), "currently the only way to do so");
  3383         // The only way to abort the bitmap iteration is to return
  3384         // false from the do_bit() method. However, inside the
  3385         // do_bit() method we move the _region_finger to point to the
  3386         // object currently being looked at. So, if we bail out, we
  3387         // have definitely set _region_finger to something non-null.
  3388         assert(_region_finger != NULL, "invariant");
  3390         // The iteration was actually aborted. So now _region_finger
  3391         // points to the address of the object we last scanned. If we
  3392         // leave it there, when we restart this task, we will rescan
  3393         // the object. It is easy to avoid this. We move the finger by
  3394         // enough to point to the next possible object header (the
  3395         // bitmap knows by how much we need to move it as it knows its
  3396         // granularity).
  3397         MemRegion newRegion =
  3398           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3400         if (!newRegion.is_empty()) {
  3401           if (_cm->verbose_low()) {
  3402             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3403                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3404                                    _task_id,
  3405                                    newRegion.start(), newRegion.end());
  3407           // Now push the part of the region we didn't scan on the
  3408           // region stack to make sure a task scans it later.
  3409           _cm->region_stack_push(newRegion);
  3411         // break from while
  3412         mr = MemRegion();
  3414       _region_finger = NULL;
  3417     if (_cm->verbose_low())
  3418       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3419                              _task_id, _cm->region_stack_size());
  3423 void CMTask::print_stats() {
  3424   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3425                          _task_id, _calls);
  3426   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3427                          _elapsed_time_ms, _termination_time_ms);
  3428   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3429                          _step_times_ms.num(), _step_times_ms.avg(),
  3430                          _step_times_ms.sd());
  3431   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3432                          _step_times_ms.maximum(), _step_times_ms.sum());
  3434 #if _MARKING_STATS_
  3435   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3436                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3437                          _all_clock_intervals_ms.sd());
  3438   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3439                          _all_clock_intervals_ms.maximum(),
  3440                          _all_clock_intervals_ms.sum());
  3441   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3442                          _clock_due_to_scanning, _clock_due_to_marking);
  3443   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3444                          _objs_scanned, _objs_found_on_bitmap);
  3445   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3446                          _local_pushes, _local_pops, _local_max_size);
  3447   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3448                          _global_pushes, _global_pops, _global_max_size);
  3449   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3450                          _global_transfers_to,_global_transfers_from);
  3451   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3452                          _regions_claimed, _region_stack_pops);
  3453   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3454   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3455                          _steal_attempts, _steals);
  3456   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3457   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3458                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3459   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3460                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3461 #endif // _MARKING_STATS_
  3464 /*****************************************************************************
  3466     The do_marking_step(time_target_ms) method is the building block
  3467     of the parallel marking framework. It can be called in parallel
  3468     with other invocations of do_marking_step() on different tasks
  3469     (but only one per task, obviously) and concurrently with the
  3470     mutator threads, or during remark, hence it eliminates the need
  3471     for two versions of the code. When called during remark, it will
  3472     pick up from where the task left off during the concurrent marking
  3473     phase. Interestingly, tasks are also claimable during evacuation
  3474     pauses too, since do_marking_step() ensures that it aborts before
  3475     it needs to yield.
  3477     The data structures that is uses to do marking work are the
  3478     following:
  3480       (1) Marking Bitmap. If there are gray objects that appear only
  3481       on the bitmap (this happens either when dealing with an overflow
  3482       or when the initial marking phase has simply marked the roots
  3483       and didn't push them on the stack), then tasks claim heap
  3484       regions whose bitmap they then scan to find gray objects. A
  3485       global finger indicates where the end of the last claimed region
  3486       is. A local finger indicates how far into the region a task has
  3487       scanned. The two fingers are used to determine how to gray an
  3488       object (i.e. whether simply marking it is OK, as it will be
  3489       visited by a task in the future, or whether it needs to be also
  3490       pushed on a stack).
  3492       (2) Local Queue. The local queue of the task which is accessed
  3493       reasonably efficiently by the task. Other tasks can steal from
  3494       it when they run out of work. Throughout the marking phase, a
  3495       task attempts to keep its local queue short but not totally
  3496       empty, so that entries are available for stealing by other
  3497       tasks. Only when there is no more work, a task will totally
  3498       drain its local queue.
  3500       (3) Global Mark Stack. This handles local queue overflow. During
  3501       marking only sets of entries are moved between it and the local
  3502       queues, as access to it requires a mutex and more fine-grain
  3503       interaction with it which might cause contention. If it
  3504       overflows, then the marking phase should restart and iterate
  3505       over the bitmap to identify gray objects. Throughout the marking
  3506       phase, tasks attempt to keep the global mark stack at a small
  3507       length but not totally empty, so that entries are available for
  3508       popping by other tasks. Only when there is no more work, tasks
  3509       will totally drain the global mark stack.
  3511       (4) Global Region Stack. Entries on it correspond to areas of
  3512       the bitmap that need to be scanned since they contain gray
  3513       objects. Pushes on the region stack only happen during
  3514       evacuation pauses and typically correspond to areas covered by
  3515       GC LABS. If it overflows, then the marking phase should restart
  3516       and iterate over the bitmap to identify gray objects. Tasks will
  3517       try to totally drain the region stack as soon as possible.
  3519       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3520       made available. Buffers are regularly removed from this queue
  3521       and scanned for roots, so that the queue doesn't get too
  3522       long. During remark, all completed buffers are processed, as
  3523       well as the filled in parts of any uncompleted buffers.
  3525     The do_marking_step() method tries to abort when the time target
  3526     has been reached. There are a few other cases when the
  3527     do_marking_step() method also aborts:
  3529       (1) When the marking phase has been aborted (after a Full GC).
  3531       (2) When a global overflow (either on the global stack or the
  3532       region stack) has been triggered. Before the task aborts, it
  3533       will actually sync up with the other tasks to ensure that all
  3534       the marking data structures (local queues, stacks, fingers etc.)
  3535       are re-initialised so that when do_marking_step() completes,
  3536       the marking phase can immediately restart.
  3538       (3) When enough completed SATB buffers are available. The
  3539       do_marking_step() method only tries to drain SATB buffers right
  3540       at the beginning. So, if enough buffers are available, the
  3541       marking step aborts and the SATB buffers are processed at
  3542       the beginning of the next invocation.
  3544       (4) To yield. when we have to yield then we abort and yield
  3545       right at the end of do_marking_step(). This saves us from a lot
  3546       of hassle as, by yielding we might allow a Full GC. If this
  3547       happens then objects will be compacted underneath our feet, the
  3548       heap might shrink, etc. We save checking for this by just
  3549       aborting and doing the yield right at the end.
  3551     From the above it follows that the do_marking_step() method should
  3552     be called in a loop (or, otherwise, regularly) until it completes.
  3554     If a marking step completes without its has_aborted() flag being
  3555     true, it means it has completed the current marking phase (and
  3556     also all other marking tasks have done so and have all synced up).
  3558     A method called regular_clock_call() is invoked "regularly" (in
  3559     sub ms intervals) throughout marking. It is this clock method that
  3560     checks all the abort conditions which were mentioned above and
  3561     decides when the task should abort. A work-based scheme is used to
  3562     trigger this clock method: when the number of object words the
  3563     marking phase has scanned or the number of references the marking
  3564     phase has visited reach a given limit. Additional invocations to
  3565     the method clock have been planted in a few other strategic places
  3566     too. The initial reason for the clock method was to avoid calling
  3567     vtime too regularly, as it is quite expensive. So, once it was in
  3568     place, it was natural to piggy-back all the other conditions on it
  3569     too and not constantly check them throughout the code.
  3571  *****************************************************************************/
  3573 void CMTask::do_marking_step(double time_target_ms) {
  3574   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3575   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3577   assert(concurrent() || _cm->region_stack_empty(),
  3578          "the region stack should have been cleared before remark");
  3579   assert(_region_finger == NULL,
  3580          "this should be non-null only when a region is being scanned");
  3582   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3583   assert(_task_queues != NULL, "invariant");
  3584   assert(_task_queue != NULL, "invariant");
  3585   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3587   assert(!_claimed,
  3588          "only one thread should claim this task at any one time");
  3590   // OK, this doesn't safeguard again all possible scenarios, as it is
  3591   // possible for two threads to set the _claimed flag at the same
  3592   // time. But it is only for debugging purposes anyway and it will
  3593   // catch most problems.
  3594   _claimed = true;
  3596   _start_time_ms = os::elapsedVTime() * 1000.0;
  3597   statsOnly( _interval_start_time_ms = _start_time_ms );
  3599   double diff_prediction_ms =
  3600     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3601   _time_target_ms = time_target_ms - diff_prediction_ms;
  3603   // set up the variables that are used in the work-based scheme to
  3604   // call the regular clock method
  3605   _words_scanned = 0;
  3606   _refs_reached  = 0;
  3607   recalculate_limits();
  3609   // clear all flags
  3610   clear_has_aborted();
  3611   _has_aborted_timed_out = false;
  3612   _draining_satb_buffers = false;
  3614   ++_calls;
  3616   if (_cm->verbose_low())
  3617     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3618                            "target = %1.2lfms >>>>>>>>>>",
  3619                            _task_id, _calls, _time_target_ms);
  3621   // Set up the bitmap and oop closures. Anything that uses them is
  3622   // eventually called from this method, so it is OK to allocate these
  3623   // statically.
  3624   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3625   CMOopClosure    oop_closure(_g1h, _cm, this);
  3626   set_oop_closure(&oop_closure);
  3628   if (_cm->has_overflown()) {
  3629     // This can happen if the region stack or the mark stack overflows
  3630     // during a GC pause and this task, after a yield point,
  3631     // restarts. We have to abort as we need to get into the overflow
  3632     // protocol which happens right at the end of this task.
  3633     set_has_aborted();
  3636   // First drain any available SATB buffers. After this, we will not
  3637   // look at SATB buffers before the next invocation of this method.
  3638   // If enough completed SATB buffers are queued up, the regular clock
  3639   // will abort this task so that it restarts.
  3640   drain_satb_buffers();
  3641   // ...then partially drain the local queue and the global stack
  3642   drain_local_queue(true);
  3643   drain_global_stack(true);
  3645   // Then totally drain the region stack.  We will not look at
  3646   // it again before the next invocation of this method. Entries on
  3647   // the region stack are only added during evacuation pauses, for
  3648   // which we have to yield. When we do, we abort the task anyway so
  3649   // it will look at the region stack again when it restarts.
  3650   bitmap_closure.set_scanning_heap_region(false);
  3651   drain_region_stack(&bitmap_closure);
  3652   // ...then partially drain the local queue and the global stack
  3653   drain_local_queue(true);
  3654   drain_global_stack(true);
  3656   do {
  3657     if (!has_aborted() && _curr_region != NULL) {
  3658       // This means that we're already holding on to a region.
  3659       assert(_finger != NULL, "if region is not NULL, then the finger "
  3660              "should not be NULL either");
  3662       // We might have restarted this task after an evacuation pause
  3663       // which might have evacuated the region we're holding on to
  3664       // underneath our feet. Let's read its limit again to make sure
  3665       // that we do not iterate over a region of the heap that
  3666       // contains garbage (update_region_limit() will also move
  3667       // _finger to the start of the region if it is found empty).
  3668       update_region_limit();
  3669       // We will start from _finger not from the start of the region,
  3670       // as we might be restarting this task after aborting half-way
  3671       // through scanning this region. In this case, _finger points to
  3672       // the address where we last found a marked object. If this is a
  3673       // fresh region, _finger points to start().
  3674       MemRegion mr = MemRegion(_finger, _region_limit);
  3676       if (_cm->verbose_low())
  3677         gclog_or_tty->print_cr("[%d] we're scanning part "
  3678                                "["PTR_FORMAT", "PTR_FORMAT") "
  3679                                "of region "PTR_FORMAT,
  3680                                _task_id, _finger, _region_limit, _curr_region);
  3682       // Let's iterate over the bitmap of the part of the
  3683       // region that is left.
  3684       bitmap_closure.set_scanning_heap_region(true);
  3685       if (mr.is_empty() ||
  3686           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3687         // We successfully completed iterating over the region. Now,
  3688         // let's give up the region.
  3689         giveup_current_region();
  3690         regular_clock_call();
  3691       } else {
  3692         assert(has_aborted(), "currently the only way to do so");
  3693         // The only way to abort the bitmap iteration is to return
  3694         // false from the do_bit() method. However, inside the
  3695         // do_bit() method we move the _finger to point to the
  3696         // object currently being looked at. So, if we bail out, we
  3697         // have definitely set _finger to something non-null.
  3698         assert(_finger != NULL, "invariant");
  3700         // Region iteration was actually aborted. So now _finger
  3701         // points to the address of the object we last scanned. If we
  3702         // leave it there, when we restart this task, we will rescan
  3703         // the object. It is easy to avoid this. We move the finger by
  3704         // enough to point to the next possible object header (the
  3705         // bitmap knows by how much we need to move it as it knows its
  3706         // granularity).
  3707         move_finger_to(_nextMarkBitMap->nextWord(_finger));
  3710     // At this point we have either completed iterating over the
  3711     // region we were holding on to, or we have aborted.
  3713     // We then partially drain the local queue and the global stack.
  3714     // (Do we really need this?)
  3715     drain_local_queue(true);
  3716     drain_global_stack(true);
  3718     // Read the note on the claim_region() method on why it might
  3719     // return NULL with potentially more regions available for
  3720     // claiming and why we have to check out_of_regions() to determine
  3721     // whether we're done or not.
  3722     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3723       // We are going to try to claim a new region. We should have
  3724       // given up on the previous one.
  3725       // Separated the asserts so that we know which one fires.
  3726       assert(_curr_region  == NULL, "invariant");
  3727       assert(_finger       == NULL, "invariant");
  3728       assert(_region_limit == NULL, "invariant");
  3729       if (_cm->verbose_low())
  3730         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3731       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3732       if (claimed_region != NULL) {
  3733         // Yes, we managed to claim one
  3734         statsOnly( ++_regions_claimed );
  3736         if (_cm->verbose_low())
  3737           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3738                                  "region "PTR_FORMAT,
  3739                                  _task_id, claimed_region);
  3741         setup_for_region(claimed_region);
  3742         assert(_curr_region == claimed_region, "invariant");
  3744       // It is important to call the regular clock here. It might take
  3745       // a while to claim a region if, for example, we hit a large
  3746       // block of empty regions. So we need to call the regular clock
  3747       // method once round the loop to make sure it's called
  3748       // frequently enough.
  3749       regular_clock_call();
  3752     if (!has_aborted() && _curr_region == NULL) {
  3753       assert(_cm->out_of_regions(),
  3754              "at this point we should be out of regions");
  3756   } while ( _curr_region != NULL && !has_aborted());
  3758   if (!has_aborted()) {
  3759     // We cannot check whether the global stack is empty, since other
  3760     // tasks might be pushing objects to it concurrently. We also cannot
  3761     // check if the region stack is empty because if a thread is aborting
  3762     // it can push a partially done region back.
  3763     assert(_cm->out_of_regions(),
  3764            "at this point we should be out of regions");
  3766     if (_cm->verbose_low())
  3767       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3769     // Try to reduce the number of available SATB buffers so that
  3770     // remark has less work to do.
  3771     drain_satb_buffers();
  3774   // Since we've done everything else, we can now totally drain the
  3775   // local queue and global stack.
  3776   drain_local_queue(false);
  3777   drain_global_stack(false);
  3779   // Attempt at work stealing from other task's queues.
  3780   if (!has_aborted()) {
  3781     // We have not aborted. This means that we have finished all that
  3782     // we could. Let's try to do some stealing...
  3784     // We cannot check whether the global stack is empty, since other
  3785     // tasks might be pushing objects to it concurrently. We also cannot
  3786     // check if the region stack is empty because if a thread is aborting
  3787     // it can push a partially done region back.
  3788     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3789            "only way to reach here");
  3791     if (_cm->verbose_low())
  3792       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3794     while (!has_aborted()) {
  3795       oop obj;
  3796       statsOnly( ++_steal_attempts );
  3798       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3799         if (_cm->verbose_medium())
  3800           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3801                                  _task_id, (void*) obj);
  3803         statsOnly( ++_steals );
  3805         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3806                "any stolen object should be marked");
  3807         scan_object(obj);
  3809         // And since we're towards the end, let's totally drain the
  3810         // local queue and global stack.
  3811         drain_local_queue(false);
  3812         drain_global_stack(false);
  3813       } else {
  3814         break;
  3819   // We still haven't aborted. Now, let's try to get into the
  3820   // termination protocol.
  3821   if (!has_aborted()) {
  3822     // We cannot check whether the global stack is empty, since other
  3823     // tasks might be concurrently pushing objects on it. We also cannot
  3824     // check if the region stack is empty because if a thread is aborting
  3825     // it can push a partially done region back.
  3826     // Separated the asserts so that we know which one fires.
  3827     assert(_cm->out_of_regions(), "only way to reach here");
  3828     assert(_task_queue->size() == 0, "only way to reach here");
  3830     if (_cm->verbose_low())
  3831       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3833     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3834     // The CMTask class also extends the TerminatorTerminator class,
  3835     // hence its should_exit_termination() method will also decide
  3836     // whether to exit the termination protocol or not.
  3837     bool finished = _cm->terminator()->offer_termination(this);
  3838     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3839     _termination_time_ms +=
  3840       termination_end_time_ms - _termination_start_time_ms;
  3842     if (finished) {
  3843       // We're all done.
  3845       if (_task_id == 0) {
  3846         // let's allow task 0 to do this
  3847         if (concurrent()) {
  3848           assert(_cm->concurrent_marking_in_progress(), "invariant");
  3849           // we need to set this to false before the next
  3850           // safepoint. This way we ensure that the marking phase
  3851           // doesn't observe any more heap expansions.
  3852           _cm->clear_concurrent_marking_in_progress();
  3856       // We can now guarantee that the global stack is empty, since
  3857       // all other tasks have finished. We separated the guarantees so
  3858       // that, if a condition is false, we can immediately find out
  3859       // which one.
  3860       guarantee(_cm->out_of_regions(), "only way to reach here");
  3861       guarantee(_cm->region_stack_empty(), "only way to reach here");
  3862       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  3863       guarantee(_task_queue->size() == 0, "only way to reach here");
  3864       guarantee(!_cm->has_overflown(), "only way to reach here");
  3865       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  3866       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  3868       if (_cm->verbose_low())
  3869         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3870     } else {
  3871       // Apparently there's more work to do. Let's abort this task. It
  3872       // will restart it and we can hopefully find more things to do.
  3874       if (_cm->verbose_low())
  3875         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3877       set_has_aborted();
  3878       statsOnly( ++_aborted_termination );
  3882   // Mainly for debugging purposes to make sure that a pointer to the
  3883   // closure which was statically allocated in this frame doesn't
  3884   // escape it by accident.
  3885   set_oop_closure(NULL);
  3886   double end_time_ms = os::elapsedVTime() * 1000.0;
  3887   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3888   // Update the step history.
  3889   _step_times_ms.add(elapsed_time_ms);
  3891   if (has_aborted()) {
  3892     // The task was aborted for some reason.
  3894     statsOnly( ++_aborted );
  3896     if (_has_aborted_timed_out) {
  3897       double diff_ms = elapsed_time_ms - _time_target_ms;
  3898       // Keep statistics of how well we did with respect to hitting
  3899       // our target only if we actually timed out (if we aborted for
  3900       // other reasons, then the results might get skewed).
  3901       _marking_step_diffs_ms.add(diff_ms);
  3904     if (_cm->has_overflown()) {
  3905       // This is the interesting one. We aborted because a global
  3906       // overflow was raised. This means we have to restart the
  3907       // marking phase and start iterating over regions. However, in
  3908       // order to do this we have to make sure that all tasks stop
  3909       // what they are doing and re-initialise in a safe manner. We
  3910       // will achieve this with the use of two barrier sync points.
  3912       if (_cm->verbose_low())
  3913         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  3915       _cm->enter_first_sync_barrier(_task_id);
  3916       // When we exit this sync barrier we know that all tasks have
  3917       // stopped doing marking work. So, it's now safe to
  3918       // re-initialise our data structures. At the end of this method,
  3919       // task 0 will clear the global data structures.
  3921       statsOnly( ++_aborted_overflow );
  3923       // We clear the local state of this task...
  3924       clear_region_fields();
  3926       // ...and enter the second barrier.
  3927       _cm->enter_second_sync_barrier(_task_id);
  3928       // At this point everything has bee re-initialised and we're
  3929       // ready to restart.
  3932     if (_cm->verbose_low()) {
  3933       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  3934                              "elapsed = %1.2lfms <<<<<<<<<<",
  3935                              _task_id, _time_target_ms, elapsed_time_ms);
  3936       if (_cm->has_aborted())
  3937         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  3938                                _task_id);
  3940   } else {
  3941     if (_cm->verbose_low())
  3942       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  3943                              "elapsed = %1.2lfms <<<<<<<<<<",
  3944                              _task_id, _time_target_ms, elapsed_time_ms);
  3947   _claimed = false;
  3950 CMTask::CMTask(int task_id,
  3951                ConcurrentMark* cm,
  3952                CMTaskQueue* task_queue,
  3953                CMTaskQueueSet* task_queues)
  3954   : _g1h(G1CollectedHeap::heap()),
  3955     _task_id(task_id), _cm(cm),
  3956     _claimed(false),
  3957     _nextMarkBitMap(NULL), _hash_seed(17),
  3958     _task_queue(task_queue),
  3959     _task_queues(task_queues),
  3960     _oop_closure(NULL) {
  3961   guarantee(task_queue != NULL, "invariant");
  3962   guarantee(task_queues != NULL, "invariant");
  3964   statsOnly( _clock_due_to_scanning = 0;
  3965              _clock_due_to_marking  = 0 );
  3967   _marking_step_diffs_ms.add(0.5);

mercurial