src/share/vm/gc_implementation/g1/concurrentMark.cpp

Mon, 05 Apr 2010 12:19:22 -0400

author
tonyp
date
Mon, 05 Apr 2010 12:19:22 -0400
changeset 1793
72f725c5a7be
parent 1752
d4197f8d516a
child 1794
23b1b27ac76c
permissions
-rw-r--r--

6940310: G1: MT-unsafe calls to CM::region_stack_push() / CM::region_stack_pop()
Summary: Calling the methods region_stack_push() and region_stack_pop() concurrent is not MT-safe. The assumption is that we will only call region_stack_push() during a GC pause and region_stack_pop() during marking. Unfortunately, we also call region_stack_push() during marking which seems to be introducing subtle marking failures. This change introduces lock-based methods for pushing / popping to be called during marking.
Reviewed-by: iveresov, johnc

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentMark.cpp.incl"
    28 //
    29 // CMS Bit Map Wrapper
    31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    32   _bm((uintptr_t*)NULL,0),
    33   _shifter(shifter) {
    34   _bmStartWord = (HeapWord*)(rs.base());
    35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    40   // For now we'll just commit all of the bit map up fromt.
    41   // Later on we'll try to be more parsimonious with swap.
    42   guarantee(_virtual_space.initialize(brs, brs.size()),
    43             "couldn't reseve backing store for CMS bit map");
    44   assert(_virtual_space.committed_size() == brs.size(),
    45          "didn't reserve backing store for all of CMS bit map?");
    46   _bm.set_map((uintptr_t*)_virtual_space.low());
    47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    48          _bmWordSize, "inconsistency in bit map sizing");
    49   _bm.set_size(_bmWordSize >> _shifter);
    50 }
    52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    53                                                HeapWord* limit) const {
    54   // First we must round addr *up* to a possible object boundary.
    55   addr = (HeapWord*)align_size_up((intptr_t)addr,
    56                                   HeapWordSize << _shifter);
    57   size_t addrOffset = heapWordToOffset(addr);
    58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    59   size_t limitOffset = heapWordToOffset(limit);
    60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    62   assert(nextAddr >= addr, "get_next_one postcondition");
    63   assert(nextAddr == limit || isMarked(nextAddr),
    64          "get_next_one postcondition");
    65   return nextAddr;
    66 }
    68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    69                                                  HeapWord* limit) const {
    70   size_t addrOffset = heapWordToOffset(addr);
    71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    72   size_t limitOffset = heapWordToOffset(limit);
    73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    75   assert(nextAddr >= addr, "get_next_one postcondition");
    76   assert(nextAddr == limit || !isMarked(nextAddr),
    77          "get_next_one postcondition");
    78   return nextAddr;
    79 }
    81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    83   return (int) (diff >> _shifter);
    84 }
    86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
    88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    89   if (right > left) {
    90     // Right-open interval [leftOffset, rightOffset).
    91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    92   } else {
    93     return true;
    94   }
    95 }
    97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
    98                                              size_t    from_start_index,
    99                                              HeapWord* to_start_word,
   100                                              size_t    word_num) {
   101   _bm.mostly_disjoint_range_union(from_bitmap,
   102                                   from_start_index,
   103                                   heapWordToOffset(to_start_word),
   104                                   word_num);
   105 }
   107 #ifndef PRODUCT
   108 bool CMBitMapRO::covers(ReservedSpace rs) const {
   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
   111          "size inconsistency");
   112   return _bmStartWord == (HeapWord*)(rs.base()) &&
   113          _bmWordSize  == rs.size()>>LogHeapWordSize;
   114 }
   115 #endif
   117 void CMBitMap::clearAll() {
   118   _bm.clear();
   119   return;
   120 }
   122 void CMBitMap::markRange(MemRegion mr) {
   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   124   assert(!mr.is_empty(), "unexpected empty region");
   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   126           ((HeapWord *) mr.end())),
   127          "markRange memory region end is not card aligned");
   128   // convert address range into offset range
   129   _bm.at_put_range(heapWordToOffset(mr.start()),
   130                    heapWordToOffset(mr.end()), true);
   131 }
   133 void CMBitMap::clearRange(MemRegion mr) {
   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   135   assert(!mr.is_empty(), "unexpected empty region");
   136   // convert address range into offset range
   137   _bm.at_put_range(heapWordToOffset(mr.start()),
   138                    heapWordToOffset(mr.end()), false);
   139 }
   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   142                                             HeapWord* end_addr) {
   143   HeapWord* start = getNextMarkedWordAddress(addr);
   144   start = MIN2(start, end_addr);
   145   HeapWord* end   = getNextUnmarkedWordAddress(start);
   146   end = MIN2(end, end_addr);
   147   assert(start <= end, "Consistency check");
   148   MemRegion mr(start, end);
   149   if (!mr.is_empty()) {
   150     clearRange(mr);
   151   }
   152   return mr;
   153 }
   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   156   _base(NULL), _cm(cm)
   157 #ifdef ASSERT
   158   , _drain_in_progress(false)
   159   , _drain_in_progress_yields(false)
   160 #endif
   161 {}
   163 void CMMarkStack::allocate(size_t size) {
   164   _base = NEW_C_HEAP_ARRAY(oop, size);
   165   if (_base == NULL)
   166     vm_exit_during_initialization("Failed to allocate "
   167                                   "CM region mark stack");
   168   _index = 0;
   169   // QQQQ cast ...
   170   _capacity = (jint) size;
   171   _oops_do_bound = -1;
   172   NOT_PRODUCT(_max_depth = 0);
   173 }
   175 CMMarkStack::~CMMarkStack() {
   176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   177 }
   179 void CMMarkStack::par_push(oop ptr) {
   180   while (true) {
   181     if (isFull()) {
   182       _overflow = true;
   183       return;
   184     }
   185     // Otherwise...
   186     jint index = _index;
   187     jint next_index = index+1;
   188     jint res = Atomic::cmpxchg(next_index, &_index, index);
   189     if (res == index) {
   190       _base[index] = ptr;
   191       // Note that we don't maintain this atomically.  We could, but it
   192       // doesn't seem necessary.
   193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   194       return;
   195     }
   196     // Otherwise, we need to try again.
   197   }
   198 }
   200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   201   while (true) {
   202     if (isFull()) {
   203       _overflow = true;
   204       return;
   205     }
   206     // Otherwise...
   207     jint index = _index;
   208     jint next_index = index + n;
   209     if (next_index > _capacity) {
   210       _overflow = true;
   211       return;
   212     }
   213     jint res = Atomic::cmpxchg(next_index, &_index, index);
   214     if (res == index) {
   215       for (int i = 0; i < n; i++) {
   216         int ind = index + i;
   217         assert(ind < _capacity, "By overflow test above.");
   218         _base[ind] = ptr_arr[i];
   219       }
   220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   221       return;
   222     }
   223     // Otherwise, we need to try again.
   224   }
   225 }
   228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   230   jint start = _index;
   231   jint next_index = start + n;
   232   if (next_index > _capacity) {
   233     _overflow = true;
   234     return;
   235   }
   236   // Otherwise.
   237   _index = next_index;
   238   for (int i = 0; i < n; i++) {
   239     int ind = start + i;
   240     assert(ind < _capacity, "By overflow test above.");
   241     _base[ind] = ptr_arr[i];
   242   }
   243 }
   246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   248   jint index = _index;
   249   if (index == 0) {
   250     *n = 0;
   251     return false;
   252   } else {
   253     int k = MIN2(max, index);
   254     jint new_ind = index - k;
   255     for (int j = 0; j < k; j++) {
   256       ptr_arr[j] = _base[new_ind + j];
   257     }
   258     _index = new_ind;
   259     *n = k;
   260     return true;
   261   }
   262 }
   265 CMRegionStack::CMRegionStack() : _base(NULL) {}
   267 void CMRegionStack::allocate(size_t size) {
   268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   269   if (_base == NULL)
   270     vm_exit_during_initialization("Failed to allocate "
   271                                   "CM region mark stack");
   272   _index = 0;
   273   // QQQQ cast ...
   274   _capacity = (jint) size;
   275 }
   277 CMRegionStack::~CMRegionStack() {
   278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   279 }
   281 void CMRegionStack::push(MemRegion mr) {
   282   assert(mr.word_size() > 0, "Precondition");
   283   while (true) {
   284     if (isFull()) {
   285       _overflow = true;
   286       return;
   287     }
   288     // Otherwise...
   289     jint index = _index;
   290     jint next_index = index+1;
   291     jint res = Atomic::cmpxchg(next_index, &_index, index);
   292     if (res == index) {
   293       _base[index] = mr;
   294       return;
   295     }
   296     // Otherwise, we need to try again.
   297   }
   298 }
   300 // Currently we do not call this at all. Normally we would call it
   301 // during the concurrent marking / remark phases but we now call
   302 // the lock-based version instead. But we might want to resurrect this
   303 // code in the future. So, we'll leave it here commented out.
   304 #if 0
   305 MemRegion CMRegionStack::pop() {
   306   while (true) {
   307     // Otherwise...
   308     jint index = _index;
   310     if (index == 0) {
   311       return MemRegion();
   312     }
   313     jint next_index = index-1;
   314     jint res = Atomic::cmpxchg(next_index, &_index, index);
   315     if (res == index) {
   316       MemRegion mr = _base[next_index];
   317       if (mr.start() != NULL) {
   318         assert(mr.end() != NULL, "invariant");
   319         assert(mr.word_size() > 0, "invariant");
   320         return mr;
   321       } else {
   322         // that entry was invalidated... let's skip it
   323         assert(mr.end() == NULL, "invariant");
   324       }
   325     }
   326     // Otherwise, we need to try again.
   327   }
   328 }
   329 #endif // 0
   331 void CMRegionStack::push_with_lock(MemRegion mr) {
   332   assert(mr.word_size() > 0, "Precondition");
   333   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   335   if (isFull()) {
   336     _overflow = true;
   337     return;
   338   }
   340   _base[_index] = mr;
   341   _index += 1;
   342 }
   344 MemRegion CMRegionStack::pop_with_lock() {
   345   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
   347   while (true) {
   348     if (_index == 0) {
   349       return MemRegion();
   350     }
   351     _index -= 1;
   353     MemRegion mr = _base[_index];
   354     if (mr.start() != NULL) {
   355       assert(mr.end() != NULL, "invariant");
   356       assert(mr.word_size() > 0, "invariant");
   357       return mr;
   358     } else {
   359       // that entry was invalidated... let's skip it
   360       assert(mr.end() == NULL, "invariant");
   361     }
   362   }
   363 }
   365 bool CMRegionStack::invalidate_entries_into_cset() {
   366   bool result = false;
   367   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   368   for (int i = 0; i < _oops_do_bound; ++i) {
   369     MemRegion mr = _base[i];
   370     if (mr.start() != NULL) {
   371       assert(mr.end() != NULL, "invariant");
   372       assert(mr.word_size() > 0, "invariant");
   373       HeapRegion* hr = g1h->heap_region_containing(mr.start());
   374       assert(hr != NULL, "invariant");
   375       if (hr->in_collection_set()) {
   376         // The region points into the collection set
   377         _base[i] = MemRegion();
   378         result = true;
   379       }
   380     } else {
   381       // that entry was invalidated... let's skip it
   382       assert(mr.end() == NULL, "invariant");
   383     }
   384   }
   385   return result;
   386 }
   388 template<class OopClosureClass>
   389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   390   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   391          || SafepointSynchronize::is_at_safepoint(),
   392          "Drain recursion must be yield-safe.");
   393   bool res = true;
   394   debug_only(_drain_in_progress = true);
   395   debug_only(_drain_in_progress_yields = yield_after);
   396   while (!isEmpty()) {
   397     oop newOop = pop();
   398     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   399     assert(newOop->is_oop(), "Expected an oop");
   400     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   401            "only grey objects on this stack");
   402     // iterate over the oops in this oop, marking and pushing
   403     // the ones in CMS generation.
   404     newOop->oop_iterate(cl);
   405     if (yield_after && _cm->do_yield_check()) {
   406       res = false; break;
   407     }
   408   }
   409   debug_only(_drain_in_progress = false);
   410   return res;
   411 }
   413 void CMMarkStack::oops_do(OopClosure* f) {
   414   if (_index == 0) return;
   415   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   416          "Bound must be set.");
   417   for (int i = 0; i < _oops_do_bound; i++) {
   418     f->do_oop(&_base[i]);
   419   }
   420   _oops_do_bound = -1;
   421 }
   423 bool ConcurrentMark::not_yet_marked(oop obj) const {
   424   return (_g1h->is_obj_ill(obj)
   425           || (_g1h->is_in_permanent(obj)
   426               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   427 }
   429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   431 #endif // _MSC_VER
   433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   434                                int max_regions) :
   435   _markBitMap1(rs, MinObjAlignment - 1),
   436   _markBitMap2(rs, MinObjAlignment - 1),
   438   _parallel_marking_threads(0),
   439   _sleep_factor(0.0),
   440   _marking_task_overhead(1.0),
   441   _cleanup_sleep_factor(0.0),
   442   _cleanup_task_overhead(1.0),
   443   _region_bm(max_regions, false /* in_resource_area*/),
   444   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   445            CardTableModRefBS::card_shift,
   446            false /* in_resource_area*/),
   447   _prevMarkBitMap(&_markBitMap1),
   448   _nextMarkBitMap(&_markBitMap2),
   449   _at_least_one_mark_complete(false),
   451   _markStack(this),
   452   _regionStack(),
   453   // _finger set in set_non_marking_state
   455   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   456   // _active_tasks set in set_non_marking_state
   457   // _tasks set inside the constructor
   458   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   459   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   461   _has_overflown(false),
   462   _concurrent(false),
   463   _has_aborted(false),
   464   _restart_for_overflow(false),
   465   _concurrent_marking_in_progress(false),
   466   _should_gray_objects(false),
   468   // _verbose_level set below
   470   _init_times(),
   471   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   472   _cleanup_times(),
   473   _total_counting_time(0.0),
   474   _total_rs_scrub_time(0.0),
   476   _parallel_workers(NULL)
   477 {
   478   CMVerboseLevel verbose_level =
   479     (CMVerboseLevel) G1MarkingVerboseLevel;
   480   if (verbose_level < no_verbose)
   481     verbose_level = no_verbose;
   482   if (verbose_level > high_verbose)
   483     verbose_level = high_verbose;
   484   _verbose_level = verbose_level;
   486   if (verbose_low())
   487     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   488                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   490   _markStack.allocate(MarkStackSize);
   491   _regionStack.allocate(G1MarkRegionStackSize);
   493   // Create & start a ConcurrentMark thread.
   494   _cmThread = new ConcurrentMarkThread(this);
   495   assert(cmThread() != NULL, "CM Thread should have been created");
   496   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   498   _g1h = G1CollectedHeap::heap();
   499   assert(CGC_lock != NULL, "Where's the CGC_lock?");
   500   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   501   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   503   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   504   satb_qs.set_buffer_size(G1SATBBufferSize);
   506   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   507   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   508   for (int i = 0 ; i < size; i++) {
   509     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   510   }
   512   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   513   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   516   _active_tasks = _max_task_num;
   517   for (int i = 0; i < (int) _max_task_num; ++i) {
   518     CMTaskQueue* task_queue = new CMTaskQueue();
   519     task_queue->initialize();
   520     _task_queues->register_queue(i, task_queue);
   522     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   523     _accum_task_vtime[i] = 0.0;
   524   }
   526   if (ConcGCThreads > ParallelGCThreads) {
   527     vm_exit_during_initialization("Can't have more ConcGCThreads "
   528                                   "than ParallelGCThreads.");
   529   }
   530   if (ParallelGCThreads == 0) {
   531     // if we are not running with any parallel GC threads we will not
   532     // spawn any marking threads either
   533     _parallel_marking_threads =   0;
   534     _sleep_factor             = 0.0;
   535     _marking_task_overhead    = 1.0;
   536   } else {
   537     if (ConcGCThreads > 0) {
   538       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
   539       // if both are set
   541       _parallel_marking_threads = ConcGCThreads;
   542       _sleep_factor             = 0.0;
   543       _marking_task_overhead    = 1.0;
   544     } else if (G1MarkingOverheadPercent > 0) {
   545       // we will calculate the number of parallel marking threads
   546       // based on a target overhead with respect to the soft real-time
   547       // goal
   549       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
   550       double overall_cm_overhead =
   551         (double) MaxGCPauseMillis * marking_overhead /
   552         (double) GCPauseIntervalMillis;
   553       double cpu_ratio = 1.0 / (double) os::processor_count();
   554       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   555       double marking_task_overhead =
   556         overall_cm_overhead / marking_thread_num *
   557                                                 (double) os::processor_count();
   558       double sleep_factor =
   559                          (1.0 - marking_task_overhead) / marking_task_overhead;
   561       _parallel_marking_threads = (size_t) marking_thread_num;
   562       _sleep_factor             = sleep_factor;
   563       _marking_task_overhead    = marking_task_overhead;
   564     } else {
   565       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   566       _sleep_factor             = 0.0;
   567       _marking_task_overhead    = 1.0;
   568     }
   570     if (parallel_marking_threads() > 1)
   571       _cleanup_task_overhead = 1.0;
   572     else
   573       _cleanup_task_overhead = marking_task_overhead();
   574     _cleanup_sleep_factor =
   575                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   577 #if 0
   578     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   579     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   580     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   581     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   582     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   583 #endif
   585     guarantee(parallel_marking_threads() > 0, "peace of mind");
   586     _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
   587                                      (int) parallel_marking_threads(), false, true);
   588     if (_parallel_workers == NULL)
   589       vm_exit_during_initialization("Failed necessary allocation.");
   590   }
   592   // so that the call below can read a sensible value
   593   _heap_start = (HeapWord*) rs.base();
   594   set_non_marking_state();
   595 }
   597 void ConcurrentMark::update_g1_committed(bool force) {
   598   // If concurrent marking is not in progress, then we do not need to
   599   // update _heap_end. This has a subtle and important
   600   // side-effect. Imagine that two evacuation pauses happen between
   601   // marking completion and remark. The first one can grow the
   602   // heap (hence now the finger is below the heap end). Then, the
   603   // second one could unnecessarily push regions on the region
   604   // stack. This causes the invariant that the region stack is empty
   605   // at the beginning of remark to be false. By ensuring that we do
   606   // not observe heap expansions after marking is complete, then we do
   607   // not have this problem.
   608   if (!concurrent_marking_in_progress() && !force)
   609     return;
   611   MemRegion committed = _g1h->g1_committed();
   612   assert(committed.start() == _heap_start, "start shouldn't change");
   613   HeapWord* new_end = committed.end();
   614   if (new_end > _heap_end) {
   615     // The heap has been expanded.
   617     _heap_end = new_end;
   618   }
   619   // Notice that the heap can also shrink. However, this only happens
   620   // during a Full GC (at least currently) and the entire marking
   621   // phase will bail out and the task will not be restarted. So, let's
   622   // do nothing.
   623 }
   625 void ConcurrentMark::reset() {
   626   // Starting values for these two. This should be called in a STW
   627   // phase. CM will be notified of any future g1_committed expansions
   628   // will be at the end of evacuation pauses, when tasks are
   629   // inactive.
   630   MemRegion committed = _g1h->g1_committed();
   631   _heap_start = committed.start();
   632   _heap_end   = committed.end();
   634   // Separated the asserts so that we know which one fires.
   635   assert(_heap_start != NULL, "heap bounds should look ok");
   636   assert(_heap_end != NULL, "heap bounds should look ok");
   637   assert(_heap_start < _heap_end, "heap bounds should look ok");
   639   // reset all the marking data structures and any necessary flags
   640   clear_marking_state();
   642   if (verbose_low())
   643     gclog_or_tty->print_cr("[global] resetting");
   645   // We do reset all of them, since different phases will use
   646   // different number of active threads. So, it's easiest to have all
   647   // of them ready.
   648   for (int i = 0; i < (int) _max_task_num; ++i)
   649     _tasks[i]->reset(_nextMarkBitMap);
   651   // we need this to make sure that the flag is on during the evac
   652   // pause with initial mark piggy-backed
   653   set_concurrent_marking_in_progress();
   654 }
   656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   657   assert(active_tasks <= _max_task_num, "we should not have more");
   659   _active_tasks = active_tasks;
   660   // Need to update the three data structures below according to the
   661   // number of active threads for this phase.
   662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   666   _concurrent = concurrent;
   667   // We propagate this to all tasks, not just the active ones.
   668   for (int i = 0; i < (int) _max_task_num; ++i)
   669     _tasks[i]->set_concurrent(concurrent);
   671   if (concurrent) {
   672     set_concurrent_marking_in_progress();
   673   } else {
   674     // We currently assume that the concurrent flag has been set to
   675     // false before we start remark. At this point we should also be
   676     // in a STW phase.
   677     assert(!concurrent_marking_in_progress(), "invariant");
   678     assert(_finger == _heap_end, "only way to get here");
   679     update_g1_committed(true);
   680   }
   681 }
   683 void ConcurrentMark::set_non_marking_state() {
   684   // We set the global marking state to some default values when we're
   685   // not doing marking.
   686   clear_marking_state();
   687   _active_tasks = 0;
   688   clear_concurrent_marking_in_progress();
   689 }
   691 ConcurrentMark::~ConcurrentMark() {
   692   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   693   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   694   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   695                     _par_cleanup_thread_state);
   697   for (int i = 0; i < (int) _max_task_num; ++i) {
   698     delete _task_queues->queue(i);
   699     delete _tasks[i];
   700   }
   701   delete _task_queues;
   702   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   703 }
   705 // This closure is used to mark refs into the g1 generation
   706 // from external roots in the CMS bit map.
   707 // Called at the first checkpoint.
   708 //
   710 void ConcurrentMark::clearNextBitmap() {
   711    guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
   713    // clear the mark bitmap (no grey objects to start with).
   714    // We need to do this in chunks and offer to yield in between
   715    // each chunk.
   716    HeapWord* start  = _nextMarkBitMap->startWord();
   717    HeapWord* end    = _nextMarkBitMap->endWord();
   718    HeapWord* cur    = start;
   719    size_t chunkSize = M;
   720    while (cur < end) {
   721      HeapWord* next = cur + chunkSize;
   722      if (next > end)
   723        next = end;
   724      MemRegion mr(cur,next);
   725      _nextMarkBitMap->clearRange(mr);
   726      cur = next;
   727      do_yield_check();
   728    }
   729 }
   731 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   732 public:
   733   bool doHeapRegion(HeapRegion* r) {
   734     if (!r->continuesHumongous()) {
   735       r->note_start_of_marking(true);
   736     }
   737     return false;
   738   }
   739 };
   741 void ConcurrentMark::checkpointRootsInitialPre() {
   742   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   743   G1CollectorPolicy* g1p = g1h->g1_policy();
   745   _has_aborted = false;
   747   if (G1PrintReachableAtInitialMark) {
   748     print_reachable(true, "before");
   749   }
   751   // Initialise marking structures. This has to be done in a STW phase.
   752   reset();
   753 }
   755 class CMMarkRootsClosure: public OopsInGenClosure {
   756 private:
   757   ConcurrentMark*  _cm;
   758   G1CollectedHeap* _g1h;
   759   bool             _do_barrier;
   761 public:
   762   CMMarkRootsClosure(ConcurrentMark* cm,
   763                      G1CollectedHeap* g1h,
   764                      bool do_barrier) : _cm(cm), _g1h(g1h),
   765                                         _do_barrier(do_barrier) { }
   767   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   768   virtual void do_oop(      oop* p) { do_oop_work(p); }
   770   template <class T> void do_oop_work(T* p) {
   771     T heap_oop = oopDesc::load_heap_oop(p);
   772     if (!oopDesc::is_null(heap_oop)) {
   773       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   774       assert(obj->is_oop() || obj->mark() == NULL,
   775              "expected an oop, possibly with mark word displaced");
   776       HeapWord* addr = (HeapWord*)obj;
   777       if (_g1h->is_in_g1_reserved(addr)) {
   778         _cm->grayRoot(obj);
   779       }
   780     }
   781     if (_do_barrier) {
   782       assert(!_g1h->is_in_g1_reserved(p),
   783              "Should be called on external roots");
   784       do_barrier(p);
   785     }
   786   }
   787 };
   789 void ConcurrentMark::checkpointRootsInitialPost() {
   790   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   792   // For each region note start of marking.
   793   NoteStartOfMarkHRClosure startcl;
   794   g1h->heap_region_iterate(&startcl);
   796   // Start weak-reference discovery.
   797   ReferenceProcessor* rp = g1h->ref_processor();
   798   rp->verify_no_references_recorded();
   799   rp->enable_discovery(); // enable ("weak") refs discovery
   800   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
   802   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   803   // This is the start of  the marking cycle, we're expected all
   804   // threads to have SATB queues with active set to false.
   805   satb_mq_set.set_active_all_threads(true, /* new active value */
   806                                      false /* expected_active */);
   808   // update_g1_committed() will be called at the end of an evac pause
   809   // when marking is on. So, it's also called at the end of the
   810   // initial-mark pause to update the heap end, if the heap expands
   811   // during it. No need to call it here.
   812 }
   814 // Checkpoint the roots into this generation from outside
   815 // this generation. [Note this initial checkpoint need only
   816 // be approximate -- we'll do a catch up phase subsequently.]
   817 void ConcurrentMark::checkpointRootsInitial() {
   818   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   819   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   821   double start = os::elapsedTime();
   823   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   824   g1p->record_concurrent_mark_init_start();
   825   checkpointRootsInitialPre();
   827   // YSR: when concurrent precleaning is in place, we'll
   828   // need to clear the cached card table here
   830   ResourceMark rm;
   831   HandleMark  hm;
   833   g1h->ensure_parsability(false);
   834   g1h->perm_gen()->save_marks();
   836   CMMarkRootsClosure notOlder(this, g1h, false);
   837   CMMarkRootsClosure older(this, g1h, true);
   839   g1h->set_marking_started();
   840   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   842   g1h->process_strong_roots(true,    // activate StrongRootsScope
   843                             false,   // fake perm gen collection
   844                             SharedHeap::SO_AllClasses,
   845                             &notOlder, // Regular roots
   846                             NULL,     // do not visit active blobs
   847                             &older    // Perm Gen Roots
   848                             );
   849   checkpointRootsInitialPost();
   851   // Statistics.
   852   double end = os::elapsedTime();
   853   _init_times.add((end - start) * 1000.0);
   855   g1p->record_concurrent_mark_init_end();
   856 }
   858 /*
   859    Notice that in the next two methods, we actually leave the STS
   860    during the barrier sync and join it immediately afterwards. If we
   861    do not do this, this then the following deadlock can occur: one
   862    thread could be in the barrier sync code, waiting for the other
   863    thread to also sync up, whereas another one could be trying to
   864    yield, while also waiting for the other threads to sync up too.
   866    Because the thread that does the sync barrier has left the STS, it
   867    is possible to be suspended for a Full GC or an evacuation pause
   868    could occur. This is actually safe, since the entering the sync
   869    barrier is one of the last things do_marking_step() does, and it
   870    doesn't manipulate any data structures afterwards.
   871 */
   873 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   874   if (verbose_low())
   875     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   877   ConcurrentGCThread::stsLeave();
   878   _first_overflow_barrier_sync.enter();
   879   ConcurrentGCThread::stsJoin();
   880   // at this point everyone should have synced up and not be doing any
   881   // more work
   883   if (verbose_low())
   884     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   886   // let task 0 do this
   887   if (task_num == 0) {
   888     // task 0 is responsible for clearing the global data structures
   889     clear_marking_state();
   891     if (PrintGC) {
   892       gclog_or_tty->date_stamp(PrintGCDateStamps);
   893       gclog_or_tty->stamp(PrintGCTimeStamps);
   894       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   895     }
   896   }
   898   // after this, each task should reset its own data structures then
   899   // then go into the second barrier
   900 }
   902 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   903   if (verbose_low())
   904     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   906   ConcurrentGCThread::stsLeave();
   907   _second_overflow_barrier_sync.enter();
   908   ConcurrentGCThread::stsJoin();
   909   // at this point everything should be re-initialised and ready to go
   911   if (verbose_low())
   912     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   913 }
   915 void ConcurrentMark::grayRoot(oop p) {
   916   HeapWord* addr = (HeapWord*) p;
   917   // We can't really check against _heap_start and _heap_end, since it
   918   // is possible during an evacuation pause with piggy-backed
   919   // initial-mark that the committed space is expanded during the
   920   // pause without CM observing this change. So the assertions below
   921   // is a bit conservative; but better than nothing.
   922   assert(_g1h->g1_committed().contains(addr),
   923          "address should be within the heap bounds");
   925   if (!_nextMarkBitMap->isMarked(addr))
   926     _nextMarkBitMap->parMark(addr);
   927 }
   929 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   930   // The objects on the region have already been marked "in bulk" by
   931   // the caller. We only need to decide whether to push the region on
   932   // the region stack or not.
   934   if (!concurrent_marking_in_progress() || !_should_gray_objects)
   935     // We're done with marking and waiting for remark. We do not need to
   936     // push anything else on the region stack.
   937     return;
   939   HeapWord* finger = _finger;
   941   if (verbose_low())
   942     gclog_or_tty->print_cr("[global] attempting to push "
   943                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   944                            PTR_FORMAT, mr.start(), mr.end(), finger);
   946   if (mr.start() < finger) {
   947     // The finger is always heap region aligned and it is not possible
   948     // for mr to span heap regions.
   949     assert(mr.end() <= finger, "invariant");
   951     // Separated the asserts so that we know which one fires.
   952     assert(mr.start() <= mr.end(),
   953            "region boundaries should fall within the committed space");
   954     assert(_heap_start <= mr.start(),
   955            "region boundaries should fall within the committed space");
   956     assert(mr.end() <= _heap_end,
   957            "region boundaries should fall within the committed space");
   958     if (verbose_low())
   959       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   960                              "below the finger, pushing it",
   961                              mr.start(), mr.end());
   963     if (!region_stack_push(mr)) {
   964       if (verbose_low())
   965         gclog_or_tty->print_cr("[global] region stack has overflown.");
   966     }
   967   }
   968 }
   970 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
   971   // The object is not marked by the caller. We need to at least mark
   972   // it and maybe push in on the stack.
   974   HeapWord* addr = (HeapWord*)p;
   975   if (!_nextMarkBitMap->isMarked(addr)) {
   976     // We definitely need to mark it, irrespective whether we bail out
   977     // because we're done with marking.
   978     if (_nextMarkBitMap->parMark(addr)) {
   979       if (!concurrent_marking_in_progress() || !_should_gray_objects)
   980         // If we're done with concurrent marking and we're waiting for
   981         // remark, then we're not pushing anything on the stack.
   982         return;
   984       // No OrderAccess:store_load() is needed. It is implicit in the
   985       // CAS done in parMark(addr) above
   986       HeapWord* finger = _finger;
   988       if (addr < finger) {
   989         if (!mark_stack_push(oop(addr))) {
   990           if (verbose_low())
   991             gclog_or_tty->print_cr("[global] global stack overflow "
   992                                    "during parMark");
   993         }
   994       }
   995     }
   996   }
   997 }
   999 class CMConcurrentMarkingTask: public AbstractGangTask {
  1000 private:
  1001   ConcurrentMark*       _cm;
  1002   ConcurrentMarkThread* _cmt;
  1004 public:
  1005   void work(int worker_i) {
  1006     assert(Thread::current()->is_ConcurrentGC_thread(),
  1007            "this should only be done by a conc GC thread");
  1009     double start_vtime = os::elapsedVTime();
  1011     ConcurrentGCThread::stsJoin();
  1013     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
  1014     CMTask* the_task = _cm->task(worker_i);
  1015     the_task->record_start_time();
  1016     if (!_cm->has_aborted()) {
  1017       do {
  1018         double start_vtime_sec = os::elapsedVTime();
  1019         double start_time_sec = os::elapsedTime();
  1020         the_task->do_marking_step(10.0);
  1021         double end_time_sec = os::elapsedTime();
  1022         double end_vtime_sec = os::elapsedVTime();
  1023         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1024         double elapsed_time_sec = end_time_sec - start_time_sec;
  1025         _cm->clear_has_overflown();
  1027         bool ret = _cm->do_yield_check(worker_i);
  1029         jlong sleep_time_ms;
  1030         if (!_cm->has_aborted() && the_task->has_aborted()) {
  1031           sleep_time_ms =
  1032             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1033           ConcurrentGCThread::stsLeave();
  1034           os::sleep(Thread::current(), sleep_time_ms, false);
  1035           ConcurrentGCThread::stsJoin();
  1037         double end_time2_sec = os::elapsedTime();
  1038         double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1040 #if 0
  1041           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1042                                  "overhead %1.4lf",
  1043                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1044                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1045           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1046                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1047 #endif
  1048       } while (!_cm->has_aborted() && the_task->has_aborted());
  1050     the_task->record_end_time();
  1051     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
  1053     ConcurrentGCThread::stsLeave();
  1055     double end_vtime = os::elapsedVTime();
  1056     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1059   CMConcurrentMarkingTask(ConcurrentMark* cm,
  1060                           ConcurrentMarkThread* cmt) :
  1061       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1063   ~CMConcurrentMarkingTask() { }
  1064 };
  1066 void ConcurrentMark::markFromRoots() {
  1067   // we might be tempted to assert that:
  1068   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1069   //        "inconsistent argument?");
  1070   // However that wouldn't be right, because it's possible that
  1071   // a safepoint is indeed in progress as a younger generation
  1072   // stop-the-world GC happens even as we mark in this generation.
  1074   _restart_for_overflow = false;
  1076   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1078   CMConcurrentMarkingTask markingTask(this, cmThread());
  1079   if (parallel_marking_threads() > 0)
  1080     _parallel_workers->run_task(&markingTask);
  1081   else
  1082     markingTask.work(0);
  1083   print_stats();
  1086 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1087   // world is stopped at this checkpoint
  1088   assert(SafepointSynchronize::is_at_safepoint(),
  1089          "world should be stopped");
  1090   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1092   // If a full collection has happened, we shouldn't do this.
  1093   if (has_aborted()) {
  1094     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1095     return;
  1098   if (VerifyDuringGC) {
  1099     HandleMark hm;  // handle scope
  1100     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1101     Universe::heap()->prepare_for_verify();
  1102     Universe::verify(true, false, true);
  1105   G1CollectorPolicy* g1p = g1h->g1_policy();
  1106   g1p->record_concurrent_mark_remark_start();
  1108   double start = os::elapsedTime();
  1110   checkpointRootsFinalWork();
  1112   double mark_work_end = os::elapsedTime();
  1114   weakRefsWork(clear_all_soft_refs);
  1116   if (has_overflown()) {
  1117     // Oops.  We overflowed.  Restart concurrent marking.
  1118     _restart_for_overflow = true;
  1119     // Clear the flag. We do not need it any more.
  1120     clear_has_overflown();
  1121     if (G1TraceMarkStackOverflow)
  1122       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1123   } else {
  1124     // We're done with marking.
  1125     // This is the end of  the marking cycle, we're expected all
  1126     // threads to have SATB queues with active set to true.
  1127     JavaThread::satb_mark_queue_set().set_active_all_threads(
  1128                                                   false, /* new active value */
  1129                                                   true /* expected_active */);
  1131     if (VerifyDuringGC) {
  1132       HandleMark hm;  // handle scope
  1133       gclog_or_tty->print(" VerifyDuringGC:(after)");
  1134       Universe::heap()->prepare_for_verify();
  1135       Universe::heap()->verify(/* allow_dirty */      true,
  1136                                /* silent */           false,
  1137                                /* use_prev_marking */ false);
  1141 #if VERIFY_OBJS_PROCESSED
  1142   _scan_obj_cl.objs_processed = 0;
  1143   ThreadLocalObjQueue::objs_enqueued = 0;
  1144 #endif
  1146   // Statistics
  1147   double now = os::elapsedTime();
  1148   _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1149   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1150   _remark_times.add((now - start) * 1000.0);
  1152   g1p->record_concurrent_mark_remark_end();
  1156 #define CARD_BM_TEST_MODE 0
  1158 class CalcLiveObjectsClosure: public HeapRegionClosure {
  1160   CMBitMapRO* _bm;
  1161   ConcurrentMark* _cm;
  1162   bool _changed;
  1163   bool _yield;
  1164   size_t _words_done;
  1165   size_t _tot_live;
  1166   size_t _tot_used;
  1167   size_t _regions_done;
  1168   double _start_vtime_sec;
  1170   BitMap* _region_bm;
  1171   BitMap* _card_bm;
  1172   intptr_t _bottom_card_num;
  1173   bool _final;
  1175   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1176     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1177 #if CARD_BM_TEST_MODE
  1178       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
  1179 #else
  1180       _card_bm->par_at_put(i - _bottom_card_num, 1);
  1181 #endif
  1185 public:
  1186   CalcLiveObjectsClosure(bool final,
  1187                          CMBitMapRO *bm, ConcurrentMark *cm,
  1188                          BitMap* region_bm, BitMap* card_bm) :
  1189     _bm(bm), _cm(cm), _changed(false), _yield(true),
  1190     _words_done(0), _tot_live(0), _tot_used(0),
  1191     _region_bm(region_bm), _card_bm(card_bm),_final(final),
  1192     _regions_done(0), _start_vtime_sec(0.0)
  1194     _bottom_card_num =
  1195       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1196                CardTableModRefBS::card_shift);
  1199   // It takes a region that's not empty (i.e., it has at least one
  1200   // live object in it and sets its corresponding bit on the region
  1201   // bitmap to 1. If the region is "starts humongous" it will also set
  1202   // to 1 the bits on the region bitmap that correspond to its
  1203   // associated "continues humongous" regions.
  1204   void set_bit_for_region(HeapRegion* hr) {
  1205     assert(!hr->continuesHumongous(), "should have filtered those out");
  1207     size_t index = hr->hrs_index();
  1208     if (!hr->startsHumongous()) {
  1209       // Normal (non-humongous) case: just set the bit.
  1210       _region_bm->par_at_put((BitMap::idx_t) index, true);
  1211     } else {
  1212       // Starts humongous case: calculate how many regions are part of
  1213       // this humongous region and then set the bit range. It might
  1214       // have been a bit more efficient to look at the object that
  1215       // spans these humongous regions to calculate their number from
  1216       // the object's size. However, it's a good idea to calculate
  1217       // this based on the metadata itself, and not the region
  1218       // contents, so that this code is not aware of what goes into
  1219       // the humongous regions (in case this changes in the future).
  1220       G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1221       size_t end_index = index + 1;
  1222       while (end_index < g1h->n_regions()) {
  1223         HeapRegion* chr = g1h->region_at(end_index);
  1224         if (!chr->continuesHumongous()) {
  1225           break;
  1227         end_index += 1;
  1229       _region_bm->par_at_put_range((BitMap::idx_t) index,
  1230                                    (BitMap::idx_t) end_index, true);
  1234   bool doHeapRegion(HeapRegion* hr) {
  1235     if (!_final && _regions_done == 0)
  1236       _start_vtime_sec = os::elapsedVTime();
  1238     if (hr->continuesHumongous()) {
  1239       // We will ignore these here and process them when their
  1240       // associated "starts humongous" region is processed (see
  1241       // set_bit_for_heap_region()). Note that we cannot rely on their
  1242       // associated "starts humongous" region to have their bit set to
  1243       // 1 since, due to the region chunking in the parallel region
  1244       // iteration, a "continues humongous" region might be visited
  1245       // before its associated "starts humongous".
  1246       return false;
  1249     HeapWord* nextTop = hr->next_top_at_mark_start();
  1250     HeapWord* start   = hr->top_at_conc_mark_count();
  1251     assert(hr->bottom() <= start && start <= hr->end() &&
  1252            hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1253            start <= nextTop,
  1254            "Preconditions.");
  1255     // Otherwise, record the number of word's we'll examine.
  1256     size_t words_done = (nextTop - start);
  1257     // Find the first marked object at or after "start".
  1258     start = _bm->getNextMarkedWordAddress(start, nextTop);
  1259     size_t marked_bytes = 0;
  1261     // Below, the term "card num" means the result of shifting an address
  1262     // by the card shift -- address 0 corresponds to card number 0.  One
  1263     // must subtract the card num of the bottom of the heap to obtain a
  1264     // card table index.
  1265     // The first card num of the sequence of live cards currently being
  1266     // constructed.  -1 ==> no sequence.
  1267     intptr_t start_card_num = -1;
  1268     // The last card num of the sequence of live cards currently being
  1269     // constructed.  -1 ==> no sequence.
  1270     intptr_t last_card_num = -1;
  1272     while (start < nextTop) {
  1273       if (_yield && _cm->do_yield_check()) {
  1274         // We yielded.  It might be for a full collection, in which case
  1275         // all bets are off; terminate the traversal.
  1276         if (_cm->has_aborted()) {
  1277           _changed = false;
  1278           return true;
  1279         } else {
  1280           // Otherwise, it might be a collection pause, and the region
  1281           // we're looking at might be in the collection set.  We'll
  1282           // abandon this region.
  1283           return false;
  1286       oop obj = oop(start);
  1287       int obj_sz = obj->size();
  1288       // The card num of the start of the current object.
  1289       intptr_t obj_card_num =
  1290         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1292       HeapWord* obj_last = start + obj_sz - 1;
  1293       intptr_t obj_last_card_num =
  1294         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1296       if (obj_card_num != last_card_num) {
  1297         if (start_card_num == -1) {
  1298           assert(last_card_num == -1, "Both or neither.");
  1299           start_card_num = obj_card_num;
  1300         } else {
  1301           assert(last_card_num != -1, "Both or neither.");
  1302           assert(obj_card_num >= last_card_num, "Inv");
  1303           if ((obj_card_num - last_card_num) > 1) {
  1304             // Mark the last run, and start a new one.
  1305             mark_card_num_range(start_card_num, last_card_num);
  1306             start_card_num = obj_card_num;
  1309 #if CARD_BM_TEST_MODE
  1310         /*
  1311         gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1312                                obj_card_num - _bottom_card_num,
  1313                                obj_last_card_num - _bottom_card_num);
  1314         */
  1315         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1316           _card_bm->par_at_put(j - _bottom_card_num, 1);
  1318 #endif
  1320       // In any case, we set the last card num.
  1321       last_card_num = obj_last_card_num;
  1323       marked_bytes += (size_t)obj_sz * HeapWordSize;
  1324       // Find the next marked object after this one.
  1325       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1326       _changed = true;
  1328     // Handle the last range, if any.
  1329     if (start_card_num != -1)
  1330       mark_card_num_range(start_card_num, last_card_num);
  1331     if (_final) {
  1332       // Mark the allocated-since-marking portion...
  1333       HeapWord* tp = hr->top();
  1334       if (nextTop < tp) {
  1335         start_card_num =
  1336           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1337         last_card_num =
  1338           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1339         mark_card_num_range(start_card_num, last_card_num);
  1340         // This definitely means the region has live objects.
  1341         set_bit_for_region(hr);
  1345     hr->add_to_marked_bytes(marked_bytes);
  1346     // Update the live region bitmap.
  1347     if (marked_bytes > 0) {
  1348       set_bit_for_region(hr);
  1350     hr->set_top_at_conc_mark_count(nextTop);
  1351     _tot_live += hr->next_live_bytes();
  1352     _tot_used += hr->used();
  1353     _words_done = words_done;
  1355     if (!_final) {
  1356       ++_regions_done;
  1357       if (_regions_done % 10 == 0) {
  1358         double end_vtime_sec = os::elapsedVTime();
  1359         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1360         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1361           jlong sleep_time_ms =
  1362             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1363           os::sleep(Thread::current(), sleep_time_ms, false);
  1364           _start_vtime_sec = end_vtime_sec;
  1369     return false;
  1372   bool changed() { return _changed;  }
  1373   void reset()   { _changed = false; _words_done = 0; }
  1374   void no_yield() { _yield = false; }
  1375   size_t words_done() { return _words_done; }
  1376   size_t tot_live() { return _tot_live; }
  1377   size_t tot_used() { return _tot_used; }
  1378 };
  1381 void ConcurrentMark::calcDesiredRegions() {
  1382   _region_bm.clear();
  1383   _card_bm.clear();
  1384   CalcLiveObjectsClosure calccl(false /*final*/,
  1385                                 nextMarkBitMap(), this,
  1386                                 &_region_bm, &_card_bm);
  1387   G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1388   g1h->heap_region_iterate(&calccl);
  1390   do {
  1391     calccl.reset();
  1392     g1h->heap_region_iterate(&calccl);
  1393   } while (calccl.changed());
  1396 class G1ParFinalCountTask: public AbstractGangTask {
  1397 protected:
  1398   G1CollectedHeap* _g1h;
  1399   CMBitMap* _bm;
  1400   size_t _n_workers;
  1401   size_t *_live_bytes;
  1402   size_t *_used_bytes;
  1403   BitMap* _region_bm;
  1404   BitMap* _card_bm;
  1405 public:
  1406   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1407                       BitMap* region_bm, BitMap* card_bm) :
  1408     AbstractGangTask("G1 final counting"), _g1h(g1h),
  1409     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1411     if (ParallelGCThreads > 0)
  1412       _n_workers = _g1h->workers()->total_workers();
  1413     else
  1414       _n_workers = 1;
  1415     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1416     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1419   ~G1ParFinalCountTask() {
  1420     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1421     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1424   void work(int i) {
  1425     CalcLiveObjectsClosure calccl(true /*final*/,
  1426                                   _bm, _g1h->concurrent_mark(),
  1427                                   _region_bm, _card_bm);
  1428     calccl.no_yield();
  1429     if (ParallelGCThreads > 0) {
  1430       _g1h->heap_region_par_iterate_chunked(&calccl, i,
  1431                                             HeapRegion::FinalCountClaimValue);
  1432     } else {
  1433       _g1h->heap_region_iterate(&calccl);
  1435     assert(calccl.complete(), "Shouldn't have yielded!");
  1437     assert((size_t) i < _n_workers, "invariant");
  1438     _live_bytes[i] = calccl.tot_live();
  1439     _used_bytes[i] = calccl.tot_used();
  1441   size_t live_bytes()  {
  1442     size_t live_bytes = 0;
  1443     for (size_t i = 0; i < _n_workers; ++i)
  1444       live_bytes += _live_bytes[i];
  1445     return live_bytes;
  1447   size_t used_bytes()  {
  1448     size_t used_bytes = 0;
  1449     for (size_t i = 0; i < _n_workers; ++i)
  1450       used_bytes += _used_bytes[i];
  1451     return used_bytes;
  1453 };
  1455 class G1ParNoteEndTask;
  1457 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1458   G1CollectedHeap* _g1;
  1459   int _worker_num;
  1460   size_t _max_live_bytes;
  1461   size_t _regions_claimed;
  1462   size_t _freed_bytes;
  1463   size_t _cleared_h_regions;
  1464   size_t _freed_regions;
  1465   UncleanRegionList* _unclean_region_list;
  1466   double _claimed_region_time;
  1467   double _max_region_time;
  1469 public:
  1470   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1471                              UncleanRegionList* list,
  1472                              int worker_num);
  1473   size_t freed_bytes() { return _freed_bytes; }
  1474   size_t cleared_h_regions() { return _cleared_h_regions; }
  1475   size_t freed_regions() { return  _freed_regions; }
  1476   UncleanRegionList* unclean_region_list() {
  1477     return _unclean_region_list;
  1480   bool doHeapRegion(HeapRegion *r);
  1482   size_t max_live_bytes() { return _max_live_bytes; }
  1483   size_t regions_claimed() { return _regions_claimed; }
  1484   double claimed_region_time_sec() { return _claimed_region_time; }
  1485   double max_region_time_sec() { return _max_region_time; }
  1486 };
  1488 class G1ParNoteEndTask: public AbstractGangTask {
  1489   friend class G1NoteEndOfConcMarkClosure;
  1490 protected:
  1491   G1CollectedHeap* _g1h;
  1492   size_t _max_live_bytes;
  1493   size_t _freed_bytes;
  1494   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1495 public:
  1496   G1ParNoteEndTask(G1CollectedHeap* g1h,
  1497                    ConcurrentMark::ParCleanupThreadState**
  1498                    par_cleanup_thread_state) :
  1499     AbstractGangTask("G1 note end"), _g1h(g1h),
  1500     _max_live_bytes(0), _freed_bytes(0),
  1501     _par_cleanup_thread_state(par_cleanup_thread_state)
  1502   {}
  1504   void work(int i) {
  1505     double start = os::elapsedTime();
  1506     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1507                                            &_par_cleanup_thread_state[i]->list,
  1508                                            i);
  1509     if (ParallelGCThreads > 0) {
  1510       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
  1511                                             HeapRegion::NoteEndClaimValue);
  1512     } else {
  1513       _g1h->heap_region_iterate(&g1_note_end);
  1515     assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1517     // Now finish up freeing the current thread's regions.
  1518     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1519                                   g1_note_end.cleared_h_regions(),
  1520                                   0, NULL);
  1522       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1523       _max_live_bytes += g1_note_end.max_live_bytes();
  1524       _freed_bytes += g1_note_end.freed_bytes();
  1526     double end = os::elapsedTime();
  1527     if (G1PrintParCleanupStats) {
  1528       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1529                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1530                           i, start, end, (end-start)*1000.0,
  1531                           g1_note_end.regions_claimed(),
  1532                           g1_note_end.claimed_region_time_sec()*1000.0,
  1533                           g1_note_end.max_region_time_sec()*1000.0);
  1536   size_t max_live_bytes() { return _max_live_bytes; }
  1537   size_t freed_bytes() { return _freed_bytes; }
  1538 };
  1540 class G1ParScrubRemSetTask: public AbstractGangTask {
  1541 protected:
  1542   G1RemSet* _g1rs;
  1543   BitMap* _region_bm;
  1544   BitMap* _card_bm;
  1545 public:
  1546   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1547                        BitMap* region_bm, BitMap* card_bm) :
  1548     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1549     _region_bm(region_bm), _card_bm(card_bm)
  1550   {}
  1552   void work(int i) {
  1553     if (ParallelGCThreads > 0) {
  1554       _g1rs->scrub_par(_region_bm, _card_bm, i,
  1555                        HeapRegion::ScrubRemSetClaimValue);
  1556     } else {
  1557       _g1rs->scrub(_region_bm, _card_bm);
  1561 };
  1563 G1NoteEndOfConcMarkClosure::
  1564 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1565                            UncleanRegionList* list,
  1566                            int worker_num)
  1567   : _g1(g1), _worker_num(worker_num),
  1568     _max_live_bytes(0), _regions_claimed(0),
  1569     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1570     _claimed_region_time(0.0), _max_region_time(0.0),
  1571     _unclean_region_list(list)
  1572 {}
  1574 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1575   // We use a claim value of zero here because all regions
  1576   // were claimed with value 1 in the FinalCount task.
  1577   r->reset_gc_time_stamp();
  1578   if (!r->continuesHumongous()) {
  1579     double start = os::elapsedTime();
  1580     _regions_claimed++;
  1581     r->note_end_of_marking();
  1582     _max_live_bytes += r->max_live_bytes();
  1583     _g1->free_region_if_totally_empty_work(r,
  1584                                            _freed_bytes,
  1585                                            _cleared_h_regions,
  1586                                            _freed_regions,
  1587                                            _unclean_region_list,
  1588                                            true /*par*/);
  1589     double region_time = (os::elapsedTime() - start);
  1590     _claimed_region_time += region_time;
  1591     if (region_time > _max_region_time) _max_region_time = region_time;
  1593   return false;
  1596 void ConcurrentMark::cleanup() {
  1597   // world is stopped at this checkpoint
  1598   assert(SafepointSynchronize::is_at_safepoint(),
  1599          "world should be stopped");
  1600   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1602   // If a full collection has happened, we shouldn't do this.
  1603   if (has_aborted()) {
  1604     g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1605     return;
  1608   if (VerifyDuringGC) {
  1609     HandleMark hm;  // handle scope
  1610     gclog_or_tty->print(" VerifyDuringGC:(before)");
  1611     Universe::heap()->prepare_for_verify();
  1612     Universe::verify(/* allow dirty  */ true,
  1613                      /* silent       */ false,
  1614                      /* prev marking */ true);
  1617   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1618   g1p->record_concurrent_mark_cleanup_start();
  1620   double start = os::elapsedTime();
  1622   // Do counting once more with the world stopped for good measure.
  1623   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1624                                         &_region_bm, &_card_bm);
  1625   if (ParallelGCThreads > 0) {
  1626     assert(g1h->check_heap_region_claim_values(
  1627                                                HeapRegion::InitialClaimValue),
  1628            "sanity check");
  1630     int n_workers = g1h->workers()->total_workers();
  1631     g1h->set_par_threads(n_workers);
  1632     g1h->workers()->run_task(&g1_par_count_task);
  1633     g1h->set_par_threads(0);
  1635     assert(g1h->check_heap_region_claim_values(
  1636                                              HeapRegion::FinalCountClaimValue),
  1637            "sanity check");
  1638   } else {
  1639     g1_par_count_task.work(0);
  1642   size_t known_garbage_bytes =
  1643     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1644 #if 0
  1645   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1646                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1647                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1648                          (double) known_garbage_bytes / (double) (1024 * 1024));
  1649 #endif // 0
  1650   g1p->set_known_garbage_bytes(known_garbage_bytes);
  1652   size_t start_used_bytes = g1h->used();
  1653   _at_least_one_mark_complete = true;
  1654   g1h->set_marking_complete();
  1656   double count_end = os::elapsedTime();
  1657   double this_final_counting_time = (count_end - start);
  1658   if (G1PrintParCleanupStats) {
  1659     gclog_or_tty->print_cr("Cleanup:");
  1660     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1661                            this_final_counting_time*1000.0);
  1663   _total_counting_time += this_final_counting_time;
  1665   // Install newly created mark bitMap as "prev".
  1666   swapMarkBitMaps();
  1668   g1h->reset_gc_time_stamp();
  1670   // Note end of marking in all heap regions.
  1671   double note_end_start = os::elapsedTime();
  1672   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1673   if (ParallelGCThreads > 0) {
  1674     int n_workers = g1h->workers()->total_workers();
  1675     g1h->set_par_threads(n_workers);
  1676     g1h->workers()->run_task(&g1_par_note_end_task);
  1677     g1h->set_par_threads(0);
  1679     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
  1680            "sanity check");
  1681   } else {
  1682     g1_par_note_end_task.work(0);
  1684   g1h->set_unclean_regions_coming(true);
  1685   double note_end_end = os::elapsedTime();
  1686   // Tell the mutators that there might be unclean regions coming...
  1687   if (G1PrintParCleanupStats) {
  1688     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1689                            (note_end_end - note_end_start)*1000.0);
  1693   // call below, since it affects the metric by which we sort the heap
  1694   // regions.
  1695   if (G1ScrubRemSets) {
  1696     double rs_scrub_start = os::elapsedTime();
  1697     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1698     if (ParallelGCThreads > 0) {
  1699       int n_workers = g1h->workers()->total_workers();
  1700       g1h->set_par_threads(n_workers);
  1701       g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1702       g1h->set_par_threads(0);
  1704       assert(g1h->check_heap_region_claim_values(
  1705                                             HeapRegion::ScrubRemSetClaimValue),
  1706              "sanity check");
  1707     } else {
  1708       g1_par_scrub_rs_task.work(0);
  1711     double rs_scrub_end = os::elapsedTime();
  1712     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1713     _total_rs_scrub_time += this_rs_scrub_time;
  1716   // this will also free any regions totally full of garbage objects,
  1717   // and sort the regions.
  1718   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1719                         g1_par_note_end_task.freed_bytes(),
  1720                         g1_par_note_end_task.max_live_bytes());
  1722   // Statistics.
  1723   double end = os::elapsedTime();
  1724   _cleanup_times.add((end - start) * 1000.0);
  1726   // G1CollectedHeap::heap()->print();
  1727   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1728   // G1CollectedHeap::heap()->get_gc_time_stamp());
  1730   if (PrintGC || PrintGCDetails) {
  1731     g1h->print_size_transition(gclog_or_tty,
  1732                                start_used_bytes,
  1733                                g1h->used(),
  1734                                g1h->capacity());
  1737   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1738   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1740   // We need to make this be a "collection" so any collection pause that
  1741   // races with it goes around and waits for completeCleanup to finish.
  1742   g1h->increment_total_collections();
  1744   if (VerifyDuringGC) {
  1745     HandleMark hm;  // handle scope
  1746     gclog_or_tty->print(" VerifyDuringGC:(after)");
  1747     Universe::heap()->prepare_for_verify();
  1748     Universe::verify(/* allow dirty  */ true,
  1749                      /* silent       */ false,
  1750                      /* prev marking */ true);
  1754 void ConcurrentMark::completeCleanup() {
  1755   // A full collection intervened.
  1756   if (has_aborted()) return;
  1758   int first = 0;
  1759   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1760   for (int t = 0; t < last; t++) {
  1761     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1762     assert(list->well_formed(), "Inv");
  1763     HeapRegion* hd = list->hd();
  1764     while (hd != NULL) {
  1765       // Now finish up the other stuff.
  1766       hd->rem_set()->clear();
  1767       HeapRegion* next_hd = hd->next_from_unclean_list();
  1768       (void)list->pop();
  1769       assert(list->hd() == next_hd, "how not?");
  1770       _g1h->put_region_on_unclean_list(hd);
  1771       if (!hd->isHumongous()) {
  1772         // Add this to the _free_regions count by 1.
  1773         _g1h->finish_free_region_work(0, 0, 1, NULL);
  1775       hd = list->hd();
  1776       assert(hd == next_hd, "how not?");
  1782 class G1CMIsAliveClosure: public BoolObjectClosure {
  1783   G1CollectedHeap* _g1;
  1784  public:
  1785   G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1786     _g1(g1)
  1787   {}
  1789   void do_object(oop obj) {
  1790     assert(false, "not to be invoked");
  1792   bool do_object_b(oop obj) {
  1793     HeapWord* addr = (HeapWord*)obj;
  1794     return addr != NULL &&
  1795            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1797 };
  1799 class G1CMKeepAliveClosure: public OopClosure {
  1800   G1CollectedHeap* _g1;
  1801   ConcurrentMark*  _cm;
  1802   CMBitMap*        _bitMap;
  1803  public:
  1804   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1805                        CMBitMap* bitMap) :
  1806     _g1(g1), _cm(cm),
  1807     _bitMap(bitMap) {}
  1809   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  1810   virtual void do_oop(      oop* p) { do_oop_work(p); }
  1812   template <class T> void do_oop_work(T* p) {
  1813     oop thisOop = oopDesc::load_decode_heap_oop(p);
  1814     HeapWord* addr = (HeapWord*)thisOop;
  1815     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1816       _bitMap->mark(addr);
  1817       _cm->mark_stack_push(thisOop);
  1820 };
  1822 class G1CMDrainMarkingStackClosure: public VoidClosure {
  1823   CMMarkStack*                  _markStack;
  1824   CMBitMap*                     _bitMap;
  1825   G1CMKeepAliveClosure*         _oopClosure;
  1826  public:
  1827   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1828                                G1CMKeepAliveClosure* oopClosure) :
  1829     _bitMap(bitMap),
  1830     _markStack(markStack),
  1831     _oopClosure(oopClosure)
  1832   {}
  1834   void do_void() {
  1835     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1837 };
  1839 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1840   ResourceMark rm;
  1841   HandleMark   hm;
  1842   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
  1843   ReferenceProcessor* rp = g1h->ref_processor();
  1845   // Process weak references.
  1846   rp->setup_policy(clear_all_soft_refs);
  1847   assert(_markStack.isEmpty(), "mark stack should be empty");
  1849   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
  1850   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
  1851   G1CMDrainMarkingStackClosure
  1852     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1853                                &g1KeepAliveClosure);
  1855   // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1856   rp->process_discovered_references(&g1IsAliveClosure,
  1857                                     &g1KeepAliveClosure,
  1858                                     &g1DrainMarkingStackClosure,
  1859                                     NULL);
  1860   assert(_markStack.overflow() || _markStack.isEmpty(),
  1861          "mark stack should be empty (unless it overflowed)");
  1862   if (_markStack.overflow()) {
  1863     set_has_overflown();
  1866   rp->enqueue_discovered_references();
  1867   rp->verify_no_references_recorded();
  1868   assert(!rp->discovery_enabled(), "should have been disabled");
  1870   // Now clean up stale oops in SymbolTable and StringTable
  1871   SymbolTable::unlink(&g1IsAliveClosure);
  1872   StringTable::unlink(&g1IsAliveClosure);
  1875 void ConcurrentMark::swapMarkBitMaps() {
  1876   CMBitMapRO* temp = _prevMarkBitMap;
  1877   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1878   _nextMarkBitMap  = (CMBitMap*)  temp;
  1881 class CMRemarkTask: public AbstractGangTask {
  1882 private:
  1883   ConcurrentMark *_cm;
  1885 public:
  1886   void work(int worker_i) {
  1887     // Since all available tasks are actually started, we should
  1888     // only proceed if we're supposed to be actived.
  1889     if ((size_t)worker_i < _cm->active_tasks()) {
  1890       CMTask* task = _cm->task(worker_i);
  1891       task->record_start_time();
  1892       do {
  1893         task->do_marking_step(1000000000.0 /* something very large */);
  1894       } while (task->has_aborted() && !_cm->has_overflown());
  1895       // If we overflow, then we do not want to restart. We instead
  1896       // want to abort remark and do concurrent marking again.
  1897       task->record_end_time();
  1901   CMRemarkTask(ConcurrentMark* cm) :
  1902     AbstractGangTask("Par Remark"), _cm(cm) { }
  1903 };
  1905 void ConcurrentMark::checkpointRootsFinalWork() {
  1906   ResourceMark rm;
  1907   HandleMark   hm;
  1908   G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1910   g1h->ensure_parsability(false);
  1912   if (ParallelGCThreads > 0) {
  1913     G1CollectedHeap::StrongRootsScope srs(g1h);
  1914     // this is remark, so we'll use up all available threads
  1915     int active_workers = ParallelGCThreads;
  1916     set_phase(active_workers, false);
  1918     CMRemarkTask remarkTask(this);
  1919     // We will start all available threads, even if we decide that the
  1920     // active_workers will be fewer. The extra ones will just bail out
  1921     // immediately.
  1922     int n_workers = g1h->workers()->total_workers();
  1923     g1h->set_par_threads(n_workers);
  1924     g1h->workers()->run_task(&remarkTask);
  1925     g1h->set_par_threads(0);
  1926   } else {
  1927     G1CollectedHeap::StrongRootsScope srs(g1h);
  1928     // this is remark, so we'll use up all available threads
  1929     int active_workers = 1;
  1930     set_phase(active_workers, false);
  1932     CMRemarkTask remarkTask(this);
  1933     // We will start all available threads, even if we decide that the
  1934     // active_workers will be fewer. The extra ones will just bail out
  1935     // immediately.
  1936     remarkTask.work(0);
  1938   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1939   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  1941   print_stats();
  1943   if (!restart_for_overflow())
  1944     set_non_marking_state();
  1946 #if VERIFY_OBJS_PROCESSED
  1947   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1948     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1949                            _scan_obj_cl.objs_processed,
  1950                            ThreadLocalObjQueue::objs_enqueued);
  1951     guarantee(_scan_obj_cl.objs_processed ==
  1952               ThreadLocalObjQueue::objs_enqueued,
  1953               "Different number of objs processed and enqueued.");
  1955 #endif
  1958 #ifndef PRODUCT
  1960 class ReachablePrinterOopClosure: public OopClosure {
  1961 private:
  1962   G1CollectedHeap* _g1h;
  1963   CMBitMapRO*      _bitmap;
  1964   outputStream*    _out;
  1965   bool             _use_prev_marking;
  1967 public:
  1968   ReachablePrinterOopClosure(CMBitMapRO*   bitmap,
  1969                              outputStream* out,
  1970                              bool          use_prev_marking) :
  1971     _g1h(G1CollectedHeap::heap()),
  1972     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  1974   void do_oop(narrowOop* p) { do_oop_work(p); }
  1975   void do_oop(      oop* p) { do_oop_work(p); }
  1977   template <class T> void do_oop_work(T* p) {
  1978     oop         obj = oopDesc::load_decode_heap_oop(p);
  1979     const char* str = NULL;
  1980     const char* str2 = "";
  1982     if (!_g1h->is_in_g1_reserved(obj))
  1983       str = "outside G1 reserved";
  1984     else {
  1985       HeapRegion* hr  = _g1h->heap_region_containing(obj);
  1986       guarantee(hr != NULL, "invariant");
  1987       bool over_tams = false;
  1988       if (_use_prev_marking) {
  1989         over_tams = hr->obj_allocated_since_prev_marking(obj);
  1990       } else {
  1991         over_tams = hr->obj_allocated_since_next_marking(obj);
  1994       if (over_tams) {
  1995         str = "over TAMS";
  1996         if (_bitmap->isMarked((HeapWord*) obj)) {
  1997           str2 = " AND MARKED";
  1999       } else if (_bitmap->isMarked((HeapWord*) obj)) {
  2000         str = "marked";
  2001       } else {
  2002         str = "#### NOT MARKED ####";
  2006     _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
  2007                    p, (void*) obj, str, str2);
  2009 };
  2011 class ReachablePrinterClosure: public BitMapClosure {
  2012 private:
  2013   CMBitMapRO*   _bitmap;
  2014   outputStream* _out;
  2015   bool          _use_prev_marking;
  2017 public:
  2018   ReachablePrinterClosure(CMBitMapRO*   bitmap,
  2019                           outputStream* out,
  2020                           bool          use_prev_marking) :
  2021     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2023   bool do_bit(size_t offset) {
  2024     HeapWord* addr = _bitmap->offsetToHeapWord(offset);
  2025     ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
  2027     _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
  2028     oop(addr)->oop_iterate(&oopCl);
  2029     _out->print_cr("");
  2031     return true;
  2033 };
  2035 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
  2036 private:
  2037   CMBitMapRO*   _bitmap;
  2038   outputStream* _out;
  2039   bool          _use_prev_marking;
  2041 public:
  2042   ObjInRegionReachablePrinterClosure(CMBitMapRO*   bitmap,
  2043                                      outputStream* out,
  2044                                      bool          use_prev_marking) :
  2045     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2047   void do_object(oop o) {
  2048     ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
  2050     _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
  2051     o->oop_iterate(&oopCl);
  2052     _out->print_cr("");
  2054 };
  2056 class RegionReachablePrinterClosure : public HeapRegionClosure {
  2057 private:
  2058   CMBitMapRO*   _bitmap;
  2059   outputStream* _out;
  2060   bool          _use_prev_marking;
  2062 public:
  2063   bool doHeapRegion(HeapRegion* hr) {
  2064     HeapWord* b = hr->bottom();
  2065     HeapWord* e = hr->end();
  2066     HeapWord* t = hr->top();
  2067     HeapWord* p = NULL;
  2068     if (_use_prev_marking) {
  2069       p = hr->prev_top_at_mark_start();
  2070     } else {
  2071       p = hr->next_top_at_mark_start();
  2073     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  2074                    "TAMS: "PTR_FORMAT, b, e, t, p);
  2075     _out->print_cr("");
  2077     ObjInRegionReachablePrinterClosure ocl(_bitmap, _out, _use_prev_marking);
  2078     hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
  2080     return false;
  2083   RegionReachablePrinterClosure(CMBitMapRO*   bitmap,
  2084                                 outputStream* out,
  2085                                 bool          use_prev_marking) :
  2086     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
  2087 };
  2089 void ConcurrentMark::print_reachable(bool use_prev_marking, const char* str) {
  2090   gclog_or_tty->print_cr("== Doing reachable object dump... ");
  2092   if (G1PrintReachableBaseFile == NULL) {
  2093     gclog_or_tty->print_cr("  #### error: no base file defined");
  2094     return;
  2097   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
  2098       (JVM_MAXPATHLEN - 1)) {
  2099     gclog_or_tty->print_cr("  #### error: file name too long");
  2100     return;
  2103   char file_name[JVM_MAXPATHLEN];
  2104   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
  2105   gclog_or_tty->print_cr("  dumping to file %s", file_name);
  2107   fileStream fout(file_name);
  2108   if (!fout.is_open()) {
  2109     gclog_or_tty->print_cr("  #### error: could not open file");
  2110     return;
  2113   outputStream* out = &fout;
  2115   CMBitMapRO* bitmap = NULL;
  2116   if (use_prev_marking) {
  2117     bitmap = _prevMarkBitMap;
  2118   } else {
  2119     bitmap = _nextMarkBitMap;
  2122   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
  2123   out->cr();
  2125   RegionReachablePrinterClosure rcl(bitmap, out, use_prev_marking);
  2126   out->print_cr("--- ITERATING OVER REGIONS WITH TAMS < TOP");
  2127   out->cr();
  2128   _g1h->heap_region_iterate(&rcl);
  2129   out->cr();
  2131   ReachablePrinterClosure cl(bitmap, out, use_prev_marking);
  2132   out->print_cr("--- ITERATING OVER MARKED OBJECTS ON THE BITMAP");
  2133   out->cr();
  2134   bitmap->iterate(&cl);
  2135   out->cr();
  2137   gclog_or_tty->print_cr("  done");
  2140 #endif // PRODUCT
  2142 // This note is for drainAllSATBBuffers and the code in between.
  2143 // In the future we could reuse a task to do this work during an
  2144 // evacuation pause (since now tasks are not active and can be claimed
  2145 // during an evacuation pause). This was a late change to the code and
  2146 // is currently not being taken advantage of.
  2148 class CMGlobalObjectClosure : public ObjectClosure {
  2149 private:
  2150   ConcurrentMark* _cm;
  2152 public:
  2153   void do_object(oop obj) {
  2154     _cm->deal_with_reference(obj);
  2157   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  2158 };
  2160 void ConcurrentMark::deal_with_reference(oop obj) {
  2161   if (verbose_high())
  2162     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  2163                            (void*) obj);
  2166   HeapWord* objAddr = (HeapWord*) obj;
  2167   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2168   if (_g1h->is_in_g1_reserved(objAddr)) {
  2169     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2170     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2171     if (_g1h->is_obj_ill(obj, hr)) {
  2172       if (verbose_high())
  2173         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  2174                                "marked", (void*) obj);
  2176       // we need to mark it first
  2177       if (_nextMarkBitMap->parMark(objAddr)) {
  2178         // No OrderAccess:store_load() is needed. It is implicit in the
  2179         // CAS done in parMark(objAddr) above
  2180         HeapWord* finger = _finger;
  2181         if (objAddr < finger) {
  2182           if (verbose_high())
  2183             gclog_or_tty->print_cr("[global] below the global finger "
  2184                                    "("PTR_FORMAT"), pushing it", finger);
  2185           if (!mark_stack_push(obj)) {
  2186             if (verbose_low())
  2187               gclog_or_tty->print_cr("[global] global stack overflow during "
  2188                                      "deal_with_reference");
  2196 void ConcurrentMark::drainAllSATBBuffers() {
  2197   CMGlobalObjectClosure oc(this);
  2198   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2199   satb_mq_set.set_closure(&oc);
  2201   while (satb_mq_set.apply_closure_to_completed_buffer()) {
  2202     if (verbose_medium())
  2203       gclog_or_tty->print_cr("[global] processed an SATB buffer");
  2206   // no need to check whether we should do this, as this is only
  2207   // called during an evacuation pause
  2208   satb_mq_set.iterate_closure_all_threads();
  2210   satb_mq_set.set_closure(NULL);
  2211   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
  2214 void ConcurrentMark::markPrev(oop p) {
  2215   // Note we are overriding the read-only view of the prev map here, via
  2216   // the cast.
  2217   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  2220 void ConcurrentMark::clear(oop p) {
  2221   assert(p != NULL && p->is_oop(), "expected an oop");
  2222   HeapWord* addr = (HeapWord*)p;
  2223   assert(addr >= _nextMarkBitMap->startWord() ||
  2224          addr < _nextMarkBitMap->endWord(), "in a region");
  2226   _nextMarkBitMap->clear(addr);
  2229 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  2230   // Note we are overriding the read-only view of the prev map here, via
  2231   // the cast.
  2232   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  2233   _nextMarkBitMap->clearRange(mr);
  2236 HeapRegion*
  2237 ConcurrentMark::claim_region(int task_num) {
  2238   // "checkpoint" the finger
  2239   HeapWord* finger = _finger;
  2241   // _heap_end will not change underneath our feet; it only changes at
  2242   // yield points.
  2243   while (finger < _heap_end) {
  2244     assert(_g1h->is_in_g1_reserved(finger), "invariant");
  2246     // is the gap between reading the finger and doing the CAS too long?
  2248     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  2249     HeapWord*   bottom        = curr_region->bottom();
  2250     HeapWord*   end           = curr_region->end();
  2251     HeapWord*   limit         = curr_region->next_top_at_mark_start();
  2253     if (verbose_low())
  2254       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  2255                              "["PTR_FORMAT", "PTR_FORMAT"), "
  2256                              "limit = "PTR_FORMAT,
  2257                              task_num, curr_region, bottom, end, limit);
  2259     HeapWord* res =
  2260       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  2261     if (res == finger) {
  2262       // we succeeded
  2264       // notice that _finger == end cannot be guaranteed here since,
  2265       // someone else might have moved the finger even further
  2266       assert(_finger >= end, "the finger should have moved forward");
  2268       if (verbose_low())
  2269         gclog_or_tty->print_cr("[%d] we were successful with region = "
  2270                                PTR_FORMAT, task_num, curr_region);
  2272       if (limit > bottom) {
  2273         if (verbose_low())
  2274           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  2275                                  "returning it ", task_num, curr_region);
  2276         return curr_region;
  2277       } else {
  2278         assert(limit == bottom,
  2279                "the region limit should be at bottom");
  2280         if (verbose_low())
  2281           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  2282                                  "returning NULL", task_num, curr_region);
  2283         // we return NULL and the caller should try calling
  2284         // claim_region() again.
  2285         return NULL;
  2287     } else {
  2288       assert(_finger > finger, "the finger should have moved forward");
  2289       if (verbose_low())
  2290         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  2291                                "global finger = "PTR_FORMAT", "
  2292                                "our finger = "PTR_FORMAT,
  2293                                task_num, _finger, finger);
  2295       // read it again
  2296       finger = _finger;
  2300   return NULL;
  2303 void ConcurrentMark::oops_do(OopClosure* cl) {
  2304   if (_markStack.size() > 0 && verbose_low())
  2305     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  2306                            "size = %d", _markStack.size());
  2307   // we first iterate over the contents of the mark stack...
  2308   _markStack.oops_do(cl);
  2310   for (int i = 0; i < (int)_max_task_num; ++i) {
  2311     OopTaskQueue* queue = _task_queues->queue((int)i);
  2313     if (queue->size() > 0 && verbose_low())
  2314       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  2315                              "size = %d", i, queue->size());
  2317     // ...then over the contents of the all the task queues.
  2318     queue->oops_do(cl);
  2321   // finally, invalidate any entries that in the region stack that
  2322   // point into the collection set
  2323   if (_regionStack.invalidate_entries_into_cset()) {
  2324     // otherwise, any gray objects copied during the evacuation pause
  2325     // might not be visited.
  2326     assert(_should_gray_objects, "invariant");
  2330 void ConcurrentMark::clear_marking_state() {
  2331   _markStack.setEmpty();
  2332   _markStack.clear_overflow();
  2333   _regionStack.setEmpty();
  2334   _regionStack.clear_overflow();
  2335   clear_has_overflown();
  2336   _finger = _heap_start;
  2338   for (int i = 0; i < (int)_max_task_num; ++i) {
  2339     OopTaskQueue* queue = _task_queues->queue(i);
  2340     queue->set_empty();
  2344 void ConcurrentMark::print_stats() {
  2345   if (verbose_stats()) {
  2346     gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2347     for (size_t i = 0; i < _active_tasks; ++i) {
  2348       _tasks[i]->print_stats();
  2349       gclog_or_tty->print_cr("---------------------------------------------------------------------");
  2354 class CSMarkOopClosure: public OopClosure {
  2355   friend class CSMarkBitMapClosure;
  2357   G1CollectedHeap* _g1h;
  2358   CMBitMap*        _bm;
  2359   ConcurrentMark*  _cm;
  2360   oop*             _ms;
  2361   jint*            _array_ind_stack;
  2362   int              _ms_size;
  2363   int              _ms_ind;
  2364   int              _array_increment;
  2366   bool push(oop obj, int arr_ind = 0) {
  2367     if (_ms_ind == _ms_size) {
  2368       gclog_or_tty->print_cr("Mark stack is full.");
  2369       return false;
  2371     _ms[_ms_ind] = obj;
  2372     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  2373     _ms_ind++;
  2374     return true;
  2377   oop pop() {
  2378     if (_ms_ind == 0) return NULL;
  2379     else {
  2380       _ms_ind--;
  2381       return _ms[_ms_ind];
  2385   template <class T> bool drain() {
  2386     while (_ms_ind > 0) {
  2387       oop obj = pop();
  2388       assert(obj != NULL, "Since index was non-zero.");
  2389       if (obj->is_objArray()) {
  2390         jint arr_ind = _array_ind_stack[_ms_ind];
  2391         objArrayOop aobj = objArrayOop(obj);
  2392         jint len = aobj->length();
  2393         jint next_arr_ind = arr_ind + _array_increment;
  2394         if (next_arr_ind < len) {
  2395           push(obj, next_arr_ind);
  2397         // Now process this portion of this one.
  2398         int lim = MIN2(next_arr_ind, len);
  2399         for (int j = arr_ind; j < lim; j++) {
  2400           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
  2403       } else {
  2404         obj->oop_iterate(this);
  2406       if (abort()) return false;
  2408     return true;
  2411 public:
  2412   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  2413     _g1h(G1CollectedHeap::heap()),
  2414     _cm(cm),
  2415     _bm(cm->nextMarkBitMap()),
  2416     _ms_size(ms_size), _ms_ind(0),
  2417     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  2418     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  2419     _array_increment(MAX2(ms_size/8, 16))
  2420   {}
  2422   ~CSMarkOopClosure() {
  2423     FREE_C_HEAP_ARRAY(oop, _ms);
  2424     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  2427   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2428   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2430   template <class T> void do_oop_work(T* p) {
  2431     T heap_oop = oopDesc::load_heap_oop(p);
  2432     if (oopDesc::is_null(heap_oop)) return;
  2433     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  2434     if (obj->is_forwarded()) {
  2435       // If the object has already been forwarded, we have to make sure
  2436       // that it's marked.  So follow the forwarding pointer.  Note that
  2437       // this does the right thing for self-forwarding pointers in the
  2438       // evacuation failure case.
  2439       obj = obj->forwardee();
  2441     HeapRegion* hr = _g1h->heap_region_containing(obj);
  2442     if (hr != NULL) {
  2443       if (hr->in_collection_set()) {
  2444         if (_g1h->is_obj_ill(obj)) {
  2445           _bm->mark((HeapWord*)obj);
  2446           if (!push(obj)) {
  2447             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  2448             set_abort();
  2451       } else {
  2452         // Outside the collection set; we need to gray it
  2453         _cm->deal_with_reference(obj);
  2457 };
  2459 class CSMarkBitMapClosure: public BitMapClosure {
  2460   G1CollectedHeap* _g1h;
  2461   CMBitMap*        _bitMap;
  2462   ConcurrentMark*  _cm;
  2463   CSMarkOopClosure _oop_cl;
  2464 public:
  2465   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  2466     _g1h(G1CollectedHeap::heap()),
  2467     _bitMap(cm->nextMarkBitMap()),
  2468     _oop_cl(cm, ms_size)
  2469   {}
  2471   ~CSMarkBitMapClosure() {}
  2473   bool do_bit(size_t offset) {
  2474     // convert offset into a HeapWord*
  2475     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  2476     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  2477            "address out of range");
  2478     assert(_bitMap->isMarked(addr), "tautology");
  2479     oop obj = oop(addr);
  2480     if (!obj->is_forwarded()) {
  2481       if (!_oop_cl.push(obj)) return false;
  2482       if (UseCompressedOops) {
  2483         if (!_oop_cl.drain<narrowOop>()) return false;
  2484       } else {
  2485         if (!_oop_cl.drain<oop>()) return false;
  2488     // Otherwise...
  2489     return true;
  2491 };
  2494 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  2495   CMBitMap* _bm;
  2496   CSMarkBitMapClosure _bit_cl;
  2497   enum SomePrivateConstants {
  2498     MSSize = 1000
  2499   };
  2500   bool _completed;
  2501 public:
  2502   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  2503     _bm(cm->nextMarkBitMap()),
  2504     _bit_cl(cm, MSSize),
  2505     _completed(true)
  2506   {}
  2508   ~CompleteMarkingInCSHRClosure() {}
  2510   bool doHeapRegion(HeapRegion* r) {
  2511     if (!r->evacuation_failed()) {
  2512       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  2513       if (!mr.is_empty()) {
  2514         if (!_bm->iterate(&_bit_cl, mr)) {
  2515           _completed = false;
  2516           return true;
  2520     return false;
  2523   bool completed() { return _completed; }
  2524 };
  2526 class ClearMarksInHRClosure: public HeapRegionClosure {
  2527   CMBitMap* _bm;
  2528 public:
  2529   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  2531   bool doHeapRegion(HeapRegion* r) {
  2532     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  2533       MemRegion usedMR = r->used_region();
  2534       _bm->clearRange(r->used_region());
  2536     return false;
  2538 };
  2540 void ConcurrentMark::complete_marking_in_collection_set() {
  2541   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  2543   if (!g1h->mark_in_progress()) {
  2544     g1h->g1_policy()->record_mark_closure_time(0.0);
  2545     return;
  2548   int i = 1;
  2549   double start = os::elapsedTime();
  2550   while (true) {
  2551     i++;
  2552     CompleteMarkingInCSHRClosure cmplt(this);
  2553     g1h->collection_set_iterate(&cmplt);
  2554     if (cmplt.completed()) break;
  2556   double end_time = os::elapsedTime();
  2557   double elapsed_time_ms = (end_time - start) * 1000.0;
  2558   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  2559   if (PrintGCDetails) {
  2560     gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  2563   ClearMarksInHRClosure clr(nextMarkBitMap());
  2564   g1h->collection_set_iterate(&clr);
  2567 // The next two methods deal with the following optimisation. Some
  2568 // objects are gray by being marked and located above the finger. If
  2569 // they are copied, during an evacuation pause, below the finger then
  2570 // the need to be pushed on the stack. The observation is that, if
  2571 // there are no regions in the collection set located above the
  2572 // finger, then the above cannot happen, hence we do not need to
  2573 // explicitly gray any objects when copying them to below the
  2574 // finger. The global stack will be scanned to ensure that, if it
  2575 // points to objects being copied, it will update their
  2576 // location. There is a tricky situation with the gray objects in
  2577 // region stack that are being coped, however. See the comment in
  2578 // newCSet().
  2580 void ConcurrentMark::newCSet() {
  2581   if (!concurrent_marking_in_progress())
  2582     // nothing to do if marking is not in progress
  2583     return;
  2585   // find what the lowest finger is among the global and local fingers
  2586   _min_finger = _finger;
  2587   for (int i = 0; i < (int)_max_task_num; ++i) {
  2588     CMTask* task = _tasks[i];
  2589     HeapWord* task_finger = task->finger();
  2590     if (task_finger != NULL && task_finger < _min_finger)
  2591       _min_finger = task_finger;
  2594   _should_gray_objects = false;
  2596   // This fixes a very subtle and fustrating bug. It might be the case
  2597   // that, during en evacuation pause, heap regions that contain
  2598   // objects that are gray (by being in regions contained in the
  2599   // region stack) are included in the collection set. Since such gray
  2600   // objects will be moved, and because it's not easy to redirect
  2601   // region stack entries to point to a new location (because objects
  2602   // in one region might be scattered to multiple regions after they
  2603   // are copied), one option is to ensure that all marked objects
  2604   // copied during a pause are pushed on the stack. Notice, however,
  2605   // that this problem can only happen when the region stack is not
  2606   // empty during an evacuation pause. So, we make the fix a bit less
  2607   // conservative and ensure that regions are pushed on the stack,
  2608   // irrespective whether all collection set regions are below the
  2609   // finger, if the region stack is not empty. This is expected to be
  2610   // a rare case, so I don't think it's necessary to be smarted about it.
  2611   if (!region_stack_empty())
  2612     _should_gray_objects = true;
  2615 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  2616   if (!concurrent_marking_in_progress())
  2617     return;
  2619   HeapWord* region_end = hr->end();
  2620   if (region_end > _min_finger)
  2621     _should_gray_objects = true;
  2624 // abandon current marking iteration due to a Full GC
  2625 void ConcurrentMark::abort() {
  2626   // Clear all marks to force marking thread to do nothing
  2627   _nextMarkBitMap->clearAll();
  2628   // Empty mark stack
  2629   clear_marking_state();
  2630   for (int i = 0; i < (int)_max_task_num; ++i)
  2631     _tasks[i]->clear_region_fields();
  2632   _has_aborted = true;
  2634   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  2635   satb_mq_set.abandon_partial_marking();
  2636   // This can be called either during or outside marking, we'll read
  2637   // the expected_active value from the SATB queue set.
  2638   satb_mq_set.set_active_all_threads(
  2639                                  false, /* new active value */
  2640                                  satb_mq_set.is_active() /* expected_active */);
  2643 static void print_ms_time_info(const char* prefix, const char* name,
  2644                                NumberSeq& ns) {
  2645   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  2646                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  2647   if (ns.num() > 0) {
  2648     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  2649                            prefix, ns.sd(), ns.maximum());
  2653 void ConcurrentMark::print_summary_info() {
  2654   gclog_or_tty->print_cr(" Concurrent marking:");
  2655   print_ms_time_info("  ", "init marks", _init_times);
  2656   print_ms_time_info("  ", "remarks", _remark_times);
  2658     print_ms_time_info("     ", "final marks", _remark_mark_times);
  2659     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  2662   print_ms_time_info("  ", "cleanups", _cleanup_times);
  2663   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  2664                          _total_counting_time,
  2665                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  2666                           (double)_cleanup_times.num()
  2667                          : 0.0));
  2668   if (G1ScrubRemSets) {
  2669     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  2670                            _total_rs_scrub_time,
  2671                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  2672                             (double)_cleanup_times.num()
  2673                            : 0.0));
  2675   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  2676                          (_init_times.sum() + _remark_times.sum() +
  2677                           _cleanup_times.sum())/1000.0);
  2678   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  2679                 "(%8.2f s marking, %8.2f s counting).",
  2680                 cmThread()->vtime_accum(),
  2681                 cmThread()->vtime_mark_accum(),
  2682                 cmThread()->vtime_count_accum());
  2685 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
  2686   _parallel_workers->print_worker_threads_on(st);
  2689 // Closures
  2690 // XXX: there seems to be a lot of code  duplication here;
  2691 // should refactor and consolidate the shared code.
  2693 // This closure is used to mark refs into the CMS generation in
  2694 // the CMS bit map. Called at the first checkpoint.
  2696 // We take a break if someone is trying to stop the world.
  2697 bool ConcurrentMark::do_yield_check(int worker_i) {
  2698   if (should_yield()) {
  2699     if (worker_i == 0)
  2700       _g1h->g1_policy()->record_concurrent_pause();
  2701     cmThread()->yield();
  2702     if (worker_i == 0)
  2703       _g1h->g1_policy()->record_concurrent_pause_end();
  2704     return true;
  2705   } else {
  2706     return false;
  2710 bool ConcurrentMark::should_yield() {
  2711   return cmThread()->should_yield();
  2714 bool ConcurrentMark::containing_card_is_marked(void* p) {
  2715   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  2716   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  2719 bool ConcurrentMark::containing_cards_are_marked(void* start,
  2720                                                  void* last) {
  2721   return
  2722     containing_card_is_marked(start) &&
  2723     containing_card_is_marked(last);
  2726 #ifndef PRODUCT
  2727 // for debugging purposes
  2728 void ConcurrentMark::print_finger() {
  2729   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  2730                          _heap_start, _heap_end, _finger);
  2731   for (int i = 0; i < (int) _max_task_num; ++i) {
  2732     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  2734   gclog_or_tty->print_cr("");
  2736 #endif
  2738 // Closure for iteration over bitmaps
  2739 class CMBitMapClosure : public BitMapClosure {
  2740 private:
  2741   // the bitmap that is being iterated over
  2742   CMBitMap*                   _nextMarkBitMap;
  2743   ConcurrentMark*             _cm;
  2744   CMTask*                     _task;
  2745   // true if we're scanning a heap region claimed by the task (so that
  2746   // we move the finger along), false if we're not, i.e. currently when
  2747   // scanning a heap region popped from the region stack (so that we
  2748   // do not move the task finger along; it'd be a mistake if we did so).
  2749   bool                        _scanning_heap_region;
  2751 public:
  2752   CMBitMapClosure(CMTask *task,
  2753                   ConcurrentMark* cm,
  2754                   CMBitMap* nextMarkBitMap)
  2755     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  2757   void set_scanning_heap_region(bool scanning_heap_region) {
  2758     _scanning_heap_region = scanning_heap_region;
  2761   bool do_bit(size_t offset) {
  2762     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  2763     assert(_nextMarkBitMap->isMarked(addr), "invariant");
  2764     assert( addr < _cm->finger(), "invariant");
  2766     if (_scanning_heap_region) {
  2767       statsOnly( _task->increase_objs_found_on_bitmap() );
  2768       assert(addr >= _task->finger(), "invariant");
  2769       // We move that task's local finger along.
  2770       _task->move_finger_to(addr);
  2771     } else {
  2772       // We move the task's region finger along.
  2773       _task->move_region_finger_to(addr);
  2776     _task->scan_object(oop(addr));
  2777     // we only partially drain the local queue and global stack
  2778     _task->drain_local_queue(true);
  2779     _task->drain_global_stack(true);
  2781     // if the has_aborted flag has been raised, we need to bail out of
  2782     // the iteration
  2783     return !_task->has_aborted();
  2785 };
  2787 // Closure for iterating over objects, currently only used for
  2788 // processing SATB buffers.
  2789 class CMObjectClosure : public ObjectClosure {
  2790 private:
  2791   CMTask* _task;
  2793 public:
  2794   void do_object(oop obj) {
  2795     _task->deal_with_reference(obj);
  2798   CMObjectClosure(CMTask* task) : _task(task) { }
  2799 };
  2801 // Closure for iterating over object fields
  2802 class CMOopClosure : public OopClosure {
  2803 private:
  2804   G1CollectedHeap*   _g1h;
  2805   ConcurrentMark*    _cm;
  2806   CMTask*            _task;
  2808 public:
  2809   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
  2810   virtual void do_oop(      oop* p) { do_oop_work(p); }
  2812   template <class T> void do_oop_work(T* p) {
  2813     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
  2814     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
  2815            "invariant");
  2817     oop obj = oopDesc::load_decode_heap_oop(p);
  2818     if (_cm->verbose_high())
  2819       gclog_or_tty->print_cr("[%d] we're looking at location "
  2820                              "*"PTR_FORMAT" = "PTR_FORMAT,
  2821                              _task->task_id(), p, (void*) obj);
  2822     _task->deal_with_reference(obj);
  2825   CMOopClosure(G1CollectedHeap* g1h,
  2826                ConcurrentMark* cm,
  2827                CMTask* task)
  2828     : _g1h(g1h), _cm(cm), _task(task) { }
  2829 };
  2831 void CMTask::setup_for_region(HeapRegion* hr) {
  2832   // Separated the asserts so that we know which one fires.
  2833   assert(hr != NULL,
  2834         "claim_region() should have filtered out continues humongous regions");
  2835   assert(!hr->continuesHumongous(),
  2836         "claim_region() should have filtered out continues humongous regions");
  2838   if (_cm->verbose_low())
  2839     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  2840                            _task_id, hr);
  2842   _curr_region  = hr;
  2843   _finger       = hr->bottom();
  2844   update_region_limit();
  2847 void CMTask::update_region_limit() {
  2848   HeapRegion* hr            = _curr_region;
  2849   HeapWord* bottom          = hr->bottom();
  2850   HeapWord* limit           = hr->next_top_at_mark_start();
  2852   if (limit == bottom) {
  2853     if (_cm->verbose_low())
  2854       gclog_or_tty->print_cr("[%d] found an empty region "
  2855                              "["PTR_FORMAT", "PTR_FORMAT")",
  2856                              _task_id, bottom, limit);
  2857     // The region was collected underneath our feet.
  2858     // We set the finger to bottom to ensure that the bitmap
  2859     // iteration that will follow this will not do anything.
  2860     // (this is not a condition that holds when we set the region up,
  2861     // as the region is not supposed to be empty in the first place)
  2862     _finger = bottom;
  2863   } else if (limit >= _region_limit) {
  2864     assert(limit >= _finger, "peace of mind");
  2865   } else {
  2866     assert(limit < _region_limit, "only way to get here");
  2867     // This can happen under some pretty unusual circumstances.  An
  2868     // evacuation pause empties the region underneath our feet (NTAMS
  2869     // at bottom). We then do some allocation in the region (NTAMS
  2870     // stays at bottom), followed by the region being used as a GC
  2871     // alloc region (NTAMS will move to top() and the objects
  2872     // originally below it will be grayed). All objects now marked in
  2873     // the region are explicitly grayed, if below the global finger,
  2874     // and we do not need in fact to scan anything else. So, we simply
  2875     // set _finger to be limit to ensure that the bitmap iteration
  2876     // doesn't do anything.
  2877     _finger = limit;
  2880   _region_limit = limit;
  2883 void CMTask::giveup_current_region() {
  2884   assert(_curr_region != NULL, "invariant");
  2885   if (_cm->verbose_low())
  2886     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  2887                            _task_id, _curr_region);
  2888   clear_region_fields();
  2891 void CMTask::clear_region_fields() {
  2892   // Values for these three fields that indicate that we're not
  2893   // holding on to a region.
  2894   _curr_region   = NULL;
  2895   _finger        = NULL;
  2896   _region_limit  = NULL;
  2898   _region_finger = NULL;
  2901 void CMTask::reset(CMBitMap* nextMarkBitMap) {
  2902   guarantee(nextMarkBitMap != NULL, "invariant");
  2904   if (_cm->verbose_low())
  2905     gclog_or_tty->print_cr("[%d] resetting", _task_id);
  2907   _nextMarkBitMap                = nextMarkBitMap;
  2908   clear_region_fields();
  2910   _calls                         = 0;
  2911   _elapsed_time_ms               = 0.0;
  2912   _termination_time_ms           = 0.0;
  2913   _termination_start_time_ms     = 0.0;
  2915 #if _MARKING_STATS_
  2916   _local_pushes                  = 0;
  2917   _local_pops                    = 0;
  2918   _local_max_size                = 0;
  2919   _objs_scanned                  = 0;
  2920   _global_pushes                 = 0;
  2921   _global_pops                   = 0;
  2922   _global_max_size               = 0;
  2923   _global_transfers_to           = 0;
  2924   _global_transfers_from         = 0;
  2925   _region_stack_pops             = 0;
  2926   _regions_claimed               = 0;
  2927   _objs_found_on_bitmap          = 0;
  2928   _satb_buffers_processed        = 0;
  2929   _steal_attempts                = 0;
  2930   _steals                        = 0;
  2931   _aborted                       = 0;
  2932   _aborted_overflow              = 0;
  2933   _aborted_cm_aborted            = 0;
  2934   _aborted_yield                 = 0;
  2935   _aborted_timed_out             = 0;
  2936   _aborted_satb                  = 0;
  2937   _aborted_termination           = 0;
  2938 #endif // _MARKING_STATS_
  2941 bool CMTask::should_exit_termination() {
  2942   regular_clock_call();
  2943   // This is called when we are in the termination protocol. We should
  2944   // quit if, for some reason, this task wants to abort or the global
  2945   // stack is not empty (this means that we can get work from it).
  2946   return !_cm->mark_stack_empty() || has_aborted();
  2949 // This determines whether the method below will check both the local
  2950 // and global fingers when determining whether to push on the stack a
  2951 // gray object (value 1) or whether it will only check the global one
  2952 // (value 0). The tradeoffs are that the former will be a bit more
  2953 // accurate and possibly push less on the stack, but it might also be
  2954 // a little bit slower.
  2956 #define _CHECK_BOTH_FINGERS_      1
  2958 void CMTask::deal_with_reference(oop obj) {
  2959   if (_cm->verbose_high())
  2960     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  2961                            _task_id, (void*) obj);
  2963   ++_refs_reached;
  2965   HeapWord* objAddr = (HeapWord*) obj;
  2966   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  2967   if (_g1h->is_in_g1_reserved(objAddr)) {
  2968     assert(obj != NULL, "is_in_g1_reserved should ensure this");
  2969     HeapRegion* hr =  _g1h->heap_region_containing(obj);
  2970     if (_g1h->is_obj_ill(obj, hr)) {
  2971       if (_cm->verbose_high())
  2972         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  2973                                _task_id, (void*) obj);
  2975       // we need to mark it first
  2976       if (_nextMarkBitMap->parMark(objAddr)) {
  2977         // No OrderAccess:store_load() is needed. It is implicit in the
  2978         // CAS done in parMark(objAddr) above
  2979         HeapWord* global_finger = _cm->finger();
  2981 #if _CHECK_BOTH_FINGERS_
  2982         // we will check both the local and global fingers
  2984         if (_finger != NULL && objAddr < _finger) {
  2985           if (_cm->verbose_high())
  2986             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  2987                                    "pushing it", _task_id, _finger);
  2988           push(obj);
  2989         } else if (_curr_region != NULL && objAddr < _region_limit) {
  2990           // do nothing
  2991         } else if (objAddr < global_finger) {
  2992           // Notice that the global finger might be moving forward
  2993           // concurrently. This is not a problem. In the worst case, we
  2994           // mark the object while it is above the global finger and, by
  2995           // the time we read the global finger, it has moved forward
  2996           // passed this object. In this case, the object will probably
  2997           // be visited when a task is scanning the region and will also
  2998           // be pushed on the stack. So, some duplicate work, but no
  2999           // correctness problems.
  3001           if (_cm->verbose_high())
  3002             gclog_or_tty->print_cr("[%d] below the global finger "
  3003                                    "("PTR_FORMAT"), pushing it",
  3004                                    _task_id, global_finger);
  3005           push(obj);
  3006         } else {
  3007           // do nothing
  3009 #else // _CHECK_BOTH_FINGERS_
  3010       // we will only check the global finger
  3012         if (objAddr < global_finger) {
  3013           // see long comment above
  3015           if (_cm->verbose_high())
  3016             gclog_or_tty->print_cr("[%d] below the global finger "
  3017                                    "("PTR_FORMAT"), pushing it",
  3018                                    _task_id, global_finger);
  3019           push(obj);
  3021 #endif // _CHECK_BOTH_FINGERS_
  3027 void CMTask::push(oop obj) {
  3028   HeapWord* objAddr = (HeapWord*) obj;
  3029   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
  3030   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
  3031          "invariant");
  3032   assert(!_g1h->is_obj_ill(obj), "invariant");
  3033   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
  3035   if (_cm->verbose_high())
  3036     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  3038   if (!_task_queue->push(obj)) {
  3039     // The local task queue looks full. We need to push some entries
  3040     // to the global stack.
  3042     if (_cm->verbose_medium())
  3043       gclog_or_tty->print_cr("[%d] task queue overflow, "
  3044                              "moving entries to the global stack",
  3045                              _task_id);
  3046     move_entries_to_global_stack();
  3048     // this should succeed since, even if we overflow the global
  3049     // stack, we should have definitely removed some entries from the
  3050     // local queue. So, there must be space on it.
  3051     bool success = _task_queue->push(obj);
  3052     assert(success, "invariant");
  3055   statsOnly( int tmp_size = _task_queue->size();
  3056              if (tmp_size > _local_max_size)
  3057                _local_max_size = tmp_size;
  3058              ++_local_pushes );
  3061 void CMTask::reached_limit() {
  3062   assert(_words_scanned >= _words_scanned_limit ||
  3063          _refs_reached >= _refs_reached_limit ,
  3064          "shouldn't have been called otherwise");
  3065   regular_clock_call();
  3068 void CMTask::regular_clock_call() {
  3069   if (has_aborted())
  3070     return;
  3072   // First, we need to recalculate the words scanned and refs reached
  3073   // limits for the next clock call.
  3074   recalculate_limits();
  3076   // During the regular clock call we do the following
  3078   // (1) If an overflow has been flagged, then we abort.
  3079   if (_cm->has_overflown()) {
  3080     set_has_aborted();
  3081     return;
  3084   // If we are not concurrent (i.e. we're doing remark) we don't need
  3085   // to check anything else. The other steps are only needed during
  3086   // the concurrent marking phase.
  3087   if (!concurrent())
  3088     return;
  3090   // (2) If marking has been aborted for Full GC, then we also abort.
  3091   if (_cm->has_aborted()) {
  3092     set_has_aborted();
  3093     statsOnly( ++_aborted_cm_aborted );
  3094     return;
  3097   double curr_time_ms = os::elapsedVTime() * 1000.0;
  3099   // (3) If marking stats are enabled, then we update the step history.
  3100 #if _MARKING_STATS_
  3101   if (_words_scanned >= _words_scanned_limit)
  3102     ++_clock_due_to_scanning;
  3103   if (_refs_reached >= _refs_reached_limit)
  3104     ++_clock_due_to_marking;
  3106   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  3107   _interval_start_time_ms = curr_time_ms;
  3108   _all_clock_intervals_ms.add(last_interval_ms);
  3110   if (_cm->verbose_medium()) {
  3111     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  3112                            "scanned = %d%s, refs reached = %d%s",
  3113                            _task_id, last_interval_ms,
  3114                            _words_scanned,
  3115                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  3116                            _refs_reached,
  3117                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  3119 #endif // _MARKING_STATS_
  3121   // (4) We check whether we should yield. If we have to, then we abort.
  3122   if (_cm->should_yield()) {
  3123     // We should yield. To do this we abort the task. The caller is
  3124     // responsible for yielding.
  3125     set_has_aborted();
  3126     statsOnly( ++_aborted_yield );
  3127     return;
  3130   // (5) We check whether we've reached our time quota. If we have,
  3131   // then we abort.
  3132   double elapsed_time_ms = curr_time_ms - _start_time_ms;
  3133   if (elapsed_time_ms > _time_target_ms) {
  3134     set_has_aborted();
  3135     _has_aborted_timed_out = true;
  3136     statsOnly( ++_aborted_timed_out );
  3137     return;
  3140   // (6) Finally, we check whether there are enough completed STAB
  3141   // buffers available for processing. If there are, we abort.
  3142   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3143   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  3144     if (_cm->verbose_low())
  3145       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  3146                              _task_id);
  3147     // we do need to process SATB buffers, we'll abort and restart
  3148     // the marking task to do so
  3149     set_has_aborted();
  3150     statsOnly( ++_aborted_satb );
  3151     return;
  3155 void CMTask::recalculate_limits() {
  3156   _real_words_scanned_limit = _words_scanned + words_scanned_period;
  3157   _words_scanned_limit      = _real_words_scanned_limit;
  3159   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  3160   _refs_reached_limit       = _real_refs_reached_limit;
  3163 void CMTask::decrease_limits() {
  3164   // This is called when we believe that we're going to do an infrequent
  3165   // operation which will increase the per byte scanned cost (i.e. move
  3166   // entries to/from the global stack). It basically tries to decrease the
  3167   // scanning limit so that the clock is called earlier.
  3169   if (_cm->verbose_medium())
  3170     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  3172   _words_scanned_limit = _real_words_scanned_limit -
  3173     3 * words_scanned_period / 4;
  3174   _refs_reached_limit  = _real_refs_reached_limit -
  3175     3 * refs_reached_period / 4;
  3178 void CMTask::move_entries_to_global_stack() {
  3179   // local array where we'll store the entries that will be popped
  3180   // from the local queue
  3181   oop buffer[global_stack_transfer_size];
  3183   int n = 0;
  3184   oop obj;
  3185   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  3186     buffer[n] = obj;
  3187     ++n;
  3190   if (n > 0) {
  3191     // we popped at least one entry from the local queue
  3193     statsOnly( ++_global_transfers_to; _local_pops += n );
  3195     if (!_cm->mark_stack_push(buffer, n)) {
  3196       if (_cm->verbose_low())
  3197         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  3198       set_has_aborted();
  3199     } else {
  3200       // the transfer was successful
  3202       if (_cm->verbose_medium())
  3203         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  3204                                _task_id, n);
  3205       statsOnly( int tmp_size = _cm->mark_stack_size();
  3206                  if (tmp_size > _global_max_size)
  3207                    _global_max_size = tmp_size;
  3208                  _global_pushes += n );
  3212   // this operation was quite expensive, so decrease the limits
  3213   decrease_limits();
  3216 void CMTask::get_entries_from_global_stack() {
  3217   // local array where we'll store the entries that will be popped
  3218   // from the global stack.
  3219   oop buffer[global_stack_transfer_size];
  3220   int n;
  3221   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  3222   assert(n <= global_stack_transfer_size,
  3223          "we should not pop more than the given limit");
  3224   if (n > 0) {
  3225     // yes, we did actually pop at least one entry
  3227     statsOnly( ++_global_transfers_from; _global_pops += n );
  3228     if (_cm->verbose_medium())
  3229       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  3230                              _task_id, n);
  3231     for (int i = 0; i < n; ++i) {
  3232       bool success = _task_queue->push(buffer[i]);
  3233       // We only call this when the local queue is empty or under a
  3234       // given target limit. So, we do not expect this push to fail.
  3235       assert(success, "invariant");
  3238     statsOnly( int tmp_size = _task_queue->size();
  3239                if (tmp_size > _local_max_size)
  3240                  _local_max_size = tmp_size;
  3241                _local_pushes += n );
  3244   // this operation was quite expensive, so decrease the limits
  3245   decrease_limits();
  3248 void CMTask::drain_local_queue(bool partially) {
  3249   if (has_aborted())
  3250     return;
  3252   // Decide what the target size is, depending whether we're going to
  3253   // drain it partially (so that other tasks can steal if they run out
  3254   // of things to do) or totally (at the very end).
  3255   size_t target_size;
  3256   if (partially)
  3257     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  3258   else
  3259     target_size = 0;
  3261   if (_task_queue->size() > target_size) {
  3262     if (_cm->verbose_high())
  3263       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  3264                              _task_id, target_size);
  3266     oop obj;
  3267     bool ret = _task_queue->pop_local(obj);
  3268     while (ret) {
  3269       statsOnly( ++_local_pops );
  3271       if (_cm->verbose_high())
  3272         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  3273                                (void*) obj);
  3275       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
  3276       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
  3277              "invariant");
  3279       scan_object(obj);
  3281       if (_task_queue->size() <= target_size || has_aborted())
  3282         ret = false;
  3283       else
  3284         ret = _task_queue->pop_local(obj);
  3287     if (_cm->verbose_high())
  3288       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  3289                              _task_id, _task_queue->size());
  3293 void CMTask::drain_global_stack(bool partially) {
  3294   if (has_aborted())
  3295     return;
  3297   // We have a policy to drain the local queue before we attempt to
  3298   // drain the global stack.
  3299   assert(partially || _task_queue->size() == 0, "invariant");
  3301   // Decide what the target size is, depending whether we're going to
  3302   // drain it partially (so that other tasks can steal if they run out
  3303   // of things to do) or totally (at the very end).  Notice that,
  3304   // because we move entries from the global stack in chunks or
  3305   // because another task might be doing the same, we might in fact
  3306   // drop below the target. But, this is not a problem.
  3307   size_t target_size;
  3308   if (partially)
  3309     target_size = _cm->partial_mark_stack_size_target();
  3310   else
  3311     target_size = 0;
  3313   if (_cm->mark_stack_size() > target_size) {
  3314     if (_cm->verbose_low())
  3315       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  3316                              _task_id, target_size);
  3318     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  3319       get_entries_from_global_stack();
  3320       drain_local_queue(partially);
  3323     if (_cm->verbose_low())
  3324       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  3325                              _task_id, _cm->mark_stack_size());
  3329 // SATB Queue has several assumptions on whether to call the par or
  3330 // non-par versions of the methods. this is why some of the code is
  3331 // replicated. We should really get rid of the single-threaded version
  3332 // of the code to simplify things.
  3333 void CMTask::drain_satb_buffers() {
  3334   if (has_aborted())
  3335     return;
  3337   // We set this so that the regular clock knows that we're in the
  3338   // middle of draining buffers and doesn't set the abort flag when it
  3339   // notices that SATB buffers are available for draining. It'd be
  3340   // very counter productive if it did that. :-)
  3341   _draining_satb_buffers = true;
  3343   CMObjectClosure oc(this);
  3344   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  3345   if (ParallelGCThreads > 0)
  3346     satb_mq_set.set_par_closure(_task_id, &oc);
  3347   else
  3348     satb_mq_set.set_closure(&oc);
  3350   // This keeps claiming and applying the closure to completed buffers
  3351   // until we run out of buffers or we need to abort.
  3352   if (ParallelGCThreads > 0) {
  3353     while (!has_aborted() &&
  3354            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  3355       if (_cm->verbose_medium())
  3356         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3357       statsOnly( ++_satb_buffers_processed );
  3358       regular_clock_call();
  3360   } else {
  3361     while (!has_aborted() &&
  3362            satb_mq_set.apply_closure_to_completed_buffer()) {
  3363       if (_cm->verbose_medium())
  3364         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  3365       statsOnly( ++_satb_buffers_processed );
  3366       regular_clock_call();
  3370   if (!concurrent() && !has_aborted()) {
  3371     // We should only do this during remark.
  3372     if (ParallelGCThreads > 0)
  3373       satb_mq_set.par_iterate_closure_all_threads(_task_id);
  3374     else
  3375       satb_mq_set.iterate_closure_all_threads();
  3378   _draining_satb_buffers = false;
  3380   assert(has_aborted() ||
  3381          concurrent() ||
  3382          satb_mq_set.completed_buffers_num() == 0, "invariant");
  3384   if (ParallelGCThreads > 0)
  3385     satb_mq_set.set_par_closure(_task_id, NULL);
  3386   else
  3387     satb_mq_set.set_closure(NULL);
  3389   // again, this was a potentially expensive operation, decrease the
  3390   // limits to get the regular clock call early
  3391   decrease_limits();
  3394 void CMTask::drain_region_stack(BitMapClosure* bc) {
  3395   if (has_aborted())
  3396     return;
  3398   assert(_region_finger == NULL,
  3399          "it should be NULL when we're not scanning a region");
  3401   if (!_cm->region_stack_empty()) {
  3402     if (_cm->verbose_low())
  3403       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  3404                              _task_id, _cm->region_stack_size());
  3406     MemRegion mr = _cm->region_stack_pop_with_lock();
  3407     // it returns MemRegion() if the pop fails
  3408     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3410     while (mr.start() != NULL) {
  3411       if (_cm->verbose_medium())
  3412         gclog_or_tty->print_cr("[%d] we are scanning region "
  3413                                "["PTR_FORMAT", "PTR_FORMAT")",
  3414                                _task_id, mr.start(), mr.end());
  3415       assert(mr.end() <= _cm->finger(),
  3416              "otherwise the region shouldn't be on the stack");
  3417       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  3418       if (_nextMarkBitMap->iterate(bc, mr)) {
  3419         assert(!has_aborted(),
  3420                "cannot abort the task without aborting the bitmap iteration");
  3422         // We finished iterating over the region without aborting.
  3423         regular_clock_call();
  3424         if (has_aborted())
  3425           mr = MemRegion();
  3426         else {
  3427           mr = _cm->region_stack_pop_with_lock();
  3428           // it returns MemRegion() if the pop fails
  3429           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  3431       } else {
  3432         assert(has_aborted(), "currently the only way to do so");
  3434         // The only way to abort the bitmap iteration is to return
  3435         // false from the do_bit() method. However, inside the
  3436         // do_bit() method we move the _region_finger to point to the
  3437         // object currently being looked at. So, if we bail out, we
  3438         // have definitely set _region_finger to something non-null.
  3439         assert(_region_finger != NULL, "invariant");
  3441         // The iteration was actually aborted. So now _region_finger
  3442         // points to the address of the object we last scanned. If we
  3443         // leave it there, when we restart this task, we will rescan
  3444         // the object. It is easy to avoid this. We move the finger by
  3445         // enough to point to the next possible object header (the
  3446         // bitmap knows by how much we need to move it as it knows its
  3447         // granularity).
  3448         MemRegion newRegion =
  3449           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  3451         if (!newRegion.is_empty()) {
  3452           if (_cm->verbose_low()) {
  3453             gclog_or_tty->print_cr("[%d] pushing unscanned region"
  3454                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  3455                                    _task_id,
  3456                                    newRegion.start(), newRegion.end());
  3458           // Now push the part of the region we didn't scan on the
  3459           // region stack to make sure a task scans it later.
  3460           _cm->region_stack_push_with_lock(newRegion);
  3462         // break from while
  3463         mr = MemRegion();
  3465       _region_finger = NULL;
  3468     if (_cm->verbose_low())
  3469       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  3470                              _task_id, _cm->region_stack_size());
  3474 void CMTask::print_stats() {
  3475   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  3476                          _task_id, _calls);
  3477   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  3478                          _elapsed_time_ms, _termination_time_ms);
  3479   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3480                          _step_times_ms.num(), _step_times_ms.avg(),
  3481                          _step_times_ms.sd());
  3482   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  3483                          _step_times_ms.maximum(), _step_times_ms.sum());
  3485 #if _MARKING_STATS_
  3486   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  3487                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  3488                          _all_clock_intervals_ms.sd());
  3489   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  3490                          _all_clock_intervals_ms.maximum(),
  3491                          _all_clock_intervals_ms.sum());
  3492   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  3493                          _clock_due_to_scanning, _clock_due_to_marking);
  3494   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  3495                          _objs_scanned, _objs_found_on_bitmap);
  3496   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  3497                          _local_pushes, _local_pops, _local_max_size);
  3498   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  3499                          _global_pushes, _global_pops, _global_max_size);
  3500   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  3501                          _global_transfers_to,_global_transfers_from);
  3502   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  3503                          _regions_claimed, _region_stack_pops);
  3504   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  3505   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  3506                          _steal_attempts, _steals);
  3507   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  3508   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  3509                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  3510   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  3511                          _aborted_timed_out, _aborted_satb, _aborted_termination);
  3512 #endif // _MARKING_STATS_
  3515 /*****************************************************************************
  3517     The do_marking_step(time_target_ms) method is the building block
  3518     of the parallel marking framework. It can be called in parallel
  3519     with other invocations of do_marking_step() on different tasks
  3520     (but only one per task, obviously) and concurrently with the
  3521     mutator threads, or during remark, hence it eliminates the need
  3522     for two versions of the code. When called during remark, it will
  3523     pick up from where the task left off during the concurrent marking
  3524     phase. Interestingly, tasks are also claimable during evacuation
  3525     pauses too, since do_marking_step() ensures that it aborts before
  3526     it needs to yield.
  3528     The data structures that is uses to do marking work are the
  3529     following:
  3531       (1) Marking Bitmap. If there are gray objects that appear only
  3532       on the bitmap (this happens either when dealing with an overflow
  3533       or when the initial marking phase has simply marked the roots
  3534       and didn't push them on the stack), then tasks claim heap
  3535       regions whose bitmap they then scan to find gray objects. A
  3536       global finger indicates where the end of the last claimed region
  3537       is. A local finger indicates how far into the region a task has
  3538       scanned. The two fingers are used to determine how to gray an
  3539       object (i.e. whether simply marking it is OK, as it will be
  3540       visited by a task in the future, or whether it needs to be also
  3541       pushed on a stack).
  3543       (2) Local Queue. The local queue of the task which is accessed
  3544       reasonably efficiently by the task. Other tasks can steal from
  3545       it when they run out of work. Throughout the marking phase, a
  3546       task attempts to keep its local queue short but not totally
  3547       empty, so that entries are available for stealing by other
  3548       tasks. Only when there is no more work, a task will totally
  3549       drain its local queue.
  3551       (3) Global Mark Stack. This handles local queue overflow. During
  3552       marking only sets of entries are moved between it and the local
  3553       queues, as access to it requires a mutex and more fine-grain
  3554       interaction with it which might cause contention. If it
  3555       overflows, then the marking phase should restart and iterate
  3556       over the bitmap to identify gray objects. Throughout the marking
  3557       phase, tasks attempt to keep the global mark stack at a small
  3558       length but not totally empty, so that entries are available for
  3559       popping by other tasks. Only when there is no more work, tasks
  3560       will totally drain the global mark stack.
  3562       (4) Global Region Stack. Entries on it correspond to areas of
  3563       the bitmap that need to be scanned since they contain gray
  3564       objects. Pushes on the region stack only happen during
  3565       evacuation pauses and typically correspond to areas covered by
  3566       GC LABS. If it overflows, then the marking phase should restart
  3567       and iterate over the bitmap to identify gray objects. Tasks will
  3568       try to totally drain the region stack as soon as possible.
  3570       (5) SATB Buffer Queue. This is where completed SATB buffers are
  3571       made available. Buffers are regularly removed from this queue
  3572       and scanned for roots, so that the queue doesn't get too
  3573       long. During remark, all completed buffers are processed, as
  3574       well as the filled in parts of any uncompleted buffers.
  3576     The do_marking_step() method tries to abort when the time target
  3577     has been reached. There are a few other cases when the
  3578     do_marking_step() method also aborts:
  3580       (1) When the marking phase has been aborted (after a Full GC).
  3582       (2) When a global overflow (either on the global stack or the
  3583       region stack) has been triggered. Before the task aborts, it
  3584       will actually sync up with the other tasks to ensure that all
  3585       the marking data structures (local queues, stacks, fingers etc.)
  3586       are re-initialised so that when do_marking_step() completes,
  3587       the marking phase can immediately restart.
  3589       (3) When enough completed SATB buffers are available. The
  3590       do_marking_step() method only tries to drain SATB buffers right
  3591       at the beginning. So, if enough buffers are available, the
  3592       marking step aborts and the SATB buffers are processed at
  3593       the beginning of the next invocation.
  3595       (4) To yield. when we have to yield then we abort and yield
  3596       right at the end of do_marking_step(). This saves us from a lot
  3597       of hassle as, by yielding we might allow a Full GC. If this
  3598       happens then objects will be compacted underneath our feet, the
  3599       heap might shrink, etc. We save checking for this by just
  3600       aborting and doing the yield right at the end.
  3602     From the above it follows that the do_marking_step() method should
  3603     be called in a loop (or, otherwise, regularly) until it completes.
  3605     If a marking step completes without its has_aborted() flag being
  3606     true, it means it has completed the current marking phase (and
  3607     also all other marking tasks have done so and have all synced up).
  3609     A method called regular_clock_call() is invoked "regularly" (in
  3610     sub ms intervals) throughout marking. It is this clock method that
  3611     checks all the abort conditions which were mentioned above and
  3612     decides when the task should abort. A work-based scheme is used to
  3613     trigger this clock method: when the number of object words the
  3614     marking phase has scanned or the number of references the marking
  3615     phase has visited reach a given limit. Additional invocations to
  3616     the method clock have been planted in a few other strategic places
  3617     too. The initial reason for the clock method was to avoid calling
  3618     vtime too regularly, as it is quite expensive. So, once it was in
  3619     place, it was natural to piggy-back all the other conditions on it
  3620     too and not constantly check them throughout the code.
  3622  *****************************************************************************/
  3624 void CMTask::do_marking_step(double time_target_ms) {
  3625   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
  3626   assert(concurrent() == _cm->concurrent(), "they should be the same");
  3628   assert(concurrent() || _cm->region_stack_empty(),
  3629          "the region stack should have been cleared before remark");
  3630   assert(_region_finger == NULL,
  3631          "this should be non-null only when a region is being scanned");
  3633   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  3634   assert(_task_queues != NULL, "invariant");
  3635   assert(_task_queue != NULL, "invariant");
  3636   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
  3638   assert(!_claimed,
  3639          "only one thread should claim this task at any one time");
  3641   // OK, this doesn't safeguard again all possible scenarios, as it is
  3642   // possible for two threads to set the _claimed flag at the same
  3643   // time. But it is only for debugging purposes anyway and it will
  3644   // catch most problems.
  3645   _claimed = true;
  3647   _start_time_ms = os::elapsedVTime() * 1000.0;
  3648   statsOnly( _interval_start_time_ms = _start_time_ms );
  3650   double diff_prediction_ms =
  3651     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  3652   _time_target_ms = time_target_ms - diff_prediction_ms;
  3654   // set up the variables that are used in the work-based scheme to
  3655   // call the regular clock method
  3656   _words_scanned = 0;
  3657   _refs_reached  = 0;
  3658   recalculate_limits();
  3660   // clear all flags
  3661   clear_has_aborted();
  3662   _has_aborted_timed_out = false;
  3663   _draining_satb_buffers = false;
  3665   ++_calls;
  3667   if (_cm->verbose_low())
  3668     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  3669                            "target = %1.2lfms >>>>>>>>>>",
  3670                            _task_id, _calls, _time_target_ms);
  3672   // Set up the bitmap and oop closures. Anything that uses them is
  3673   // eventually called from this method, so it is OK to allocate these
  3674   // statically.
  3675   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  3676   CMOopClosure    oop_closure(_g1h, _cm, this);
  3677   set_oop_closure(&oop_closure);
  3679   if (_cm->has_overflown()) {
  3680     // This can happen if the region stack or the mark stack overflows
  3681     // during a GC pause and this task, after a yield point,
  3682     // restarts. We have to abort as we need to get into the overflow
  3683     // protocol which happens right at the end of this task.
  3684     set_has_aborted();
  3687   // First drain any available SATB buffers. After this, we will not
  3688   // look at SATB buffers before the next invocation of this method.
  3689   // If enough completed SATB buffers are queued up, the regular clock
  3690   // will abort this task so that it restarts.
  3691   drain_satb_buffers();
  3692   // ...then partially drain the local queue and the global stack
  3693   drain_local_queue(true);
  3694   drain_global_stack(true);
  3696   // Then totally drain the region stack.  We will not look at
  3697   // it again before the next invocation of this method. Entries on
  3698   // the region stack are only added during evacuation pauses, for
  3699   // which we have to yield. When we do, we abort the task anyway so
  3700   // it will look at the region stack again when it restarts.
  3701   bitmap_closure.set_scanning_heap_region(false);
  3702   drain_region_stack(&bitmap_closure);
  3703   // ...then partially drain the local queue and the global stack
  3704   drain_local_queue(true);
  3705   drain_global_stack(true);
  3707   do {
  3708     if (!has_aborted() && _curr_region != NULL) {
  3709       // This means that we're already holding on to a region.
  3710       assert(_finger != NULL, "if region is not NULL, then the finger "
  3711              "should not be NULL either");
  3713       // We might have restarted this task after an evacuation pause
  3714       // which might have evacuated the region we're holding on to
  3715       // underneath our feet. Let's read its limit again to make sure
  3716       // that we do not iterate over a region of the heap that
  3717       // contains garbage (update_region_limit() will also move
  3718       // _finger to the start of the region if it is found empty).
  3719       update_region_limit();
  3720       // We will start from _finger not from the start of the region,
  3721       // as we might be restarting this task after aborting half-way
  3722       // through scanning this region. In this case, _finger points to
  3723       // the address where we last found a marked object. If this is a
  3724       // fresh region, _finger points to start().
  3725       MemRegion mr = MemRegion(_finger, _region_limit);
  3727       if (_cm->verbose_low())
  3728         gclog_or_tty->print_cr("[%d] we're scanning part "
  3729                                "["PTR_FORMAT", "PTR_FORMAT") "
  3730                                "of region "PTR_FORMAT,
  3731                                _task_id, _finger, _region_limit, _curr_region);
  3733       // Let's iterate over the bitmap of the part of the
  3734       // region that is left.
  3735       bitmap_closure.set_scanning_heap_region(true);
  3736       if (mr.is_empty() ||
  3737           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  3738         // We successfully completed iterating over the region. Now,
  3739         // let's give up the region.
  3740         giveup_current_region();
  3741         regular_clock_call();
  3742       } else {
  3743         assert(has_aborted(), "currently the only way to do so");
  3744         // The only way to abort the bitmap iteration is to return
  3745         // false from the do_bit() method. However, inside the
  3746         // do_bit() method we move the _finger to point to the
  3747         // object currently being looked at. So, if we bail out, we
  3748         // have definitely set _finger to something non-null.
  3749         assert(_finger != NULL, "invariant");
  3751         // Region iteration was actually aborted. So now _finger
  3752         // points to the address of the object we last scanned. If we
  3753         // leave it there, when we restart this task, we will rescan
  3754         // the object. It is easy to avoid this. We move the finger by
  3755         // enough to point to the next possible object header (the
  3756         // bitmap knows by how much we need to move it as it knows its
  3757         // granularity).
  3758         assert(_finger < _region_limit, "invariant");
  3759         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
  3760         // Check if bitmap iteration was aborted while scanning the last object
  3761         if (new_finger >= _region_limit) {
  3762             giveup_current_region();
  3763         } else {
  3764             move_finger_to(new_finger);
  3768     // At this point we have either completed iterating over the
  3769     // region we were holding on to, or we have aborted.
  3771     // We then partially drain the local queue and the global stack.
  3772     // (Do we really need this?)
  3773     drain_local_queue(true);
  3774     drain_global_stack(true);
  3776     // Read the note on the claim_region() method on why it might
  3777     // return NULL with potentially more regions available for
  3778     // claiming and why we have to check out_of_regions() to determine
  3779     // whether we're done or not.
  3780     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  3781       // We are going to try to claim a new region. We should have
  3782       // given up on the previous one.
  3783       // Separated the asserts so that we know which one fires.
  3784       assert(_curr_region  == NULL, "invariant");
  3785       assert(_finger       == NULL, "invariant");
  3786       assert(_region_limit == NULL, "invariant");
  3787       if (_cm->verbose_low())
  3788         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  3789       HeapRegion* claimed_region = _cm->claim_region(_task_id);
  3790       if (claimed_region != NULL) {
  3791         // Yes, we managed to claim one
  3792         statsOnly( ++_regions_claimed );
  3794         if (_cm->verbose_low())
  3795           gclog_or_tty->print_cr("[%d] we successfully claimed "
  3796                                  "region "PTR_FORMAT,
  3797                                  _task_id, claimed_region);
  3799         setup_for_region(claimed_region);
  3800         assert(_curr_region == claimed_region, "invariant");
  3802       // It is important to call the regular clock here. It might take
  3803       // a while to claim a region if, for example, we hit a large
  3804       // block of empty regions. So we need to call the regular clock
  3805       // method once round the loop to make sure it's called
  3806       // frequently enough.
  3807       regular_clock_call();
  3810     if (!has_aborted() && _curr_region == NULL) {
  3811       assert(_cm->out_of_regions(),
  3812              "at this point we should be out of regions");
  3814   } while ( _curr_region != NULL && !has_aborted());
  3816   if (!has_aborted()) {
  3817     // We cannot check whether the global stack is empty, since other
  3818     // tasks might be pushing objects to it concurrently. We also cannot
  3819     // check if the region stack is empty because if a thread is aborting
  3820     // it can push a partially done region back.
  3821     assert(_cm->out_of_regions(),
  3822            "at this point we should be out of regions");
  3824     if (_cm->verbose_low())
  3825       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  3827     // Try to reduce the number of available SATB buffers so that
  3828     // remark has less work to do.
  3829     drain_satb_buffers();
  3832   // Since we've done everything else, we can now totally drain the
  3833   // local queue and global stack.
  3834   drain_local_queue(false);
  3835   drain_global_stack(false);
  3837   // Attempt at work stealing from other task's queues.
  3838   if (!has_aborted()) {
  3839     // We have not aborted. This means that we have finished all that
  3840     // we could. Let's try to do some stealing...
  3842     // We cannot check whether the global stack is empty, since other
  3843     // tasks might be pushing objects to it concurrently. We also cannot
  3844     // check if the region stack is empty because if a thread is aborting
  3845     // it can push a partially done region back.
  3846     assert(_cm->out_of_regions() && _task_queue->size() == 0,
  3847            "only way to reach here");
  3849     if (_cm->verbose_low())
  3850       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  3852     while (!has_aborted()) {
  3853       oop obj;
  3854       statsOnly( ++_steal_attempts );
  3856       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  3857         if (_cm->verbose_medium())
  3858           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  3859                                  _task_id, (void*) obj);
  3861         statsOnly( ++_steals );
  3863         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
  3864                "any stolen object should be marked");
  3865         scan_object(obj);
  3867         // And since we're towards the end, let's totally drain the
  3868         // local queue and global stack.
  3869         drain_local_queue(false);
  3870         drain_global_stack(false);
  3871       } else {
  3872         break;
  3877   // We still haven't aborted. Now, let's try to get into the
  3878   // termination protocol.
  3879   if (!has_aborted()) {
  3880     // We cannot check whether the global stack is empty, since other
  3881     // tasks might be concurrently pushing objects on it. We also cannot
  3882     // check if the region stack is empty because if a thread is aborting
  3883     // it can push a partially done region back.
  3884     // Separated the asserts so that we know which one fires.
  3885     assert(_cm->out_of_regions(), "only way to reach here");
  3886     assert(_task_queue->size() == 0, "only way to reach here");
  3888     if (_cm->verbose_low())
  3889       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  3891     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  3892     // The CMTask class also extends the TerminatorTerminator class,
  3893     // hence its should_exit_termination() method will also decide
  3894     // whether to exit the termination protocol or not.
  3895     bool finished = _cm->terminator()->offer_termination(this);
  3896     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  3897     _termination_time_ms +=
  3898       termination_end_time_ms - _termination_start_time_ms;
  3900     if (finished) {
  3901       // We're all done.
  3903       if (_task_id == 0) {
  3904         // let's allow task 0 to do this
  3905         if (concurrent()) {
  3906           assert(_cm->concurrent_marking_in_progress(), "invariant");
  3907           // we need to set this to false before the next
  3908           // safepoint. This way we ensure that the marking phase
  3909           // doesn't observe any more heap expansions.
  3910           _cm->clear_concurrent_marking_in_progress();
  3914       // We can now guarantee that the global stack is empty, since
  3915       // all other tasks have finished. We separated the guarantees so
  3916       // that, if a condition is false, we can immediately find out
  3917       // which one.
  3918       guarantee(_cm->out_of_regions(), "only way to reach here");
  3919       guarantee(_cm->region_stack_empty(), "only way to reach here");
  3920       guarantee(_cm->mark_stack_empty(), "only way to reach here");
  3921       guarantee(_task_queue->size() == 0, "only way to reach here");
  3922       guarantee(!_cm->has_overflown(), "only way to reach here");
  3923       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
  3924       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
  3926       if (_cm->verbose_low())
  3927         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  3928     } else {
  3929       // Apparently there's more work to do. Let's abort this task. It
  3930       // will restart it and we can hopefully find more things to do.
  3932       if (_cm->verbose_low())
  3933         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  3935       set_has_aborted();
  3936       statsOnly( ++_aborted_termination );
  3940   // Mainly for debugging purposes to make sure that a pointer to the
  3941   // closure which was statically allocated in this frame doesn't
  3942   // escape it by accident.
  3943   set_oop_closure(NULL);
  3944   double end_time_ms = os::elapsedVTime() * 1000.0;
  3945   double elapsed_time_ms = end_time_ms - _start_time_ms;
  3946   // Update the step history.
  3947   _step_times_ms.add(elapsed_time_ms);
  3949   if (has_aborted()) {
  3950     // The task was aborted for some reason.
  3952     statsOnly( ++_aborted );
  3954     if (_has_aborted_timed_out) {
  3955       double diff_ms = elapsed_time_ms - _time_target_ms;
  3956       // Keep statistics of how well we did with respect to hitting
  3957       // our target only if we actually timed out (if we aborted for
  3958       // other reasons, then the results might get skewed).
  3959       _marking_step_diffs_ms.add(diff_ms);
  3962     if (_cm->has_overflown()) {
  3963       // This is the interesting one. We aborted because a global
  3964       // overflow was raised. This means we have to restart the
  3965       // marking phase and start iterating over regions. However, in
  3966       // order to do this we have to make sure that all tasks stop
  3967       // what they are doing and re-initialise in a safe manner. We
  3968       // will achieve this with the use of two barrier sync points.
  3970       if (_cm->verbose_low())
  3971         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  3973       _cm->enter_first_sync_barrier(_task_id);
  3974       // When we exit this sync barrier we know that all tasks have
  3975       // stopped doing marking work. So, it's now safe to
  3976       // re-initialise our data structures. At the end of this method,
  3977       // task 0 will clear the global data structures.
  3979       statsOnly( ++_aborted_overflow );
  3981       // We clear the local state of this task...
  3982       clear_region_fields();
  3984       // ...and enter the second barrier.
  3985       _cm->enter_second_sync_barrier(_task_id);
  3986       // At this point everything has bee re-initialised and we're
  3987       // ready to restart.
  3990     if (_cm->verbose_low()) {
  3991       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  3992                              "elapsed = %1.2lfms <<<<<<<<<<",
  3993                              _task_id, _time_target_ms, elapsed_time_ms);
  3994       if (_cm->has_aborted())
  3995         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  3996                                _task_id);
  3998   } else {
  3999     if (_cm->verbose_low())
  4000       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  4001                              "elapsed = %1.2lfms <<<<<<<<<<",
  4002                              _task_id, _time_target_ms, elapsed_time_ms);
  4005   _claimed = false;
  4008 CMTask::CMTask(int task_id,
  4009                ConcurrentMark* cm,
  4010                CMTaskQueue* task_queue,
  4011                CMTaskQueueSet* task_queues)
  4012   : _g1h(G1CollectedHeap::heap()),
  4013     _task_id(task_id), _cm(cm),
  4014     _claimed(false),
  4015     _nextMarkBitMap(NULL), _hash_seed(17),
  4016     _task_queue(task_queue),
  4017     _task_queues(task_queues),
  4018     _oop_closure(NULL) {
  4019   guarantee(task_queue != NULL, "invariant");
  4020   guarantee(task_queues != NULL, "invariant");
  4022   statsOnly( _clock_due_to_scanning = 0;
  4023              _clock_due_to_marking  = 0 );
  4025   _marking_step_diffs_ms.add(0.5);

mercurial