jdk8-mips64-public/hotspot: src/share/vm/gc_implementation/g1/concurrentMark.cpp@24b9c7f4cae6

     1 /*

     2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "classfile/symbolTable.hpp"

    27 #include "gc_implementation/g1/concurrentMark.inline.hpp"

    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"

    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"

    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"

    31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"

    32 #include "gc_implementation/g1/g1Log.hpp"

    33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"

    34 #include "gc_implementation/g1/g1RemSet.hpp"

    35 #include "gc_implementation/g1/heapRegion.inline.hpp"

    36 #include "gc_implementation/g1/heapRegionRemSet.hpp"

    37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"

    38 #include "gc_implementation/shared/vmGCOperations.hpp"

    39 #include "memory/genOopClosures.inline.hpp"

    40 #include "memory/referencePolicy.hpp"

    41 #include "memory/resourceArea.hpp"

    42 #include "oops/oop.inline.hpp"

    43 #include "runtime/handles.inline.hpp"

    44 #include "runtime/java.hpp"

    45 #include "services/memTracker.hpp"

    47 // Concurrent marking bit map wrapper

    49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :

    50   _bm((uintptr_t*)NULL,0),

    51   _shifter(shifter) {

    52   _bmStartWord = (HeapWord*)(rs.base());

    53   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes

    54   ReservedSpace brs(ReservedSpace::allocation_align_size_up(

    55                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));

    57   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);

    59   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");

    60   // For now we'll just commit all of the bit map up fromt.

    61   // Later on we'll try to be more parsimonious with swap.

    62   guarantee(_virtual_space.initialize(brs, brs.size()),

    63             "couldn't reseve backing store for concurrent marking bit map");

    64   assert(_virtual_space.committed_size() == brs.size(),

    65          "didn't reserve backing store for all of concurrent marking bit map?");

    66   _bm.set_map((uintptr_t*)_virtual_space.low());

    67   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=

    68          _bmWordSize, "inconsistency in bit map sizing");

    69   _bm.set_size(_bmWordSize >> _shifter);

    70 }

    72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,

    73                                                HeapWord* limit) const {

    74   // First we must round addr *up* to a possible object boundary.

    75   addr = (HeapWord*)align_size_up((intptr_t)addr,

    76                                   HeapWordSize << _shifter);

    77   size_t addrOffset = heapWordToOffset(addr);

    78   if (limit == NULL) {

    79     limit = _bmStartWord + _bmWordSize;

    80   }

    81   size_t limitOffset = heapWordToOffset(limit);

    82   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);

    83   HeapWord* nextAddr = offsetToHeapWord(nextOffset);

    84   assert(nextAddr >= addr, "get_next_one postcondition");

    85   assert(nextAddr == limit || isMarked(nextAddr),

    86          "get_next_one postcondition");

    87   return nextAddr;

    88 }

    90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,

    91                                                  HeapWord* limit) const {

    92   size_t addrOffset = heapWordToOffset(addr);

    93   if (limit == NULL) {

    94     limit = _bmStartWord + _bmWordSize;

    95   }

    96   size_t limitOffset = heapWordToOffset(limit);

    97   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);

    98   HeapWord* nextAddr = offsetToHeapWord(nextOffset);

    99   assert(nextAddr >= addr, "get_next_one postcondition");

   100   assert(nextAddr == limit || !isMarked(nextAddr),

   101          "get_next_one postcondition");

   102   return nextAddr;

   103 }

   105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {

   106   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");

   107   return (int) (diff >> _shifter);

   108 }

   110 #ifndef PRODUCT

   111 bool CMBitMapRO::covers(ReservedSpace rs) const {

   112   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");

   113   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,

   114          "size inconsistency");

   115   return _bmStartWord == (HeapWord*)(rs.base()) &&

   116          _bmWordSize  == rs.size()>>LogHeapWordSize;

   117 }

   118 #endif

   120 void CMBitMap::clearAll() {

   121   _bm.clear();

   122   return;

   123 }

   125 void CMBitMap::markRange(MemRegion mr) {

   126   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));

   127   assert(!mr.is_empty(), "unexpected empty region");

   128   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==

   129           ((HeapWord *) mr.end())),

   130          "markRange memory region end is not card aligned");

   131   // convert address range into offset range

   132   _bm.at_put_range(heapWordToOffset(mr.start()),

   133                    heapWordToOffset(mr.end()), true);

   134 }

   136 void CMBitMap::clearRange(MemRegion mr) {

   137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));

   138   assert(!mr.is_empty(), "unexpected empty region");

   139   // convert address range into offset range

   140   _bm.at_put_range(heapWordToOffset(mr.start()),

   141                    heapWordToOffset(mr.end()), false);

   142 }

   144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,

   145                                             HeapWord* end_addr) {

   146   HeapWord* start = getNextMarkedWordAddress(addr);

   147   start = MIN2(start, end_addr);

   148   HeapWord* end   = getNextUnmarkedWordAddress(start);

   149   end = MIN2(end, end_addr);

   150   assert(start <= end, "Consistency check");

   151   MemRegion mr(start, end);

   152   if (!mr.is_empty()) {

   153     clearRange(mr);

   154   }

   155   return mr;

   156 }

   158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :

   159   _base(NULL), _cm(cm)

   160 #ifdef ASSERT

   161   , _drain_in_progress(false)

   162   , _drain_in_progress_yields(false)

   163 #endif

   164 {}

   166 void CMMarkStack::allocate(size_t size) {

   167   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);

   168   if (_base == NULL) {

   169     vm_exit_during_initialization("Failed to allocate CM region mark stack");

   170   }

   171   _index = 0;

   172   _capacity = (jint) size;

   173   _saved_index = -1;

   174   NOT_PRODUCT(_max_depth = 0);

   175 }

   177 CMMarkStack::~CMMarkStack() {

   178   if (_base != NULL) {

   179     FREE_C_HEAP_ARRAY(oop, _base, mtGC);

   180   }

   181 }

   183 void CMMarkStack::par_push(oop ptr) {

   184   while (true) {

   185     if (isFull()) {

   186       _overflow = true;

   187       return;

   188     }

   189     // Otherwise...

   190     jint index = _index;

   191     jint next_index = index+1;

   192     jint res = Atomic::cmpxchg(next_index, &_index, index);

   193     if (res == index) {

   194       _base[index] = ptr;

   195       // Note that we don't maintain this atomically.  We could, but it

   196       // doesn't seem necessary.

   197       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));

   198       return;

   199     }

   200     // Otherwise, we need to try again.

   201   }

   202 }

   204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {

   205   while (true) {

   206     if (isFull()) {

   207       _overflow = true;

   208       return;

   209     }

   210     // Otherwise...

   211     jint index = _index;

   212     jint next_index = index + n;

   213     if (next_index > _capacity) {

   214       _overflow = true;

   215       return;

   216     }

   217     jint res = Atomic::cmpxchg(next_index, &_index, index);

   218     if (res == index) {

   219       for (int i = 0; i < n; i++) {

   220         int ind = index + i;

   221         assert(ind < _capacity, "By overflow test above.");

   222         _base[ind] = ptr_arr[i];

   223       }

   224       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));

   225       return;

   226     }

   227     // Otherwise, we need to try again.

   228   }

   229 }

   232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {

   233   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);

   234   jint start = _index;

   235   jint next_index = start + n;

   236   if (next_index > _capacity) {

   237     _overflow = true;

   238     return;

   239   }

   240   // Otherwise.

   241   _index = next_index;

   242   for (int i = 0; i < n; i++) {

   243     int ind = start + i;

   244     assert(ind < _capacity, "By overflow test above.");

   245     _base[ind] = ptr_arr[i];

   246   }

   247 }

   250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {

   251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);

   252   jint index = _index;

   253   if (index == 0) {

   254     *n = 0;

   255     return false;

   256   } else {

   257     int k = MIN2(max, index);

   258     jint new_ind = index - k;

   259     for (int j = 0; j < k; j++) {

   260       ptr_arr[j] = _base[new_ind + j];

   261     }

   262     _index = new_ind;

   263     *n = k;

   264     return true;

   265   }

   266 }

   268 template<class OopClosureClass>

   269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {

   270   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after

   271          || SafepointSynchronize::is_at_safepoint(),

   272          "Drain recursion must be yield-safe.");

   273   bool res = true;

   274   debug_only(_drain_in_progress = true);

   275   debug_only(_drain_in_progress_yields = yield_after);

   276   while (!isEmpty()) {

   277     oop newOop = pop();

   278     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");

   279     assert(newOop->is_oop(), "Expected an oop");

   280     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),

   281            "only grey objects on this stack");

   282     newOop->oop_iterate(cl);

   283     if (yield_after && _cm->do_yield_check()) {

   284       res = false;

   285       break;

   286     }

   287   }

   288   debug_only(_drain_in_progress = false);

   289   return res;

   290 }

   292 void CMMarkStack::note_start_of_gc() {

   293   assert(_saved_index == -1,

   294          "note_start_of_gc()/end_of_gc() bracketed incorrectly");

   295   _saved_index = _index;

   296 }

   298 void CMMarkStack::note_end_of_gc() {

   299   // This is intentionally a guarantee, instead of an assert. If we

   300   // accidentally add something to the mark stack during GC, it

   301   // will be a correctness issue so it's better if we crash. we'll

   302   // only check this once per GC anyway, so it won't be a performance

   303   // issue in any way.

   304   guarantee(_saved_index == _index,

   305             err_msg("saved index: %d index: %d", _saved_index, _index));

   306   _saved_index = -1;

   307 }

   309 void CMMarkStack::oops_do(OopClosure* f) {

   310   assert(_saved_index == _index,

   311          err_msg("saved index: %d index: %d", _saved_index, _index));

   312   for (int i = 0; i < _index; i += 1) {

   313     f->do_oop(&_base[i]);

   314   }

   315 }

   317 bool ConcurrentMark::not_yet_marked(oop obj) const {

   318   return (_g1h->is_obj_ill(obj)

   319           || (_g1h->is_in_permanent(obj)

   320               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));

   321 }

   323 CMRootRegions::CMRootRegions() :

   324   _young_list(NULL), _cm(NULL), _scan_in_progress(false),

   325   _should_abort(false),  _next_survivor(NULL) { }

   327 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {

   328   _young_list = g1h->young_list();

   329   _cm = cm;

   330 }

   332 void CMRootRegions::prepare_for_scan() {

   333   assert(!scan_in_progress(), "pre-condition");

   335   // Currently, only survivors can be root regions.

   336   assert(_next_survivor == NULL, "pre-condition");

   337   _next_survivor = _young_list->first_survivor_region();

   338   _scan_in_progress = (_next_survivor != NULL);

   339   _should_abort = false;

   340 }

   342 HeapRegion* CMRootRegions::claim_next() {

   343   if (_should_abort) {

   344     // If someone has set the should_abort flag, we return NULL to

   345     // force the caller to bail out of their loop.

   346     return NULL;

   347   }

   349   // Currently, only survivors can be root regions.

   350   HeapRegion* res = _next_survivor;

   351   if (res != NULL) {

   352     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);

   353     // Read it again in case it changed while we were waiting for the lock.

   354     res = _next_survivor;

   355     if (res != NULL) {

   356       if (res == _young_list->last_survivor_region()) {

   357         // We just claimed the last survivor so store NULL to indicate

   358         // that we're done.

   359         _next_survivor = NULL;

   360       } else {

   361         _next_survivor = res->get_next_young_region();

   362       }

   363     } else {

   364       // Someone else claimed the last survivor while we were trying

   365       // to take the lock so nothing else to do.

   366     }

   367   }

   368   assert(res == NULL || res->is_survivor(), "post-condition");

   370   return res;

   371 }

   373 void CMRootRegions::scan_finished() {

   374   assert(scan_in_progress(), "pre-condition");

   376   // Currently, only survivors can be root regions.

   377   if (!_should_abort) {

   378     assert(_next_survivor == NULL, "we should have claimed all survivors");

   379   }

   380   _next_survivor = NULL;

   382   {

   383     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);

   384     _scan_in_progress = false;

   385     RootRegionScan_lock->notify_all();

   386   }

   387 }

   389 bool CMRootRegions::wait_until_scan_finished() {

   390   if (!scan_in_progress()) return false;

   392   {

   393     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);

   394     while (scan_in_progress()) {

   395       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);

   396     }

   397   }

   398   return true;

   399 }

   401 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away

   402 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list

   403 #endif // _MSC_VER

   405 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {

   406   return MAX2((n_par_threads + 2) / 4, 1U);

   407 }

   409 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :

   410   _markBitMap1(rs, MinObjAlignment - 1),

   411   _markBitMap2(rs, MinObjAlignment - 1),

   413   _parallel_marking_threads(0),

   414   _max_parallel_marking_threads(0),

   415   _sleep_factor(0.0),

   416   _marking_task_overhead(1.0),

   417   _cleanup_sleep_factor(0.0),

   418   _cleanup_task_overhead(1.0),

   419   _cleanup_list("Cleanup List"),

   420   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),

   421   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>

   422            CardTableModRefBS::card_shift,

   423            false /* in_resource_area*/),

   425   _prevMarkBitMap(&_markBitMap1),

   426   _nextMarkBitMap(&_markBitMap2),

   428   _markStack(this),

   429   // _finger set in set_non_marking_state

   431   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),

   432   // _active_tasks set in set_non_marking_state

   433   // _tasks set inside the constructor

   434   _task_queues(new CMTaskQueueSet((int) _max_task_num)),

   435   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),

   437   _has_overflown(false),

   438   _concurrent(false),

   439   _has_aborted(false),

   440   _restart_for_overflow(false),

   441   _concurrent_marking_in_progress(false),

   443   // _verbose_level set below

   445   _init_times(),

   446   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),

   447   _cleanup_times(),

   448   _total_counting_time(0.0),

   449   _total_rs_scrub_time(0.0),

   451   _parallel_workers(NULL),

   453   _count_card_bitmaps(NULL),

   454   _count_marked_bytes(NULL) {

   455   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;

   456   if (verbose_level < no_verbose) {

   457     verbose_level = no_verbose;

   458   }

   459   if (verbose_level > high_verbose) {

   460     verbose_level = high_verbose;

   461   }

   462   _verbose_level = verbose_level;

   464   if (verbose_low()) {

   465     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "

   466                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);

   467   }

   469   _markStack.allocate(MarkStackSize);

   471   // Create & start a ConcurrentMark thread.

   472   _cmThread = new ConcurrentMarkThread(this);

   473   assert(cmThread() != NULL, "CM Thread should have been created");

   474   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");

   476   _g1h = G1CollectedHeap::heap();

   477   assert(CGC_lock != NULL, "Where's the CGC_lock?");

   478   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");

   479   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");

   481   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();

   482   satb_qs.set_buffer_size(G1SATBBufferSize);

   484   _root_regions.init(_g1h, this);

   486   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);

   487   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);

   489   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);

   490   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);

   492   BitMap::idx_t card_bm_size = _card_bm.size();

   494   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail

   495   _active_tasks = _max_task_num;

   496   for (int i = 0; i < (int) _max_task_num; ++i) {

   497     CMTaskQueue* task_queue = new CMTaskQueue();

   498     task_queue->initialize();

   499     _task_queues->register_queue(i, task_queue);

   501     _count_card_bitmaps[i] = BitMap(card_bm_size, false);

   502     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);

   504     _tasks[i] = new CMTask(i, this,

   505                            _count_marked_bytes[i],

   506                            &_count_card_bitmaps[i],

   507                            task_queue, _task_queues);

   509     _accum_task_vtime[i] = 0.0;

   510   }

   512   // Calculate the card number for the bottom of the heap. Used

   513   // in biasing indexes into the accounting card bitmaps.

   514   _heap_bottom_card_num =

   515     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>

   516                                 CardTableModRefBS::card_shift);

   518   // Clear all the liveness counting data

   519   clear_all_count_data();

   521   if (ConcGCThreads > ParallelGCThreads) {

   522     vm_exit_during_initialization("Can't have more ConcGCThreads "

   523                                   "than ParallelGCThreads.");

   524   }

   525   if (ParallelGCThreads == 0) {

   526     // if we are not running with any parallel GC threads we will not

   527     // spawn any marking threads either

   528     _parallel_marking_threads =       0;

   529     _max_parallel_marking_threads =   0;

   530     _sleep_factor             =     0.0;

   531     _marking_task_overhead    =     1.0;

   532   } else {

   533     if (ConcGCThreads > 0) {

   534       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent

   535       // if both are set

   537       _parallel_marking_threads = (uint) ConcGCThreads;

   538       _max_parallel_marking_threads = _parallel_marking_threads;

   539       _sleep_factor             = 0.0;

   540       _marking_task_overhead    = 1.0;

   541     } else if (G1MarkingOverheadPercent > 0) {

   542       // we will calculate the number of parallel marking threads

   543       // based on a target overhead with respect to the soft real-time

   544       // goal

   546       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;

   547       double overall_cm_overhead =

   548         (double) MaxGCPauseMillis * marking_overhead /

   549         (double) GCPauseIntervalMillis;

   550       double cpu_ratio = 1.0 / (double) os::processor_count();

   551       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);

   552       double marking_task_overhead =

   553         overall_cm_overhead / marking_thread_num *

   554                                                 (double) os::processor_count();

   555       double sleep_factor =

   556                          (1.0 - marking_task_overhead) / marking_task_overhead;

   558       _parallel_marking_threads = (uint) marking_thread_num;

   559       _max_parallel_marking_threads = _parallel_marking_threads;

   560       _sleep_factor             = sleep_factor;

   561       _marking_task_overhead    = marking_task_overhead;

   562     } else {

   563       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);

   564       _max_parallel_marking_threads = _parallel_marking_threads;

   565       _sleep_factor             = 0.0;

   566       _marking_task_overhead    = 1.0;

   567     }

   569     if (parallel_marking_threads() > 1) {

   570       _cleanup_task_overhead = 1.0;

   571     } else {

   572       _cleanup_task_overhead = marking_task_overhead();

   573     }

   574     _cleanup_sleep_factor =

   575                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();

   577 #if 0

   578     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());

   579     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());

   580     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());

   581     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());

   582     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());

   583 #endif

   585     guarantee(parallel_marking_threads() > 0, "peace of mind");

   586     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",

   587          _max_parallel_marking_threads, false, true);

   588     if (_parallel_workers == NULL) {

   589       vm_exit_during_initialization("Failed necessary allocation.");

   590     } else {

   591       _parallel_workers->initialize_workers();

   592     }

   593   }

   595   // so that the call below can read a sensible value

   596   _heap_start = (HeapWord*) rs.base();

   597   set_non_marking_state();

   598 }

   600 void ConcurrentMark::update_g1_committed(bool force) {

   601   // If concurrent marking is not in progress, then we do not need to

   602   // update _heap_end.

   603   if (!concurrent_marking_in_progress() && !force) return;

   605   MemRegion committed = _g1h->g1_committed();

   606   assert(committed.start() == _heap_start, "start shouldn't change");

   607   HeapWord* new_end = committed.end();

   608   if (new_end > _heap_end) {

   609     // The heap has been expanded.

   611     _heap_end = new_end;

   612   }

   613   // Notice that the heap can also shrink. However, this only happens

   614   // during a Full GC (at least currently) and the entire marking

   615   // phase will bail out and the task will not be restarted. So, let's

   616   // do nothing.

   617 }

   619 void ConcurrentMark::reset() {

   620   // Starting values for these two. This should be called in a STW

   621   // phase. CM will be notified of any future g1_committed expansions

   622   // will be at the end of evacuation pauses, when tasks are

   623   // inactive.

   624   MemRegion committed = _g1h->g1_committed();

   625   _heap_start = committed.start();

   626   _heap_end   = committed.end();

   628   // Separated the asserts so that we know which one fires.

   629   assert(_heap_start != NULL, "heap bounds should look ok");

   630   assert(_heap_end != NULL, "heap bounds should look ok");

   631   assert(_heap_start < _heap_end, "heap bounds should look ok");

   633   // reset all the marking data structures and any necessary flags

   634   clear_marking_state();

   636   if (verbose_low()) {

   637     gclog_or_tty->print_cr("[global] resetting");

   638   }

   640   // We do reset all of them, since different phases will use

   641   // different number of active threads. So, it's easiest to have all

   642   // of them ready.

   643   for (int i = 0; i < (int) _max_task_num; ++i) {

   644     _tasks[i]->reset(_nextMarkBitMap);

   645   }

   647   // we need this to make sure that the flag is on during the evac

   648   // pause with initial mark piggy-backed

   649   set_concurrent_marking_in_progress();

   650 }

   652 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {

   653   assert(active_tasks <= _max_task_num, "we should not have more");

   655   _active_tasks = active_tasks;

   656   // Need to update the three data structures below according to the

   657   // number of active threads for this phase.

   658   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);

   659   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);

   660   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);

   662   _concurrent = concurrent;

   663   // We propagate this to all tasks, not just the active ones.

   664   for (int i = 0; i < (int) _max_task_num; ++i)

   665     _tasks[i]->set_concurrent(concurrent);

   667   if (concurrent) {

   668     set_concurrent_marking_in_progress();

   669   } else {

   670     // We currently assume that the concurrent flag has been set to

   671     // false before we start remark. At this point we should also be

   672     // in a STW phase.

   673     assert(!concurrent_marking_in_progress(), "invariant");

   674     assert(_finger == _heap_end, "only way to get here");

   675     update_g1_committed(true);

   676   }

   677 }

   679 void ConcurrentMark::set_non_marking_state() {

   680   // We set the global marking state to some default values when we're

   681   // not doing marking.

   682   clear_marking_state();

   683   _active_tasks = 0;

   684   clear_concurrent_marking_in_progress();

   685 }

   687 ConcurrentMark::~ConcurrentMark() {

   688   // The ConcurrentMark instance is never freed.

   689   ShouldNotReachHere();

   690 }

   692 void ConcurrentMark::clearNextBitmap() {

   693   G1CollectedHeap* g1h = G1CollectedHeap::heap();

   694   G1CollectorPolicy* g1p = g1h->g1_policy();

   696   // Make sure that the concurrent mark thread looks to still be in

   697   // the current cycle.

   698   guarantee(cmThread()->during_cycle(), "invariant");

   700   // We are finishing up the current cycle by clearing the next

   701   // marking bitmap and getting it ready for the next cycle. During

   702   // this time no other cycle can start. So, let's make sure that this

   703   // is the case.

   704   guarantee(!g1h->mark_in_progress(), "invariant");

   706   // clear the mark bitmap (no grey objects to start with).

   707   // We need to do this in chunks and offer to yield in between

   708   // each chunk.

   709   HeapWord* start  = _nextMarkBitMap->startWord();

   710   HeapWord* end    = _nextMarkBitMap->endWord();

   711   HeapWord* cur    = start;

   712   size_t chunkSize = M;

   713   while (cur < end) {

   714     HeapWord* next = cur + chunkSize;

   715     if (next > end) {

   716       next = end;

   717     }

   718     MemRegion mr(cur,next);

   719     _nextMarkBitMap->clearRange(mr);

   720     cur = next;

   721     do_yield_check();

   723     // Repeat the asserts from above. We'll do them as asserts here to

   724     // minimize their overhead on the product. However, we'll have

   725     // them as guarantees at the beginning / end of the bitmap

   726     // clearing to get some checking in the product.

   727     assert(cmThread()->during_cycle(), "invariant");

   728     assert(!g1h->mark_in_progress(), "invariant");

   729   }

   731   // Clear the liveness counting data

   732   clear_all_count_data();

   734   // Repeat the asserts from above.

   735   guarantee(cmThread()->during_cycle(), "invariant");

   736   guarantee(!g1h->mark_in_progress(), "invariant");

   737 }

   739 class NoteStartOfMarkHRClosure: public HeapRegionClosure {

   740 public:

   741   bool doHeapRegion(HeapRegion* r) {

   742     if (!r->continuesHumongous()) {

   743       r->note_start_of_marking();

   744     }

   745     return false;

   746   }

   747 };

   749 void ConcurrentMark::checkpointRootsInitialPre() {

   750   G1CollectedHeap*   g1h = G1CollectedHeap::heap();

   751   G1CollectorPolicy* g1p = g1h->g1_policy();

   753   _has_aborted = false;

   755 #ifndef PRODUCT

   756   if (G1PrintReachableAtInitialMark) {

   757     print_reachable("at-cycle-start",

   758                     VerifyOption_G1UsePrevMarking, true /* all */);

   759   }

   760 #endif

   762   // Initialise marking structures. This has to be done in a STW phase.

   763   reset();

   765   // For each region note start of marking.

   766   NoteStartOfMarkHRClosure startcl;

   767   g1h->heap_region_iterate(&startcl);

   768 }

   771 void ConcurrentMark::checkpointRootsInitialPost() {

   772   G1CollectedHeap*   g1h = G1CollectedHeap::heap();

   774   // If we force an overflow during remark, the remark operation will

   775   // actually abort and we'll restart concurrent marking. If we always

   776   // force an oveflow during remark we'll never actually complete the

   777   // marking phase. So, we initilize this here, at the start of the

   778   // cycle, so that at the remaining overflow number will decrease at

   779   // every remark and we'll eventually not need to cause one.

   780   force_overflow_stw()->init();

   782   // Start Concurrent Marking weak-reference discovery.

   783   ReferenceProcessor* rp = g1h->ref_processor_cm();

   784   // enable ("weak") refs discovery

   785   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);

   786   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle

   788   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

   789   // This is the start of  the marking cycle, we're expected all

   790   // threads to have SATB queues with active set to false.

   791   satb_mq_set.set_active_all_threads(true, /* new active value */

   792                                      false /* expected_active */);

   794   _root_regions.prepare_for_scan();

   796   // update_g1_committed() will be called at the end of an evac pause

   797   // when marking is on. So, it's also called at the end of the

   798   // initial-mark pause to update the heap end, if the heap expands

   799   // during it. No need to call it here.

   800 }

   802 /*

   803  * Notice that in the next two methods, we actually leave the STS

   804  * during the barrier sync and join it immediately afterwards. If we

   805  * do not do this, the following deadlock can occur: one thread could

   806  * be in the barrier sync code, waiting for the other thread to also

   807  * sync up, whereas another one could be trying to yield, while also

   808  * waiting for the other threads to sync up too.

   809  *

   810  * Note, however, that this code is also used during remark and in

   811  * this case we should not attempt to leave / enter the STS, otherwise

   812  * we'll either hit an asseert (debug / fastdebug) or deadlock

   813  * (product). So we should only leave / enter the STS if we are

   814  * operating concurrently.

   815  *

   816  * Because the thread that does the sync barrier has left the STS, it

   817  * is possible to be suspended for a Full GC or an evacuation pause

   818  * could occur. This is actually safe, since the entering the sync

   819  * barrier is one of the last things do_marking_step() does, and it

   820  * doesn't manipulate any data structures afterwards.

   821  */

   823 void ConcurrentMark::enter_first_sync_barrier(int task_num) {

   824   if (verbose_low()) {

   825     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);

   826   }

   828   if (concurrent()) {

   829     ConcurrentGCThread::stsLeave();

   830   }

   831   _first_overflow_barrier_sync.enter();

   832   if (concurrent()) {

   833     ConcurrentGCThread::stsJoin();

   834   }

   835   // at this point everyone should have synced up and not be doing any

   836   // more work

   838   if (verbose_low()) {

   839     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);

   840   }

   842   // let task 0 do this

   843   if (task_num == 0) {

   844     // task 0 is responsible for clearing the global data structures

   845     // We should be here because of an overflow. During STW we should

   846     // not clear the overflow flag since we rely on it being true when

   847     // we exit this method to abort the pause and restart concurent

   848     // marking.

   849     clear_marking_state(concurrent() /* clear_overflow */);

   850     force_overflow()->update();

   852     if (G1Log::fine()) {

   853       gclog_or_tty->date_stamp(PrintGCDateStamps);

   854       gclog_or_tty->stamp(PrintGCTimeStamps);

   855       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");

   856     }

   857   }

   859   // after this, each task should reset its own data structures then

   860   // then go into the second barrier

   861 }

   863 void ConcurrentMark::enter_second_sync_barrier(int task_num) {

   864   if (verbose_low()) {

   865     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);

   866   }

   868   if (concurrent()) {

   869     ConcurrentGCThread::stsLeave();

   870   }

   871   _second_overflow_barrier_sync.enter();

   872   if (concurrent()) {

   873     ConcurrentGCThread::stsJoin();

   874   }

   875   // at this point everything should be re-initialised and ready to go

   877   if (verbose_low()) {

   878     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);

   879   }

   880 }

   882 #ifndef PRODUCT

   883 void ForceOverflowSettings::init() {

   884   _num_remaining = G1ConcMarkForceOverflow;

   885   _force = false;

   886   update();

   887 }

   889 void ForceOverflowSettings::update() {

   890   if (_num_remaining > 0) {

   891     _num_remaining -= 1;

   892     _force = true;

   893   } else {

   894     _force = false;

   895   }

   896 }

   898 bool ForceOverflowSettings::should_force() {

   899   if (_force) {

   900     _force = false;

   901     return true;

   902   } else {

   903     return false;

   904   }

   905 }

   906 #endif // !PRODUCT

   908 class CMConcurrentMarkingTask: public AbstractGangTask {

   909 private:

   910   ConcurrentMark*       _cm;

   911   ConcurrentMarkThread* _cmt;

   913 public:

   914   void work(uint worker_id) {

   915     assert(Thread::current()->is_ConcurrentGC_thread(),

   916            "this should only be done by a conc GC thread");

   917     ResourceMark rm;

   919     double start_vtime = os::elapsedVTime();

   921     ConcurrentGCThread::stsJoin();

   923     assert(worker_id < _cm->active_tasks(), "invariant");

   924     CMTask* the_task = _cm->task(worker_id);

   925     the_task->record_start_time();

   926     if (!_cm->has_aborted()) {

   927       do {

   928         double start_vtime_sec = os::elapsedVTime();

   929         double start_time_sec = os::elapsedTime();

   930         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;

   932         the_task->do_marking_step(mark_step_duration_ms,

   933                                   true /* do_stealing    */,

   934                                   true /* do_termination */);

   936         double end_time_sec = os::elapsedTime();

   937         double end_vtime_sec = os::elapsedVTime();

   938         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;

   939         double elapsed_time_sec = end_time_sec - start_time_sec;

   940         _cm->clear_has_overflown();

   942         bool ret = _cm->do_yield_check(worker_id);

   944         jlong sleep_time_ms;

   945         if (!_cm->has_aborted() && the_task->has_aborted()) {

   946           sleep_time_ms =

   947             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);

   948           ConcurrentGCThread::stsLeave();

   949           os::sleep(Thread::current(), sleep_time_ms, false);

   950           ConcurrentGCThread::stsJoin();

   951         }

   952         double end_time2_sec = os::elapsedTime();

   953         double elapsed_time2_sec = end_time2_sec - start_time_sec;

   955 #if 0

   956           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "

   957                                  "overhead %1.4lf",

   958                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,

   959                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);

   960           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",

   961                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);

   962 #endif

   963       } while (!_cm->has_aborted() && the_task->has_aborted());

   964     }

   965     the_task->record_end_time();

   966     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");

   968     ConcurrentGCThread::stsLeave();

   970     double end_vtime = os::elapsedVTime();

   971     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);

   972   }

   974   CMConcurrentMarkingTask(ConcurrentMark* cm,

   975                           ConcurrentMarkThread* cmt) :

   976       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }

   978   ~CMConcurrentMarkingTask() { }

   979 };

   981 // Calculates the number of active workers for a concurrent

   982 // phase.

   983 uint ConcurrentMark::calc_parallel_marking_threads() {

   984   if (G1CollectedHeap::use_parallel_gc_threads()) {

   985     uint n_conc_workers = 0;

   986     if (!UseDynamicNumberOfGCThreads ||

   987         (!FLAG_IS_DEFAULT(ConcGCThreads) &&

   988          !ForceDynamicNumberOfGCThreads)) {

   989       n_conc_workers = max_parallel_marking_threads();

   990     } else {

   991       n_conc_workers =

   992         AdaptiveSizePolicy::calc_default_active_workers(

   993                                      max_parallel_marking_threads(),

   994                                      1, /* Minimum workers */

   995                                      parallel_marking_threads(),

   996                                      Threads::number_of_non_daemon_threads());

   997       // Don't scale down "n_conc_workers" by scale_parallel_threads() because

   998       // that scaling has already gone into "_max_parallel_marking_threads".

   999     }

  1000     assert(n_conc_workers > 0, "Always need at least 1");

  1001     return n_conc_workers;

  1002   }

  1003   // If we are not running with any parallel GC threads we will not

  1004   // have spawned any marking threads either. Hence the number of

  1005   // concurrent workers should be 0.

  1006   return 0;

  1007 }

  1009 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {

  1010   // Currently, only survivors can be root regions.

  1011   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");

  1012   G1RootRegionScanClosure cl(_g1h, this, worker_id);

  1014   const uintx interval = PrefetchScanIntervalInBytes;

  1015   HeapWord* curr = hr->bottom();

  1016   const HeapWord* end = hr->top();

  1017   while (curr < end) {

  1018     Prefetch::read(curr, interval);

  1019     oop obj = oop(curr);

  1020     int size = obj->oop_iterate(&cl);

  1021     assert(size == obj->size(), "sanity");

  1022     curr += size;

  1023   }

  1024 }

  1026 class CMRootRegionScanTask : public AbstractGangTask {

  1027 private:

  1028   ConcurrentMark* _cm;

  1030 public:

  1031   CMRootRegionScanTask(ConcurrentMark* cm) :

  1032     AbstractGangTask("Root Region Scan"), _cm(cm) { }

  1034   void work(uint worker_id) {

  1035     assert(Thread::current()->is_ConcurrentGC_thread(),

  1036            "this should only be done by a conc GC thread");

  1038     CMRootRegions* root_regions = _cm->root_regions();

  1039     HeapRegion* hr = root_regions->claim_next();

  1040     while (hr != NULL) {

  1041       _cm->scanRootRegion(hr, worker_id);

  1042       hr = root_regions->claim_next();

  1043     }

  1044   }

  1045 };

  1047 void ConcurrentMark::scanRootRegions() {

  1048   // scan_in_progress() will have been set to true only if there was

  1049   // at least one root region to scan. So, if it's false, we

  1050   // should not attempt to do any further work.

  1051   if (root_regions()->scan_in_progress()) {

  1052     _parallel_marking_threads = calc_parallel_marking_threads();

  1053     assert(parallel_marking_threads() <= max_parallel_marking_threads(),

  1054            "Maximum number of marking threads exceeded");

  1055     uint active_workers = MAX2(1U, parallel_marking_threads());

  1057     CMRootRegionScanTask task(this);

  1058     if (parallel_marking_threads() > 0) {

  1059       _parallel_workers->set_active_workers((int) active_workers);

  1060       _parallel_workers->run_task(&task);

  1061     } else {

  1062       task.work(0);

  1063     }

  1065     // It's possible that has_aborted() is true here without actually

  1066     // aborting the survivor scan earlier. This is OK as it's

  1067     // mainly used for sanity checking.

  1068     root_regions()->scan_finished();

  1069   }

  1070 }

  1072 void ConcurrentMark::markFromRoots() {

  1073   // we might be tempted to assert that:

  1074   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),

  1075   //        "inconsistent argument?");

  1076   // However that wouldn't be right, because it's possible that

  1077   // a safepoint is indeed in progress as a younger generation

  1078   // stop-the-world GC happens even as we mark in this generation.

  1080   _restart_for_overflow = false;

  1081   force_overflow_conc()->init();

  1083   // _g1h has _n_par_threads

  1084   _parallel_marking_threads = calc_parallel_marking_threads();

  1085   assert(parallel_marking_threads() <= max_parallel_marking_threads(),

  1086     "Maximum number of marking threads exceeded");

  1088   uint active_workers = MAX2(1U, parallel_marking_threads());

  1090   // Parallel task terminator is set in "set_phase()"

  1091   set_phase(active_workers, true /* concurrent */);

  1093   CMConcurrentMarkingTask markingTask(this, cmThread());

  1094   if (parallel_marking_threads() > 0) {

  1095     _parallel_workers->set_active_workers((int)active_workers);

  1096     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()

  1097     // and the decisions on that MT processing is made elsewhere.

  1098     assert(_parallel_workers->active_workers() > 0, "Should have been set");

  1099     _parallel_workers->run_task(&markingTask);

  1100   } else {

  1101     markingTask.work(0);

  1102   }

  1103   print_stats();

  1104 }

  1106 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {

  1107   // world is stopped at this checkpoint

  1108   assert(SafepointSynchronize::is_at_safepoint(),

  1109          "world should be stopped");

  1111   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1113   // If a full collection has happened, we shouldn't do this.

  1114   if (has_aborted()) {

  1115     g1h->set_marking_complete(); // So bitmap clearing isn't confused

  1116     return;

  1117   }

  1119   SvcGCMarker sgcm(SvcGCMarker::OTHER);

  1121   if (VerifyDuringGC) {

  1122     HandleMark hm;  // handle scope

  1123     gclog_or_tty->print(" VerifyDuringGC:(before)");

  1124     Universe::heap()->prepare_for_verify();

  1125     Universe::verify(/* silent      */ false,

  1126                      /* option      */ VerifyOption_G1UsePrevMarking);

  1127   }

  1129   G1CollectorPolicy* g1p = g1h->g1_policy();

  1130   g1p->record_concurrent_mark_remark_start();

  1132   double start = os::elapsedTime();

  1134   checkpointRootsFinalWork();

  1136   double mark_work_end = os::elapsedTime();

  1138   weakRefsWork(clear_all_soft_refs);

  1140   if (has_overflown()) {

  1141     // Oops.  We overflowed.  Restart concurrent marking.

  1142     _restart_for_overflow = true;

  1143     // Clear the flag. We do not need it any more.

  1144     clear_has_overflown();

  1145     if (G1TraceMarkStackOverflow) {

  1146       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");

  1147     }

  1148   } else {

  1149     // Aggregate the per-task counting data that we have accumulated

  1150     // while marking.

  1151     aggregate_count_data();

  1153     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  1154     // We're done with marking.

  1155     // This is the end of  the marking cycle, we're expected all

  1156     // threads to have SATB queues with active set to true.

  1157     satb_mq_set.set_active_all_threads(false, /* new active value */

  1158                                        true /* expected_active */);

  1160     if (VerifyDuringGC) {

  1161       HandleMark hm;  // handle scope

  1162       gclog_or_tty->print(" VerifyDuringGC:(after)");

  1163       Universe::heap()->prepare_for_verify();

  1164       Universe::verify(/* silent      */ false,

  1165                        /* option      */ VerifyOption_G1UseNextMarking);

  1166     }

  1167     assert(!restart_for_overflow(), "sanity");

  1168   }

  1170   // Reset the marking state if marking completed

  1171   if (!restart_for_overflow()) {

  1172     set_non_marking_state();

  1173   }

  1175 #if VERIFY_OBJS_PROCESSED

  1176   _scan_obj_cl.objs_processed = 0;

  1177   ThreadLocalObjQueue::objs_enqueued = 0;

  1178 #endif

  1180   // Statistics

  1181   double now = os::elapsedTime();

  1182   _remark_mark_times.add((mark_work_end - start) * 1000.0);

  1183   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);

  1184   _remark_times.add((now - start) * 1000.0);

  1186   g1p->record_concurrent_mark_remark_end();

  1187 }

  1189 // Base class of the closures that finalize and verify the

  1190 // liveness counting data.

  1191 class CMCountDataClosureBase: public HeapRegionClosure {

  1192 protected:

  1193   ConcurrentMark* _cm;

  1194   BitMap* _region_bm;

  1195   BitMap* _card_bm;

  1197   void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {

  1198     assert(start_idx <= last_idx, "sanity");

  1200     // Set the inclusive bit range [start_idx, last_idx].

  1201     // For small ranges (up to 8 cards) use a simple loop; otherwise

  1202     // use par_at_put_range.

  1203     if ((last_idx - start_idx) < 8) {

  1204       for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {

  1205         _card_bm->par_set_bit(i);

  1206       }

  1207     } else {

  1208       assert(last_idx < _card_bm->size(), "sanity");

  1209       // Note BitMap::par_at_put_range() is exclusive.

  1210       _card_bm->par_at_put_range(start_idx, last_idx+1, true);

  1211     }

  1212   }

  1214   // It takes a region that's not empty (i.e., it has at least one

  1215   // live object in it and sets its corresponding bit on the region

  1216   // bitmap to 1. If the region is "starts humongous" it will also set

  1217   // to 1 the bits on the region bitmap that correspond to its

  1218   // associated "continues humongous" regions.

  1219   void set_bit_for_region(HeapRegion* hr) {

  1220     assert(!hr->continuesHumongous(), "should have filtered those out");

  1222     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();

  1223     if (!hr->startsHumongous()) {

  1224       // Normal (non-humongous) case: just set the bit.

  1225       _region_bm->par_at_put(index, true);

  1226     } else {

  1227       // Starts humongous case: calculate how many regions are part of

  1228       // this humongous region and then set the bit range.

  1229       G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1230       HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);

  1231       BitMap::idx_t end_index = (BitMap::idx_t) last_hr->hrs_index() + 1;

  1232       _region_bm->par_at_put_range(index, end_index, true);

  1233     }

  1234   }

  1236 public:

  1237   CMCountDataClosureBase(ConcurrentMark *cm,

  1238                          BitMap* region_bm, BitMap* card_bm):

  1239     _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }

  1240 };

  1242 // Closure that calculates the # live objects per region. Used

  1243 // for verification purposes during the cleanup pause.

  1244 class CalcLiveObjectsClosure: public CMCountDataClosureBase {

  1245   CMBitMapRO* _bm;

  1246   size_t _region_marked_bytes;

  1248 public:

  1249   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,

  1250                          BitMap* region_bm, BitMap* card_bm) :

  1251     CMCountDataClosureBase(cm, region_bm, card_bm),

  1252     _bm(bm), _region_marked_bytes(0) { }

  1254   bool doHeapRegion(HeapRegion* hr) {

  1256     if (hr->continuesHumongous()) {

  1257       // We will ignore these here and process them when their

  1258       // associated "starts humongous" region is processed (see

  1259       // set_bit_for_heap_region()). Note that we cannot rely on their

  1260       // associated "starts humongous" region to have their bit set to

  1261       // 1 since, due to the region chunking in the parallel region

  1262       // iteration, a "continues humongous" region might be visited

  1263       // before its associated "starts humongous".

  1264       return false;

  1265     }

  1267     HeapWord* nextTop = hr->next_top_at_mark_start();

  1268     HeapWord* start   = hr->bottom();

  1270     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),

  1271            err_msg("Preconditions not met - "

  1272                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,

  1273                    start, nextTop, hr->end()));

  1275     // Find the first marked object at or after "start".

  1276     start = _bm->getNextMarkedWordAddress(start, nextTop);

  1278     size_t marked_bytes = 0;

  1280     while (start < nextTop) {

  1281       oop obj = oop(start);

  1282       int obj_sz = obj->size();

  1283       HeapWord* obj_last = start + obj_sz - 1;

  1285       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);

  1286       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);

  1288       // Set the bits in the card BM for this object (inclusive).

  1289       set_card_bitmap_range(start_idx, last_idx);

  1291       // Add the size of this object to the number of marked bytes.

  1292       marked_bytes += (size_t)obj_sz * HeapWordSize;

  1294       // Find the next marked object after this one.

  1295       start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);

  1296     }

  1298     // Mark the allocated-since-marking portion...

  1299     HeapWord* top = hr->top();

  1300     if (nextTop < top) {

  1301       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);

  1302       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);

  1304       set_card_bitmap_range(start_idx, last_idx);

  1306       // This definitely means the region has live objects.

  1307       set_bit_for_region(hr);

  1308     }

  1310     // Update the live region bitmap.

  1311     if (marked_bytes > 0) {

  1312       set_bit_for_region(hr);

  1313     }

  1315     // Set the marked bytes for the current region so that

  1316     // it can be queried by a calling verificiation routine

  1317     _region_marked_bytes = marked_bytes;

  1319     return false;

  1320   }

  1322   size_t region_marked_bytes() const { return _region_marked_bytes; }

  1323 };

  1325 // Heap region closure used for verifying the counting data

  1326 // that was accumulated concurrently and aggregated during

  1327 // the remark pause. This closure is applied to the heap

  1328 // regions during the STW cleanup pause.

  1330 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {

  1331   ConcurrentMark* _cm;

  1332   CalcLiveObjectsClosure _calc_cl;

  1333   BitMap* _region_bm;   // Region BM to be verified

  1334   BitMap* _card_bm;     // Card BM to be verified

  1335   bool _verbose;        // verbose output?

  1337   BitMap* _exp_region_bm; // Expected Region BM values

  1338   BitMap* _exp_card_bm;   // Expected card BM values

  1340   int _failures;

  1342 public:

  1343   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,

  1344                                 BitMap* region_bm,

  1345                                 BitMap* card_bm,

  1346                                 BitMap* exp_region_bm,

  1347                                 BitMap* exp_card_bm,

  1348                                 bool verbose) :

  1349     _cm(cm),

  1350     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),

  1351     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),

  1352     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),

  1353     _failures(0) { }

  1355   int failures() const { return _failures; }

  1357   bool doHeapRegion(HeapRegion* hr) {

  1358     if (hr->continuesHumongous()) {

  1359       // We will ignore these here and process them when their

  1360       // associated "starts humongous" region is processed (see

  1361       // set_bit_for_heap_region()). Note that we cannot rely on their

  1362       // associated "starts humongous" region to have their bit set to

  1363       // 1 since, due to the region chunking in the parallel region

  1364       // iteration, a "continues humongous" region might be visited

  1365       // before its associated "starts humongous".

  1366       return false;

  1367     }

  1369     int failures = 0;

  1371     // Call the CalcLiveObjectsClosure to walk the marking bitmap for

  1372     // this region and set the corresponding bits in the expected region

  1373     // and card bitmaps.

  1374     bool res = _calc_cl.doHeapRegion(hr);

  1375     assert(res == false, "should be continuing");

  1377     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),

  1378                     Mutex::_no_safepoint_check_flag);

  1380     // Verify the marked bytes for this region.

  1381     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();

  1382     size_t act_marked_bytes = hr->next_marked_bytes();

  1384     // We're not OK if expected marked bytes > actual marked bytes. It means

  1385     // we have missed accounting some objects during the actual marking.

  1386     if (exp_marked_bytes > act_marked_bytes) {

  1387       if (_verbose) {

  1388         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "

  1389                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,

  1390                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);

  1391       }

  1392       failures += 1;

  1393     }

  1395     // Verify the bit, for this region, in the actual and expected

  1396     // (which was just calculated) region bit maps.

  1397     // We're not OK if the bit in the calculated expected region

  1398     // bitmap is set and the bit in the actual region bitmap is not.

  1399     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();

  1401     bool expected = _exp_region_bm->at(index);

  1402     bool actual = _region_bm->at(index);

  1403     if (expected && !actual) {

  1404       if (_verbose) {

  1405         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "

  1406                                "expected: %s, actual: %s",

  1407                                hr->hrs_index(),

  1408                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));

  1409       }

  1410       failures += 1;

  1411     }

  1413     // Verify that the card bit maps for the cards spanned by the current

  1414     // region match. We have an error if we have a set bit in the expected

  1415     // bit map and the corresponding bit in the actual bitmap is not set.

  1417     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());

  1418     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());

  1420     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {

  1421       expected = _exp_card_bm->at(i);

  1422       actual = _card_bm->at(i);

  1424       if (expected && !actual) {

  1425         if (_verbose) {

  1426           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "

  1427                                  "expected: %s, actual: %s",

  1428                                  hr->hrs_index(), i,

  1429                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));

  1430         }

  1431         failures += 1;

  1432       }

  1433     }

  1435     if (failures > 0 && _verbose)  {

  1436       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "

  1437                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,

  1438                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),

  1439                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());

  1440     }

  1442     _failures += failures;

  1444     // We could stop iteration over the heap when we

  1445     // find the first violating region by returning true.

  1446     return false;

  1447   }

  1448 };

  1451 class G1ParVerifyFinalCountTask: public AbstractGangTask {

  1452 protected:

  1453   G1CollectedHeap* _g1h;

  1454   ConcurrentMark* _cm;

  1455   BitMap* _actual_region_bm;

  1456   BitMap* _actual_card_bm;

  1458   uint    _n_workers;

  1460   BitMap* _expected_region_bm;

  1461   BitMap* _expected_card_bm;

  1463   int  _failures;

  1464   bool _verbose;

  1466 public:

  1467   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,

  1468                             BitMap* region_bm, BitMap* card_bm,

  1469                             BitMap* expected_region_bm, BitMap* expected_card_bm)

  1470     : AbstractGangTask("G1 verify final counting"),

  1471       _g1h(g1h), _cm(_g1h->concurrent_mark()),

  1472       _actual_region_bm(region_bm), _actual_card_bm(card_bm),

  1473       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),

  1474       _failures(0), _verbose(false),

  1475       _n_workers(0) {

  1476     assert(VerifyDuringGC, "don't call this otherwise");

  1478     // Use the value already set as the number of active threads

  1479     // in the call to run_task().

  1480     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1481       assert( _g1h->workers()->active_workers() > 0,

  1482         "Should have been previously set");

  1483       _n_workers = _g1h->workers()->active_workers();

  1484     } else {

  1485       _n_workers = 1;

  1486     }

  1488     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");

  1489     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");

  1491     _verbose = _cm->verbose_medium();

  1492   }

  1494   void work(uint worker_id) {

  1495     assert(worker_id < _n_workers, "invariant");

  1497     VerifyLiveObjectDataHRClosure verify_cl(_cm,

  1498                                             _actual_region_bm, _actual_card_bm,

  1499                                             _expected_region_bm,

  1500                                             _expected_card_bm,

  1501                                             _verbose);

  1503     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1504       _g1h->heap_region_par_iterate_chunked(&verify_cl,

  1505                                             worker_id,

  1506                                             _n_workers,

  1507                                             HeapRegion::VerifyCountClaimValue);

  1508     } else {

  1509       _g1h->heap_region_iterate(&verify_cl);

  1510     }

  1512     Atomic::add(verify_cl.failures(), &_failures);

  1513   }

  1515   int failures() const { return _failures; }

  1516 };

  1518 // Closure that finalizes the liveness counting data.

  1519 // Used during the cleanup pause.

  1520 // Sets the bits corresponding to the interval [NTAMS, top]

  1521 // (which contains the implicitly live objects) in the

  1522 // card liveness bitmap. Also sets the bit for each region,

  1523 // containing live data, in the region liveness bitmap.

  1525 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {

  1526  public:

  1527   FinalCountDataUpdateClosure(ConcurrentMark* cm,

  1528                               BitMap* region_bm,

  1529                               BitMap* card_bm) :

  1530     CMCountDataClosureBase(cm, region_bm, card_bm) { }

  1532   bool doHeapRegion(HeapRegion* hr) {

  1534     if (hr->continuesHumongous()) {

  1535       // We will ignore these here and process them when their

  1536       // associated "starts humongous" region is processed (see

  1537       // set_bit_for_heap_region()). Note that we cannot rely on their

  1538       // associated "starts humongous" region to have their bit set to

  1539       // 1 since, due to the region chunking in the parallel region

  1540       // iteration, a "continues humongous" region might be visited

  1541       // before its associated "starts humongous".

  1542       return false;

  1543     }

  1545     HeapWord* ntams = hr->next_top_at_mark_start();

  1546     HeapWord* top   = hr->top();

  1548     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");

  1550     // Mark the allocated-since-marking portion...

  1551     if (ntams < top) {

  1552       // This definitely means the region has live objects.

  1553       set_bit_for_region(hr);

  1554     }

  1556     // Now set the bits for [ntams, top]

  1557     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);

  1558     BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);

  1559     set_card_bitmap_range(start_idx, last_idx);

  1561     // Set the bit for the region if it contains live data

  1562     if (hr->next_marked_bytes() > 0) {

  1563       set_bit_for_region(hr);

  1564     }

  1566     return false;

  1567   }

  1568 };

  1570 class G1ParFinalCountTask: public AbstractGangTask {

  1571 protected:

  1572   G1CollectedHeap* _g1h;

  1573   ConcurrentMark* _cm;

  1574   BitMap* _actual_region_bm;

  1575   BitMap* _actual_card_bm;

  1577   uint    _n_workers;

  1579 public:

  1580   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)

  1581     : AbstractGangTask("G1 final counting"),

  1582       _g1h(g1h), _cm(_g1h->concurrent_mark()),

  1583       _actual_region_bm(region_bm), _actual_card_bm(card_bm),

  1584       _n_workers(0) {

  1585     // Use the value already set as the number of active threads

  1586     // in the call to run_task().

  1587     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1588       assert( _g1h->workers()->active_workers() > 0,

  1589         "Should have been previously set");

  1590       _n_workers = _g1h->workers()->active_workers();

  1591     } else {

  1592       _n_workers = 1;

  1593     }

  1594   }

  1596   void work(uint worker_id) {

  1597     assert(worker_id < _n_workers, "invariant");

  1599     FinalCountDataUpdateClosure final_update_cl(_cm,

  1600                                                 _actual_region_bm,

  1601                                                 _actual_card_bm);

  1603     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1604       _g1h->heap_region_par_iterate_chunked(&final_update_cl,

  1605                                             worker_id,

  1606                                             _n_workers,

  1607                                             HeapRegion::FinalCountClaimValue);

  1608     } else {

  1609       _g1h->heap_region_iterate(&final_update_cl);

  1610     }

  1611   }

  1612 };

  1614 class G1ParNoteEndTask;

  1616 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {

  1617   G1CollectedHeap* _g1;

  1618   int _worker_num;

  1619   size_t _max_live_bytes;

  1620   uint _regions_claimed;

  1621   size_t _freed_bytes;

  1622   FreeRegionList* _local_cleanup_list;

  1623   OldRegionSet* _old_proxy_set;

  1624   HumongousRegionSet* _humongous_proxy_set;

  1625   HRRSCleanupTask* _hrrs_cleanup_task;

  1626   double _claimed_region_time;

  1627   double _max_region_time;

  1629 public:

  1630   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,

  1631                              int worker_num,

  1632                              FreeRegionList* local_cleanup_list,

  1633                              OldRegionSet* old_proxy_set,

  1634                              HumongousRegionSet* humongous_proxy_set,

  1635                              HRRSCleanupTask* hrrs_cleanup_task) :

  1636     _g1(g1), _worker_num(worker_num),

  1637     _max_live_bytes(0), _regions_claimed(0),

  1638     _freed_bytes(0),

  1639     _claimed_region_time(0.0), _max_region_time(0.0),

  1640     _local_cleanup_list(local_cleanup_list),

  1641     _old_proxy_set(old_proxy_set),

  1642     _humongous_proxy_set(humongous_proxy_set),

  1643     _hrrs_cleanup_task(hrrs_cleanup_task) { }

  1645   size_t freed_bytes() { return _freed_bytes; }

  1647   bool doHeapRegion(HeapRegion *hr) {

  1648     // We use a claim value of zero here because all regions

  1649     // were claimed with value 1 in the FinalCount task.

  1650     hr->reset_gc_time_stamp();

  1651     if (!hr->continuesHumongous()) {

  1652       double start = os::elapsedTime();

  1653       _regions_claimed++;

  1654       hr->note_end_of_marking();

  1655       _max_live_bytes += hr->max_live_bytes();

  1656       _g1->free_region_if_empty(hr,

  1657                                 &_freed_bytes,

  1658                                 _local_cleanup_list,

  1659                                 _old_proxy_set,

  1660                                 _humongous_proxy_set,

  1661                                 _hrrs_cleanup_task,

  1662                                 true /* par */);

  1663       double region_time = (os::elapsedTime() - start);

  1664       _claimed_region_time += region_time;

  1665       if (region_time > _max_region_time) {

  1666         _max_region_time = region_time;

  1667       }

  1668     }

  1669     return false;

  1670   }

  1672   size_t max_live_bytes() { return _max_live_bytes; }

  1673   uint regions_claimed() { return _regions_claimed; }

  1674   double claimed_region_time_sec() { return _claimed_region_time; }

  1675   double max_region_time_sec() { return _max_region_time; }

  1676 };

  1678 class G1ParNoteEndTask: public AbstractGangTask {

  1679   friend class G1NoteEndOfConcMarkClosure;

  1681 protected:

  1682   G1CollectedHeap* _g1h;

  1683   size_t _max_live_bytes;

  1684   size_t _freed_bytes;

  1685   FreeRegionList* _cleanup_list;

  1687 public:

  1688   G1ParNoteEndTask(G1CollectedHeap* g1h,

  1689                    FreeRegionList* cleanup_list) :

  1690     AbstractGangTask("G1 note end"), _g1h(g1h),

  1691     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }

  1693   void work(uint worker_id) {

  1694     double start = os::elapsedTime();

  1695     FreeRegionList local_cleanup_list("Local Cleanup List");

  1696     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");

  1697     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");

  1698     HRRSCleanupTask hrrs_cleanup_task;

  1699     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,

  1700                                            &old_proxy_set,

  1701                                            &humongous_proxy_set,

  1702                                            &hrrs_cleanup_task);

  1703     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1704       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,

  1705                                             _g1h->workers()->active_workers(),

  1706                                             HeapRegion::NoteEndClaimValue);

  1707     } else {

  1708       _g1h->heap_region_iterate(&g1_note_end);

  1709     }

  1710     assert(g1_note_end.complete(), "Shouldn't have yielded!");

  1712     // Now update the lists

  1713     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),

  1714                                             NULL /* free_list */,

  1715                                             &old_proxy_set,

  1716                                             &humongous_proxy_set,

  1717                                             true /* par */);

  1718     {

  1719       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);

  1720       _max_live_bytes += g1_note_end.max_live_bytes();

  1721       _freed_bytes += g1_note_end.freed_bytes();

  1723       // If we iterate over the global cleanup list at the end of

  1724       // cleanup to do this printing we will not guarantee to only

  1725       // generate output for the newly-reclaimed regions (the list

  1726       // might not be empty at the beginning of cleanup; we might

  1727       // still be working on its previous contents). So we do the

  1728       // printing here, before we append the new regions to the global

  1729       // cleanup list.

  1731       G1HRPrinter* hr_printer = _g1h->hr_printer();

  1732       if (hr_printer->is_active()) {

  1733         HeapRegionLinkedListIterator iter(&local_cleanup_list);

  1734         while (iter.more_available()) {

  1735           HeapRegion* hr = iter.get_next();

  1736           hr_printer->cleanup(hr);

  1737         }

  1738       }

  1740       _cleanup_list->add_as_tail(&local_cleanup_list);

  1741       assert(local_cleanup_list.is_empty(), "post-condition");

  1743       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);

  1744     }

  1745   }

  1746   size_t max_live_bytes() { return _max_live_bytes; }

  1747   size_t freed_bytes() { return _freed_bytes; }

  1748 };

  1750 class G1ParScrubRemSetTask: public AbstractGangTask {

  1751 protected:

  1752   G1RemSet* _g1rs;

  1753   BitMap* _region_bm;

  1754   BitMap* _card_bm;

  1755 public:

  1756   G1ParScrubRemSetTask(G1CollectedHeap* g1h,

  1757                        BitMap* region_bm, BitMap* card_bm) :

  1758     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),

  1759     _region_bm(region_bm), _card_bm(card_bm) { }

  1761   void work(uint worker_id) {

  1762     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1763       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,

  1764                        HeapRegion::ScrubRemSetClaimValue);

  1765     } else {

  1766       _g1rs->scrub(_region_bm, _card_bm);

  1767     }

  1768   }

  1770 };

  1772 void ConcurrentMark::cleanup() {

  1773   // world is stopped at this checkpoint

  1774   assert(SafepointSynchronize::is_at_safepoint(),

  1775          "world should be stopped");

  1776   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1778   // If a full collection has happened, we shouldn't do this.

  1779   if (has_aborted()) {

  1780     g1h->set_marking_complete(); // So bitmap clearing isn't confused

  1781     return;

  1782   }

  1784   HRSPhaseSetter x(HRSPhaseCleanup);

  1785   g1h->verify_region_sets_optional();

  1787   if (VerifyDuringGC) {

  1788     HandleMark hm;  // handle scope

  1789     gclog_or_tty->print(" VerifyDuringGC:(before)");

  1790     Universe::heap()->prepare_for_verify();

  1791     Universe::verify(/* silent      */ false,

  1792                      /* option      */ VerifyOption_G1UsePrevMarking);

  1793   }

  1795   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();

  1796   g1p->record_concurrent_mark_cleanup_start();

  1798   double start = os::elapsedTime();

  1800   HeapRegionRemSet::reset_for_cleanup_tasks();

  1802   uint n_workers;

  1804   // Do counting once more with the world stopped for good measure.

  1805   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);

  1807   if (G1CollectedHeap::use_parallel_gc_threads()) {

  1808    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),

  1809            "sanity check");

  1811     g1h->set_par_threads();

  1812     n_workers = g1h->n_par_threads();

  1813     assert(g1h->n_par_threads() == n_workers,

  1814            "Should not have been reset");

  1815     g1h->workers()->run_task(&g1_par_count_task);

  1816     // Done with the parallel phase so reset to 0.

  1817     g1h->set_par_threads(0);

  1819     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),

  1820            "sanity check");

  1821   } else {

  1822     n_workers = 1;

  1823     g1_par_count_task.work(0);

  1824   }

  1826   if (VerifyDuringGC) {

  1827     // Verify that the counting data accumulated during marking matches

  1828     // that calculated by walking the marking bitmap.

  1830     // Bitmaps to hold expected values

  1831     BitMap expected_region_bm(_region_bm.size(), false);

  1832     BitMap expected_card_bm(_card_bm.size(), false);

  1834     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,

  1835                                                  &_region_bm,

  1836                                                  &_card_bm,

  1837                                                  &expected_region_bm,

  1838                                                  &expected_card_bm);

  1840     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1841       g1h->set_par_threads((int)n_workers);

  1842       g1h->workers()->run_task(&g1_par_verify_task);

  1843       // Done with the parallel phase so reset to 0.

  1844       g1h->set_par_threads(0);

  1846       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),

  1847              "sanity check");

  1848     } else {

  1849       g1_par_verify_task.work(0);

  1850     }

  1852     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");

  1853   }

  1855   size_t start_used_bytes = g1h->used();

  1856   g1h->set_marking_complete();

  1858   double count_end = os::elapsedTime();

  1859   double this_final_counting_time = (count_end - start);

  1860   _total_counting_time += this_final_counting_time;

  1862   if (G1PrintRegionLivenessInfo) {

  1863     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");

  1864     _g1h->heap_region_iterate(&cl);

  1865   }

  1867   // Install newly created mark bitMap as "prev".

  1868   swapMarkBitMaps();

  1870   g1h->reset_gc_time_stamp();

  1872   // Note end of marking in all heap regions.

  1873   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);

  1874   if (G1CollectedHeap::use_parallel_gc_threads()) {

  1875     g1h->set_par_threads((int)n_workers);

  1876     g1h->workers()->run_task(&g1_par_note_end_task);

  1877     g1h->set_par_threads(0);

  1879     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),

  1880            "sanity check");

  1881   } else {

  1882     g1_par_note_end_task.work(0);

  1883   }

  1885   if (!cleanup_list_is_empty()) {

  1886     // The cleanup list is not empty, so we'll have to process it

  1887     // concurrently. Notify anyone else that might be wanting free

  1888     // regions that there will be more free regions coming soon.

  1889     g1h->set_free_regions_coming();

  1890   }

  1892   // call below, since it affects the metric by which we sort the heap

  1893   // regions.

  1894   if (G1ScrubRemSets) {

  1895     double rs_scrub_start = os::elapsedTime();

  1896     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);

  1897     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1898       g1h->set_par_threads((int)n_workers);

  1899       g1h->workers()->run_task(&g1_par_scrub_rs_task);

  1900       g1h->set_par_threads(0);

  1902       assert(g1h->check_heap_region_claim_values(

  1903                                             HeapRegion::ScrubRemSetClaimValue),

  1904              "sanity check");

  1905     } else {

  1906       g1_par_scrub_rs_task.work(0);

  1907     }

  1909     double rs_scrub_end = os::elapsedTime();

  1910     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);

  1911     _total_rs_scrub_time += this_rs_scrub_time;

  1912   }

  1914   // this will also free any regions totally full of garbage objects,

  1915   // and sort the regions.

  1916   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);

  1918   // Statistics.

  1919   double end = os::elapsedTime();

  1920   _cleanup_times.add((end - start) * 1000.0);

  1922   if (G1Log::fine()) {

  1923     g1h->print_size_transition(gclog_or_tty,

  1924                                start_used_bytes,

  1925                                g1h->used(),

  1926                                g1h->capacity());

  1927   }

  1929   // Clean up will have freed any regions completely full of garbage.

  1930   // Update the soft reference policy with the new heap occupancy.

  1931   Universe::update_heap_info_at_gc();

  1933   // We need to make this be a "collection" so any collection pause that

  1934   // races with it goes around and waits for completeCleanup to finish.

  1935   g1h->increment_total_collections();

  1937   // We reclaimed old regions so we should calculate the sizes to make

  1938   // sure we update the old gen/space data.

  1939   g1h->g1mm()->update_sizes();

  1941   if (VerifyDuringGC) {

  1942     HandleMark hm;  // handle scope

  1943     gclog_or_tty->print(" VerifyDuringGC:(after)");

  1944     Universe::heap()->prepare_for_verify();

  1945     Universe::verify(/* silent      */ false,

  1946                      /* option      */ VerifyOption_G1UsePrevMarking);

  1947   }

  1949   g1h->verify_region_sets_optional();

  1950 }

  1952 void ConcurrentMark::completeCleanup() {

  1953   if (has_aborted()) return;

  1955   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1957   _cleanup_list.verify_optional();

  1958   FreeRegionList tmp_free_list("Tmp Free List");

  1960   if (G1ConcRegionFreeingVerbose) {

  1961     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "

  1962                            "cleanup list has %u entries",

  1963                            _cleanup_list.length());

  1964   }

  1966   // Noone else should be accessing the _cleanup_list at this point,

  1967   // so it's not necessary to take any locks

  1968   while (!_cleanup_list.is_empty()) {

  1969     HeapRegion* hr = _cleanup_list.remove_head();

  1970     assert(hr != NULL, "the list was not empty");

  1971     hr->par_clear();

  1972     tmp_free_list.add_as_tail(hr);

  1974     // Instead of adding one region at a time to the secondary_free_list,

  1975     // we accumulate them in the local list and move them a few at a

  1976     // time. This also cuts down on the number of notify_all() calls

  1977     // we do during this process. We'll also append the local list when

  1978     // _cleanup_list is empty (which means we just removed the last

  1979     // region from the _cleanup_list).

  1980     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||

  1981         _cleanup_list.is_empty()) {

  1982       if (G1ConcRegionFreeingVerbose) {

  1983         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "

  1984                                "appending %u entries to the secondary_free_list, "

  1985                                "cleanup list still has %u entries",

  1986                                tmp_free_list.length(),

  1987                                _cleanup_list.length());

  1988       }

  1990       {

  1991         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);

  1992         g1h->secondary_free_list_add_as_tail(&tmp_free_list);

  1993         SecondaryFreeList_lock->notify_all();

  1994       }

  1996       if (G1StressConcRegionFreeing) {

  1997         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {

  1998           os::sleep(Thread::current(), (jlong) 1, false);

  1999         }

  2000       }

  2001     }

  2002   }

  2003   assert(tmp_free_list.is_empty(), "post-condition");

  2004 }

  2006 // Support closures for reference procssing in G1

  2008 bool G1CMIsAliveClosure::do_object_b(oop obj) {

  2009   HeapWord* addr = (HeapWord*)obj;

  2010   return addr != NULL &&

  2011          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));

  2012 }

  2014 class G1CMKeepAliveClosure: public OopClosure {

  2015   G1CollectedHeap* _g1;

  2016   ConcurrentMark*  _cm;

  2017  public:

  2018   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :

  2019     _g1(g1), _cm(cm) {

  2020     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");

  2021   }

  2023   virtual void do_oop(narrowOop* p) { do_oop_work(p); }

  2024   virtual void do_oop(      oop* p) { do_oop_work(p); }

  2026   template <class T> void do_oop_work(T* p) {

  2027     oop obj = oopDesc::load_decode_heap_oop(p);

  2028     HeapWord* addr = (HeapWord*)obj;

  2030     if (_cm->verbose_high()) {

  2031       gclog_or_tty->print_cr("\t[0] we're looking at location "

  2032                              "*"PTR_FORMAT" = "PTR_FORMAT,

  2033                              p, (void*) obj);

  2034     }

  2036     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {

  2037       _cm->mark_and_count(obj);

  2038       _cm->mark_stack_push(obj);

  2039     }

  2040   }

  2041 };

  2043 class G1CMDrainMarkingStackClosure: public VoidClosure {

  2044   ConcurrentMark*               _cm;

  2045   CMMarkStack*                  _markStack;

  2046   G1CMKeepAliveClosure*         _oopClosure;

  2047  public:

  2048   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,

  2049                                G1CMKeepAliveClosure* oopClosure) :

  2050     _cm(cm),

  2051     _markStack(markStack),

  2052     _oopClosure(oopClosure) { }

  2054   void do_void() {

  2055     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);

  2056   }

  2057 };

  2059 // 'Keep Alive' closure used by parallel reference processing.

  2060 // An instance of this closure is used in the parallel reference processing

  2061 // code rather than an instance of G1CMKeepAliveClosure. We could have used

  2062 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are

  2063 // placed on to discovered ref lists once so we can mark and push with no

  2064 // need to check whether the object has already been marked. Using the

  2065 // G1CMKeepAliveClosure would mean, however, having all the worker threads

  2066 // operating on the global mark stack. This means that an individual

  2067 // worker would be doing lock-free pushes while it processes its own

  2068 // discovered ref list followed by drain call. If the discovered ref lists

  2069 // are unbalanced then this could cause interference with the other

  2070 // workers. Using a CMTask (and its embedded local data structures)

  2071 // avoids that potential interference.

  2072 class G1CMParKeepAliveAndDrainClosure: public OopClosure {

  2073   ConcurrentMark*  _cm;

  2074   CMTask*          _task;

  2075   int              _ref_counter_limit;

  2076   int              _ref_counter;

  2077  public:

  2078   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :

  2079     _cm(cm), _task(task),

  2080     _ref_counter_limit(G1RefProcDrainInterval) {

  2081     assert(_ref_counter_limit > 0, "sanity");

  2082     _ref_counter = _ref_counter_limit;

  2083   }

  2085   virtual void do_oop(narrowOop* p) { do_oop_work(p); }

  2086   virtual void do_oop(      oop* p) { do_oop_work(p); }

  2088   template <class T> void do_oop_work(T* p) {

  2089     if (!_cm->has_overflown()) {

  2090       oop obj = oopDesc::load_decode_heap_oop(p);

  2091       if (_cm->verbose_high()) {

  2092         gclog_or_tty->print_cr("\t[%d] we're looking at location "

  2093                                "*"PTR_FORMAT" = "PTR_FORMAT,

  2094                                _task->task_id(), p, (void*) obj);

  2095       }

  2097       _task->deal_with_reference(obj);

  2098       _ref_counter--;

  2100       if (_ref_counter == 0) {

  2101         // We have dealt with _ref_counter_limit references, pushing them and objects

  2102         // reachable from them on to the local stack (and possibly the global stack).

  2103         // Call do_marking_step() to process these entries. We call the routine in a

  2104         // loop, which we'll exit if there's nothing more to do (i.e. we're done

  2105         // with the entries that we've pushed as a result of the deal_with_reference

  2106         // calls above) or we overflow.

  2107         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag

  2108         // while there may still be some work to do. (See the comment at the

  2109         // beginning of CMTask::do_marking_step() for those conditions - one of which

  2110         // is reaching the specified time target.) It is only when

  2111         // CMTask::do_marking_step() returns without setting the has_aborted() flag

  2112         // that the marking has completed.

  2113         do {

  2114           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;

  2115           _task->do_marking_step(mark_step_duration_ms,

  2116                                  false /* do_stealing    */,

  2117                                  false /* do_termination */);

  2118         } while (_task->has_aborted() && !_cm->has_overflown());

  2119         _ref_counter = _ref_counter_limit;

  2120       }

  2121     } else {

  2122       if (_cm->verbose_high()) {

  2123          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());

  2124       }

  2125     }

  2126   }

  2127 };

  2129 class G1CMParDrainMarkingStackClosure: public VoidClosure {

  2130   ConcurrentMark* _cm;

  2131   CMTask* _task;

  2132  public:

  2133   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :

  2134     _cm(cm), _task(task) { }

  2136   void do_void() {

  2137     do {

  2138       if (_cm->verbose_high()) {

  2139         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",

  2140                                _task->task_id());

  2141       }

  2143       // We call CMTask::do_marking_step() to completely drain the local and

  2144       // global marking stacks. The routine is called in a loop, which we'll

  2145       // exit if there's nothing more to do (i.e. we'completely drained the

  2146       // entries that were pushed as a result of applying the

  2147       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref

  2148       // lists above) or we overflow the global marking stack.

  2149       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag

  2150       // while there may still be some work to do. (See the comment at the

  2151       // beginning of CMTask::do_marking_step() for those conditions - one of which

  2152       // is reaching the specified time target.) It is only when

  2153       // CMTask::do_marking_step() returns without setting the has_aborted() flag

  2154       // that the marking has completed.

  2156       _task->do_marking_step(1000000000.0 /* something very large */,

  2157                              true /* do_stealing    */,

  2158                              true /* do_termination */);

  2159     } while (_task->has_aborted() && !_cm->has_overflown());

  2160   }

  2161 };

  2163 // Implementation of AbstractRefProcTaskExecutor for parallel

  2164 // reference processing at the end of G1 concurrent marking

  2166 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {

  2167 private:

  2168   G1CollectedHeap* _g1h;

  2169   ConcurrentMark*  _cm;

  2170   WorkGang*        _workers;

  2171   int              _active_workers;

  2173 public:

  2174   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,

  2175                         ConcurrentMark* cm,

  2176                         WorkGang* workers,

  2177                         int n_workers) :

  2178     _g1h(g1h), _cm(cm),

  2179     _workers(workers), _active_workers(n_workers) { }

  2181   // Executes the given task using concurrent marking worker threads.

  2182   virtual void execute(ProcessTask& task);

  2183   virtual void execute(EnqueueTask& task);

  2184 };

  2186 class G1CMRefProcTaskProxy: public AbstractGangTask {

  2187   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;

  2188   ProcessTask&     _proc_task;

  2189   G1CollectedHeap* _g1h;

  2190   ConcurrentMark*  _cm;

  2192 public:

  2193   G1CMRefProcTaskProxy(ProcessTask& proc_task,

  2194                      G1CollectedHeap* g1h,

  2195                      ConcurrentMark* cm) :

  2196     AbstractGangTask("Process reference objects in parallel"),

  2197     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }

  2199   virtual void work(uint worker_id) {

  2200     CMTask* marking_task = _cm->task(worker_id);

  2201     G1CMIsAliveClosure g1_is_alive(_g1h);

  2202     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);

  2203     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);

  2205     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);

  2206   }

  2207 };

  2209 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {

  2210   assert(_workers != NULL, "Need parallel worker threads.");

  2212   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);

  2214   // We need to reset the phase for each task execution so that

  2215   // the termination protocol of CMTask::do_marking_step works.

  2216   _cm->set_phase(_active_workers, false /* concurrent */);

  2217   _g1h->set_par_threads(_active_workers);

  2218   _workers->run_task(&proc_task_proxy);

  2219   _g1h->set_par_threads(0);

  2220 }

  2222 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {

  2223   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;

  2224   EnqueueTask& _enq_task;

  2226 public:

  2227   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :

  2228     AbstractGangTask("Enqueue reference objects in parallel"),

  2229     _enq_task(enq_task) { }

  2231   virtual void work(uint worker_id) {

  2232     _enq_task.work(worker_id);

  2233   }

  2234 };

  2236 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {

  2237   assert(_workers != NULL, "Need parallel worker threads.");

  2239   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);

  2241   _g1h->set_par_threads(_active_workers);

  2242   _workers->run_task(&enq_task_proxy);

  2243   _g1h->set_par_threads(0);

  2244 }

  2246 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {

  2247   ResourceMark rm;

  2248   HandleMark   hm;

  2250   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  2252   // Is alive closure.

  2253   G1CMIsAliveClosure g1_is_alive(g1h);

  2255   // Inner scope to exclude the cleaning of the string and symbol

  2256   // tables from the displayed time.

  2257   {

  2258     if (G1Log::finer()) {

  2259       gclog_or_tty->put(' ');

  2260     }

  2261     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);

  2263     ReferenceProcessor* rp = g1h->ref_processor_cm();

  2265     // See the comment in G1CollectedHeap::ref_processing_init()

  2266     // about how reference processing currently works in G1.

  2268     // Process weak references.

  2269     rp->setup_policy(clear_all_soft_refs);

  2270     assert(_markStack.isEmpty(), "mark stack should be empty");

  2272     G1CMKeepAliveClosure g1_keep_alive(g1h, this);

  2273     G1CMDrainMarkingStackClosure

  2274       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);

  2276     // We use the work gang from the G1CollectedHeap and we utilize all

  2277     // the worker threads.

  2278     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;

  2279     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);

  2281     G1CMRefProcTaskExecutor par_task_executor(g1h, this,

  2282                                               g1h->workers(), active_workers);

  2284     if (rp->processing_is_mt()) {

  2285       // Set the degree of MT here.  If the discovery is done MT, there

  2286       // may have been a different number of threads doing the discovery

  2287       // and a different number of discovered lists may have Ref objects.

  2288       // That is OK as long as the Reference lists are balanced (see

  2289       // balance_all_queues() and balance_queues()).

  2290       rp->set_active_mt_degree(active_workers);

  2292       rp->process_discovered_references(&g1_is_alive,

  2293                                       &g1_keep_alive,

  2294                                       &g1_drain_mark_stack,

  2295                                       &par_task_executor);

  2297       // The work routines of the parallel keep_alive and drain_marking_stack

  2298       // will set the has_overflown flag if we overflow the global marking

  2299       // stack.

  2300     } else {

  2301       rp->process_discovered_references(&g1_is_alive,

  2302                                         &g1_keep_alive,

  2303                                         &g1_drain_mark_stack,

  2304                                         NULL);

  2305     }

  2307     assert(_markStack.overflow() || _markStack.isEmpty(),

  2308             "mark stack should be empty (unless it overflowed)");

  2309     if (_markStack.overflow()) {

  2310       // Should have been done already when we tried to push an

  2311       // entry on to the global mark stack. But let's do it again.

  2312       set_has_overflown();

  2313     }

  2315     if (rp->processing_is_mt()) {

  2316       assert(rp->num_q() == active_workers, "why not");

  2317       rp->enqueue_discovered_references(&par_task_executor);

  2318     } else {

  2319       rp->enqueue_discovered_references();

  2320     }

  2322     rp->verify_no_references_recorded();

  2323     assert(!rp->discovery_enabled(), "Post condition");

  2324   }

  2326   // Now clean up stale oops in StringTable

  2327   StringTable::unlink(&g1_is_alive);

  2328   // Clean up unreferenced symbols in symbol table.

  2329   SymbolTable::unlink();

  2330 }

  2332 void ConcurrentMark::swapMarkBitMaps() {

  2333   CMBitMapRO* temp = _prevMarkBitMap;

  2334   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;

  2335   _nextMarkBitMap  = (CMBitMap*)  temp;

  2336 }

  2338 class CMRemarkTask: public AbstractGangTask {

  2339 private:

  2340   ConcurrentMark *_cm;

  2342 public:

  2343   void work(uint worker_id) {

  2344     // Since all available tasks are actually started, we should

  2345     // only proceed if we're supposed to be actived.

  2346     if (worker_id < _cm->active_tasks()) {

  2347       CMTask* task = _cm->task(worker_id);

  2348       task->record_start_time();

  2349       do {

  2350         task->do_marking_step(1000000000.0 /* something very large */,

  2351                               true /* do_stealing    */,

  2352                               true /* do_termination */);

  2353       } while (task->has_aborted() && !_cm->has_overflown());

  2354       // If we overflow, then we do not want to restart. We instead

  2355       // want to abort remark and do concurrent marking again.

  2356       task->record_end_time();

  2357     }

  2358   }

  2360   CMRemarkTask(ConcurrentMark* cm, int active_workers) :

  2361     AbstractGangTask("Par Remark"), _cm(cm) {

  2362     _cm->terminator()->reset_for_reuse(active_workers);

  2363   }

  2364 };

  2366 void ConcurrentMark::checkpointRootsFinalWork() {

  2367   ResourceMark rm;

  2368   HandleMark   hm;

  2369   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  2371   g1h->ensure_parsability(false);

  2373   if (G1CollectedHeap::use_parallel_gc_threads()) {

  2374     G1CollectedHeap::StrongRootsScope srs(g1h);

  2375     // this is remark, so we'll use up all active threads

  2376     uint active_workers = g1h->workers()->active_workers();

  2377     if (active_workers == 0) {

  2378       assert(active_workers > 0, "Should have been set earlier");

  2379       active_workers = (uint) ParallelGCThreads;

  2380       g1h->workers()->set_active_workers(active_workers);

  2381     }

  2382     set_phase(active_workers, false /* concurrent */);

  2383     // Leave _parallel_marking_threads at it's

  2384     // value originally calculated in the ConcurrentMark

  2385     // constructor and pass values of the active workers

  2386     // through the gang in the task.

  2388     CMRemarkTask remarkTask(this, active_workers);

  2389     g1h->set_par_threads(active_workers);

  2390     g1h->workers()->run_task(&remarkTask);

  2391     g1h->set_par_threads(0);

  2392   } else {

  2393     G1CollectedHeap::StrongRootsScope srs(g1h);

  2394     // this is remark, so we'll use up all available threads

  2395     uint active_workers = 1;

  2396     set_phase(active_workers, false /* concurrent */);

  2398     CMRemarkTask remarkTask(this, active_workers);

  2399     // We will start all available threads, even if we decide that the

  2400     // active_workers will be fewer. The extra ones will just bail out

  2401     // immediately.

  2402     remarkTask.work(0);

  2403   }

  2404   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  2405   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");

  2407   print_stats();

  2409 #if VERIFY_OBJS_PROCESSED

  2410   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {

  2411     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",

  2412                            _scan_obj_cl.objs_processed,

  2413                            ThreadLocalObjQueue::objs_enqueued);

  2414     guarantee(_scan_obj_cl.objs_processed ==

  2415               ThreadLocalObjQueue::objs_enqueued,

  2416               "Different number of objs processed and enqueued.");

  2417   }

  2418 #endif

  2419 }

  2421 #ifndef PRODUCT

  2423 class PrintReachableOopClosure: public OopClosure {

  2424 private:

  2425   G1CollectedHeap* _g1h;

  2426   outputStream*    _out;

  2427   VerifyOption     _vo;

  2428   bool             _all;

  2430 public:

  2431   PrintReachableOopClosure(outputStream* out,

  2432                            VerifyOption  vo,

  2433                            bool          all) :

  2434     _g1h(G1CollectedHeap::heap()),

  2435     _out(out), _vo(vo), _all(all) { }

  2437   void do_oop(narrowOop* p) { do_oop_work(p); }

  2438   void do_oop(      oop* p) { do_oop_work(p); }

  2440   template <class T> void do_oop_work(T* p) {

  2441     oop         obj = oopDesc::load_decode_heap_oop(p);

  2442     const char* str = NULL;

  2443     const char* str2 = "";

  2445     if (obj == NULL) {

  2446       str = "";

  2447     } else if (!_g1h->is_in_g1_reserved(obj)) {

  2448       str = " O";

  2449     } else {

  2450       HeapRegion* hr  = _g1h->heap_region_containing(obj);

  2451       guarantee(hr != NULL, "invariant");

  2452       bool over_tams = false;

  2453       bool marked = false;

  2455       switch (_vo) {

  2456         case VerifyOption_G1UsePrevMarking:

  2457           over_tams = hr->obj_allocated_since_prev_marking(obj);

  2458           marked = _g1h->isMarkedPrev(obj);

  2459           break;

  2460         case VerifyOption_G1UseNextMarking:

  2461           over_tams = hr->obj_allocated_since_next_marking(obj);

  2462           marked = _g1h->isMarkedNext(obj);

  2463           break;

  2464         case VerifyOption_G1UseMarkWord:

  2465           marked = obj->is_gc_marked();

  2466           break;

  2467         default:

  2468           ShouldNotReachHere();

  2469       }

  2471       if (over_tams) {

  2472         str = " >";

  2473         if (marked) {

  2474           str2 = " AND MARKED";

  2475         }

  2476       } else if (marked) {

  2477         str = " M";

  2478       } else {

  2479         str = " NOT";

  2480       }

  2481     }

  2483     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",

  2484                    p, (void*) obj, str, str2);

  2485   }

  2486 };

  2488 class PrintReachableObjectClosure : public ObjectClosure {

  2489 private:

  2490   G1CollectedHeap* _g1h;

  2491   outputStream*    _out;

  2492   VerifyOption     _vo;

  2493   bool             _all;

  2494   HeapRegion*      _hr;

  2496 public:

  2497   PrintReachableObjectClosure(outputStream* out,

  2498                               VerifyOption  vo,

  2499                               bool          all,

  2500                               HeapRegion*   hr) :

  2501     _g1h(G1CollectedHeap::heap()),

  2502     _out(out), _vo(vo), _all(all), _hr(hr) { }

  2504   void do_object(oop o) {

  2505     bool over_tams = false;

  2506     bool marked = false;

  2508     switch (_vo) {

  2509       case VerifyOption_G1UsePrevMarking:

  2510         over_tams = _hr->obj_allocated_since_prev_marking(o);

  2511         marked = _g1h->isMarkedPrev(o);

  2512         break;

  2513       case VerifyOption_G1UseNextMarking:

  2514         over_tams = _hr->obj_allocated_since_next_marking(o);

  2515         marked = _g1h->isMarkedNext(o);

  2516         break;

  2517       case VerifyOption_G1UseMarkWord:

  2518         marked = o->is_gc_marked();

  2519         break;

  2520       default:

  2521         ShouldNotReachHere();

  2522     }

  2523     bool print_it = _all || over_tams || marked;

  2525     if (print_it) {

  2526       _out->print_cr(" "PTR_FORMAT"%s",

  2527                      o, (over_tams) ? " >" : (marked) ? " M" : "");

  2528       PrintReachableOopClosure oopCl(_out, _vo, _all);

  2529       o->oop_iterate(&oopCl);

  2530     }

  2531   }

  2532 };

  2534 class PrintReachableRegionClosure : public HeapRegionClosure {

  2535 private:

  2536   outputStream* _out;

  2537   VerifyOption  _vo;

  2538   bool          _all;

  2540 public:

  2541   bool doHeapRegion(HeapRegion* hr) {

  2542     HeapWord* b = hr->bottom();

  2543     HeapWord* e = hr->end();

  2544     HeapWord* t = hr->top();

  2545     HeapWord* p = NULL;

  2547     switch (_vo) {

  2548       case VerifyOption_G1UsePrevMarking:

  2549         p = hr->prev_top_at_mark_start();

  2550         break;

  2551       case VerifyOption_G1UseNextMarking:

  2552         p = hr->next_top_at_mark_start();

  2553         break;

  2554       case VerifyOption_G1UseMarkWord:

  2555         // When we are verifying marking using the mark word

  2556         // TAMS has no relevance.

  2557         assert(p == NULL, "post-condition");

  2558         break;

  2559       default:

  2560         ShouldNotReachHere();

  2561     }

  2562     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "

  2563                    "TAMS: "PTR_FORMAT, b, e, t, p);

  2564     _out->cr();

  2566     HeapWord* from = b;

  2567     HeapWord* to   = t;

  2569     if (to > from) {

  2570       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);

  2571       _out->cr();

  2572       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);

  2573       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);

  2574       _out->cr();

  2575     }

  2577     return false;

  2578   }

  2580   PrintReachableRegionClosure(outputStream* out,

  2581                               VerifyOption  vo,

  2582                               bool          all) :

  2583     _out(out), _vo(vo), _all(all) { }

  2584 };

  2586 static const char* verify_option_to_tams(VerifyOption vo) {

  2587   switch (vo) {

  2588     case VerifyOption_G1UsePrevMarking:

  2589       return "PTAMS";

  2590     case VerifyOption_G1UseNextMarking:

  2591       return "NTAMS";

  2592     default:

  2593       return "NONE";

  2594   }

  2595 }

  2597 void ConcurrentMark::print_reachable(const char* str,

  2598                                      VerifyOption vo,

  2599                                      bool all) {

  2600   gclog_or_tty->cr();

  2601   gclog_or_tty->print_cr("== Doing heap dump... ");

  2603   if (G1PrintReachableBaseFile == NULL) {

  2604     gclog_or_tty->print_cr("  #### error: no base file defined");

  2605     return;

  2606   }

  2608   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >

  2609       (JVM_MAXPATHLEN - 1)) {

  2610     gclog_or_tty->print_cr("  #### error: file name too long");

  2611     return;

  2612   }

  2614   char file_name[JVM_MAXPATHLEN];

  2615   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);

  2616   gclog_or_tty->print_cr("  dumping to file %s", file_name);

  2618   fileStream fout(file_name);

  2619   if (!fout.is_open()) {

  2620     gclog_or_tty->print_cr("  #### error: could not open file");

  2621     return;

  2622   }

  2624   outputStream* out = &fout;

  2625   out->print_cr("-- USING %s", verify_option_to_tams(vo));

  2626   out->cr();

  2628   out->print_cr("--- ITERATING OVER REGIONS");

  2629   out->cr();

  2630   PrintReachableRegionClosure rcl(out, vo, all);

  2631   _g1h->heap_region_iterate(&rcl);

  2632   out->cr();

  2634   gclog_or_tty->print_cr("  done");

  2635   gclog_or_tty->flush();

  2636 }

  2638 #endif // PRODUCT

  2640 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {

  2641   // Note we are overriding the read-only view of the prev map here, via

  2642   // the cast.

  2643   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);

  2644 }

  2646 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {

  2647   _nextMarkBitMap->clearRange(mr);

  2648 }

  2650 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {

  2651   clearRangePrevBitmap(mr);

  2652   clearRangeNextBitmap(mr);

  2653 }

  2655 HeapRegion*

  2656 ConcurrentMark::claim_region(int task_num) {

  2657   // "checkpoint" the finger

  2658   HeapWord* finger = _finger;

  2660   // _heap_end will not change underneath our feet; it only changes at

  2661   // yield points.

  2662   while (finger < _heap_end) {

  2663     assert(_g1h->is_in_g1_reserved(finger), "invariant");

  2665     // Note on how this code handles humongous regions. In the

  2666     // normal case the finger will reach the start of a "starts

  2667     // humongous" (SH) region. Its end will either be the end of the

  2668     // last "continues humongous" (CH) region in the sequence, or the

  2669     // standard end of the SH region (if the SH is the only region in

  2670     // the sequence). That way claim_region() will skip over the CH

  2671     // regions. However, there is a subtle race between a CM thread

  2672     // executing this method and a mutator thread doing a humongous

  2673     // object allocation. The two are not mutually exclusive as the CM

  2674     // thread does not need to hold the Heap_lock when it gets

  2675     // here. So there is a chance that claim_region() will come across

  2676     // a free region that's in the progress of becoming a SH or a CH

  2677     // region. In the former case, it will either

  2678     //   a) Miss the update to the region's end, in which case it will

  2679     //      visit every subsequent CH region, will find their bitmaps

  2680     //      empty, and do nothing, or

  2681     //   b) Will observe the update of the region's end (in which case

  2682     //      it will skip the subsequent CH regions).

  2683     // If it comes across a region that suddenly becomes CH, the

  2684     // scenario will be similar to b). So, the race between

  2685     // claim_region() and a humongous object allocation might force us

  2686     // to do a bit of unnecessary work (due to some unnecessary bitmap

  2687     // iterations) but it should not introduce and correctness issues.

  2688     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);

  2689     HeapWord*   bottom        = curr_region->bottom();

  2690     HeapWord*   end           = curr_region->end();

  2691     HeapWord*   limit         = curr_region->next_top_at_mark_start();

  2693     if (verbose_low()) {

  2694       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "

  2695                              "["PTR_FORMAT", "PTR_FORMAT"), "

  2696                              "limit = "PTR_FORMAT,

  2697                              task_num, curr_region, bottom, end, limit);

  2698     }

  2700     // Is the gap between reading the finger and doing the CAS too long?

  2701     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);

  2702     if (res == finger) {

  2703       // we succeeded

  2705       // notice that _finger == end cannot be guaranteed here since,

  2706       // someone else might have moved the finger even further

  2707       assert(_finger >= end, "the finger should have moved forward");

  2709       if (verbose_low()) {

  2710         gclog_or_tty->print_cr("[%d] we were successful with region = "

  2711                                PTR_FORMAT, task_num, curr_region);

  2712       }

  2714       if (limit > bottom) {

  2715         if (verbose_low()) {

  2716           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "

  2717                                  "returning it ", task_num, curr_region);

  2718         }

  2719         return curr_region;

  2720       } else {

  2721         assert(limit == bottom,

  2722                "the region limit should be at bottom");

  2723         if (verbose_low()) {

  2724           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "

  2725                                  "returning NULL", task_num, curr_region);

  2726         }

  2727         // we return NULL and the caller should try calling

  2728         // claim_region() again.

  2729         return NULL;

  2730       }

  2731     } else {

  2732       assert(_finger > finger, "the finger should have moved forward");

  2733       if (verbose_low()) {

  2734         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "

  2735                                "global finger = "PTR_FORMAT", "

  2736                                "our finger = "PTR_FORMAT,

  2737                                task_num, _finger, finger);

  2738       }

  2740       // read it again

  2741       finger = _finger;

  2742     }

  2743   }

  2745   return NULL;

  2746 }

  2748 #ifndef PRODUCT

  2749 enum VerifyNoCSetOopsPhase {

  2750   VerifyNoCSetOopsStack,

  2751   VerifyNoCSetOopsQueues,

  2752   VerifyNoCSetOopsSATBCompleted,

  2753   VerifyNoCSetOopsSATBThread

  2754 };

  2756 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {

  2757 private:

  2758   G1CollectedHeap* _g1h;

  2759   VerifyNoCSetOopsPhase _phase;

  2760   int _info;

  2762   const char* phase_str() {

  2763     switch (_phase) {

  2764     case VerifyNoCSetOopsStack:         return "Stack";

  2765     case VerifyNoCSetOopsQueues:        return "Queue";

  2766     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";

  2767     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";

  2768     default:                            ShouldNotReachHere();

  2769     }

  2770     return NULL;

  2771   }

  2773   void do_object_work(oop obj) {

  2774     guarantee(!_g1h->obj_in_cs(obj),

  2775               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",

  2776                       (void*) obj, phase_str(), _info));

  2777   }

  2779 public:

  2780   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }

  2782   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {

  2783     _phase = phase;

  2784     _info = info;

  2785   }

  2787   virtual void do_oop(oop* p) {

  2788     oop obj = oopDesc::load_decode_heap_oop(p);

  2789     do_object_work(obj);

  2790   }

  2792   virtual void do_oop(narrowOop* p) {

  2793     // We should not come across narrow oops while scanning marking

  2794     // stacks and SATB buffers.

  2795     ShouldNotReachHere();

  2796   }

  2798   virtual void do_object(oop obj) {

  2799     do_object_work(obj);

  2800   }

  2801 };

  2803 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,

  2804                                          bool verify_enqueued_buffers,

  2805                                          bool verify_thread_buffers,

  2806                                          bool verify_fingers) {

  2807   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");

  2808   if (!G1CollectedHeap::heap()->mark_in_progress()) {

  2809     return;

  2810   }

  2812   VerifyNoCSetOopsClosure cl;

  2814   if (verify_stacks) {

  2815     // Verify entries on the global mark stack

  2816     cl.set_phase(VerifyNoCSetOopsStack);

  2817     _markStack.oops_do(&cl);

  2819     // Verify entries on the task queues

  2820     for (int i = 0; i < (int) _max_task_num; i += 1) {

  2821       cl.set_phase(VerifyNoCSetOopsQueues, i);

  2822       OopTaskQueue* queue = _task_queues->queue(i);

  2823       queue->oops_do(&cl);

  2824     }

  2825   }

  2827   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();

  2829   // Verify entries on the enqueued SATB buffers

  2830   if (verify_enqueued_buffers) {

  2831     cl.set_phase(VerifyNoCSetOopsSATBCompleted);

  2832     satb_qs.iterate_completed_buffers_read_only(&cl);

  2833   }

  2835   // Verify entries on the per-thread SATB buffers

  2836   if (verify_thread_buffers) {

  2837     cl.set_phase(VerifyNoCSetOopsSATBThread);

  2838     satb_qs.iterate_thread_buffers_read_only(&cl);

  2839   }

  2841   if (verify_fingers) {

  2842     // Verify the global finger

  2843     HeapWord* global_finger = finger();

  2844     if (global_finger != NULL && global_finger < _heap_end) {

  2845       // The global finger always points to a heap region boundary. We

  2846       // use heap_region_containing_raw() to get the containing region

  2847       // given that the global finger could be pointing to a free region

  2848       // which subsequently becomes continues humongous. If that

  2849       // happens, heap_region_containing() will return the bottom of the

  2850       // corresponding starts humongous region and the check below will

  2851       // not hold any more.

  2852       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);

  2853       guarantee(global_finger == global_hr->bottom(),

  2854                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,

  2855                         global_finger, HR_FORMAT_PARAMS(global_hr)));

  2856     }

  2858     // Verify the task fingers

  2859     assert(parallel_marking_threads() <= _max_task_num, "sanity");

  2860     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {

  2861       CMTask* task = _tasks[i];

  2862       HeapWord* task_finger = task->finger();

  2863       if (task_finger != NULL && task_finger < _heap_end) {

  2864         // See above note on the global finger verification.

  2865         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);

  2866         guarantee(task_finger == task_hr->bottom() ||

  2867                   !task_hr->in_collection_set(),

  2868                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,

  2869                           task_finger, HR_FORMAT_PARAMS(task_hr)));

  2870       }

  2871     }

  2872   }

  2873 }

  2874 #endif // PRODUCT

  2876 void ConcurrentMark::clear_marking_state(bool clear_overflow) {

  2877   _markStack.setEmpty();

  2878   _markStack.clear_overflow();

  2879   if (clear_overflow) {

  2880     clear_has_overflown();

  2881   } else {

  2882     assert(has_overflown(), "pre-condition");

  2883   }

  2884   _finger = _heap_start;

  2886   for (int i = 0; i < (int)_max_task_num; ++i) {

  2887     OopTaskQueue* queue = _task_queues->queue(i);

  2888     queue->set_empty();

  2889   }

  2890 }

  2892 // Aggregate the counting data that was constructed concurrently

  2893 // with marking.

  2894 class AggregateCountDataHRClosure: public HeapRegionClosure {

  2895   ConcurrentMark* _cm;

  2896   BitMap* _cm_card_bm;

  2897   size_t _max_task_num;

  2899  public:

  2900   AggregateCountDataHRClosure(ConcurrentMark *cm,

  2901                               BitMap* cm_card_bm,

  2902                               size_t max_task_num) :

  2903     _cm(cm), _cm_card_bm(cm_card_bm),

  2904     _max_task_num(max_task_num) { }

  2906   bool is_card_aligned(HeapWord* p) {

  2907     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);

  2908   }

  2910   bool doHeapRegion(HeapRegion* hr) {

  2911     if (hr->continuesHumongous()) {

  2912       // We will ignore these here and process them when their

  2913       // associated "starts humongous" region is processed.

  2914       // Note that we cannot rely on their associated

  2915       // "starts humongous" region to have their bit set to 1

  2916       // since, due to the region chunking in the parallel region

  2917       // iteration, a "continues humongous" region might be visited

  2918       // before its associated "starts humongous".

  2919       return false;

  2920     }

  2922     HeapWord* start = hr->bottom();

  2923     HeapWord* limit = hr->next_top_at_mark_start();

  2924     HeapWord* end = hr->end();

  2926     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),

  2927            err_msg("Preconditions not met - "

  2928                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "

  2929                    "top: "PTR_FORMAT", end: "PTR_FORMAT,

  2930                    start, limit, hr->top(), hr->end()));

  2932     assert(hr->next_marked_bytes() == 0, "Precondition");

  2934     if (start == limit) {

  2935       // NTAMS of this region has not been set so nothing to do.

  2936       return false;

  2937     }

  2939     assert(is_card_aligned(start), "sanity");

  2940     assert(is_card_aligned(end), "sanity");

  2942     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);

  2943     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);

  2944     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);

  2946     // If ntams is not card aligned then we bump the index for

  2947     // limit so that we get the card spanning ntams.

  2948     if (!is_card_aligned(limit)) {

  2949       limit_idx += 1;

  2950     }

  2952     assert(limit_idx <= end_idx, "or else use atomics");

  2954     // Aggregate the "stripe" in the count data associated with hr.

  2955     uint hrs_index = hr->hrs_index();

  2956     size_t marked_bytes = 0;

  2958     for (int i = 0; (size_t)i < _max_task_num; i += 1) {

  2959       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);

  2960       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);

  2962       // Fetch the marked_bytes in this region for task i and

  2963       // add it to the running total for this region.

  2964       marked_bytes += marked_bytes_array[hrs_index];

  2966       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)

  2967       // into the global card bitmap.

  2968       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);

  2970       while (scan_idx < limit_idx) {

  2971         assert(task_card_bm->at(scan_idx) == true, "should be");

  2972         _cm_card_bm->set_bit(scan_idx);

  2973         assert(_cm_card_bm->at(scan_idx) == true, "should be");

  2975         // BitMap::get_next_one_offset() can handle the case when

  2976         // its left_offset parameter is greater than its right_offset

  2977         // parameter. If does, however, have an early exit if

  2978         // left_offset == right_offset. So let's limit the value

  2979         // passed in for left offset here.

  2980         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);

  2981         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);

  2982       }

  2983     }

  2985     // Update the marked bytes for this region.

  2986     hr->add_to_marked_bytes(marked_bytes);

  2988     // Next heap region

  2989     return false;

  2990   }

  2991 };

  2993 class G1AggregateCountDataTask: public AbstractGangTask {

  2994 protected:

  2995   G1CollectedHeap* _g1h;

  2996   ConcurrentMark* _cm;

  2997   BitMap* _cm_card_bm;

  2998   size_t _max_task_num;

  2999   int _active_workers;

  3001 public:

  3002   G1AggregateCountDataTask(G1CollectedHeap* g1h,

  3003                            ConcurrentMark* cm,

  3004                            BitMap* cm_card_bm,

  3005                            size_t max_task_num,

  3006                            int n_workers) :

  3007     AbstractGangTask("Count Aggregation"),

  3008     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),

  3009     _max_task_num(max_task_num),

  3010     _active_workers(n_workers) { }

  3012   void work(uint worker_id) {

  3013     AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);

  3015     if (G1CollectedHeap::use_parallel_gc_threads()) {

  3016       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,

  3017                                             _active_workers,

  3018                                             HeapRegion::AggregateCountClaimValue);

  3019     } else {

  3020       _g1h->heap_region_iterate(&cl);

  3021     }

  3022   }

  3023 };

  3026 void ConcurrentMark::aggregate_count_data() {

  3027   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?

  3028                         _g1h->workers()->active_workers() :

  3029                         1);

  3031   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,

  3032                                            _max_task_num, n_workers);

  3034   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3035     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),

  3036            "sanity check");

  3037     _g1h->set_par_threads(n_workers);

  3038     _g1h->workers()->run_task(&g1_par_agg_task);

  3039     _g1h->set_par_threads(0);

  3041     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),

  3042            "sanity check");

  3043     _g1h->reset_heap_region_claim_values();

  3044   } else {

  3045     g1_par_agg_task.work(0);

  3046   }

  3047 }

  3049 // Clear the per-worker arrays used to store the per-region counting data

  3050 void ConcurrentMark::clear_all_count_data() {

  3051   // Clear the global card bitmap - it will be filled during

  3052   // liveness count aggregation (during remark) and the

  3053   // final counting task.

  3054   _card_bm.clear();

  3056   // Clear the global region bitmap - it will be filled as part

  3057   // of the final counting task.

  3058   _region_bm.clear();

  3060   uint max_regions = _g1h->max_regions();

  3061   assert(_max_task_num != 0, "unitialized");

  3063   for (int i = 0; (size_t) i < _max_task_num; i += 1) {

  3064     BitMap* task_card_bm = count_card_bitmap_for(i);

  3065     size_t* marked_bytes_array = count_marked_bytes_array_for(i);

  3067     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");

  3068     assert(marked_bytes_array != NULL, "uninitialized");

  3070     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));

  3071     task_card_bm->clear();

  3072   }

  3073 }

  3075 void ConcurrentMark::print_stats() {

  3076   if (verbose_stats()) {

  3077     gclog_or_tty->print_cr("---------------------------------------------------------------------");

  3078     for (size_t i = 0; i < _active_tasks; ++i) {

  3079       _tasks[i]->print_stats();

  3080       gclog_or_tty->print_cr("---------------------------------------------------------------------");

  3081     }

  3082   }

  3083 }

  3085 // abandon current marking iteration due to a Full GC

  3086 void ConcurrentMark::abort() {

  3087   // Clear all marks to force marking thread to do nothing

  3088   _nextMarkBitMap->clearAll();

  3089   // Clear the liveness counting data

  3090   clear_all_count_data();

  3091   // Empty mark stack

  3092   clear_marking_state();

  3093   for (int i = 0; i < (int)_max_task_num; ++i) {

  3094     _tasks[i]->clear_region_fields();

  3095   }

  3096   _has_aborted = true;

  3098   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  3099   satb_mq_set.abandon_partial_marking();

  3100   // This can be called either during or outside marking, we'll read

  3101   // the expected_active value from the SATB queue set.

  3102   satb_mq_set.set_active_all_threads(

  3103                                  false, /* new active value */

  3104                                  satb_mq_set.is_active() /* expected_active */);

  3105 }

  3107 static void print_ms_time_info(const char* prefix, const char* name,

  3108                                NumberSeq& ns) {

  3109   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",

  3110                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());

  3111   if (ns.num() > 0) {

  3112     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",

  3113                            prefix, ns.sd(), ns.maximum());

  3114   }

  3115 }

  3117 void ConcurrentMark::print_summary_info() {

  3118   gclog_or_tty->print_cr(" Concurrent marking:");

  3119   print_ms_time_info("  ", "init marks", _init_times);

  3120   print_ms_time_info("  ", "remarks", _remark_times);

  3121   {

  3122     print_ms_time_info("     ", "final marks", _remark_mark_times);

  3123     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);

  3125   }

  3126   print_ms_time_info("  ", "cleanups", _cleanup_times);

  3127   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",

  3128                          _total_counting_time,

  3129                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /

  3130                           (double)_cleanup_times.num()

  3131                          : 0.0));

  3132   if (G1ScrubRemSets) {

  3133     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",

  3134                            _total_rs_scrub_time,

  3135                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /

  3136                             (double)_cleanup_times.num()

  3137                            : 0.0));

  3138   }

  3139   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",

  3140                          (_init_times.sum() + _remark_times.sum() +

  3141                           _cleanup_times.sum())/1000.0);

  3142   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "

  3143                 "(%8.2f s marking).",

  3144                 cmThread()->vtime_accum(),

  3145                 cmThread()->vtime_mark_accum());

  3146 }

  3148 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {

  3149   _parallel_workers->print_worker_threads_on(st);

  3150 }

  3152 // We take a break if someone is trying to stop the world.

  3153 bool ConcurrentMark::do_yield_check(uint worker_id) {

  3154   if (should_yield()) {

  3155     if (worker_id == 0) {

  3156       _g1h->g1_policy()->record_concurrent_pause();

  3157     }

  3158     cmThread()->yield();

  3159     if (worker_id == 0) {

  3160       _g1h->g1_policy()->record_concurrent_pause_end();

  3161     }

  3162     return true;

  3163   } else {

  3164     return false;

  3165   }

  3166 }

  3168 bool ConcurrentMark::should_yield() {

  3169   return cmThread()->should_yield();

  3170 }

  3172 bool ConcurrentMark::containing_card_is_marked(void* p) {

  3173   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);

  3174   return _card_bm.at(offset >> CardTableModRefBS::card_shift);

  3175 }

  3177 bool ConcurrentMark::containing_cards_are_marked(void* start,

  3178                                                  void* last) {

  3179   return containing_card_is_marked(start) &&

  3180          containing_card_is_marked(last);

  3181 }

  3183 #ifndef PRODUCT

  3184 // for debugging purposes

  3185 void ConcurrentMark::print_finger() {

  3186   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,

  3187                          _heap_start, _heap_end, _finger);

  3188   for (int i = 0; i < (int) _max_task_num; ++i) {

  3189     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());

  3190   }

  3191   gclog_or_tty->print_cr("");

  3192 }

  3193 #endif

  3195 void CMTask::scan_object(oop obj) {

  3196   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");

  3198   if (_cm->verbose_high()) {

  3199     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,

  3200                            _task_id, (void*) obj);

  3201   }

  3203   size_t obj_size = obj->size();

  3204   _words_scanned += obj_size;

  3206   obj->oop_iterate(_cm_oop_closure);

  3207   statsOnly( ++_objs_scanned );

  3208   check_limits();

  3209 }

  3211 // Closure for iteration over bitmaps

  3212 class CMBitMapClosure : public BitMapClosure {

  3213 private:

  3214   // the bitmap that is being iterated over

  3215   CMBitMap*                   _nextMarkBitMap;

  3216   ConcurrentMark*             _cm;

  3217   CMTask*                     _task;

  3219 public:

  3220   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :

  3221     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }

  3223   bool do_bit(size_t offset) {

  3224     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);

  3225     assert(_nextMarkBitMap->isMarked(addr), "invariant");

  3226     assert( addr < _cm->finger(), "invariant");

  3228     statsOnly( _task->increase_objs_found_on_bitmap() );

  3229     assert(addr >= _task->finger(), "invariant");

  3231     // We move that task's local finger along.

  3232     _task->move_finger_to(addr);

  3234     _task->scan_object(oop(addr));

  3235     // we only partially drain the local queue and global stack

  3236     _task->drain_local_queue(true);

  3237     _task->drain_global_stack(true);

  3239     // if the has_aborted flag has been raised, we need to bail out of

  3240     // the iteration

  3241     return !_task->has_aborted();

  3242   }

  3243 };

  3245 // Closure for iterating over objects, currently only used for

  3246 // processing SATB buffers.

  3247 class CMObjectClosure : public ObjectClosure {

  3248 private:

  3249   CMTask* _task;

  3251 public:

  3252   void do_object(oop obj) {

  3253     _task->deal_with_reference(obj);

  3254   }

  3256   CMObjectClosure(CMTask* task) : _task(task) { }

  3257 };

  3259 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,

  3260                                ConcurrentMark* cm,

  3261                                CMTask* task)

  3262   : _g1h(g1h), _cm(cm), _task(task) {

  3263   assert(_ref_processor == NULL, "should be initialized to NULL");

  3265   if (G1UseConcMarkReferenceProcessing) {

  3266     _ref_processor = g1h->ref_processor_cm();

  3267     assert(_ref_processor != NULL, "should not be NULL");

  3268   }

  3269 }

  3271 void CMTask::setup_for_region(HeapRegion* hr) {

  3272   // Separated the asserts so that we know which one fires.

  3273   assert(hr != NULL,

  3274         "claim_region() should have filtered out continues humongous regions");

  3275   assert(!hr->continuesHumongous(),

  3276         "claim_region() should have filtered out continues humongous regions");

  3278   if (_cm->verbose_low()) {

  3279     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,

  3280                            _task_id, hr);

  3281   }

  3283   _curr_region  = hr;

  3284   _finger       = hr->bottom();

  3285   update_region_limit();

  3286 }

  3288 void CMTask::update_region_limit() {

  3289   HeapRegion* hr            = _curr_region;

  3290   HeapWord* bottom          = hr->bottom();

  3291   HeapWord* limit           = hr->next_top_at_mark_start();

  3293   if (limit == bottom) {

  3294     if (_cm->verbose_low()) {

  3295       gclog_or_tty->print_cr("[%d] found an empty region "

  3296                              "["PTR_FORMAT", "PTR_FORMAT")",

  3297                              _task_id, bottom, limit);

  3298     }

  3299     // The region was collected underneath our feet.

  3300     // We set the finger to bottom to ensure that the bitmap

  3301     // iteration that will follow this will not do anything.

  3302     // (this is not a condition that holds when we set the region up,

  3303     // as the region is not supposed to be empty in the first place)

  3304     _finger = bottom;

  3305   } else if (limit >= _region_limit) {

  3306     assert(limit >= _finger, "peace of mind");

  3307   } else {

  3308     assert(limit < _region_limit, "only way to get here");

  3309     // This can happen under some pretty unusual circumstances.  An

  3310     // evacuation pause empties the region underneath our feet (NTAMS

  3311     // at bottom). We then do some allocation in the region (NTAMS

  3312     // stays at bottom), followed by the region being used as a GC

  3313     // alloc region (NTAMS will move to top() and the objects

  3314     // originally below it will be grayed). All objects now marked in

  3315     // the region are explicitly grayed, if below the global finger,

  3316     // and we do not need in fact to scan anything else. So, we simply

  3317     // set _finger to be limit to ensure that the bitmap iteration

  3318     // doesn't do anything.

  3319     _finger = limit;

  3320   }

  3322   _region_limit = limit;

  3323 }

  3325 void CMTask::giveup_current_region() {

  3326   assert(_curr_region != NULL, "invariant");

  3327   if (_cm->verbose_low()) {

  3328     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,

  3329                            _task_id, _curr_region);

  3330   }

  3331   clear_region_fields();

  3332 }

  3334 void CMTask::clear_region_fields() {

  3335   // Values for these three fields that indicate that we're not

  3336   // holding on to a region.

  3337   _curr_region   = NULL;

  3338   _finger        = NULL;

  3339   _region_limit  = NULL;

  3340 }

  3342 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {

  3343   if (cm_oop_closure == NULL) {

  3344     assert(_cm_oop_closure != NULL, "invariant");

  3345   } else {

  3346     assert(_cm_oop_closure == NULL, "invariant");

  3347   }

  3348   _cm_oop_closure = cm_oop_closure;

  3349 }

  3351 void CMTask::reset(CMBitMap* nextMarkBitMap) {

  3352   guarantee(nextMarkBitMap != NULL, "invariant");

  3354   if (_cm->verbose_low()) {

  3355     gclog_or_tty->print_cr("[%d] resetting", _task_id);

  3356   }

  3358   _nextMarkBitMap                = nextMarkBitMap;

  3359   clear_region_fields();

  3361   _calls                         = 0;

  3362   _elapsed_time_ms               = 0.0;

  3363   _termination_time_ms           = 0.0;

  3364   _termination_start_time_ms     = 0.0;

  3366 #if _MARKING_STATS_

  3367   _local_pushes                  = 0;

  3368   _local_pops                    = 0;

  3369   _local_max_size                = 0;

  3370   _objs_scanned                  = 0;

  3371   _global_pushes                 = 0;

  3372   _global_pops                   = 0;

  3373   _global_max_size               = 0;

  3374   _global_transfers_to           = 0;

  3375   _global_transfers_from         = 0;

  3376   _regions_claimed               = 0;

  3377   _objs_found_on_bitmap          = 0;

  3378   _satb_buffers_processed        = 0;

  3379   _steal_attempts                = 0;

  3380   _steals                        = 0;

  3381   _aborted                       = 0;

  3382   _aborted_overflow              = 0;

  3383   _aborted_cm_aborted            = 0;

  3384   _aborted_yield                 = 0;

  3385   _aborted_timed_out             = 0;

  3386   _aborted_satb                  = 0;

  3387   _aborted_termination           = 0;

  3388 #endif // _MARKING_STATS_

  3389 }

  3391 bool CMTask::should_exit_termination() {

  3392   regular_clock_call();

  3393   // This is called when we are in the termination protocol. We should

  3394   // quit if, for some reason, this task wants to abort or the global

  3395   // stack is not empty (this means that we can get work from it).

  3396   return !_cm->mark_stack_empty() || has_aborted();

  3397 }

  3399 void CMTask::reached_limit() {

  3400   assert(_words_scanned >= _words_scanned_limit ||

  3401          _refs_reached >= _refs_reached_limit ,

  3402          "shouldn't have been called otherwise");

  3403   regular_clock_call();

  3404 }

  3406 void CMTask::regular_clock_call() {

  3407   if (has_aborted()) return;

  3409   // First, we need to recalculate the words scanned and refs reached

  3410   // limits for the next clock call.

  3411   recalculate_limits();

  3413   // During the regular clock call we do the following

  3415   // (1) If an overflow has been flagged, then we abort.

  3416   if (_cm->has_overflown()) {

  3417     set_has_aborted();

  3418     return;

  3419   }

  3421   // If we are not concurrent (i.e. we're doing remark) we don't need

  3422   // to check anything else. The other steps are only needed during

  3423   // the concurrent marking phase.

  3424   if (!concurrent()) return;

  3426   // (2) If marking has been aborted for Full GC, then we also abort.

  3427   if (_cm->has_aborted()) {

  3428     set_has_aborted();

  3429     statsOnly( ++_aborted_cm_aborted );

  3430     return;

  3431   }

  3433   double curr_time_ms = os::elapsedVTime() * 1000.0;

  3435   // (3) If marking stats are enabled, then we update the step history.

  3436 #if _MARKING_STATS_

  3437   if (_words_scanned >= _words_scanned_limit) {

  3438     ++_clock_due_to_scanning;

  3439   }

  3440   if (_refs_reached >= _refs_reached_limit) {

  3441     ++_clock_due_to_marking;

  3442   }

  3444   double last_interval_ms = curr_time_ms - _interval_start_time_ms;

  3445   _interval_start_time_ms = curr_time_ms;

  3446   _all_clock_intervals_ms.add(last_interval_ms);

  3448   if (_cm->verbose_medium()) {

  3449       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "

  3450                         "scanned = %d%s, refs reached = %d%s",

  3451                         _task_id, last_interval_ms,

  3452                         _words_scanned,

  3453                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",

  3454                         _refs_reached,

  3455                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");

  3456   }

  3457 #endif // _MARKING_STATS_

  3459   // (4) We check whether we should yield. If we have to, then we abort.

  3460   if (_cm->should_yield()) {

  3461     // We should yield. To do this we abort the task. The caller is

  3462     // responsible for yielding.

  3463     set_has_aborted();

  3464     statsOnly( ++_aborted_yield );

  3465     return;

  3466   }

  3468   // (5) We check whether we've reached our time quota. If we have,

  3469   // then we abort.

  3470   double elapsed_time_ms = curr_time_ms - _start_time_ms;

  3471   if (elapsed_time_ms > _time_target_ms) {

  3472     set_has_aborted();

  3473     _has_timed_out = true;

  3474     statsOnly( ++_aborted_timed_out );

  3475     return;

  3476   }

  3478   // (6) Finally, we check whether there are enough completed STAB

  3479   // buffers available for processing. If there are, we abort.

  3480   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  3481   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {

  3482     if (_cm->verbose_low()) {

  3483       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",

  3484                              _task_id);

  3485     }

  3486     // we do need to process SATB buffers, we'll abort and restart

  3487     // the marking task to do so

  3488     set_has_aborted();

  3489     statsOnly( ++_aborted_satb );

  3490     return;

  3491   }

  3492 }

  3494 void CMTask::recalculate_limits() {

  3495   _real_words_scanned_limit = _words_scanned + words_scanned_period;

  3496   _words_scanned_limit      = _real_words_scanned_limit;

  3498   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;

  3499   _refs_reached_limit       = _real_refs_reached_limit;

  3500 }

  3502 void CMTask::decrease_limits() {

  3503   // This is called when we believe that we're going to do an infrequent

  3504   // operation which will increase the per byte scanned cost (i.e. move

  3505   // entries to/from the global stack). It basically tries to decrease the

  3506   // scanning limit so that the clock is called earlier.

  3508   if (_cm->verbose_medium()) {

  3509     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);

  3510   }

  3512   _words_scanned_limit = _real_words_scanned_limit -

  3513     3 * words_scanned_period / 4;

  3514   _refs_reached_limit  = _real_refs_reached_limit -

  3515     3 * refs_reached_period / 4;

  3516 }

  3518 void CMTask::move_entries_to_global_stack() {

  3519   // local array where we'll store the entries that will be popped

  3520   // from the local queue

  3521   oop buffer[global_stack_transfer_size];

  3523   int n = 0;

  3524   oop obj;

  3525   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {

  3526     buffer[n] = obj;

  3527     ++n;

  3528   }

  3530   if (n > 0) {

  3531     // we popped at least one entry from the local queue

  3533     statsOnly( ++_global_transfers_to; _local_pops += n );

  3535     if (!_cm->mark_stack_push(buffer, n)) {

  3536       if (_cm->verbose_low()) {

  3537         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",

  3538                                _task_id);

  3539       }

  3540       set_has_aborted();

  3541     } else {

  3542       // the transfer was successful

  3544       if (_cm->verbose_medium()) {

  3545         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",

  3546                                _task_id, n);

  3547       }

  3548       statsOnly( int tmp_size = _cm->mark_stack_size();

  3549                  if (tmp_size > _global_max_size) {

  3550                    _global_max_size = tmp_size;

  3551                  }

  3552                  _global_pushes += n );

  3553     }

  3554   }

  3556   // this operation was quite expensive, so decrease the limits

  3557   decrease_limits();

  3558 }

  3560 void CMTask::get_entries_from_global_stack() {

  3561   // local array where we'll store the entries that will be popped

  3562   // from the global stack.

  3563   oop buffer[global_stack_transfer_size];

  3564   int n;

  3565   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);

  3566   assert(n <= global_stack_transfer_size,

  3567          "we should not pop more than the given limit");

  3568   if (n > 0) {

  3569     // yes, we did actually pop at least one entry

  3571     statsOnly( ++_global_transfers_from; _global_pops += n );

  3572     if (_cm->verbose_medium()) {

  3573       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",

  3574                              _task_id, n);

  3575     }

  3576     for (int i = 0; i < n; ++i) {

  3577       bool success = _task_queue->push(buffer[i]);

  3578       // We only call this when the local queue is empty or under a

  3579       // given target limit. So, we do not expect this push to fail.

  3580       assert(success, "invariant");

  3581     }

  3583     statsOnly( int tmp_size = _task_queue->size();

  3584                if (tmp_size > _local_max_size) {

  3585                  _local_max_size = tmp_size;

  3586                }

  3587                _local_pushes += n );

  3588   }

  3590   // this operation was quite expensive, so decrease the limits

  3591   decrease_limits();

  3592 }

  3594 void CMTask::drain_local_queue(bool partially) {

  3595   if (has_aborted()) return;

  3597   // Decide what the target size is, depending whether we're going to

  3598   // drain it partially (so that other tasks can steal if they run out

  3599   // of things to do) or totally (at the very end).

  3600   size_t target_size;

  3601   if (partially) {

  3602     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);

  3603   } else {

  3604     target_size = 0;

  3605   }

  3607   if (_task_queue->size() > target_size) {

  3608     if (_cm->verbose_high()) {

  3609       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",

  3610                              _task_id, target_size);

  3611     }

  3613     oop obj;

  3614     bool ret = _task_queue->pop_local(obj);

  3615     while (ret) {

  3616       statsOnly( ++_local_pops );

  3618       if (_cm->verbose_high()) {

  3619         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,

  3620                                (void*) obj);

  3621       }

  3623       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );

  3624       assert(!_g1h->is_on_master_free_list(

  3625                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");

  3627       scan_object(obj);

  3629       if (_task_queue->size() <= target_size || has_aborted()) {

  3630         ret = false;

  3631       } else {

  3632         ret = _task_queue->pop_local(obj);

  3633       }

  3634     }

  3636     if (_cm->verbose_high()) {

  3637       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",

  3638                              _task_id, _task_queue->size());

  3639     }

  3640   }

  3641 }

  3643 void CMTask::drain_global_stack(bool partially) {

  3644   if (has_aborted()) return;

  3646   // We have a policy to drain the local queue before we attempt to

  3647   // drain the global stack.

  3648   assert(partially || _task_queue->size() == 0, "invariant");

  3650   // Decide what the target size is, depending whether we're going to

  3651   // drain it partially (so that other tasks can steal if they run out

  3652   // of things to do) or totally (at the very end).  Notice that,

  3653   // because we move entries from the global stack in chunks or

  3654   // because another task might be doing the same, we might in fact

  3655   // drop below the target. But, this is not a problem.

  3656   size_t target_size;

  3657   if (partially) {

  3658     target_size = _cm->partial_mark_stack_size_target();

  3659   } else {

  3660     target_size = 0;

  3661   }

  3663   if (_cm->mark_stack_size() > target_size) {

  3664     if (_cm->verbose_low()) {

  3665       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",

  3666                              _task_id, target_size);

  3667     }

  3669     while (!has_aborted() && _cm->mark_stack_size() > target_size) {

  3670       get_entries_from_global_stack();

  3671       drain_local_queue(partially);

  3672     }

  3674     if (_cm->verbose_low()) {

  3675       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",

  3676                              _task_id, _cm->mark_stack_size());

  3677     }

  3678   }

  3679 }

  3681 // SATB Queue has several assumptions on whether to call the par or

  3682 // non-par versions of the methods. this is why some of the code is

  3683 // replicated. We should really get rid of the single-threaded version

  3684 // of the code to simplify things.

  3685 void CMTask::drain_satb_buffers() {

  3686   if (has_aborted()) return;

  3688   // We set this so that the regular clock knows that we're in the

  3689   // middle of draining buffers and doesn't set the abort flag when it

  3690   // notices that SATB buffers are available for draining. It'd be

  3691   // very counter productive if it did that. :-)

  3692   _draining_satb_buffers = true;

  3694   CMObjectClosure oc(this);

  3695   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  3696   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3697     satb_mq_set.set_par_closure(_task_id, &oc);

  3698   } else {

  3699     satb_mq_set.set_closure(&oc);

  3700   }

  3702   // This keeps claiming and applying the closure to completed buffers

  3703   // until we run out of buffers or we need to abort.

  3704   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3705     while (!has_aborted() &&

  3706            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {

  3707       if (_cm->verbose_medium()) {

  3708         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);

  3709       }

  3710       statsOnly( ++_satb_buffers_processed );

  3711       regular_clock_call();

  3712     }

  3713   } else {

  3714     while (!has_aborted() &&

  3715            satb_mq_set.apply_closure_to_completed_buffer()) {

  3716       if (_cm->verbose_medium()) {

  3717         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);

  3718       }

  3719       statsOnly( ++_satb_buffers_processed );

  3720       regular_clock_call();

  3721     }

  3722   }

  3724   if (!concurrent() && !has_aborted()) {

  3725     // We should only do this during remark.

  3726     if (G1CollectedHeap::use_parallel_gc_threads()) {

  3727       satb_mq_set.par_iterate_closure_all_threads(_task_id);

  3728     } else {

  3729       satb_mq_set.iterate_closure_all_threads();

  3730     }

  3731   }

  3733   _draining_satb_buffers = false;

  3735   assert(has_aborted() ||

  3736          concurrent() ||

  3737          satb_mq_set.completed_buffers_num() == 0, "invariant");

  3739   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3740     satb_mq_set.set_par_closure(_task_id, NULL);

  3741   } else {

  3742     satb_mq_set.set_closure(NULL);

  3743   }

  3745   // again, this was a potentially expensive operation, decrease the

  3746   // limits to get the regular clock call early

  3747   decrease_limits();

  3748 }

  3750 void CMTask::print_stats() {

  3751   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",

  3752                          _task_id, _calls);

  3753   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",

  3754                          _elapsed_time_ms, _termination_time_ms);

  3755   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",

  3756                          _step_times_ms.num(), _step_times_ms.avg(),

  3757                          _step_times_ms.sd());

  3758   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",

  3759                          _step_times_ms.maximum(), _step_times_ms.sum());

  3761 #if _MARKING_STATS_

  3762   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",

  3763                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),

  3764                          _all_clock_intervals_ms.sd());

  3765   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",

  3766                          _all_clock_intervals_ms.maximum(),

  3767                          _all_clock_intervals_ms.sum());

  3768   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",

  3769                          _clock_due_to_scanning, _clock_due_to_marking);

  3770   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",

  3771                          _objs_scanned, _objs_found_on_bitmap);

  3772   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",

  3773                          _local_pushes, _local_pops, _local_max_size);

  3774   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",

  3775                          _global_pushes, _global_pops, _global_max_size);

  3776   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",

  3777                          _global_transfers_to,_global_transfers_from);

  3778   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);

  3779   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);

  3780   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",

  3781                          _steal_attempts, _steals);

  3782   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);

  3783   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",

  3784                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);

  3785   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",

  3786                          _aborted_timed_out, _aborted_satb, _aborted_termination);

  3787 #endif // _MARKING_STATS_

  3788 }

  3790 /*****************************************************************************

  3792     The do_marking_step(time_target_ms) method is the building block

  3793     of the parallel marking framework. It can be called in parallel

  3794     with other invocations of do_marking_step() on different tasks

  3795     (but only one per task, obviously) and concurrently with the

  3796     mutator threads, or during remark, hence it eliminates the need

  3797     for two versions of the code. When called during remark, it will

  3798     pick up from where the task left off during the concurrent marking

  3799     phase. Interestingly, tasks are also claimable during evacuation

  3800     pauses too, since do_marking_step() ensures that it aborts before

  3801     it needs to yield.

  3803     The data structures that is uses to do marking work are the

  3804     following:

  3806       (1) Marking Bitmap. If there are gray objects that appear only

  3807       on the bitmap (this happens either when dealing with an overflow

  3808       or when the initial marking phase has simply marked the roots

  3809       and didn't push them on the stack), then tasks claim heap

  3810       regions whose bitmap they then scan to find gray objects. A

  3811       global finger indicates where the end of the last claimed region

  3812       is. A local finger indicates how far into the region a task has

  3813       scanned. The two fingers are used to determine how to gray an

  3814       object (i.e. whether simply marking it is OK, as it will be

  3815       visited by a task in the future, or whether it needs to be also

  3816       pushed on a stack).

  3818       (2) Local Queue. The local queue of the task which is accessed

  3819       reasonably efficiently by the task. Other tasks can steal from

  3820       it when they run out of work. Throughout the marking phase, a

  3821       task attempts to keep its local queue short but not totally

  3822       empty, so that entries are available for stealing by other

  3823       tasks. Only when there is no more work, a task will totally

  3824       drain its local queue.

  3826       (3) Global Mark Stack. This handles local queue overflow. During

  3827       marking only sets of entries are moved between it and the local

  3828       queues, as access to it requires a mutex and more fine-grain

  3829       interaction with it which might cause contention. If it

  3830       overflows, then the marking phase should restart and iterate

  3831       over the bitmap to identify gray objects. Throughout the marking

  3832       phase, tasks attempt to keep the global mark stack at a small

  3833       length but not totally empty, so that entries are available for

  3834       popping by other tasks. Only when there is no more work, tasks

  3835       will totally drain the global mark stack.

  3837       (4) SATB Buffer Queue. This is where completed SATB buffers are

  3838       made available. Buffers are regularly removed from this queue

  3839       and scanned for roots, so that the queue doesn't get too

  3840       long. During remark, all completed buffers are processed, as

  3841       well as the filled in parts of any uncompleted buffers.

  3843     The do_marking_step() method tries to abort when the time target

  3844     has been reached. There are a few other cases when the

  3845     do_marking_step() method also aborts:

  3847       (1) When the marking phase has been aborted (after a Full GC).

  3849       (2) When a global overflow (on the global stack) has been

  3850       triggered. Before the task aborts, it will actually sync up with

  3851       the other tasks to ensure that all the marking data structures

  3852       (local queues, stacks, fingers etc.)  are re-initialised so that

  3853       when do_marking_step() completes, the marking phase can

  3854       immediately restart.

  3856       (3) When enough completed SATB buffers are available. The

  3857       do_marking_step() method only tries to drain SATB buffers right

  3858       at the beginning. So, if enough buffers are available, the

  3859       marking step aborts and the SATB buffers are processed at

  3860       the beginning of the next invocation.

  3862       (4) To yield. when we have to yield then we abort and yield

  3863       right at the end of do_marking_step(). This saves us from a lot

  3864       of hassle as, by yielding we might allow a Full GC. If this

  3865       happens then objects will be compacted underneath our feet, the

  3866       heap might shrink, etc. We save checking for this by just

  3867       aborting and doing the yield right at the end.

  3869     From the above it follows that the do_marking_step() method should

  3870     be called in a loop (or, otherwise, regularly) until it completes.

  3872     If a marking step completes without its has_aborted() flag being

  3873     true, it means it has completed the current marking phase (and

  3874     also all other marking tasks have done so and have all synced up).

  3876     A method called regular_clock_call() is invoked "regularly" (in

  3877     sub ms intervals) throughout marking. It is this clock method that

  3878     checks all the abort conditions which were mentioned above and

  3879     decides when the task should abort. A work-based scheme is used to

  3880     trigger this clock method: when the number of object words the

  3881     marking phase has scanned or the number of references the marking

  3882     phase has visited reach a given limit. Additional invocations to

  3883     the method clock have been planted in a few other strategic places

  3884     too. The initial reason for the clock method was to avoid calling

  3885     vtime too regularly, as it is quite expensive. So, once it was in

  3886     place, it was natural to piggy-back all the other conditions on it

  3887     too and not constantly check them throughout the code.

  3889  *****************************************************************************/

  3891 void CMTask::do_marking_step(double time_target_ms,

  3892                              bool do_stealing,

  3893                              bool do_termination) {

  3894   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");

  3895   assert(concurrent() == _cm->concurrent(), "they should be the same");

  3897   G1CollectorPolicy* g1_policy = _g1h->g1_policy();

  3898   assert(_task_queues != NULL, "invariant");

  3899   assert(_task_queue != NULL, "invariant");

  3900   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");

  3902   assert(!_claimed,

  3903          "only one thread should claim this task at any one time");

  3905   // OK, this doesn't safeguard again all possible scenarios, as it is

  3906   // possible for two threads to set the _claimed flag at the same

  3907   // time. But it is only for debugging purposes anyway and it will

  3908   // catch most problems.

  3909   _claimed = true;

  3911   _start_time_ms = os::elapsedVTime() * 1000.0;

  3912   statsOnly( _interval_start_time_ms = _start_time_ms );

  3914   double diff_prediction_ms =

  3915     g1_policy->get_new_prediction(&_marking_step_diffs_ms);

  3916   _time_target_ms = time_target_ms - diff_prediction_ms;

  3918   // set up the variables that are used in the work-based scheme to

  3919   // call the regular clock method

  3920   _words_scanned = 0;

  3921   _refs_reached  = 0;

  3922   recalculate_limits();

  3924   // clear all flags

  3925   clear_has_aborted();

  3926   _has_timed_out = false;

  3927   _draining_satb_buffers = false;

  3929   ++_calls;

  3931   if (_cm->verbose_low()) {

  3932     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "

  3933                            "target = %1.2lfms >>>>>>>>>>",

  3934                            _task_id, _calls, _time_target_ms);

  3935   }

  3937   // Set up the bitmap and oop closures. Anything that uses them is

  3938   // eventually called from this method, so it is OK to allocate these

  3939   // statically.

  3940   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);

  3941   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);

  3942   set_cm_oop_closure(&cm_oop_closure);

  3944   if (_cm->has_overflown()) {

  3945     // This can happen if the mark stack overflows during a GC pause

  3946     // and this task, after a yield point, restarts. We have to abort

  3947     // as we need to get into the overflow protocol which happens

  3948     // right at the end of this task.

  3949     set_has_aborted();

  3950   }

  3952   // First drain any available SATB buffers. After this, we will not

  3953   // look at SATB buffers before the next invocation of this method.

  3954   // If enough completed SATB buffers are queued up, the regular clock

  3955   // will abort this task so that it restarts.

  3956   drain_satb_buffers();

  3957   // ...then partially drain the local queue and the global stack

  3958   drain_local_queue(true);

  3959   drain_global_stack(true);

  3961   do {

  3962     if (!has_aborted() && _curr_region != NULL) {

  3963       // This means that we're already holding on to a region.

  3964       assert(_finger != NULL, "if region is not NULL, then the finger "

  3965              "should not be NULL either");

  3967       // We might have restarted this task after an evacuation pause

  3968       // which might have evacuated the region we're holding on to

  3969       // underneath our feet. Let's read its limit again to make sure

  3970       // that we do not iterate over a region of the heap that

  3971       // contains garbage (update_region_limit() will also move

  3972       // _finger to the start of the region if it is found empty).

  3973       update_region_limit();

  3974       // We will start from _finger not from the start of the region,

  3975       // as we might be restarting this task after aborting half-way

  3976       // through scanning this region. In this case, _finger points to

  3977       // the address where we last found a marked object. If this is a

  3978       // fresh region, _finger points to start().

  3979       MemRegion mr = MemRegion(_finger, _region_limit);

  3981       if (_cm->verbose_low()) {

  3982         gclog_or_tty->print_cr("[%d] we're scanning part "

  3983                                "["PTR_FORMAT", "PTR_FORMAT") "

  3984                                "of region "PTR_FORMAT,

  3985                                _task_id, _finger, _region_limit, _curr_region);

  3986       }

  3988       // Let's iterate over the bitmap of the part of the

  3989       // region that is left.

  3990       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {

  3991         // We successfully completed iterating over the region. Now,

  3992         // let's give up the region.

  3993         giveup_current_region();

  3994         regular_clock_call();

  3995       } else {

  3996         assert(has_aborted(), "currently the only way to do so");

  3997         // The only way to abort the bitmap iteration is to return

  3998         // false from the do_bit() method. However, inside the

  3999         // do_bit() method we move the _finger to point to the

  4000         // object currently being looked at. So, if we bail out, we

  4001         // have definitely set _finger to something non-null.

  4002         assert(_finger != NULL, "invariant");

  4004         // Region iteration was actually aborted. So now _finger

  4005         // points to the address of the object we last scanned. If we

  4006         // leave it there, when we restart this task, we will rescan

  4007         // the object. It is easy to avoid this. We move the finger by

  4008         // enough to point to the next possible object header (the

  4009         // bitmap knows by how much we need to move it as it knows its

  4010         // granularity).

  4011         assert(_finger < _region_limit, "invariant");

  4012         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);

  4013         // Check if bitmap iteration was aborted while scanning the last object

  4014         if (new_finger >= _region_limit) {

  4015           giveup_current_region();

  4016         } else {

  4017           move_finger_to(new_finger);

  4018         }

  4019       }

  4020     }

  4021     // At this point we have either completed iterating over the

  4022     // region we were holding on to, or we have aborted.

  4024     // We then partially drain the local queue and the global stack.

  4025     // (Do we really need this?)

  4026     drain_local_queue(true);

  4027     drain_global_stack(true);

  4029     // Read the note on the claim_region() method on why it might

  4030     // return NULL with potentially more regions available for

  4031     // claiming and why we have to check out_of_regions() to determine

  4032     // whether we're done or not.

  4033     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {

  4034       // We are going to try to claim a new region. We should have

  4035       // given up on the previous one.

  4036       // Separated the asserts so that we know which one fires.

  4037       assert(_curr_region  == NULL, "invariant");

  4038       assert(_finger       == NULL, "invariant");

  4039       assert(_region_limit == NULL, "invariant");

  4040       if (_cm->verbose_low()) {

  4041         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);

  4042       }

  4043       HeapRegion* claimed_region = _cm->claim_region(_task_id);

  4044       if (claimed_region != NULL) {

  4045         // Yes, we managed to claim one

  4046         statsOnly( ++_regions_claimed );

  4048         if (_cm->verbose_low()) {

  4049           gclog_or_tty->print_cr("[%d] we successfully claimed "

  4050                                  "region "PTR_FORMAT,

  4051                                  _task_id, claimed_region);

  4052         }

  4054         setup_for_region(claimed_region);

  4055         assert(_curr_region == claimed_region, "invariant");

  4056       }

  4057       // It is important to call the regular clock here. It might take

  4058       // a while to claim a region if, for example, we hit a large

  4059       // block of empty regions. So we need to call the regular clock

  4060       // method once round the loop to make sure it's called

  4061       // frequently enough.

  4062       regular_clock_call();

  4063     }

  4065     if (!has_aborted() && _curr_region == NULL) {

  4066       assert(_cm->out_of_regions(),

  4067              "at this point we should be out of regions");

  4068     }

  4069   } while ( _curr_region != NULL && !has_aborted());

  4071   if (!has_aborted()) {

  4072     // We cannot check whether the global stack is empty, since other

  4073     // tasks might be pushing objects to it concurrently.

  4074     assert(_cm->out_of_regions(),

  4075            "at this point we should be out of regions");

  4077     if (_cm->verbose_low()) {

  4078       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);

  4079     }

  4081     // Try to reduce the number of available SATB buffers so that

  4082     // remark has less work to do.

  4083     drain_satb_buffers();

  4084   }

  4086   // Since we've done everything else, we can now totally drain the

  4087   // local queue and global stack.

  4088   drain_local_queue(false);

  4089   drain_global_stack(false);

  4091   // Attempt at work stealing from other task's queues.

  4092   if (do_stealing && !has_aborted()) {

  4093     // We have not aborted. This means that we have finished all that

  4094     // we could. Let's try to do some stealing...

  4096     // We cannot check whether the global stack is empty, since other

  4097     // tasks might be pushing objects to it concurrently.

  4098     assert(_cm->out_of_regions() && _task_queue->size() == 0,

  4099            "only way to reach here");

  4101     if (_cm->verbose_low()) {

  4102       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);

  4103     }

  4105     while (!has_aborted()) {

  4106       oop obj;

  4107       statsOnly( ++_steal_attempts );

  4109       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {

  4110         if (_cm->verbose_medium()) {

  4111           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",

  4112                                  _task_id, (void*) obj);

  4113         }

  4115         statsOnly( ++_steals );

  4117         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),

  4118                "any stolen object should be marked");

  4119         scan_object(obj);

  4121         // And since we're towards the end, let's totally drain the

  4122         // local queue and global stack.

  4123         drain_local_queue(false);

  4124         drain_global_stack(false);

  4125       } else {

  4126         break;

  4127       }

  4128     }

  4129   }

  4131   // If we are about to wrap up and go into termination, check if we

  4132   // should raise the overflow flag.

  4133   if (do_termination && !has_aborted()) {

  4134     if (_cm->force_overflow()->should_force()) {

  4135       _cm->set_has_overflown();

  4136       regular_clock_call();

  4137     }

  4138   }

  4140   // We still haven't aborted. Now, let's try to get into the

  4141   // termination protocol.

  4142   if (do_termination && !has_aborted()) {

  4143     // We cannot check whether the global stack is empty, since other

  4144     // tasks might be concurrently pushing objects on it.

  4145     // Separated the asserts so that we know which one fires.

  4146     assert(_cm->out_of_regions(), "only way to reach here");

  4147     assert(_task_queue->size() == 0, "only way to reach here");

  4149     if (_cm->verbose_low()) {

  4150       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);

  4151     }

  4153     _termination_start_time_ms = os::elapsedVTime() * 1000.0;

  4154     // The CMTask class also extends the TerminatorTerminator class,

  4155     // hence its should_exit_termination() method will also decide

  4156     // whether to exit the termination protocol or not.

  4157     bool finished = _cm->terminator()->offer_termination(this);

  4158     double termination_end_time_ms = os::elapsedVTime() * 1000.0;

  4159     _termination_time_ms +=

  4160       termination_end_time_ms - _termination_start_time_ms;

  4162     if (finished) {

  4163       // We're all done.

  4165       if (_task_id == 0) {

  4166         // let's allow task 0 to do this

  4167         if (concurrent()) {

  4168           assert(_cm->concurrent_marking_in_progress(), "invariant");

  4169           // we need to set this to false before the next

  4170           // safepoint. This way we ensure that the marking phase

  4171           // doesn't observe any more heap expansions.

  4172           _cm->clear_concurrent_marking_in_progress();

  4173         }

  4174       }

  4176       // We can now guarantee that the global stack is empty, since

  4177       // all other tasks have finished. We separated the guarantees so

  4178       // that, if a condition is false, we can immediately find out

  4179       // which one.

  4180       guarantee(_cm->out_of_regions(), "only way to reach here");

  4181       guarantee(_cm->mark_stack_empty(), "only way to reach here");

  4182       guarantee(_task_queue->size() == 0, "only way to reach here");

  4183       guarantee(!_cm->has_overflown(), "only way to reach here");

  4184       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");

  4186       if (_cm->verbose_low()) {

  4187         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);

  4188       }

  4189     } else {

  4190       // Apparently there's more work to do. Let's abort this task. It

  4191       // will restart it and we can hopefully find more things to do.

  4193       if (_cm->verbose_low()) {

  4194         gclog_or_tty->print_cr("[%d] apparently there is more work to do",

  4195                                _task_id);

  4196       }

  4198       set_has_aborted();

  4199       statsOnly( ++_aborted_termination );

  4200     }

  4201   }

  4203   // Mainly for debugging purposes to make sure that a pointer to the

  4204   // closure which was statically allocated in this frame doesn't

  4205   // escape it by accident.

  4206   set_cm_oop_closure(NULL);

  4207   double end_time_ms = os::elapsedVTime() * 1000.0;

  4208   double elapsed_time_ms = end_time_ms - _start_time_ms;

  4209   // Update the step history.

  4210   _step_times_ms.add(elapsed_time_ms);

  4212   if (has_aborted()) {

  4213     // The task was aborted for some reason.

  4215     statsOnly( ++_aborted );

  4217     if (_has_timed_out) {

  4218       double diff_ms = elapsed_time_ms - _time_target_ms;

  4219       // Keep statistics of how well we did with respect to hitting

  4220       // our target only if we actually timed out (if we aborted for

  4221       // other reasons, then the results might get skewed).

  4222       _marking_step_diffs_ms.add(diff_ms);

  4223     }

  4225     if (_cm->has_overflown()) {

  4226       // This is the interesting one. We aborted because a global

  4227       // overflow was raised. This means we have to restart the

  4228       // marking phase and start iterating over regions. However, in

  4229       // order to do this we have to make sure that all tasks stop

  4230       // what they are doing and re-initialise in a safe manner. We

  4231       // will achieve this with the use of two barrier sync points.

  4233       if (_cm->verbose_low()) {

  4234         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);

  4235       }

  4237       _cm->enter_first_sync_barrier(_task_id);

  4238       // When we exit this sync barrier we know that all tasks have

  4239       // stopped doing marking work. So, it's now safe to

  4240       // re-initialise our data structures. At the end of this method,

  4241       // task 0 will clear the global data structures.

  4243       statsOnly( ++_aborted_overflow );

  4245       // We clear the local state of this task...

  4246       clear_region_fields();

  4248       // ...and enter the second barrier.

  4249       _cm->enter_second_sync_barrier(_task_id);

  4250       // At this point everything has bee re-initialised and we're

  4251       // ready to restart.

  4252     }

  4254     if (_cm->verbose_low()) {

  4255       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "

  4256                              "elapsed = %1.2lfms <<<<<<<<<<",

  4257                              _task_id, _time_target_ms, elapsed_time_ms);

  4258       if (_cm->has_aborted()) {

  4259         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",

  4260                                _task_id);

  4261       }

  4262     }

  4263   } else {

  4264     if (_cm->verbose_low()) {

  4265       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "

  4266                              "elapsed = %1.2lfms <<<<<<<<<<",

  4267                              _task_id, _time_target_ms, elapsed_time_ms);

  4268     }

  4269   }

  4271   _claimed = false;

  4272 }

  4274 CMTask::CMTask(int task_id,

  4275                ConcurrentMark* cm,

  4276                size_t* marked_bytes,

  4277                BitMap* card_bm,

  4278                CMTaskQueue* task_queue,

  4279                CMTaskQueueSet* task_queues)

  4280   : _g1h(G1CollectedHeap::heap()),

  4281     _task_id(task_id), _cm(cm),

  4282     _claimed(false),

  4283     _nextMarkBitMap(NULL), _hash_seed(17),

  4284     _task_queue(task_queue),

  4285     _task_queues(task_queues),

  4286     _cm_oop_closure(NULL),

  4287     _marked_bytes_array(marked_bytes),

  4288     _card_bm(card_bm) {

  4289   guarantee(task_queue != NULL, "invariant");

  4290   guarantee(task_queues != NULL, "invariant");

  4292   statsOnly( _clock_due_to_scanning = 0;

  4293              _clock_due_to_marking  = 0 );

  4295   _marking_step_diffs_ms.add(0.5);

  4296 }

  4298 // These are formatting macros that are used below to ensure

  4299 // consistent formatting. The *_H_* versions are used to format the

  4300 // header for a particular value and they should be kept consistent

  4301 // with the corresponding macro. Also note that most of the macros add

  4302 // the necessary white space (as a prefix) which makes them a bit

  4303 // easier to compose.

  4305 // All the output lines are prefixed with this string to be able to

  4306 // identify them easily in a large log file.

  4307 #define G1PPRL_LINE_PREFIX            "###"

  4309 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT

  4310 #ifdef _LP64

  4311 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"

  4312 #else // _LP64

  4313 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"

  4314 #endif // _LP64

  4316 // For per-region info

  4317 #define G1PPRL_TYPE_FORMAT            "   %-4s"

  4318 #define G1PPRL_TYPE_H_FORMAT          "   %4s"

  4319 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)

  4320 #define G1PPRL_BYTE_H_FORMAT          "  %9s"

  4321 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"

  4322 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"

  4324 // For summary info

  4325 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT

  4326 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT

  4327 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"

  4328 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"

  4330 G1PrintRegionLivenessInfoClosure::

  4331 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)

  4332   : _out(out),

  4333     _total_used_bytes(0), _total_capacity_bytes(0),

  4334     _total_prev_live_bytes(0), _total_next_live_bytes(0),

  4335     _hum_used_bytes(0), _hum_capacity_bytes(0),

  4336     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {

  4337   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  4338   MemRegion g1_committed = g1h->g1_committed();

  4339   MemRegion g1_reserved = g1h->g1_reserved();

  4340   double now = os::elapsedTime();

  4342   // Print the header of the output.

  4343   _out->cr();

  4344   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);

  4345   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"

  4346                  G1PPRL_SUM_ADDR_FORMAT("committed")

  4347                  G1PPRL_SUM_ADDR_FORMAT("reserved")

  4348                  G1PPRL_SUM_BYTE_FORMAT("region-size"),

  4349                  g1_committed.start(), g1_committed.end(),

  4350                  g1_reserved.start(), g1_reserved.end(),

  4351                  HeapRegion::GrainBytes);

  4352   _out->print_cr(G1PPRL_LINE_PREFIX);

  4353   _out->print_cr(G1PPRL_LINE_PREFIX

  4354                  G1PPRL_TYPE_H_FORMAT

  4355                  G1PPRL_ADDR_BASE_H_FORMAT

  4356                  G1PPRL_BYTE_H_FORMAT

  4357                  G1PPRL_BYTE_H_FORMAT

  4358                  G1PPRL_BYTE_H_FORMAT

  4359                  G1PPRL_DOUBLE_H_FORMAT,

  4360                  "type", "address-range",

  4361                  "used", "prev-live", "next-live", "gc-eff");

  4362   _out->print_cr(G1PPRL_LINE_PREFIX

  4363                  G1PPRL_TYPE_H_FORMAT

  4364                  G1PPRL_ADDR_BASE_H_FORMAT

  4365                  G1PPRL_BYTE_H_FORMAT

  4366                  G1PPRL_BYTE_H_FORMAT

  4367                  G1PPRL_BYTE_H_FORMAT

  4368                  G1PPRL_DOUBLE_H_FORMAT,

  4369                  "", "",

  4370                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");

  4371 }

  4373 // It takes as a parameter a reference to one of the _hum_* fields, it

  4374 // deduces the corresponding value for a region in a humongous region

  4375 // series (either the region size, or what's left if the _hum_* field

  4376 // is < the region size), and updates the _hum_* field accordingly.

  4377 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {

  4378   size_t bytes = 0;

  4379   // The > 0 check is to deal with the prev and next live bytes which

  4380   // could be 0.

  4381   if (*hum_bytes > 0) {

  4382     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);

  4383     *hum_bytes -= bytes;

  4384   }

  4385   return bytes;

  4386 }

  4388 // It deduces the values for a region in a humongous region series

  4389 // from the _hum_* fields and updates those accordingly. It assumes

  4390 // that that _hum_* fields have already been set up from the "starts

  4391 // humongous" region and we visit the regions in address order.

  4392 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,

  4393                                                      size_t* capacity_bytes,

  4394                                                      size_t* prev_live_bytes,

  4395                                                      size_t* next_live_bytes) {

  4396   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");

  4397   *used_bytes      = get_hum_bytes(&_hum_used_bytes);

  4398   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);

  4399   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);

  4400   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);

  4401 }

  4403 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {

  4404   const char* type = "";

  4405   HeapWord* bottom       = r->bottom();

  4406   HeapWord* end          = r->end();

  4407   size_t capacity_bytes  = r->capacity();

  4408   size_t used_bytes      = r->used();

  4409   size_t prev_live_bytes = r->live_bytes();

  4410   size_t next_live_bytes = r->next_live_bytes();

  4411   double gc_eff          = r->gc_efficiency();

  4412   if (r->used() == 0) {

  4413     type = "FREE";

  4414   } else if (r->is_survivor()) {

  4415     type = "SURV";

  4416   } else if (r->is_young()) {

  4417     type = "EDEN";

  4418   } else if (r->startsHumongous()) {

  4419     type = "HUMS";

  4421     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&

  4422            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,

  4423            "they should have been zeroed after the last time we used them");

  4424     // Set up the _hum_* fields.

  4425     _hum_capacity_bytes  = capacity_bytes;

  4426     _hum_used_bytes      = used_bytes;

  4427     _hum_prev_live_bytes = prev_live_bytes;

  4428     _hum_next_live_bytes = next_live_bytes;

  4429     get_hum_bytes(&used_bytes, &capacity_bytes,

  4430                   &prev_live_bytes, &next_live_bytes);

  4431     end = bottom + HeapRegion::GrainWords;

  4432   } else if (r->continuesHumongous()) {

  4433     type = "HUMC";

  4434     get_hum_bytes(&used_bytes, &capacity_bytes,

  4435                   &prev_live_bytes, &next_live_bytes);

  4436     assert(end == bottom + HeapRegion::GrainWords, "invariant");

  4437   } else {

  4438     type = "OLD";

  4439   }

  4441   _total_used_bytes      += used_bytes;

  4442   _total_capacity_bytes  += capacity_bytes;

  4443   _total_prev_live_bytes += prev_live_bytes;

  4444   _total_next_live_bytes += next_live_bytes;

  4446   // Print a line for this particular region.

  4447   _out->print_cr(G1PPRL_LINE_PREFIX

  4448                  G1PPRL_TYPE_FORMAT

  4449                  G1PPRL_ADDR_BASE_FORMAT

  4450                  G1PPRL_BYTE_FORMAT

  4451                  G1PPRL_BYTE_FORMAT

  4452                  G1PPRL_BYTE_FORMAT

  4453                  G1PPRL_DOUBLE_FORMAT,

  4454                  type, bottom, end,

  4455                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);

  4457   return false;

  4458 }

  4460 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {

  4461   // Print the footer of the output.

  4462   _out->print_cr(G1PPRL_LINE_PREFIX);

  4463   _out->print_cr(G1PPRL_LINE_PREFIX

  4464                  " SUMMARY"

  4465                  G1PPRL_SUM_MB_FORMAT("capacity")

  4466                  G1PPRL_SUM_MB_PERC_FORMAT("used")

  4467                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")

  4468                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),

  4469                  bytes_to_mb(_total_capacity_bytes),

  4470                  bytes_to_mb(_total_used_bytes),

  4471                  perc(_total_used_bytes, _total_capacity_bytes),

  4472                  bytes_to_mb(_total_prev_live_bytes),

  4473                  perc(_total_prev_live_bytes, _total_capacity_bytes),

  4474                  bytes_to_mb(_total_next_live_bytes),

  4475                  perc(_total_next_live_bytes, _total_capacity_bytes));

  4476   _out->cr();

  4477 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/gc_implementation/g1/concurrentMark.cpp@24b9c7f4cae6

src/share/vm/gc_implementation/g1/concurrentMark.cpp