jdk8-mips64-public/hotspot: src/share/vm/gc_implementation/g1/concurrentMark.cpp@37552638d24a

7172388: G1: _total_full_collections should not be incremented for concurrent cycles
Reviewed-by: azeemj, jmasa

     1 /*

     2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "classfile/symbolTable.hpp"

    27 #include "gc_implementation/g1/concurrentMark.inline.hpp"

    28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"

    29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"

    30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"

    31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"

    32 #include "gc_implementation/g1/g1Log.hpp"

    33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"

    34 #include "gc_implementation/g1/g1RemSet.hpp"

    35 #include "gc_implementation/g1/heapRegion.inline.hpp"

    36 #include "gc_implementation/g1/heapRegionRemSet.hpp"

    37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"

    38 #include "gc_implementation/shared/vmGCOperations.hpp"

    39 #include "memory/genOopClosures.inline.hpp"

    40 #include "memory/referencePolicy.hpp"

    41 #include "memory/resourceArea.hpp"

    42 #include "oops/oop.inline.hpp"

    43 #include "runtime/handles.inline.hpp"

    44 #include "runtime/java.hpp"

    46 // Concurrent marking bit map wrapper

    48 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :

    49   _bm((uintptr_t*)NULL,0),

    50   _shifter(shifter) {

    51   _bmStartWord = (HeapWord*)(rs.base());

    52   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes

    53   ReservedSpace brs(ReservedSpace::allocation_align_size_up(

    54                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));

    56   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");

    57   // For now we'll just commit all of the bit map up fromt.

    58   // Later on we'll try to be more parsimonious with swap.

    59   guarantee(_virtual_space.initialize(brs, brs.size()),

    60             "couldn't reseve backing store for concurrent marking bit map");

    61   assert(_virtual_space.committed_size() == brs.size(),

    62          "didn't reserve backing store for all of concurrent marking bit map?");

    63   _bm.set_map((uintptr_t*)_virtual_space.low());

    64   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=

    65          _bmWordSize, "inconsistency in bit map sizing");

    66   _bm.set_size(_bmWordSize >> _shifter);

    67 }

    69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,

    70                                                HeapWord* limit) const {

    71   // First we must round addr *up* to a possible object boundary.

    72   addr = (HeapWord*)align_size_up((intptr_t)addr,

    73                                   HeapWordSize << _shifter);

    74   size_t addrOffset = heapWordToOffset(addr);

    75   if (limit == NULL) {

    76     limit = _bmStartWord + _bmWordSize;

    77   }

    78   size_t limitOffset = heapWordToOffset(limit);

    79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);

    80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);

    81   assert(nextAddr >= addr, "get_next_one postcondition");

    82   assert(nextAddr == limit || isMarked(nextAddr),

    83          "get_next_one postcondition");

    84   return nextAddr;

    85 }

    87 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,

    88                                                  HeapWord* limit) const {

    89   size_t addrOffset = heapWordToOffset(addr);

    90   if (limit == NULL) {

    91     limit = _bmStartWord + _bmWordSize;

    92   }

    93   size_t limitOffset = heapWordToOffset(limit);

    94   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);

    95   HeapWord* nextAddr = offsetToHeapWord(nextOffset);

    96   assert(nextAddr >= addr, "get_next_one postcondition");

    97   assert(nextAddr == limit || !isMarked(nextAddr),

    98          "get_next_one postcondition");

    99   return nextAddr;

   100 }

   102 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {

   103   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");

   104   return (int) (diff >> _shifter);

   105 }

   107 #ifndef PRODUCT

   108 bool CMBitMapRO::covers(ReservedSpace rs) const {

   109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");

   110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,

   111          "size inconsistency");

   112   return _bmStartWord == (HeapWord*)(rs.base()) &&

   113          _bmWordSize  == rs.size()>>LogHeapWordSize;

   114 }

   115 #endif

   117 void CMBitMap::clearAll() {

   118   _bm.clear();

   119   return;

   120 }

   122 void CMBitMap::markRange(MemRegion mr) {

   123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));

   124   assert(!mr.is_empty(), "unexpected empty region");

   125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==

   126           ((HeapWord *) mr.end())),

   127          "markRange memory region end is not card aligned");

   128   // convert address range into offset range

   129   _bm.at_put_range(heapWordToOffset(mr.start()),

   130                    heapWordToOffset(mr.end()), true);

   131 }

   133 void CMBitMap::clearRange(MemRegion mr) {

   134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));

   135   assert(!mr.is_empty(), "unexpected empty region");

   136   // convert address range into offset range

   137   _bm.at_put_range(heapWordToOffset(mr.start()),

   138                    heapWordToOffset(mr.end()), false);

   139 }

   141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,

   142                                             HeapWord* end_addr) {

   143   HeapWord* start = getNextMarkedWordAddress(addr);

   144   start = MIN2(start, end_addr);

   145   HeapWord* end   = getNextUnmarkedWordAddress(start);

   146   end = MIN2(end, end_addr);

   147   assert(start <= end, "Consistency check");

   148   MemRegion mr(start, end);

   149   if (!mr.is_empty()) {

   150     clearRange(mr);

   151   }

   152   return mr;

   153 }

   155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :

   156   _base(NULL), _cm(cm)

   157 #ifdef ASSERT

   158   , _drain_in_progress(false)

   159   , _drain_in_progress_yields(false)

   160 #endif

   161 {}

   163 void CMMarkStack::allocate(size_t size) {

   164   _base = NEW_C_HEAP_ARRAY(oop, size);

   165   if (_base == NULL) {

   166     vm_exit_during_initialization("Failed to allocate CM region mark stack");

   167   }

   168   _index = 0;

   169   _capacity = (jint) size;

   170   _saved_index = -1;

   171   NOT_PRODUCT(_max_depth = 0);

   172 }

   174 CMMarkStack::~CMMarkStack() {

   175   if (_base != NULL) {

   176     FREE_C_HEAP_ARRAY(oop, _base);

   177   }

   178 }

   180 void CMMarkStack::par_push(oop ptr) {

   181   while (true) {

   182     if (isFull()) {

   183       _overflow = true;

   184       return;

   185     }

   186     // Otherwise...

   187     jint index = _index;

   188     jint next_index = index+1;

   189     jint res = Atomic::cmpxchg(next_index, &_index, index);

   190     if (res == index) {

   191       _base[index] = ptr;

   192       // Note that we don't maintain this atomically.  We could, but it

   193       // doesn't seem necessary.

   194       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));

   195       return;

   196     }

   197     // Otherwise, we need to try again.

   198   }

   199 }

   201 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {

   202   while (true) {

   203     if (isFull()) {

   204       _overflow = true;

   205       return;

   206     }

   207     // Otherwise...

   208     jint index = _index;

   209     jint next_index = index + n;

   210     if (next_index > _capacity) {

   211       _overflow = true;

   212       return;

   213     }

   214     jint res = Atomic::cmpxchg(next_index, &_index, index);

   215     if (res == index) {

   216       for (int i = 0; i < n; i++) {

   217         int ind = index + i;

   218         assert(ind < _capacity, "By overflow test above.");

   219         _base[ind] = ptr_arr[i];

   220       }

   221       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));

   222       return;

   223     }

   224     // Otherwise, we need to try again.

   225   }

   226 }

   229 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {

   230   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);

   231   jint start = _index;

   232   jint next_index = start + n;

   233   if (next_index > _capacity) {

   234     _overflow = true;

   235     return;

   236   }

   237   // Otherwise.

   238   _index = next_index;

   239   for (int i = 0; i < n; i++) {

   240     int ind = start + i;

   241     assert(ind < _capacity, "By overflow test above.");

   242     _base[ind] = ptr_arr[i];

   243   }

   244 }

   247 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {

   248   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);

   249   jint index = _index;

   250   if (index == 0) {

   251     *n = 0;

   252     return false;

   253   } else {

   254     int k = MIN2(max, index);

   255     jint new_ind = index - k;

   256     for (int j = 0; j < k; j++) {

   257       ptr_arr[j] = _base[new_ind + j];

   258     }

   259     _index = new_ind;

   260     *n = k;

   261     return true;

   262   }

   263 }

   265 template<class OopClosureClass>

   266 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {

   267   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after

   268          || SafepointSynchronize::is_at_safepoint(),

   269          "Drain recursion must be yield-safe.");

   270   bool res = true;

   271   debug_only(_drain_in_progress = true);

   272   debug_only(_drain_in_progress_yields = yield_after);

   273   while (!isEmpty()) {

   274     oop newOop = pop();

   275     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");

   276     assert(newOop->is_oop(), "Expected an oop");

   277     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),

   278            "only grey objects on this stack");

   279     newOop->oop_iterate(cl);

   280     if (yield_after && _cm->do_yield_check()) {

   281       res = false;

   282       break;

   283     }

   284   }

   285   debug_only(_drain_in_progress = false);

   286   return res;

   287 }

   289 void CMMarkStack::note_start_of_gc() {

   290   assert(_saved_index == -1,

   291          "note_start_of_gc()/end_of_gc() bracketed incorrectly");

   292   _saved_index = _index;

   293 }

   295 void CMMarkStack::note_end_of_gc() {

   296   // This is intentionally a guarantee, instead of an assert. If we

   297   // accidentally add something to the mark stack during GC, it

   298   // will be a correctness issue so it's better if we crash. we'll

   299   // only check this once per GC anyway, so it won't be a performance

   300   // issue in any way.

   301   guarantee(_saved_index == _index,

   302             err_msg("saved index: %d index: %d", _saved_index, _index));

   303   _saved_index = -1;

   304 }

   306 void CMMarkStack::oops_do(OopClosure* f) {

   307   assert(_saved_index == _index,

   308          err_msg("saved index: %d index: %d", _saved_index, _index));

   309   for (int i = 0; i < _index; i += 1) {

   310     f->do_oop(&_base[i]);

   311   }

   312 }

   314 bool ConcurrentMark::not_yet_marked(oop obj) const {

   315   return (_g1h->is_obj_ill(obj)

   316           || (_g1h->is_in_permanent(obj)

   317               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));

   318 }

   320 CMRootRegions::CMRootRegions() :

   321   _young_list(NULL), _cm(NULL), _scan_in_progress(false),

   322   _should_abort(false),  _next_survivor(NULL) { }

   324 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {

   325   _young_list = g1h->young_list();

   326   _cm = cm;

   327 }

   329 void CMRootRegions::prepare_for_scan() {

   330   assert(!scan_in_progress(), "pre-condition");

   332   // Currently, only survivors can be root regions.

   333   assert(_next_survivor == NULL, "pre-condition");

   334   _next_survivor = _young_list->first_survivor_region();

   335   _scan_in_progress = (_next_survivor != NULL);

   336   _should_abort = false;

   337 }

   339 HeapRegion* CMRootRegions::claim_next() {

   340   if (_should_abort) {

   341     // If someone has set the should_abort flag, we return NULL to

   342     // force the caller to bail out of their loop.

   343     return NULL;

   344   }

   346   // Currently, only survivors can be root regions.

   347   HeapRegion* res = _next_survivor;

   348   if (res != NULL) {

   349     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);

   350     // Read it again in case it changed while we were waiting for the lock.

   351     res = _next_survivor;

   352     if (res != NULL) {

   353       if (res == _young_list->last_survivor_region()) {

   354         // We just claimed the last survivor so store NULL to indicate

   355         // that we're done.

   356         _next_survivor = NULL;

   357       } else {

   358         _next_survivor = res->get_next_young_region();

   359       }

   360     } else {

   361       // Someone else claimed the last survivor while we were trying

   362       // to take the lock so nothing else to do.

   363     }

   364   }

   365   assert(res == NULL || res->is_survivor(), "post-condition");

   367   return res;

   368 }

   370 void CMRootRegions::scan_finished() {

   371   assert(scan_in_progress(), "pre-condition");

   373   // Currently, only survivors can be root regions.

   374   if (!_should_abort) {

   375     assert(_next_survivor == NULL, "we should have claimed all survivors");

   376   }

   377   _next_survivor = NULL;

   379   {

   380     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);

   381     _scan_in_progress = false;

   382     RootRegionScan_lock->notify_all();

   383   }

   384 }

   386 bool CMRootRegions::wait_until_scan_finished() {

   387   if (!scan_in_progress()) return false;

   389   {

   390     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);

   391     while (scan_in_progress()) {

   392       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);

   393     }

   394   }

   395   return true;

   396 }

   398 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away

   399 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list

   400 #endif // _MSC_VER

   402 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {

   403   return MAX2((n_par_threads + 2) / 4, 1U);

   404 }

   406 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :

   407   _markBitMap1(rs, MinObjAlignment - 1),

   408   _markBitMap2(rs, MinObjAlignment - 1),

   410   _parallel_marking_threads(0),

   411   _max_parallel_marking_threads(0),

   412   _sleep_factor(0.0),

   413   _marking_task_overhead(1.0),

   414   _cleanup_sleep_factor(0.0),

   415   _cleanup_task_overhead(1.0),

   416   _cleanup_list("Cleanup List"),

   417   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),

   418   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>

   419            CardTableModRefBS::card_shift,

   420            false /* in_resource_area*/),

   422   _prevMarkBitMap(&_markBitMap1),

   423   _nextMarkBitMap(&_markBitMap2),

   425   _markStack(this),

   426   // _finger set in set_non_marking_state

   428   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),

   429   // _active_tasks set in set_non_marking_state

   430   // _tasks set inside the constructor

   431   _task_queues(new CMTaskQueueSet((int) _max_task_num)),

   432   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),

   434   _has_overflown(false),

   435   _concurrent(false),

   436   _has_aborted(false),

   437   _restart_for_overflow(false),

   438   _concurrent_marking_in_progress(false),

   440   // _verbose_level set below

   442   _init_times(),

   443   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),

   444   _cleanup_times(),

   445   _total_counting_time(0.0),

   446   _total_rs_scrub_time(0.0),

   448   _parallel_workers(NULL),

   450   _count_card_bitmaps(NULL),

   451   _count_marked_bytes(NULL) {

   452   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;

   453   if (verbose_level < no_verbose) {

   454     verbose_level = no_verbose;

   455   }

   456   if (verbose_level > high_verbose) {

   457     verbose_level = high_verbose;

   458   }

   459   _verbose_level = verbose_level;

   461   if (verbose_low()) {

   462     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "

   463                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);

   464   }

   466   _markStack.allocate(MarkStackSize);

   468   // Create & start a ConcurrentMark thread.

   469   _cmThread = new ConcurrentMarkThread(this);

   470   assert(cmThread() != NULL, "CM Thread should have been created");

   471   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");

   473   _g1h = G1CollectedHeap::heap();

   474   assert(CGC_lock != NULL, "Where's the CGC_lock?");

   475   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");

   476   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");

   478   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();

   479   satb_qs.set_buffer_size(G1SATBBufferSize);

   481   _root_regions.init(_g1h, this);

   483   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);

   484   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);

   486   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);

   487   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);

   489   BitMap::idx_t card_bm_size = _card_bm.size();

   491   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail

   492   _active_tasks = _max_task_num;

   493   for (int i = 0; i < (int) _max_task_num; ++i) {

   494     CMTaskQueue* task_queue = new CMTaskQueue();

   495     task_queue->initialize();

   496     _task_queues->register_queue(i, task_queue);

   498     _count_card_bitmaps[i] = BitMap(card_bm_size, false);

   499     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions);

   501     _tasks[i] = new CMTask(i, this,

   502                            _count_marked_bytes[i],

   503                            &_count_card_bitmaps[i],

   504                            task_queue, _task_queues);

   506     _accum_task_vtime[i] = 0.0;

   507   }

   509   // Calculate the card number for the bottom of the heap. Used

   510   // in biasing indexes into the accounting card bitmaps.

   511   _heap_bottom_card_num =

   512     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>

   513                                 CardTableModRefBS::card_shift);

   515   // Clear all the liveness counting data

   516   clear_all_count_data();

   518   if (ConcGCThreads > ParallelGCThreads) {

   519     vm_exit_during_initialization("Can't have more ConcGCThreads "

   520                                   "than ParallelGCThreads.");

   521   }

   522   if (ParallelGCThreads == 0) {

   523     // if we are not running with any parallel GC threads we will not

   524     // spawn any marking threads either

   525     _parallel_marking_threads =       0;

   526     _max_parallel_marking_threads =   0;

   527     _sleep_factor             =     0.0;

   528     _marking_task_overhead    =     1.0;

   529   } else {

   530     if (ConcGCThreads > 0) {

   531       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent

   532       // if both are set

   534       _parallel_marking_threads = (uint) ConcGCThreads;

   535       _max_parallel_marking_threads = _parallel_marking_threads;

   536       _sleep_factor             = 0.0;

   537       _marking_task_overhead    = 1.0;

   538     } else if (G1MarkingOverheadPercent > 0) {

   539       // we will calculate the number of parallel marking threads

   540       // based on a target overhead with respect to the soft real-time

   541       // goal

   543       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;

   544       double overall_cm_overhead =

   545         (double) MaxGCPauseMillis * marking_overhead /

   546         (double) GCPauseIntervalMillis;

   547       double cpu_ratio = 1.0 / (double) os::processor_count();

   548       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);

   549       double marking_task_overhead =

   550         overall_cm_overhead / marking_thread_num *

   551                                                 (double) os::processor_count();

   552       double sleep_factor =

   553                          (1.0 - marking_task_overhead) / marking_task_overhead;

   555       _parallel_marking_threads = (uint) marking_thread_num;

   556       _max_parallel_marking_threads = _parallel_marking_threads;

   557       _sleep_factor             = sleep_factor;

   558       _marking_task_overhead    = marking_task_overhead;

   559     } else {

   560       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);

   561       _max_parallel_marking_threads = _parallel_marking_threads;

   562       _sleep_factor             = 0.0;

   563       _marking_task_overhead    = 1.0;

   564     }

   566     if (parallel_marking_threads() > 1) {

   567       _cleanup_task_overhead = 1.0;

   568     } else {

   569       _cleanup_task_overhead = marking_task_overhead();

   570     }

   571     _cleanup_sleep_factor =

   572                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();

   574 #if 0

   575     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());

   576     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());

   577     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());

   578     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());

   579     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());

   580 #endif

   582     guarantee(parallel_marking_threads() > 0, "peace of mind");

   583     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",

   584          _max_parallel_marking_threads, false, true);

   585     if (_parallel_workers == NULL) {

   586       vm_exit_during_initialization("Failed necessary allocation.");

   587     } else {

   588       _parallel_workers->initialize_workers();

   589     }

   590   }

   592   // so that the call below can read a sensible value

   593   _heap_start = (HeapWord*) rs.base();

   594   set_non_marking_state();

   595 }

   597 void ConcurrentMark::update_g1_committed(bool force) {

   598   // If concurrent marking is not in progress, then we do not need to

   599   // update _heap_end.

   600   if (!concurrent_marking_in_progress() && !force) return;

   602   MemRegion committed = _g1h->g1_committed();

   603   assert(committed.start() == _heap_start, "start shouldn't change");

   604   HeapWord* new_end = committed.end();

   605   if (new_end > _heap_end) {

   606     // The heap has been expanded.

   608     _heap_end = new_end;

   609   }

   610   // Notice that the heap can also shrink. However, this only happens

   611   // during a Full GC (at least currently) and the entire marking

   612   // phase will bail out and the task will not be restarted. So, let's

   613   // do nothing.

   614 }

   616 void ConcurrentMark::reset() {

   617   // Starting values for these two. This should be called in a STW

   618   // phase. CM will be notified of any future g1_committed expansions

   619   // will be at the end of evacuation pauses, when tasks are

   620   // inactive.

   621   MemRegion committed = _g1h->g1_committed();

   622   _heap_start = committed.start();

   623   _heap_end   = committed.end();

   625   // Separated the asserts so that we know which one fires.

   626   assert(_heap_start != NULL, "heap bounds should look ok");

   627   assert(_heap_end != NULL, "heap bounds should look ok");

   628   assert(_heap_start < _heap_end, "heap bounds should look ok");

   630   // reset all the marking data structures and any necessary flags

   631   clear_marking_state();

   633   if (verbose_low()) {

   634     gclog_or_tty->print_cr("[global] resetting");

   635   }

   637   // We do reset all of them, since different phases will use

   638   // different number of active threads. So, it's easiest to have all

   639   // of them ready.

   640   for (int i = 0; i < (int) _max_task_num; ++i) {

   641     _tasks[i]->reset(_nextMarkBitMap);

   642   }

   644   // we need this to make sure that the flag is on during the evac

   645   // pause with initial mark piggy-backed

   646   set_concurrent_marking_in_progress();

   647 }

   649 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {

   650   assert(active_tasks <= _max_task_num, "we should not have more");

   652   _active_tasks = active_tasks;

   653   // Need to update the three data structures below according to the

   654   // number of active threads for this phase.

   655   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);

   656   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);

   657   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);

   659   _concurrent = concurrent;

   660   // We propagate this to all tasks, not just the active ones.

   661   for (int i = 0; i < (int) _max_task_num; ++i)

   662     _tasks[i]->set_concurrent(concurrent);

   664   if (concurrent) {

   665     set_concurrent_marking_in_progress();

   666   } else {

   667     // We currently assume that the concurrent flag has been set to

   668     // false before we start remark. At this point we should also be

   669     // in a STW phase.

   670     assert(!concurrent_marking_in_progress(), "invariant");

   671     assert(_finger == _heap_end, "only way to get here");

   672     update_g1_committed(true);

   673   }

   674 }

   676 void ConcurrentMark::set_non_marking_state() {

   677   // We set the global marking state to some default values when we're

   678   // not doing marking.

   679   clear_marking_state();

   680   _active_tasks = 0;

   681   clear_concurrent_marking_in_progress();

   682 }

   684 ConcurrentMark::~ConcurrentMark() {

   685   // The ConcurrentMark instance is never freed.

   686   ShouldNotReachHere();

   687 }

   689 void ConcurrentMark::clearNextBitmap() {

   690   G1CollectedHeap* g1h = G1CollectedHeap::heap();

   691   G1CollectorPolicy* g1p = g1h->g1_policy();

   693   // Make sure that the concurrent mark thread looks to still be in

   694   // the current cycle.

   695   guarantee(cmThread()->during_cycle(), "invariant");

   697   // We are finishing up the current cycle by clearing the next

   698   // marking bitmap and getting it ready for the next cycle. During

   699   // this time no other cycle can start. So, let's make sure that this

   700   // is the case.

   701   guarantee(!g1h->mark_in_progress(), "invariant");

   703   // clear the mark bitmap (no grey objects to start with).

   704   // We need to do this in chunks and offer to yield in between

   705   // each chunk.

   706   HeapWord* start  = _nextMarkBitMap->startWord();

   707   HeapWord* end    = _nextMarkBitMap->endWord();

   708   HeapWord* cur    = start;

   709   size_t chunkSize = M;

   710   while (cur < end) {

   711     HeapWord* next = cur + chunkSize;

   712     if (next > end) {

   713       next = end;

   714     }

   715     MemRegion mr(cur,next);

   716     _nextMarkBitMap->clearRange(mr);

   717     cur = next;

   718     do_yield_check();

   720     // Repeat the asserts from above. We'll do them as asserts here to

   721     // minimize their overhead on the product. However, we'll have

   722     // them as guarantees at the beginning / end of the bitmap

   723     // clearing to get some checking in the product.

   724     assert(cmThread()->during_cycle(), "invariant");

   725     assert(!g1h->mark_in_progress(), "invariant");

   726   }

   728   // Clear the liveness counting data

   729   clear_all_count_data();

   731   // Repeat the asserts from above.

   732   guarantee(cmThread()->during_cycle(), "invariant");

   733   guarantee(!g1h->mark_in_progress(), "invariant");

   734 }

   736 class NoteStartOfMarkHRClosure: public HeapRegionClosure {

   737 public:

   738   bool doHeapRegion(HeapRegion* r) {

   739     if (!r->continuesHumongous()) {

   740       r->note_start_of_marking();

   741     }

   742     return false;

   743   }

   744 };

   746 void ConcurrentMark::checkpointRootsInitialPre() {

   747   G1CollectedHeap*   g1h = G1CollectedHeap::heap();

   748   G1CollectorPolicy* g1p = g1h->g1_policy();

   750   _has_aborted = false;

   752 #ifndef PRODUCT

   753   if (G1PrintReachableAtInitialMark) {

   754     print_reachable("at-cycle-start",

   755                     VerifyOption_G1UsePrevMarking, true /* all */);

   756   }

   757 #endif

   759   // Initialise marking structures. This has to be done in a STW phase.

   760   reset();

   762   // For each region note start of marking.

   763   NoteStartOfMarkHRClosure startcl;

   764   g1h->heap_region_iterate(&startcl);

   765 }

   768 void ConcurrentMark::checkpointRootsInitialPost() {

   769   G1CollectedHeap*   g1h = G1CollectedHeap::heap();

   771   // If we force an overflow during remark, the remark operation will

   772   // actually abort and we'll restart concurrent marking. If we always

   773   // force an oveflow during remark we'll never actually complete the

   774   // marking phase. So, we initilize this here, at the start of the

   775   // cycle, so that at the remaining overflow number will decrease at

   776   // every remark and we'll eventually not need to cause one.

   777   force_overflow_stw()->init();

   779   // Start Concurrent Marking weak-reference discovery.

   780   ReferenceProcessor* rp = g1h->ref_processor_cm();

   781   // enable ("weak") refs discovery

   782   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);

   783   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle

   785   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

   786   // This is the start of  the marking cycle, we're expected all

   787   // threads to have SATB queues with active set to false.

   788   satb_mq_set.set_active_all_threads(true, /* new active value */

   789                                      false /* expected_active */);

   791   _root_regions.prepare_for_scan();

   793   // update_g1_committed() will be called at the end of an evac pause

   794   // when marking is on. So, it's also called at the end of the

   795   // initial-mark pause to update the heap end, if the heap expands

   796   // during it. No need to call it here.

   797 }

   799 /*

   800  * Notice that in the next two methods, we actually leave the STS

   801  * during the barrier sync and join it immediately afterwards. If we

   802  * do not do this, the following deadlock can occur: one thread could

   803  * be in the barrier sync code, waiting for the other thread to also

   804  * sync up, whereas another one could be trying to yield, while also

   805  * waiting for the other threads to sync up too.

   806  *

   807  * Note, however, that this code is also used during remark and in

   808  * this case we should not attempt to leave / enter the STS, otherwise

   809  * we'll either hit an asseert (debug / fastdebug) or deadlock

   810  * (product). So we should only leave / enter the STS if we are

   811  * operating concurrently.

   812  *

   813  * Because the thread that does the sync barrier has left the STS, it

   814  * is possible to be suspended for a Full GC or an evacuation pause

   815  * could occur. This is actually safe, since the entering the sync

   816  * barrier is one of the last things do_marking_step() does, and it

   817  * doesn't manipulate any data structures afterwards.

   818  */

   820 void ConcurrentMark::enter_first_sync_barrier(int task_num) {

   821   if (verbose_low()) {

   822     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);

   823   }

   825   if (concurrent()) {

   826     ConcurrentGCThread::stsLeave();

   827   }

   828   _first_overflow_barrier_sync.enter();

   829   if (concurrent()) {

   830     ConcurrentGCThread::stsJoin();

   831   }

   832   // at this point everyone should have synced up and not be doing any

   833   // more work

   835   if (verbose_low()) {

   836     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);

   837   }

   839   // let task 0 do this

   840   if (task_num == 0) {

   841     // task 0 is responsible for clearing the global data structures

   842     // We should be here because of an overflow. During STW we should

   843     // not clear the overflow flag since we rely on it being true when

   844     // we exit this method to abort the pause and restart concurent

   845     // marking.

   846     clear_marking_state(concurrent() /* clear_overflow */);

   847     force_overflow()->update();

   849     if (G1Log::fine()) {

   850       gclog_or_tty->date_stamp(PrintGCDateStamps);

   851       gclog_or_tty->stamp(PrintGCTimeStamps);

   852       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");

   853     }

   854   }

   856   // after this, each task should reset its own data structures then

   857   // then go into the second barrier

   858 }

   860 void ConcurrentMark::enter_second_sync_barrier(int task_num) {

   861   if (verbose_low()) {

   862     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);

   863   }

   865   if (concurrent()) {

   866     ConcurrentGCThread::stsLeave();

   867   }

   868   _second_overflow_barrier_sync.enter();

   869   if (concurrent()) {

   870     ConcurrentGCThread::stsJoin();

   871   }

   872   // at this point everything should be re-initialised and ready to go

   874   if (verbose_low()) {

   875     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);

   876   }

   877 }

   879 #ifndef PRODUCT

   880 void ForceOverflowSettings::init() {

   881   _num_remaining = G1ConcMarkForceOverflow;

   882   _force = false;

   883   update();

   884 }

   886 void ForceOverflowSettings::update() {

   887   if (_num_remaining > 0) {

   888     _num_remaining -= 1;

   889     _force = true;

   890   } else {

   891     _force = false;

   892   }

   893 }

   895 bool ForceOverflowSettings::should_force() {

   896   if (_force) {

   897     _force = false;

   898     return true;

   899   } else {

   900     return false;

   901   }

   902 }

   903 #endif // !PRODUCT

   905 class CMConcurrentMarkingTask: public AbstractGangTask {

   906 private:

   907   ConcurrentMark*       _cm;

   908   ConcurrentMarkThread* _cmt;

   910 public:

   911   void work(uint worker_id) {

   912     assert(Thread::current()->is_ConcurrentGC_thread(),

   913            "this should only be done by a conc GC thread");

   914     ResourceMark rm;

   916     double start_vtime = os::elapsedVTime();

   918     ConcurrentGCThread::stsJoin();

   920     assert(worker_id < _cm->active_tasks(), "invariant");

   921     CMTask* the_task = _cm->task(worker_id);

   922     the_task->record_start_time();

   923     if (!_cm->has_aborted()) {

   924       do {

   925         double start_vtime_sec = os::elapsedVTime();

   926         double start_time_sec = os::elapsedTime();

   927         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;

   929         the_task->do_marking_step(mark_step_duration_ms,

   930                                   true /* do_stealing    */,

   931                                   true /* do_termination */);

   933         double end_time_sec = os::elapsedTime();

   934         double end_vtime_sec = os::elapsedVTime();

   935         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;

   936         double elapsed_time_sec = end_time_sec - start_time_sec;

   937         _cm->clear_has_overflown();

   939         bool ret = _cm->do_yield_check(worker_id);

   941         jlong sleep_time_ms;

   942         if (!_cm->has_aborted() && the_task->has_aborted()) {

   943           sleep_time_ms =

   944             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);

   945           ConcurrentGCThread::stsLeave();

   946           os::sleep(Thread::current(), sleep_time_ms, false);

   947           ConcurrentGCThread::stsJoin();

   948         }

   949         double end_time2_sec = os::elapsedTime();

   950         double elapsed_time2_sec = end_time2_sec - start_time_sec;

   952 #if 0

   953           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "

   954                                  "overhead %1.4lf",

   955                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,

   956                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);

   957           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",

   958                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);

   959 #endif

   960       } while (!_cm->has_aborted() && the_task->has_aborted());

   961     }

   962     the_task->record_end_time();

   963     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");

   965     ConcurrentGCThread::stsLeave();

   967     double end_vtime = os::elapsedVTime();

   968     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);

   969   }

   971   CMConcurrentMarkingTask(ConcurrentMark* cm,

   972                           ConcurrentMarkThread* cmt) :

   973       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }

   975   ~CMConcurrentMarkingTask() { }

   976 };

   978 // Calculates the number of active workers for a concurrent

   979 // phase.

   980 uint ConcurrentMark::calc_parallel_marking_threads() {

   981   if (G1CollectedHeap::use_parallel_gc_threads()) {

   982     uint n_conc_workers = 0;

   983     if (!UseDynamicNumberOfGCThreads ||

   984         (!FLAG_IS_DEFAULT(ConcGCThreads) &&

   985          !ForceDynamicNumberOfGCThreads)) {

   986       n_conc_workers = max_parallel_marking_threads();

   987     } else {

   988       n_conc_workers =

   989         AdaptiveSizePolicy::calc_default_active_workers(

   990                                      max_parallel_marking_threads(),

   991                                      1, /* Minimum workers */

   992                                      parallel_marking_threads(),

   993                                      Threads::number_of_non_daemon_threads());

   994       // Don't scale down "n_conc_workers" by scale_parallel_threads() because

   995       // that scaling has already gone into "_max_parallel_marking_threads".

   996     }

   997     assert(n_conc_workers > 0, "Always need at least 1");

   998     return n_conc_workers;

   999   }

  1000   // If we are not running with any parallel GC threads we will not

  1001   // have spawned any marking threads either. Hence the number of

  1002   // concurrent workers should be 0.

  1003   return 0;

  1004 }

  1006 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {

  1007   // Currently, only survivors can be root regions.

  1008   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");

  1009   G1RootRegionScanClosure cl(_g1h, this, worker_id);

  1011   const uintx interval = PrefetchScanIntervalInBytes;

  1012   HeapWord* curr = hr->bottom();

  1013   const HeapWord* end = hr->top();

  1014   while (curr < end) {

  1015     Prefetch::read(curr, interval);

  1016     oop obj = oop(curr);

  1017     int size = obj->oop_iterate(&cl);

  1018     assert(size == obj->size(), "sanity");

  1019     curr += size;

  1020   }

  1021 }

  1023 class CMRootRegionScanTask : public AbstractGangTask {

  1024 private:

  1025   ConcurrentMark* _cm;

  1027 public:

  1028   CMRootRegionScanTask(ConcurrentMark* cm) :

  1029     AbstractGangTask("Root Region Scan"), _cm(cm) { }

  1031   void work(uint worker_id) {

  1032     assert(Thread::current()->is_ConcurrentGC_thread(),

  1033            "this should only be done by a conc GC thread");

  1035     CMRootRegions* root_regions = _cm->root_regions();

  1036     HeapRegion* hr = root_regions->claim_next();

  1037     while (hr != NULL) {

  1038       _cm->scanRootRegion(hr, worker_id);

  1039       hr = root_regions->claim_next();

  1040     }

  1041   }

  1042 };

  1044 void ConcurrentMark::scanRootRegions() {

  1045   // scan_in_progress() will have been set to true only if there was

  1046   // at least one root region to scan. So, if it's false, we

  1047   // should not attempt to do any further work.

  1048   if (root_regions()->scan_in_progress()) {

  1049     _parallel_marking_threads = calc_parallel_marking_threads();

  1050     assert(parallel_marking_threads() <= max_parallel_marking_threads(),

  1051            "Maximum number of marking threads exceeded");

  1052     uint active_workers = MAX2(1U, parallel_marking_threads());

  1054     CMRootRegionScanTask task(this);

  1055     if (parallel_marking_threads() > 0) {

  1056       _parallel_workers->set_active_workers((int) active_workers);

  1057       _parallel_workers->run_task(&task);

  1058     } else {

  1059       task.work(0);

  1060     }

  1062     // It's possible that has_aborted() is true here without actually

  1063     // aborting the survivor scan earlier. This is OK as it's

  1064     // mainly used for sanity checking.

  1065     root_regions()->scan_finished();

  1066   }

  1067 }

  1069 void ConcurrentMark::markFromRoots() {

  1070   // we might be tempted to assert that:

  1071   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),

  1072   //        "inconsistent argument?");

  1073   // However that wouldn't be right, because it's possible that

  1074   // a safepoint is indeed in progress as a younger generation

  1075   // stop-the-world GC happens even as we mark in this generation.

  1077   _restart_for_overflow = false;

  1078   force_overflow_conc()->init();

  1080   // _g1h has _n_par_threads

  1081   _parallel_marking_threads = calc_parallel_marking_threads();

  1082   assert(parallel_marking_threads() <= max_parallel_marking_threads(),

  1083     "Maximum number of marking threads exceeded");

  1085   uint active_workers = MAX2(1U, parallel_marking_threads());

  1087   // Parallel task terminator is set in "set_phase()"

  1088   set_phase(active_workers, true /* concurrent */);

  1090   CMConcurrentMarkingTask markingTask(this, cmThread());

  1091   if (parallel_marking_threads() > 0) {

  1092     _parallel_workers->set_active_workers((int)active_workers);

  1093     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()

  1094     // and the decisions on that MT processing is made elsewhere.

  1095     assert(_parallel_workers->active_workers() > 0, "Should have been set");

  1096     _parallel_workers->run_task(&markingTask);

  1097   } else {

  1098     markingTask.work(0);

  1099   }

  1100   print_stats();

  1101 }

  1103 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {

  1104   // world is stopped at this checkpoint

  1105   assert(SafepointSynchronize::is_at_safepoint(),

  1106          "world should be stopped");

  1108   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1110   // If a full collection has happened, we shouldn't do this.

  1111   if (has_aborted()) {

  1112     g1h->set_marking_complete(); // So bitmap clearing isn't confused

  1113     return;

  1114   }

  1116   SvcGCMarker sgcm(SvcGCMarker::OTHER);

  1118   if (VerifyDuringGC) {

  1119     HandleMark hm;  // handle scope

  1120     gclog_or_tty->print(" VerifyDuringGC:(before)");

  1121     Universe::heap()->prepare_for_verify();

  1122     Universe::verify(/* silent      */ false,

  1123                      /* option      */ VerifyOption_G1UsePrevMarking);

  1124   }

  1126   G1CollectorPolicy* g1p = g1h->g1_policy();

  1127   g1p->record_concurrent_mark_remark_start();

  1129   double start = os::elapsedTime();

  1131   checkpointRootsFinalWork();

  1133   double mark_work_end = os::elapsedTime();

  1135   weakRefsWork(clear_all_soft_refs);

  1137   if (has_overflown()) {

  1138     // Oops.  We overflowed.  Restart concurrent marking.

  1139     _restart_for_overflow = true;

  1140     // Clear the flag. We do not need it any more.

  1141     clear_has_overflown();

  1142     if (G1TraceMarkStackOverflow) {

  1143       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");

  1144     }

  1145   } else {

  1146     // Aggregate the per-task counting data that we have accumulated

  1147     // while marking.

  1148     aggregate_count_data();

  1150     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  1151     // We're done with marking.

  1152     // This is the end of  the marking cycle, we're expected all

  1153     // threads to have SATB queues with active set to true.

  1154     satb_mq_set.set_active_all_threads(false, /* new active value */

  1155                                        true /* expected_active */);

  1157     if (VerifyDuringGC) {

  1158       HandleMark hm;  // handle scope

  1159       gclog_or_tty->print(" VerifyDuringGC:(after)");

  1160       Universe::heap()->prepare_for_verify();

  1161       Universe::verify(/* silent      */ false,

  1162                        /* option      */ VerifyOption_G1UseNextMarking);

  1163     }

  1164     assert(!restart_for_overflow(), "sanity");

  1165   }

  1167   // Reset the marking state if marking completed

  1168   if (!restart_for_overflow()) {

  1169     set_non_marking_state();

  1170   }

  1172 #if VERIFY_OBJS_PROCESSED

  1173   _scan_obj_cl.objs_processed = 0;

  1174   ThreadLocalObjQueue::objs_enqueued = 0;

  1175 #endif

  1177   // Statistics

  1178   double now = os::elapsedTime();

  1179   _remark_mark_times.add((mark_work_end - start) * 1000.0);

  1180   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);

  1181   _remark_times.add((now - start) * 1000.0);

  1183   g1p->record_concurrent_mark_remark_end();

  1184 }

  1186 // Base class of the closures that finalize and verify the

  1187 // liveness counting data.

  1188 class CMCountDataClosureBase: public HeapRegionClosure {

  1189 protected:

  1190   ConcurrentMark* _cm;

  1191   BitMap* _region_bm;

  1192   BitMap* _card_bm;

  1194   void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {

  1195     assert(start_idx <= last_idx, "sanity");

  1197     // Set the inclusive bit range [start_idx, last_idx].

  1198     // For small ranges (up to 8 cards) use a simple loop; otherwise

  1199     // use par_at_put_range.

  1200     if ((last_idx - start_idx) < 8) {

  1201       for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {

  1202         _card_bm->par_set_bit(i);

  1203       }

  1204     } else {

  1205       assert(last_idx < _card_bm->size(), "sanity");

  1206       // Note BitMap::par_at_put_range() is exclusive.

  1207       _card_bm->par_at_put_range(start_idx, last_idx+1, true);

  1208     }

  1209   }

  1211   // It takes a region that's not empty (i.e., it has at least one

  1212   // live object in it and sets its corresponding bit on the region

  1213   // bitmap to 1. If the region is "starts humongous" it will also set

  1214   // to 1 the bits on the region bitmap that correspond to its

  1215   // associated "continues humongous" regions.

  1216   void set_bit_for_region(HeapRegion* hr) {

  1217     assert(!hr->continuesHumongous(), "should have filtered those out");

  1219     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();

  1220     if (!hr->startsHumongous()) {

  1221       // Normal (non-humongous) case: just set the bit.

  1222       _region_bm->par_at_put(index, true);

  1223     } else {

  1224       // Starts humongous case: calculate how many regions are part of

  1225       // this humongous region and then set the bit range.

  1226       G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1227       HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);

  1228       BitMap::idx_t end_index = (BitMap::idx_t) last_hr->hrs_index() + 1;

  1229       _region_bm->par_at_put_range(index, end_index, true);

  1230     }

  1231   }

  1233 public:

  1234   CMCountDataClosureBase(ConcurrentMark *cm,

  1235                          BitMap* region_bm, BitMap* card_bm):

  1236     _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }

  1237 };

  1239 // Closure that calculates the # live objects per region. Used

  1240 // for verification purposes during the cleanup pause.

  1241 class CalcLiveObjectsClosure: public CMCountDataClosureBase {

  1242   CMBitMapRO* _bm;

  1243   size_t _region_marked_bytes;

  1245 public:

  1246   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,

  1247                          BitMap* region_bm, BitMap* card_bm) :

  1248     CMCountDataClosureBase(cm, region_bm, card_bm),

  1249     _bm(bm), _region_marked_bytes(0) { }

  1251   bool doHeapRegion(HeapRegion* hr) {

  1253     if (hr->continuesHumongous()) {

  1254       // We will ignore these here and process them when their

  1255       // associated "starts humongous" region is processed (see

  1256       // set_bit_for_heap_region()). Note that we cannot rely on their

  1257       // associated "starts humongous" region to have their bit set to

  1258       // 1 since, due to the region chunking in the parallel region

  1259       // iteration, a "continues humongous" region might be visited

  1260       // before its associated "starts humongous".

  1261       return false;

  1262     }

  1264     HeapWord* nextTop = hr->next_top_at_mark_start();

  1265     HeapWord* start   = hr->bottom();

  1267     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),

  1268            err_msg("Preconditions not met - "

  1269                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,

  1270                    start, nextTop, hr->end()));

  1272     // Find the first marked object at or after "start".

  1273     start = _bm->getNextMarkedWordAddress(start, nextTop);

  1275     size_t marked_bytes = 0;

  1277     while (start < nextTop) {

  1278       oop obj = oop(start);

  1279       int obj_sz = obj->size();

  1280       HeapWord* obj_last = start + obj_sz - 1;

  1282       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);

  1283       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);

  1285       // Set the bits in the card BM for this object (inclusive).

  1286       set_card_bitmap_range(start_idx, last_idx);

  1288       // Add the size of this object to the number of marked bytes.

  1289       marked_bytes += (size_t)obj_sz * HeapWordSize;

  1291       // Find the next marked object after this one.

  1292       start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);

  1293     }

  1295     // Mark the allocated-since-marking portion...

  1296     HeapWord* top = hr->top();

  1297     if (nextTop < top) {

  1298       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);

  1299       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);

  1301       set_card_bitmap_range(start_idx, last_idx);

  1303       // This definitely means the region has live objects.

  1304       set_bit_for_region(hr);

  1305     }

  1307     // Update the live region bitmap.

  1308     if (marked_bytes > 0) {

  1309       set_bit_for_region(hr);

  1310     }

  1312     // Set the marked bytes for the current region so that

  1313     // it can be queried by a calling verificiation routine

  1314     _region_marked_bytes = marked_bytes;

  1316     return false;

  1317   }

  1319   size_t region_marked_bytes() const { return _region_marked_bytes; }

  1320 };

  1322 // Heap region closure used for verifying the counting data

  1323 // that was accumulated concurrently and aggregated during

  1324 // the remark pause. This closure is applied to the heap

  1325 // regions during the STW cleanup pause.

  1327 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {

  1328   ConcurrentMark* _cm;

  1329   CalcLiveObjectsClosure _calc_cl;

  1330   BitMap* _region_bm;   // Region BM to be verified

  1331   BitMap* _card_bm;     // Card BM to be verified

  1332   bool _verbose;        // verbose output?

  1334   BitMap* _exp_region_bm; // Expected Region BM values

  1335   BitMap* _exp_card_bm;   // Expected card BM values

  1337   int _failures;

  1339 public:

  1340   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,

  1341                                 BitMap* region_bm,

  1342                                 BitMap* card_bm,

  1343                                 BitMap* exp_region_bm,

  1344                                 BitMap* exp_card_bm,

  1345                                 bool verbose) :

  1346     _cm(cm),

  1347     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),

  1348     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),

  1349     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),

  1350     _failures(0) { }

  1352   int failures() const { return _failures; }

  1354   bool doHeapRegion(HeapRegion* hr) {

  1355     if (hr->continuesHumongous()) {

  1356       // We will ignore these here and process them when their

  1357       // associated "starts humongous" region is processed (see

  1358       // set_bit_for_heap_region()). Note that we cannot rely on their

  1359       // associated "starts humongous" region to have their bit set to

  1360       // 1 since, due to the region chunking in the parallel region

  1361       // iteration, a "continues humongous" region might be visited

  1362       // before its associated "starts humongous".

  1363       return false;

  1364     }

  1366     int failures = 0;

  1368     // Call the CalcLiveObjectsClosure to walk the marking bitmap for

  1369     // this region and set the corresponding bits in the expected region

  1370     // and card bitmaps.

  1371     bool res = _calc_cl.doHeapRegion(hr);

  1372     assert(res == false, "should be continuing");

  1374     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),

  1375                     Mutex::_no_safepoint_check_flag);

  1377     // Verify the marked bytes for this region.

  1378     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();

  1379     size_t act_marked_bytes = hr->next_marked_bytes();

  1381     // We're not OK if expected marked bytes > actual marked bytes. It means

  1382     // we have missed accounting some objects during the actual marking.

  1383     if (exp_marked_bytes > act_marked_bytes) {

  1384       if (_verbose) {

  1385         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "

  1386                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,

  1387                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);

  1388       }

  1389       failures += 1;

  1390     }

  1392     // Verify the bit, for this region, in the actual and expected

  1393     // (which was just calculated) region bit maps.

  1394     // We're not OK if the bit in the calculated expected region

  1395     // bitmap is set and the bit in the actual region bitmap is not.

  1396     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();

  1398     bool expected = _exp_region_bm->at(index);

  1399     bool actual = _region_bm->at(index);

  1400     if (expected && !actual) {

  1401       if (_verbose) {

  1402         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "

  1403                                "expected: %s, actual: %s",

  1404                                hr->hrs_index(),

  1405                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));

  1406       }

  1407       failures += 1;

  1408     }

  1410     // Verify that the card bit maps for the cards spanned by the current

  1411     // region match. We have an error if we have a set bit in the expected

  1412     // bit map and the corresponding bit in the actual bitmap is not set.

  1414     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());

  1415     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());

  1417     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {

  1418       expected = _exp_card_bm->at(i);

  1419       actual = _card_bm->at(i);

  1421       if (expected && !actual) {

  1422         if (_verbose) {

  1423           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "

  1424                                  "expected: %s, actual: %s",

  1425                                  hr->hrs_index(), i,

  1426                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));

  1427         }

  1428         failures += 1;

  1429       }

  1430     }

  1432     if (failures > 0 && _verbose)  {

  1433       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "

  1434                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,

  1435                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),

  1436                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());

  1437     }

  1439     _failures += failures;

  1441     // We could stop iteration over the heap when we

  1442     // find the first violating region by returning true.

  1443     return false;

  1444   }

  1445 };

  1448 class G1ParVerifyFinalCountTask: public AbstractGangTask {

  1449 protected:

  1450   G1CollectedHeap* _g1h;

  1451   ConcurrentMark* _cm;

  1452   BitMap* _actual_region_bm;

  1453   BitMap* _actual_card_bm;

  1455   uint    _n_workers;

  1457   BitMap* _expected_region_bm;

  1458   BitMap* _expected_card_bm;

  1460   int  _failures;

  1461   bool _verbose;

  1463 public:

  1464   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,

  1465                             BitMap* region_bm, BitMap* card_bm,

  1466                             BitMap* expected_region_bm, BitMap* expected_card_bm)

  1467     : AbstractGangTask("G1 verify final counting"),

  1468       _g1h(g1h), _cm(_g1h->concurrent_mark()),

  1469       _actual_region_bm(region_bm), _actual_card_bm(card_bm),

  1470       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),

  1471       _failures(0), _verbose(false),

  1472       _n_workers(0) {

  1473     assert(VerifyDuringGC, "don't call this otherwise");

  1475     // Use the value already set as the number of active threads

  1476     // in the call to run_task().

  1477     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1478       assert( _g1h->workers()->active_workers() > 0,

  1479         "Should have been previously set");

  1480       _n_workers = _g1h->workers()->active_workers();

  1481     } else {

  1482       _n_workers = 1;

  1483     }

  1485     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");

  1486     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");

  1488     _verbose = _cm->verbose_medium();

  1489   }

  1491   void work(uint worker_id) {

  1492     assert(worker_id < _n_workers, "invariant");

  1494     VerifyLiveObjectDataHRClosure verify_cl(_cm,

  1495                                             _actual_region_bm, _actual_card_bm,

  1496                                             _expected_region_bm,

  1497                                             _expected_card_bm,

  1498                                             _verbose);

  1500     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1501       _g1h->heap_region_par_iterate_chunked(&verify_cl,

  1502                                             worker_id,

  1503                                             _n_workers,

  1504                                             HeapRegion::VerifyCountClaimValue);

  1505     } else {

  1506       _g1h->heap_region_iterate(&verify_cl);

  1507     }

  1509     Atomic::add(verify_cl.failures(), &_failures);

  1510   }

  1512   int failures() const { return _failures; }

  1513 };

  1515 // Closure that finalizes the liveness counting data.

  1516 // Used during the cleanup pause.

  1517 // Sets the bits corresponding to the interval [NTAMS, top]

  1518 // (which contains the implicitly live objects) in the

  1519 // card liveness bitmap. Also sets the bit for each region,

  1520 // containing live data, in the region liveness bitmap.

  1522 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {

  1523  public:

  1524   FinalCountDataUpdateClosure(ConcurrentMark* cm,

  1525                               BitMap* region_bm,

  1526                               BitMap* card_bm) :

  1527     CMCountDataClosureBase(cm, region_bm, card_bm) { }

  1529   bool doHeapRegion(HeapRegion* hr) {

  1531     if (hr->continuesHumongous()) {

  1532       // We will ignore these here and process them when their

  1533       // associated "starts humongous" region is processed (see

  1534       // set_bit_for_heap_region()). Note that we cannot rely on their

  1535       // associated "starts humongous" region to have their bit set to

  1536       // 1 since, due to the region chunking in the parallel region

  1537       // iteration, a "continues humongous" region might be visited

  1538       // before its associated "starts humongous".

  1539       return false;

  1540     }

  1542     HeapWord* ntams = hr->next_top_at_mark_start();

  1543     HeapWord* top   = hr->top();

  1545     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");

  1547     // Mark the allocated-since-marking portion...

  1548     if (ntams < top) {

  1549       // This definitely means the region has live objects.

  1550       set_bit_for_region(hr);

  1551     }

  1553     // Now set the bits for [ntams, top]

  1554     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);

  1555     BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);

  1556     set_card_bitmap_range(start_idx, last_idx);

  1558     // Set the bit for the region if it contains live data

  1559     if (hr->next_marked_bytes() > 0) {

  1560       set_bit_for_region(hr);

  1561     }

  1563     return false;

  1564   }

  1565 };

  1567 class G1ParFinalCountTask: public AbstractGangTask {

  1568 protected:

  1569   G1CollectedHeap* _g1h;

  1570   ConcurrentMark* _cm;

  1571   BitMap* _actual_region_bm;

  1572   BitMap* _actual_card_bm;

  1574   uint    _n_workers;

  1576 public:

  1577   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)

  1578     : AbstractGangTask("G1 final counting"),

  1579       _g1h(g1h), _cm(_g1h->concurrent_mark()),

  1580       _actual_region_bm(region_bm), _actual_card_bm(card_bm),

  1581       _n_workers(0) {

  1582     // Use the value already set as the number of active threads

  1583     // in the call to run_task().

  1584     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1585       assert( _g1h->workers()->active_workers() > 0,

  1586         "Should have been previously set");

  1587       _n_workers = _g1h->workers()->active_workers();

  1588     } else {

  1589       _n_workers = 1;

  1590     }

  1591   }

  1593   void work(uint worker_id) {

  1594     assert(worker_id < _n_workers, "invariant");

  1596     FinalCountDataUpdateClosure final_update_cl(_cm,

  1597                                                 _actual_region_bm,

  1598                                                 _actual_card_bm);

  1600     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1601       _g1h->heap_region_par_iterate_chunked(&final_update_cl,

  1602                                             worker_id,

  1603                                             _n_workers,

  1604                                             HeapRegion::FinalCountClaimValue);

  1605     } else {

  1606       _g1h->heap_region_iterate(&final_update_cl);

  1607     }

  1608   }

  1609 };

  1611 class G1ParNoteEndTask;

  1613 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {

  1614   G1CollectedHeap* _g1;

  1615   int _worker_num;

  1616   size_t _max_live_bytes;

  1617   uint _regions_claimed;

  1618   size_t _freed_bytes;

  1619   FreeRegionList* _local_cleanup_list;

  1620   OldRegionSet* _old_proxy_set;

  1621   HumongousRegionSet* _humongous_proxy_set;

  1622   HRRSCleanupTask* _hrrs_cleanup_task;

  1623   double _claimed_region_time;

  1624   double _max_region_time;

  1626 public:

  1627   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,

  1628                              int worker_num,

  1629                              FreeRegionList* local_cleanup_list,

  1630                              OldRegionSet* old_proxy_set,

  1631                              HumongousRegionSet* humongous_proxy_set,

  1632                              HRRSCleanupTask* hrrs_cleanup_task) :

  1633     _g1(g1), _worker_num(worker_num),

  1634     _max_live_bytes(0), _regions_claimed(0),

  1635     _freed_bytes(0),

  1636     _claimed_region_time(0.0), _max_region_time(0.0),

  1637     _local_cleanup_list(local_cleanup_list),

  1638     _old_proxy_set(old_proxy_set),

  1639     _humongous_proxy_set(humongous_proxy_set),

  1640     _hrrs_cleanup_task(hrrs_cleanup_task) { }

  1642   size_t freed_bytes() { return _freed_bytes; }

  1644   bool doHeapRegion(HeapRegion *hr) {

  1645     // We use a claim value of zero here because all regions

  1646     // were claimed with value 1 in the FinalCount task.

  1647     hr->reset_gc_time_stamp();

  1648     if (!hr->continuesHumongous()) {

  1649       double start = os::elapsedTime();

  1650       _regions_claimed++;

  1651       hr->note_end_of_marking();

  1652       _max_live_bytes += hr->max_live_bytes();

  1653       _g1->free_region_if_empty(hr,

  1654                                 &_freed_bytes,

  1655                                 _local_cleanup_list,

  1656                                 _old_proxy_set,

  1657                                 _humongous_proxy_set,

  1658                                 _hrrs_cleanup_task,

  1659                                 true /* par */);

  1660       double region_time = (os::elapsedTime() - start);

  1661       _claimed_region_time += region_time;

  1662       if (region_time > _max_region_time) {

  1663         _max_region_time = region_time;

  1664       }

  1665     }

  1666     return false;

  1667   }

  1669   size_t max_live_bytes() { return _max_live_bytes; }

  1670   uint regions_claimed() { return _regions_claimed; }

  1671   double claimed_region_time_sec() { return _claimed_region_time; }

  1672   double max_region_time_sec() { return _max_region_time; }

  1673 };

  1675 class G1ParNoteEndTask: public AbstractGangTask {

  1676   friend class G1NoteEndOfConcMarkClosure;

  1678 protected:

  1679   G1CollectedHeap* _g1h;

  1680   size_t _max_live_bytes;

  1681   size_t _freed_bytes;

  1682   FreeRegionList* _cleanup_list;

  1684 public:

  1685   G1ParNoteEndTask(G1CollectedHeap* g1h,

  1686                    FreeRegionList* cleanup_list) :

  1687     AbstractGangTask("G1 note end"), _g1h(g1h),

  1688     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }

  1690   void work(uint worker_id) {

  1691     double start = os::elapsedTime();

  1692     FreeRegionList local_cleanup_list("Local Cleanup List");

  1693     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");

  1694     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");

  1695     HRRSCleanupTask hrrs_cleanup_task;

  1696     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,

  1697                                            &old_proxy_set,

  1698                                            &humongous_proxy_set,

  1699                                            &hrrs_cleanup_task);

  1700     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1701       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,

  1702                                             _g1h->workers()->active_workers(),

  1703                                             HeapRegion::NoteEndClaimValue);

  1704     } else {

  1705       _g1h->heap_region_iterate(&g1_note_end);

  1706     }

  1707     assert(g1_note_end.complete(), "Shouldn't have yielded!");

  1709     // Now update the lists

  1710     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),

  1711                                             NULL /* free_list */,

  1712                                             &old_proxy_set,

  1713                                             &humongous_proxy_set,

  1714                                             true /* par */);

  1715     {

  1716       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);

  1717       _max_live_bytes += g1_note_end.max_live_bytes();

  1718       _freed_bytes += g1_note_end.freed_bytes();

  1720       // If we iterate over the global cleanup list at the end of

  1721       // cleanup to do this printing we will not guarantee to only

  1722       // generate output for the newly-reclaimed regions (the list

  1723       // might not be empty at the beginning of cleanup; we might

  1724       // still be working on its previous contents). So we do the

  1725       // printing here, before we append the new regions to the global

  1726       // cleanup list.

  1728       G1HRPrinter* hr_printer = _g1h->hr_printer();

  1729       if (hr_printer->is_active()) {

  1730         HeapRegionLinkedListIterator iter(&local_cleanup_list);

  1731         while (iter.more_available()) {

  1732           HeapRegion* hr = iter.get_next();

  1733           hr_printer->cleanup(hr);

  1734         }

  1735       }

  1737       _cleanup_list->add_as_tail(&local_cleanup_list);

  1738       assert(local_cleanup_list.is_empty(), "post-condition");

  1740       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);

  1741     }

  1742   }

  1743   size_t max_live_bytes() { return _max_live_bytes; }

  1744   size_t freed_bytes() { return _freed_bytes; }

  1745 };

  1747 class G1ParScrubRemSetTask: public AbstractGangTask {

  1748 protected:

  1749   G1RemSet* _g1rs;

  1750   BitMap* _region_bm;

  1751   BitMap* _card_bm;

  1752 public:

  1753   G1ParScrubRemSetTask(G1CollectedHeap* g1h,

  1754                        BitMap* region_bm, BitMap* card_bm) :

  1755     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),

  1756     _region_bm(region_bm), _card_bm(card_bm) { }

  1758   void work(uint worker_id) {

  1759     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1760       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,

  1761                        HeapRegion::ScrubRemSetClaimValue);

  1762     } else {

  1763       _g1rs->scrub(_region_bm, _card_bm);

  1764     }

  1765   }

  1767 };

  1769 void ConcurrentMark::cleanup() {

  1770   // world is stopped at this checkpoint

  1771   assert(SafepointSynchronize::is_at_safepoint(),

  1772          "world should be stopped");

  1773   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1775   // If a full collection has happened, we shouldn't do this.

  1776   if (has_aborted()) {

  1777     g1h->set_marking_complete(); // So bitmap clearing isn't confused

  1778     return;

  1779   }

  1781   HRSPhaseSetter x(HRSPhaseCleanup);

  1782   g1h->verify_region_sets_optional();

  1784   if (VerifyDuringGC) {

  1785     HandleMark hm;  // handle scope

  1786     gclog_or_tty->print(" VerifyDuringGC:(before)");

  1787     Universe::heap()->prepare_for_verify();

  1788     Universe::verify(/* silent      */ false,

  1789                      /* option      */ VerifyOption_G1UsePrevMarking);

  1790   }

  1792   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();

  1793   g1p->record_concurrent_mark_cleanup_start();

  1795   double start = os::elapsedTime();

  1797   HeapRegionRemSet::reset_for_cleanup_tasks();

  1799   uint n_workers;

  1801   // Do counting once more with the world stopped for good measure.

  1802   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);

  1804   if (G1CollectedHeap::use_parallel_gc_threads()) {

  1805    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),

  1806            "sanity check");

  1808     g1h->set_par_threads();

  1809     n_workers = g1h->n_par_threads();

  1810     assert(g1h->n_par_threads() == n_workers,

  1811            "Should not have been reset");

  1812     g1h->workers()->run_task(&g1_par_count_task);

  1813     // Done with the parallel phase so reset to 0.

  1814     g1h->set_par_threads(0);

  1816     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),

  1817            "sanity check");

  1818   } else {

  1819     n_workers = 1;

  1820     g1_par_count_task.work(0);

  1821   }

  1823   if (VerifyDuringGC) {

  1824     // Verify that the counting data accumulated during marking matches

  1825     // that calculated by walking the marking bitmap.

  1827     // Bitmaps to hold expected values

  1828     BitMap expected_region_bm(_region_bm.size(), false);

  1829     BitMap expected_card_bm(_card_bm.size(), false);

  1831     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,

  1832                                                  &_region_bm,

  1833                                                  &_card_bm,

  1834                                                  &expected_region_bm,

  1835                                                  &expected_card_bm);

  1837     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1838       g1h->set_par_threads((int)n_workers);

  1839       g1h->workers()->run_task(&g1_par_verify_task);

  1840       // Done with the parallel phase so reset to 0.

  1841       g1h->set_par_threads(0);

  1843       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),

  1844              "sanity check");

  1845     } else {

  1846       g1_par_verify_task.work(0);

  1847     }

  1849     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");

  1850   }

  1852   size_t start_used_bytes = g1h->used();

  1853   g1h->set_marking_complete();

  1855   double count_end = os::elapsedTime();

  1856   double this_final_counting_time = (count_end - start);

  1857   _total_counting_time += this_final_counting_time;

  1859   if (G1PrintRegionLivenessInfo) {

  1860     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");

  1861     _g1h->heap_region_iterate(&cl);

  1862   }

  1864   // Install newly created mark bitMap as "prev".

  1865   swapMarkBitMaps();

  1867   g1h->reset_gc_time_stamp();

  1869   // Note end of marking in all heap regions.

  1870   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);

  1871   if (G1CollectedHeap::use_parallel_gc_threads()) {

  1872     g1h->set_par_threads((int)n_workers);

  1873     g1h->workers()->run_task(&g1_par_note_end_task);

  1874     g1h->set_par_threads(0);

  1876     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),

  1877            "sanity check");

  1878   } else {

  1879     g1_par_note_end_task.work(0);

  1880   }

  1882   if (!cleanup_list_is_empty()) {

  1883     // The cleanup list is not empty, so we'll have to process it

  1884     // concurrently. Notify anyone else that might be wanting free

  1885     // regions that there will be more free regions coming soon.

  1886     g1h->set_free_regions_coming();

  1887   }

  1889   // call below, since it affects the metric by which we sort the heap

  1890   // regions.

  1891   if (G1ScrubRemSets) {

  1892     double rs_scrub_start = os::elapsedTime();

  1893     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);

  1894     if (G1CollectedHeap::use_parallel_gc_threads()) {

  1895       g1h->set_par_threads((int)n_workers);

  1896       g1h->workers()->run_task(&g1_par_scrub_rs_task);

  1897       g1h->set_par_threads(0);

  1899       assert(g1h->check_heap_region_claim_values(

  1900                                             HeapRegion::ScrubRemSetClaimValue),

  1901              "sanity check");

  1902     } else {

  1903       g1_par_scrub_rs_task.work(0);

  1904     }

  1906     double rs_scrub_end = os::elapsedTime();

  1907     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);

  1908     _total_rs_scrub_time += this_rs_scrub_time;

  1909   }

  1911   // this will also free any regions totally full of garbage objects,

  1912   // and sort the regions.

  1913   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);

  1915   // Statistics.

  1916   double end = os::elapsedTime();

  1917   _cleanup_times.add((end - start) * 1000.0);

  1919   if (G1Log::fine()) {

  1920     g1h->print_size_transition(gclog_or_tty,

  1921                                start_used_bytes,

  1922                                g1h->used(),

  1923                                g1h->capacity());

  1924   }

  1926   // Clean up will have freed any regions completely full of garbage.

  1927   // Update the soft reference policy with the new heap occupancy.

  1928   Universe::update_heap_info_at_gc();

  1930   // We need to make this be a "collection" so any collection pause that

  1931   // races with it goes around and waits for completeCleanup to finish.

  1932   g1h->increment_total_collections();

  1934   // We reclaimed old regions so we should calculate the sizes to make

  1935   // sure we update the old gen/space data.

  1936   g1h->g1mm()->update_sizes();

  1938   if (VerifyDuringGC) {

  1939     HandleMark hm;  // handle scope

  1940     gclog_or_tty->print(" VerifyDuringGC:(after)");

  1941     Universe::heap()->prepare_for_verify();

  1942     Universe::verify(/* silent      */ false,

  1943                      /* option      */ VerifyOption_G1UsePrevMarking);

  1944   }

  1946   g1h->verify_region_sets_optional();

  1947 }

  1949 void ConcurrentMark::completeCleanup() {

  1950   if (has_aborted()) return;

  1952   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  1954   _cleanup_list.verify_optional();

  1955   FreeRegionList tmp_free_list("Tmp Free List");

  1957   if (G1ConcRegionFreeingVerbose) {

  1958     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "

  1959                            "cleanup list has %u entries",

  1960                            _cleanup_list.length());

  1961   }

  1963   // Noone else should be accessing the _cleanup_list at this point,

  1964   // so it's not necessary to take any locks

  1965   while (!_cleanup_list.is_empty()) {

  1966     HeapRegion* hr = _cleanup_list.remove_head();

  1967     assert(hr != NULL, "the list was not empty");

  1968     hr->par_clear();

  1969     tmp_free_list.add_as_tail(hr);

  1971     // Instead of adding one region at a time to the secondary_free_list,

  1972     // we accumulate them in the local list and move them a few at a

  1973     // time. This also cuts down on the number of notify_all() calls

  1974     // we do during this process. We'll also append the local list when

  1975     // _cleanup_list is empty (which means we just removed the last

  1976     // region from the _cleanup_list).

  1977     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||

  1978         _cleanup_list.is_empty()) {

  1979       if (G1ConcRegionFreeingVerbose) {

  1980         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "

  1981                                "appending %u entries to the secondary_free_list, "

  1982                                "cleanup list still has %u entries",

  1983                                tmp_free_list.length(),

  1984                                _cleanup_list.length());

  1985       }

  1987       {

  1988         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);

  1989         g1h->secondary_free_list_add_as_tail(&tmp_free_list);

  1990         SecondaryFreeList_lock->notify_all();

  1991       }

  1993       if (G1StressConcRegionFreeing) {

  1994         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {

  1995           os::sleep(Thread::current(), (jlong) 1, false);

  1996         }

  1997       }

  1998     }

  1999   }

  2000   assert(tmp_free_list.is_empty(), "post-condition");

  2001 }

  2003 // Support closures for reference procssing in G1

  2005 bool G1CMIsAliveClosure::do_object_b(oop obj) {

  2006   HeapWord* addr = (HeapWord*)obj;

  2007   return addr != NULL &&

  2008          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));

  2009 }

  2011 class G1CMKeepAliveClosure: public OopClosure {

  2012   G1CollectedHeap* _g1;

  2013   ConcurrentMark*  _cm;

  2014  public:

  2015   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :

  2016     _g1(g1), _cm(cm) {

  2017     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");

  2018   }

  2020   virtual void do_oop(narrowOop* p) { do_oop_work(p); }

  2021   virtual void do_oop(      oop* p) { do_oop_work(p); }

  2023   template <class T> void do_oop_work(T* p) {

  2024     oop obj = oopDesc::load_decode_heap_oop(p);

  2025     HeapWord* addr = (HeapWord*)obj;

  2027     if (_cm->verbose_high()) {

  2028       gclog_or_tty->print_cr("\t[0] we're looking at location "

  2029                              "*"PTR_FORMAT" = "PTR_FORMAT,

  2030                              p, (void*) obj);

  2031     }

  2033     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {

  2034       _cm->mark_and_count(obj);

  2035       _cm->mark_stack_push(obj);

  2036     }

  2037   }

  2038 };

  2040 class G1CMDrainMarkingStackClosure: public VoidClosure {

  2041   ConcurrentMark*               _cm;

  2042   CMMarkStack*                  _markStack;

  2043   G1CMKeepAliveClosure*         _oopClosure;

  2044  public:

  2045   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,

  2046                                G1CMKeepAliveClosure* oopClosure) :

  2047     _cm(cm),

  2048     _markStack(markStack),

  2049     _oopClosure(oopClosure) { }

  2051   void do_void() {

  2052     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);

  2053   }

  2054 };

  2056 // 'Keep Alive' closure used by parallel reference processing.

  2057 // An instance of this closure is used in the parallel reference processing

  2058 // code rather than an instance of G1CMKeepAliveClosure. We could have used

  2059 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are

  2060 // placed on to discovered ref lists once so we can mark and push with no

  2061 // need to check whether the object has already been marked. Using the

  2062 // G1CMKeepAliveClosure would mean, however, having all the worker threads

  2063 // operating on the global mark stack. This means that an individual

  2064 // worker would be doing lock-free pushes while it processes its own

  2065 // discovered ref list followed by drain call. If the discovered ref lists

  2066 // are unbalanced then this could cause interference with the other

  2067 // workers. Using a CMTask (and its embedded local data structures)

  2068 // avoids that potential interference.

  2069 class G1CMParKeepAliveAndDrainClosure: public OopClosure {

  2070   ConcurrentMark*  _cm;

  2071   CMTask*          _task;

  2072   int              _ref_counter_limit;

  2073   int              _ref_counter;

  2074  public:

  2075   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :

  2076     _cm(cm), _task(task),

  2077     _ref_counter_limit(G1RefProcDrainInterval) {

  2078     assert(_ref_counter_limit > 0, "sanity");

  2079     _ref_counter = _ref_counter_limit;

  2080   }

  2082   virtual void do_oop(narrowOop* p) { do_oop_work(p); }

  2083   virtual void do_oop(      oop* p) { do_oop_work(p); }

  2085   template <class T> void do_oop_work(T* p) {

  2086     if (!_cm->has_overflown()) {

  2087       oop obj = oopDesc::load_decode_heap_oop(p);

  2088       if (_cm->verbose_high()) {

  2089         gclog_or_tty->print_cr("\t[%d] we're looking at location "

  2090                                "*"PTR_FORMAT" = "PTR_FORMAT,

  2091                                _task->task_id(), p, (void*) obj);

  2092       }

  2094       _task->deal_with_reference(obj);

  2095       _ref_counter--;

  2097       if (_ref_counter == 0) {

  2098         // We have dealt with _ref_counter_limit references, pushing them and objects

  2099         // reachable from them on to the local stack (and possibly the global stack).

  2100         // Call do_marking_step() to process these entries. We call the routine in a

  2101         // loop, which we'll exit if there's nothing more to do (i.e. we're done

  2102         // with the entries that we've pushed as a result of the deal_with_reference

  2103         // calls above) or we overflow.

  2104         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag

  2105         // while there may still be some work to do. (See the comment at the

  2106         // beginning of CMTask::do_marking_step() for those conditions - one of which

  2107         // is reaching the specified time target.) It is only when

  2108         // CMTask::do_marking_step() returns without setting the has_aborted() flag

  2109         // that the marking has completed.

  2110         do {

  2111           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;

  2112           _task->do_marking_step(mark_step_duration_ms,

  2113                                  false /* do_stealing    */,

  2114                                  false /* do_termination */);

  2115         } while (_task->has_aborted() && !_cm->has_overflown());

  2116         _ref_counter = _ref_counter_limit;

  2117       }

  2118     } else {

  2119       if (_cm->verbose_high()) {

  2120          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());

  2121       }

  2122     }

  2123   }

  2124 };

  2126 class G1CMParDrainMarkingStackClosure: public VoidClosure {

  2127   ConcurrentMark* _cm;

  2128   CMTask* _task;

  2129  public:

  2130   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :

  2131     _cm(cm), _task(task) { }

  2133   void do_void() {

  2134     do {

  2135       if (_cm->verbose_high()) {

  2136         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",

  2137                                _task->task_id());

  2138       }

  2140       // We call CMTask::do_marking_step() to completely drain the local and

  2141       // global marking stacks. The routine is called in a loop, which we'll

  2142       // exit if there's nothing more to do (i.e. we'completely drained the

  2143       // entries that were pushed as a result of applying the

  2144       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref

  2145       // lists above) or we overflow the global marking stack.

  2146       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag

  2147       // while there may still be some work to do. (See the comment at the

  2148       // beginning of CMTask::do_marking_step() for those conditions - one of which

  2149       // is reaching the specified time target.) It is only when

  2150       // CMTask::do_marking_step() returns without setting the has_aborted() flag

  2151       // that the marking has completed.

  2153       _task->do_marking_step(1000000000.0 /* something very large */,

  2154                              true /* do_stealing    */,

  2155                              true /* do_termination */);

  2156     } while (_task->has_aborted() && !_cm->has_overflown());

  2157   }

  2158 };

  2160 // Implementation of AbstractRefProcTaskExecutor for parallel

  2161 // reference processing at the end of G1 concurrent marking

  2163 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {

  2164 private:

  2165   G1CollectedHeap* _g1h;

  2166   ConcurrentMark*  _cm;

  2167   WorkGang*        _workers;

  2168   int              _active_workers;

  2170 public:

  2171   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,

  2172                         ConcurrentMark* cm,

  2173                         WorkGang* workers,

  2174                         int n_workers) :

  2175     _g1h(g1h), _cm(cm),

  2176     _workers(workers), _active_workers(n_workers) { }

  2178   // Executes the given task using concurrent marking worker threads.

  2179   virtual void execute(ProcessTask& task);

  2180   virtual void execute(EnqueueTask& task);

  2181 };

  2183 class G1CMRefProcTaskProxy: public AbstractGangTask {

  2184   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;

  2185   ProcessTask&     _proc_task;

  2186   G1CollectedHeap* _g1h;

  2187   ConcurrentMark*  _cm;

  2189 public:

  2190   G1CMRefProcTaskProxy(ProcessTask& proc_task,

  2191                      G1CollectedHeap* g1h,

  2192                      ConcurrentMark* cm) :

  2193     AbstractGangTask("Process reference objects in parallel"),

  2194     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }

  2196   virtual void work(uint worker_id) {

  2197     CMTask* marking_task = _cm->task(worker_id);

  2198     G1CMIsAliveClosure g1_is_alive(_g1h);

  2199     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);

  2200     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);

  2202     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);

  2203   }

  2204 };

  2206 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {

  2207   assert(_workers != NULL, "Need parallel worker threads.");

  2209   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);

  2211   // We need to reset the phase for each task execution so that

  2212   // the termination protocol of CMTask::do_marking_step works.

  2213   _cm->set_phase(_active_workers, false /* concurrent */);

  2214   _g1h->set_par_threads(_active_workers);

  2215   _workers->run_task(&proc_task_proxy);

  2216   _g1h->set_par_threads(0);

  2217 }

  2219 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {

  2220   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;

  2221   EnqueueTask& _enq_task;

  2223 public:

  2224   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :

  2225     AbstractGangTask("Enqueue reference objects in parallel"),

  2226     _enq_task(enq_task) { }

  2228   virtual void work(uint worker_id) {

  2229     _enq_task.work(worker_id);

  2230   }

  2231 };

  2233 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {

  2234   assert(_workers != NULL, "Need parallel worker threads.");

  2236   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);

  2238   _g1h->set_par_threads(_active_workers);

  2239   _workers->run_task(&enq_task_proxy);

  2240   _g1h->set_par_threads(0);

  2241 }

  2243 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {

  2244   ResourceMark rm;

  2245   HandleMark   hm;

  2247   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  2249   // Is alive closure.

  2250   G1CMIsAliveClosure g1_is_alive(g1h);

  2252   // Inner scope to exclude the cleaning of the string and symbol

  2253   // tables from the displayed time.

  2254   {

  2255     if (G1Log::finer()) {

  2256       gclog_or_tty->put(' ');

  2257     }

  2258     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);

  2260     ReferenceProcessor* rp = g1h->ref_processor_cm();

  2262     // See the comment in G1CollectedHeap::ref_processing_init()

  2263     // about how reference processing currently works in G1.

  2265     // Process weak references.

  2266     rp->setup_policy(clear_all_soft_refs);

  2267     assert(_markStack.isEmpty(), "mark stack should be empty");

  2269     G1CMKeepAliveClosure g1_keep_alive(g1h, this);

  2270     G1CMDrainMarkingStackClosure

  2271       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);

  2273     // We use the work gang from the G1CollectedHeap and we utilize all

  2274     // the worker threads.

  2275     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;

  2276     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);

  2278     G1CMRefProcTaskExecutor par_task_executor(g1h, this,

  2279                                               g1h->workers(), active_workers);

  2281     if (rp->processing_is_mt()) {

  2282       // Set the degree of MT here.  If the discovery is done MT, there

  2283       // may have been a different number of threads doing the discovery

  2284       // and a different number of discovered lists may have Ref objects.

  2285       // That is OK as long as the Reference lists are balanced (see

  2286       // balance_all_queues() and balance_queues()).

  2287       rp->set_active_mt_degree(active_workers);

  2289       rp->process_discovered_references(&g1_is_alive,

  2290                                       &g1_keep_alive,

  2291                                       &g1_drain_mark_stack,

  2292                                       &par_task_executor);

  2294       // The work routines of the parallel keep_alive and drain_marking_stack

  2295       // will set the has_overflown flag if we overflow the global marking

  2296       // stack.

  2297     } else {

  2298       rp->process_discovered_references(&g1_is_alive,

  2299                                         &g1_keep_alive,

  2300                                         &g1_drain_mark_stack,

  2301                                         NULL);

  2302     }

  2304     assert(_markStack.overflow() || _markStack.isEmpty(),

  2305             "mark stack should be empty (unless it overflowed)");

  2306     if (_markStack.overflow()) {

  2307       // Should have been done already when we tried to push an

  2308       // entry on to the global mark stack. But let's do it again.

  2309       set_has_overflown();

  2310     }

  2312     if (rp->processing_is_mt()) {

  2313       assert(rp->num_q() == active_workers, "why not");

  2314       rp->enqueue_discovered_references(&par_task_executor);

  2315     } else {

  2316       rp->enqueue_discovered_references();

  2317     }

  2319     rp->verify_no_references_recorded();

  2320     assert(!rp->discovery_enabled(), "Post condition");

  2321   }

  2323   // Now clean up stale oops in StringTable

  2324   StringTable::unlink(&g1_is_alive);

  2325   // Clean up unreferenced symbols in symbol table.

  2326   SymbolTable::unlink();

  2327 }

  2329 void ConcurrentMark::swapMarkBitMaps() {

  2330   CMBitMapRO* temp = _prevMarkBitMap;

  2331   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;

  2332   _nextMarkBitMap  = (CMBitMap*)  temp;

  2333 }

  2335 class CMRemarkTask: public AbstractGangTask {

  2336 private:

  2337   ConcurrentMark *_cm;

  2339 public:

  2340   void work(uint worker_id) {

  2341     // Since all available tasks are actually started, we should

  2342     // only proceed if we're supposed to be actived.

  2343     if (worker_id < _cm->active_tasks()) {

  2344       CMTask* task = _cm->task(worker_id);

  2345       task->record_start_time();

  2346       do {

  2347         task->do_marking_step(1000000000.0 /* something very large */,

  2348                               true /* do_stealing    */,

  2349                               true /* do_termination */);

  2350       } while (task->has_aborted() && !_cm->has_overflown());

  2351       // If we overflow, then we do not want to restart. We instead

  2352       // want to abort remark and do concurrent marking again.

  2353       task->record_end_time();

  2354     }

  2355   }

  2357   CMRemarkTask(ConcurrentMark* cm, int active_workers) :

  2358     AbstractGangTask("Par Remark"), _cm(cm) {

  2359     _cm->terminator()->reset_for_reuse(active_workers);

  2360   }

  2361 };

  2363 void ConcurrentMark::checkpointRootsFinalWork() {

  2364   ResourceMark rm;

  2365   HandleMark   hm;

  2366   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  2368   g1h->ensure_parsability(false);

  2370   if (G1CollectedHeap::use_parallel_gc_threads()) {

  2371     G1CollectedHeap::StrongRootsScope srs(g1h);

  2372     // this is remark, so we'll use up all active threads

  2373     uint active_workers = g1h->workers()->active_workers();

  2374     if (active_workers == 0) {

  2375       assert(active_workers > 0, "Should have been set earlier");

  2376       active_workers = (uint) ParallelGCThreads;

  2377       g1h->workers()->set_active_workers(active_workers);

  2378     }

  2379     set_phase(active_workers, false /* concurrent */);

  2380     // Leave _parallel_marking_threads at it's

  2381     // value originally calculated in the ConcurrentMark

  2382     // constructor and pass values of the active workers

  2383     // through the gang in the task.

  2385     CMRemarkTask remarkTask(this, active_workers);

  2386     g1h->set_par_threads(active_workers);

  2387     g1h->workers()->run_task(&remarkTask);

  2388     g1h->set_par_threads(0);

  2389   } else {

  2390     G1CollectedHeap::StrongRootsScope srs(g1h);

  2391     // this is remark, so we'll use up all available threads

  2392     uint active_workers = 1;

  2393     set_phase(active_workers, false /* concurrent */);

  2395     CMRemarkTask remarkTask(this, active_workers);

  2396     // We will start all available threads, even if we decide that the

  2397     // active_workers will be fewer. The extra ones will just bail out

  2398     // immediately.

  2399     remarkTask.work(0);

  2400   }

  2401   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  2402   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");

  2404   print_stats();

  2406 #if VERIFY_OBJS_PROCESSED

  2407   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {

  2408     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",

  2409                            _scan_obj_cl.objs_processed,

  2410                            ThreadLocalObjQueue::objs_enqueued);

  2411     guarantee(_scan_obj_cl.objs_processed ==

  2412               ThreadLocalObjQueue::objs_enqueued,

  2413               "Different number of objs processed and enqueued.");

  2414   }

  2415 #endif

  2416 }

  2418 #ifndef PRODUCT

  2420 class PrintReachableOopClosure: public OopClosure {

  2421 private:

  2422   G1CollectedHeap* _g1h;

  2423   outputStream*    _out;

  2424   VerifyOption     _vo;

  2425   bool             _all;

  2427 public:

  2428   PrintReachableOopClosure(outputStream* out,

  2429                            VerifyOption  vo,

  2430                            bool          all) :

  2431     _g1h(G1CollectedHeap::heap()),

  2432     _out(out), _vo(vo), _all(all) { }

  2434   void do_oop(narrowOop* p) { do_oop_work(p); }

  2435   void do_oop(      oop* p) { do_oop_work(p); }

  2437   template <class T> void do_oop_work(T* p) {

  2438     oop         obj = oopDesc::load_decode_heap_oop(p);

  2439     const char* str = NULL;

  2440     const char* str2 = "";

  2442     if (obj == NULL) {

  2443       str = "";

  2444     } else if (!_g1h->is_in_g1_reserved(obj)) {

  2445       str = " O";

  2446     } else {

  2447       HeapRegion* hr  = _g1h->heap_region_containing(obj);

  2448       guarantee(hr != NULL, "invariant");

  2449       bool over_tams = false;

  2450       bool marked = false;

  2452       switch (_vo) {

  2453         case VerifyOption_G1UsePrevMarking:

  2454           over_tams = hr->obj_allocated_since_prev_marking(obj);

  2455           marked = _g1h->isMarkedPrev(obj);

  2456           break;

  2457         case VerifyOption_G1UseNextMarking:

  2458           over_tams = hr->obj_allocated_since_next_marking(obj);

  2459           marked = _g1h->isMarkedNext(obj);

  2460           break;

  2461         case VerifyOption_G1UseMarkWord:

  2462           marked = obj->is_gc_marked();

  2463           break;

  2464         default:

  2465           ShouldNotReachHere();

  2466       }

  2468       if (over_tams) {

  2469         str = " >";

  2470         if (marked) {

  2471           str2 = " AND MARKED";

  2472         }

  2473       } else if (marked) {

  2474         str = " M";

  2475       } else {

  2476         str = " NOT";

  2477       }

  2478     }

  2480     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",

  2481                    p, (void*) obj, str, str2);

  2482   }

  2483 };

  2485 class PrintReachableObjectClosure : public ObjectClosure {

  2486 private:

  2487   G1CollectedHeap* _g1h;

  2488   outputStream*    _out;

  2489   VerifyOption     _vo;

  2490   bool             _all;

  2491   HeapRegion*      _hr;

  2493 public:

  2494   PrintReachableObjectClosure(outputStream* out,

  2495                               VerifyOption  vo,

  2496                               bool          all,

  2497                               HeapRegion*   hr) :

  2498     _g1h(G1CollectedHeap::heap()),

  2499     _out(out), _vo(vo), _all(all), _hr(hr) { }

  2501   void do_object(oop o) {

  2502     bool over_tams = false;

  2503     bool marked = false;

  2505     switch (_vo) {

  2506       case VerifyOption_G1UsePrevMarking:

  2507         over_tams = _hr->obj_allocated_since_prev_marking(o);

  2508         marked = _g1h->isMarkedPrev(o);

  2509         break;

  2510       case VerifyOption_G1UseNextMarking:

  2511         over_tams = _hr->obj_allocated_since_next_marking(o);

  2512         marked = _g1h->isMarkedNext(o);

  2513         break;

  2514       case VerifyOption_G1UseMarkWord:

  2515         marked = o->is_gc_marked();

  2516         break;

  2517       default:

  2518         ShouldNotReachHere();

  2519     }

  2520     bool print_it = _all || over_tams || marked;

  2522     if (print_it) {

  2523       _out->print_cr(" "PTR_FORMAT"%s",

  2524                      o, (over_tams) ? " >" : (marked) ? " M" : "");

  2525       PrintReachableOopClosure oopCl(_out, _vo, _all);

  2526       o->oop_iterate(&oopCl);

  2527     }

  2528   }

  2529 };

  2531 class PrintReachableRegionClosure : public HeapRegionClosure {

  2532 private:

  2533   outputStream* _out;

  2534   VerifyOption  _vo;

  2535   bool          _all;

  2537 public:

  2538   bool doHeapRegion(HeapRegion* hr) {

  2539     HeapWord* b = hr->bottom();

  2540     HeapWord* e = hr->end();

  2541     HeapWord* t = hr->top();

  2542     HeapWord* p = NULL;

  2544     switch (_vo) {

  2545       case VerifyOption_G1UsePrevMarking:

  2546         p = hr->prev_top_at_mark_start();

  2547         break;

  2548       case VerifyOption_G1UseNextMarking:

  2549         p = hr->next_top_at_mark_start();

  2550         break;

  2551       case VerifyOption_G1UseMarkWord:

  2552         // When we are verifying marking using the mark word

  2553         // TAMS has no relevance.

  2554         assert(p == NULL, "post-condition");

  2555         break;

  2556       default:

  2557         ShouldNotReachHere();

  2558     }

  2559     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "

  2560                    "TAMS: "PTR_FORMAT, b, e, t, p);

  2561     _out->cr();

  2563     HeapWord* from = b;

  2564     HeapWord* to   = t;

  2566     if (to > from) {

  2567       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);

  2568       _out->cr();

  2569       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);

  2570       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);

  2571       _out->cr();

  2572     }

  2574     return false;

  2575   }

  2577   PrintReachableRegionClosure(outputStream* out,

  2578                               VerifyOption  vo,

  2579                               bool          all) :

  2580     _out(out), _vo(vo), _all(all) { }

  2581 };

  2583 static const char* verify_option_to_tams(VerifyOption vo) {

  2584   switch (vo) {

  2585     case VerifyOption_G1UsePrevMarking:

  2586       return "PTAMS";

  2587     case VerifyOption_G1UseNextMarking:

  2588       return "NTAMS";

  2589     default:

  2590       return "NONE";

  2591   }

  2592 }

  2594 void ConcurrentMark::print_reachable(const char* str,

  2595                                      VerifyOption vo,

  2596                                      bool all) {

  2597   gclog_or_tty->cr();

  2598   gclog_or_tty->print_cr("== Doing heap dump... ");

  2600   if (G1PrintReachableBaseFile == NULL) {

  2601     gclog_or_tty->print_cr("  #### error: no base file defined");

  2602     return;

  2603   }

  2605   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >

  2606       (JVM_MAXPATHLEN - 1)) {

  2607     gclog_or_tty->print_cr("  #### error: file name too long");

  2608     return;

  2609   }

  2611   char file_name[JVM_MAXPATHLEN];

  2612   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);

  2613   gclog_or_tty->print_cr("  dumping to file %s", file_name);

  2615   fileStream fout(file_name);

  2616   if (!fout.is_open()) {

  2617     gclog_or_tty->print_cr("  #### error: could not open file");

  2618     return;

  2619   }

  2621   outputStream* out = &fout;

  2622   out->print_cr("-- USING %s", verify_option_to_tams(vo));

  2623   out->cr();

  2625   out->print_cr("--- ITERATING OVER REGIONS");

  2626   out->cr();

  2627   PrintReachableRegionClosure rcl(out, vo, all);

  2628   _g1h->heap_region_iterate(&rcl);

  2629   out->cr();

  2631   gclog_or_tty->print_cr("  done");

  2632   gclog_or_tty->flush();

  2633 }

  2635 #endif // PRODUCT

  2637 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {

  2638   // Note we are overriding the read-only view of the prev map here, via

  2639   // the cast.

  2640   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);

  2641 }

  2643 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {

  2644   _nextMarkBitMap->clearRange(mr);

  2645 }

  2647 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {

  2648   clearRangePrevBitmap(mr);

  2649   clearRangeNextBitmap(mr);

  2650 }

  2652 HeapRegion*

  2653 ConcurrentMark::claim_region(int task_num) {

  2654   // "checkpoint" the finger

  2655   HeapWord* finger = _finger;

  2657   // _heap_end will not change underneath our feet; it only changes at

  2658   // yield points.

  2659   while (finger < _heap_end) {

  2660     assert(_g1h->is_in_g1_reserved(finger), "invariant");

  2662     // Note on how this code handles humongous regions. In the

  2663     // normal case the finger will reach the start of a "starts

  2664     // humongous" (SH) region. Its end will either be the end of the

  2665     // last "continues humongous" (CH) region in the sequence, or the

  2666     // standard end of the SH region (if the SH is the only region in

  2667     // the sequence). That way claim_region() will skip over the CH

  2668     // regions. However, there is a subtle race between a CM thread

  2669     // executing this method and a mutator thread doing a humongous

  2670     // object allocation. The two are not mutually exclusive as the CM

  2671     // thread does not need to hold the Heap_lock when it gets

  2672     // here. So there is a chance that claim_region() will come across

  2673     // a free region that's in the progress of becoming a SH or a CH

  2674     // region. In the former case, it will either

  2675     //   a) Miss the update to the region's end, in which case it will

  2676     //      visit every subsequent CH region, will find their bitmaps

  2677     //      empty, and do nothing, or

  2678     //   b) Will observe the update of the region's end (in which case

  2679     //      it will skip the subsequent CH regions).

  2680     // If it comes across a region that suddenly becomes CH, the

  2681     // scenario will be similar to b). So, the race between

  2682     // claim_region() and a humongous object allocation might force us

  2683     // to do a bit of unnecessary work (due to some unnecessary bitmap

  2684     // iterations) but it should not introduce and correctness issues.

  2685     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);

  2686     HeapWord*   bottom        = curr_region->bottom();

  2687     HeapWord*   end           = curr_region->end();

  2688     HeapWord*   limit         = curr_region->next_top_at_mark_start();

  2690     if (verbose_low()) {

  2691       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "

  2692                              "["PTR_FORMAT", "PTR_FORMAT"), "

  2693                              "limit = "PTR_FORMAT,

  2694                              task_num, curr_region, bottom, end, limit);

  2695     }

  2697     // Is the gap between reading the finger and doing the CAS too long?

  2698     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);

  2699     if (res == finger) {

  2700       // we succeeded

  2702       // notice that _finger == end cannot be guaranteed here since,

  2703       // someone else might have moved the finger even further

  2704       assert(_finger >= end, "the finger should have moved forward");

  2706       if (verbose_low()) {

  2707         gclog_or_tty->print_cr("[%d] we were successful with region = "

  2708                                PTR_FORMAT, task_num, curr_region);

  2709       }

  2711       if (limit > bottom) {

  2712         if (verbose_low()) {

  2713           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "

  2714                                  "returning it ", task_num, curr_region);

  2715         }

  2716         return curr_region;

  2717       } else {

  2718         assert(limit == bottom,

  2719                "the region limit should be at bottom");

  2720         if (verbose_low()) {

  2721           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "

  2722                                  "returning NULL", task_num, curr_region);

  2723         }

  2724         // we return NULL and the caller should try calling

  2725         // claim_region() again.

  2726         return NULL;

  2727       }

  2728     } else {

  2729       assert(_finger > finger, "the finger should have moved forward");

  2730       if (verbose_low()) {

  2731         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "

  2732                                "global finger = "PTR_FORMAT", "

  2733                                "our finger = "PTR_FORMAT,

  2734                                task_num, _finger, finger);

  2735       }

  2737       // read it again

  2738       finger = _finger;

  2739     }

  2740   }

  2742   return NULL;

  2743 }

  2745 #ifndef PRODUCT

  2746 enum VerifyNoCSetOopsPhase {

  2747   VerifyNoCSetOopsStack,

  2748   VerifyNoCSetOopsQueues,

  2749   VerifyNoCSetOopsSATBCompleted,

  2750   VerifyNoCSetOopsSATBThread

  2751 };

  2753 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {

  2754 private:

  2755   G1CollectedHeap* _g1h;

  2756   VerifyNoCSetOopsPhase _phase;

  2757   int _info;

  2759   const char* phase_str() {

  2760     switch (_phase) {

  2761     case VerifyNoCSetOopsStack:         return "Stack";

  2762     case VerifyNoCSetOopsQueues:        return "Queue";

  2763     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";

  2764     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";

  2765     default:                            ShouldNotReachHere();

  2766     }

  2767     return NULL;

  2768   }

  2770   void do_object_work(oop obj) {

  2771     guarantee(!_g1h->obj_in_cs(obj),

  2772               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",

  2773                       (void*) obj, phase_str(), _info));

  2774   }

  2776 public:

  2777   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }

  2779   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {

  2780     _phase = phase;

  2781     _info = info;

  2782   }

  2784   virtual void do_oop(oop* p) {

  2785     oop obj = oopDesc::load_decode_heap_oop(p);

  2786     do_object_work(obj);

  2787   }

  2789   virtual void do_oop(narrowOop* p) {

  2790     // We should not come across narrow oops while scanning marking

  2791     // stacks and SATB buffers.

  2792     ShouldNotReachHere();

  2793   }

  2795   virtual void do_object(oop obj) {

  2796     do_object_work(obj);

  2797   }

  2798 };

  2800 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,

  2801                                          bool verify_enqueued_buffers,

  2802                                          bool verify_thread_buffers,

  2803                                          bool verify_fingers) {

  2804   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");

  2805   if (!G1CollectedHeap::heap()->mark_in_progress()) {

  2806     return;

  2807   }

  2809   VerifyNoCSetOopsClosure cl;

  2811   if (verify_stacks) {

  2812     // Verify entries on the global mark stack

  2813     cl.set_phase(VerifyNoCSetOopsStack);

  2814     _markStack.oops_do(&cl);

  2816     // Verify entries on the task queues

  2817     for (int i = 0; i < (int) _max_task_num; i += 1) {

  2818       cl.set_phase(VerifyNoCSetOopsQueues, i);

  2819       OopTaskQueue* queue = _task_queues->queue(i);

  2820       queue->oops_do(&cl);

  2821     }

  2822   }

  2824   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();

  2826   // Verify entries on the enqueued SATB buffers

  2827   if (verify_enqueued_buffers) {

  2828     cl.set_phase(VerifyNoCSetOopsSATBCompleted);

  2829     satb_qs.iterate_completed_buffers_read_only(&cl);

  2830   }

  2832   // Verify entries on the per-thread SATB buffers

  2833   if (verify_thread_buffers) {

  2834     cl.set_phase(VerifyNoCSetOopsSATBThread);

  2835     satb_qs.iterate_thread_buffers_read_only(&cl);

  2836   }

  2838   if (verify_fingers) {

  2839     // Verify the global finger

  2840     HeapWord* global_finger = finger();

  2841     if (global_finger != NULL && global_finger < _heap_end) {

  2842       // The global finger always points to a heap region boundary. We

  2843       // use heap_region_containing_raw() to get the containing region

  2844       // given that the global finger could be pointing to a free region

  2845       // which subsequently becomes continues humongous. If that

  2846       // happens, heap_region_containing() will return the bottom of the

  2847       // corresponding starts humongous region and the check below will

  2848       // not hold any more.

  2849       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);

  2850       guarantee(global_finger == global_hr->bottom(),

  2851                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,

  2852                         global_finger, HR_FORMAT_PARAMS(global_hr)));

  2853     }

  2855     // Verify the task fingers

  2856     assert(parallel_marking_threads() <= _max_task_num, "sanity");

  2857     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {

  2858       CMTask* task = _tasks[i];

  2859       HeapWord* task_finger = task->finger();

  2860       if (task_finger != NULL && task_finger < _heap_end) {

  2861         // See above note on the global finger verification.

  2862         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);

  2863         guarantee(task_finger == task_hr->bottom() ||

  2864                   !task_hr->in_collection_set(),

  2865                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,

  2866                           task_finger, HR_FORMAT_PARAMS(task_hr)));

  2867       }

  2868     }

  2869   }

  2870 }

  2871 #endif // PRODUCT

  2873 void ConcurrentMark::clear_marking_state(bool clear_overflow) {

  2874   _markStack.setEmpty();

  2875   _markStack.clear_overflow();

  2876   if (clear_overflow) {

  2877     clear_has_overflown();

  2878   } else {

  2879     assert(has_overflown(), "pre-condition");

  2880   }

  2881   _finger = _heap_start;

  2883   for (int i = 0; i < (int)_max_task_num; ++i) {

  2884     OopTaskQueue* queue = _task_queues->queue(i);

  2885     queue->set_empty();

  2886   }

  2887 }

  2889 // Aggregate the counting data that was constructed concurrently

  2890 // with marking.

  2891 class AggregateCountDataHRClosure: public HeapRegionClosure {

  2892   ConcurrentMark* _cm;

  2893   BitMap* _cm_card_bm;

  2894   size_t _max_task_num;

  2896  public:

  2897   AggregateCountDataHRClosure(ConcurrentMark *cm,

  2898                               BitMap* cm_card_bm,

  2899                               size_t max_task_num) :

  2900     _cm(cm), _cm_card_bm(cm_card_bm),

  2901     _max_task_num(max_task_num) { }

  2903   bool is_card_aligned(HeapWord* p) {

  2904     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);

  2905   }

  2907   bool doHeapRegion(HeapRegion* hr) {

  2908     if (hr->continuesHumongous()) {

  2909       // We will ignore these here and process them when their

  2910       // associated "starts humongous" region is processed.

  2911       // Note that we cannot rely on their associated

  2912       // "starts humongous" region to have their bit set to 1

  2913       // since, due to the region chunking in the parallel region

  2914       // iteration, a "continues humongous" region might be visited

  2915       // before its associated "starts humongous".

  2916       return false;

  2917     }

  2919     HeapWord* start = hr->bottom();

  2920     HeapWord* limit = hr->next_top_at_mark_start();

  2921     HeapWord* end = hr->end();

  2923     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),

  2924            err_msg("Preconditions not met - "

  2925                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "

  2926                    "top: "PTR_FORMAT", end: "PTR_FORMAT,

  2927                    start, limit, hr->top(), hr->end()));

  2929     assert(hr->next_marked_bytes() == 0, "Precondition");

  2931     if (start == limit) {

  2932       // NTAMS of this region has not been set so nothing to do.

  2933       return false;

  2934     }

  2936     assert(is_card_aligned(start), "sanity");

  2937     assert(is_card_aligned(end), "sanity");

  2939     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);

  2940     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);

  2941     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);

  2943     // If ntams is not card aligned then we bump the index for

  2944     // limit so that we get the card spanning ntams.

  2945     if (!is_card_aligned(limit)) {

  2946       limit_idx += 1;

  2947     }

  2949     assert(limit_idx <= end_idx, "or else use atomics");

  2951     // Aggregate the "stripe" in the count data associated with hr.

  2952     uint hrs_index = hr->hrs_index();

  2953     size_t marked_bytes = 0;

  2955     for (int i = 0; (size_t)i < _max_task_num; i += 1) {

  2956       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);

  2957       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);

  2959       // Fetch the marked_bytes in this region for task i and

  2960       // add it to the running total for this region.

  2961       marked_bytes += marked_bytes_array[hrs_index];

  2963       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)

  2964       // into the global card bitmap.

  2965       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);

  2967       while (scan_idx < limit_idx) {

  2968         assert(task_card_bm->at(scan_idx) == true, "should be");

  2969         _cm_card_bm->set_bit(scan_idx);

  2970         assert(_cm_card_bm->at(scan_idx) == true, "should be");

  2972         // BitMap::get_next_one_offset() can handle the case when

  2973         // its left_offset parameter is greater than its right_offset

  2974         // parameter. If does, however, have an early exit if

  2975         // left_offset == right_offset. So let's limit the value

  2976         // passed in for left offset here.

  2977         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);

  2978         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);

  2979       }

  2980     }

  2982     // Update the marked bytes for this region.

  2983     hr->add_to_marked_bytes(marked_bytes);

  2985     // Next heap region

  2986     return false;

  2987   }

  2988 };

  2990 class G1AggregateCountDataTask: public AbstractGangTask {

  2991 protected:

  2992   G1CollectedHeap* _g1h;

  2993   ConcurrentMark* _cm;

  2994   BitMap* _cm_card_bm;

  2995   size_t _max_task_num;

  2996   int _active_workers;

  2998 public:

  2999   G1AggregateCountDataTask(G1CollectedHeap* g1h,

  3000                            ConcurrentMark* cm,

  3001                            BitMap* cm_card_bm,

  3002                            size_t max_task_num,

  3003                            int n_workers) :

  3004     AbstractGangTask("Count Aggregation"),

  3005     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),

  3006     _max_task_num(max_task_num),

  3007     _active_workers(n_workers) { }

  3009   void work(uint worker_id) {

  3010     AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);

  3012     if (G1CollectedHeap::use_parallel_gc_threads()) {

  3013       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,

  3014                                             _active_workers,

  3015                                             HeapRegion::AggregateCountClaimValue);

  3016     } else {

  3017       _g1h->heap_region_iterate(&cl);

  3018     }

  3019   }

  3020 };

  3023 void ConcurrentMark::aggregate_count_data() {

  3024   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?

  3025                         _g1h->workers()->active_workers() :

  3026                         1);

  3028   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,

  3029                                            _max_task_num, n_workers);

  3031   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3032     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),

  3033            "sanity check");

  3034     _g1h->set_par_threads(n_workers);

  3035     _g1h->workers()->run_task(&g1_par_agg_task);

  3036     _g1h->set_par_threads(0);

  3038     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),

  3039            "sanity check");

  3040     _g1h->reset_heap_region_claim_values();

  3041   } else {

  3042     g1_par_agg_task.work(0);

  3043   }

  3044 }

  3046 // Clear the per-worker arrays used to store the per-region counting data

  3047 void ConcurrentMark::clear_all_count_data() {

  3048   // Clear the global card bitmap - it will be filled during

  3049   // liveness count aggregation (during remark) and the

  3050   // final counting task.

  3051   _card_bm.clear();

  3053   // Clear the global region bitmap - it will be filled as part

  3054   // of the final counting task.

  3055   _region_bm.clear();

  3057   uint max_regions = _g1h->max_regions();

  3058   assert(_max_task_num != 0, "unitialized");

  3060   for (int i = 0; (size_t) i < _max_task_num; i += 1) {

  3061     BitMap* task_card_bm = count_card_bitmap_for(i);

  3062     size_t* marked_bytes_array = count_marked_bytes_array_for(i);

  3064     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");

  3065     assert(marked_bytes_array != NULL, "uninitialized");

  3067     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));

  3068     task_card_bm->clear();

  3069   }

  3070 }

  3072 void ConcurrentMark::print_stats() {

  3073   if (verbose_stats()) {

  3074     gclog_or_tty->print_cr("---------------------------------------------------------------------");

  3075     for (size_t i = 0; i < _active_tasks; ++i) {

  3076       _tasks[i]->print_stats();

  3077       gclog_or_tty->print_cr("---------------------------------------------------------------------");

  3078     }

  3079   }

  3080 }

  3082 // abandon current marking iteration due to a Full GC

  3083 void ConcurrentMark::abort() {

  3084   // Clear all marks to force marking thread to do nothing

  3085   _nextMarkBitMap->clearAll();

  3086   // Clear the liveness counting data

  3087   clear_all_count_data();

  3088   // Empty mark stack

  3089   clear_marking_state();

  3090   for (int i = 0; i < (int)_max_task_num; ++i) {

  3091     _tasks[i]->clear_region_fields();

  3092   }

  3093   _has_aborted = true;

  3095   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  3096   satb_mq_set.abandon_partial_marking();

  3097   // This can be called either during or outside marking, we'll read

  3098   // the expected_active value from the SATB queue set.

  3099   satb_mq_set.set_active_all_threads(

  3100                                  false, /* new active value */

  3101                                  satb_mq_set.is_active() /* expected_active */);

  3102 }

  3104 static void print_ms_time_info(const char* prefix, const char* name,

  3105                                NumberSeq& ns) {

  3106   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",

  3107                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());

  3108   if (ns.num() > 0) {

  3109     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",

  3110                            prefix, ns.sd(), ns.maximum());

  3111   }

  3112 }

  3114 void ConcurrentMark::print_summary_info() {

  3115   gclog_or_tty->print_cr(" Concurrent marking:");

  3116   print_ms_time_info("  ", "init marks", _init_times);

  3117   print_ms_time_info("  ", "remarks", _remark_times);

  3118   {

  3119     print_ms_time_info("     ", "final marks", _remark_mark_times);

  3120     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);

  3122   }

  3123   print_ms_time_info("  ", "cleanups", _cleanup_times);

  3124   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",

  3125                          _total_counting_time,

  3126                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /

  3127                           (double)_cleanup_times.num()

  3128                          : 0.0));

  3129   if (G1ScrubRemSets) {

  3130     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",

  3131                            _total_rs_scrub_time,

  3132                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /

  3133                             (double)_cleanup_times.num()

  3134                            : 0.0));

  3135   }

  3136   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",

  3137                          (_init_times.sum() + _remark_times.sum() +

  3138                           _cleanup_times.sum())/1000.0);

  3139   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "

  3140                 "(%8.2f s marking).",

  3141                 cmThread()->vtime_accum(),

  3142                 cmThread()->vtime_mark_accum());

  3143 }

  3145 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {

  3146   _parallel_workers->print_worker_threads_on(st);

  3147 }

  3149 // We take a break if someone is trying to stop the world.

  3150 bool ConcurrentMark::do_yield_check(uint worker_id) {

  3151   if (should_yield()) {

  3152     if (worker_id == 0) {

  3153       _g1h->g1_policy()->record_concurrent_pause();

  3154     }

  3155     cmThread()->yield();

  3156     if (worker_id == 0) {

  3157       _g1h->g1_policy()->record_concurrent_pause_end();

  3158     }

  3159     return true;

  3160   } else {

  3161     return false;

  3162   }

  3163 }

  3165 bool ConcurrentMark::should_yield() {

  3166   return cmThread()->should_yield();

  3167 }

  3169 bool ConcurrentMark::containing_card_is_marked(void* p) {

  3170   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);

  3171   return _card_bm.at(offset >> CardTableModRefBS::card_shift);

  3172 }

  3174 bool ConcurrentMark::containing_cards_are_marked(void* start,

  3175                                                  void* last) {

  3176   return containing_card_is_marked(start) &&

  3177          containing_card_is_marked(last);

  3178 }

  3180 #ifndef PRODUCT

  3181 // for debugging purposes

  3182 void ConcurrentMark::print_finger() {

  3183   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,

  3184                          _heap_start, _heap_end, _finger);

  3185   for (int i = 0; i < (int) _max_task_num; ++i) {

  3186     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());

  3187   }

  3188   gclog_or_tty->print_cr("");

  3189 }

  3190 #endif

  3192 void CMTask::scan_object(oop obj) {

  3193   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");

  3195   if (_cm->verbose_high()) {

  3196     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,

  3197                            _task_id, (void*) obj);

  3198   }

  3200   size_t obj_size = obj->size();

  3201   _words_scanned += obj_size;

  3203   obj->oop_iterate(_cm_oop_closure);

  3204   statsOnly( ++_objs_scanned );

  3205   check_limits();

  3206 }

  3208 // Closure for iteration over bitmaps

  3209 class CMBitMapClosure : public BitMapClosure {

  3210 private:

  3211   // the bitmap that is being iterated over

  3212   CMBitMap*                   _nextMarkBitMap;

  3213   ConcurrentMark*             _cm;

  3214   CMTask*                     _task;

  3216 public:

  3217   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :

  3218     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }

  3220   bool do_bit(size_t offset) {

  3221     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);

  3222     assert(_nextMarkBitMap->isMarked(addr), "invariant");

  3223     assert( addr < _cm->finger(), "invariant");

  3225     statsOnly( _task->increase_objs_found_on_bitmap() );

  3226     assert(addr >= _task->finger(), "invariant");

  3228     // We move that task's local finger along.

  3229     _task->move_finger_to(addr);

  3231     _task->scan_object(oop(addr));

  3232     // we only partially drain the local queue and global stack

  3233     _task->drain_local_queue(true);

  3234     _task->drain_global_stack(true);

  3236     // if the has_aborted flag has been raised, we need to bail out of

  3237     // the iteration

  3238     return !_task->has_aborted();

  3239   }

  3240 };

  3242 // Closure for iterating over objects, currently only used for

  3243 // processing SATB buffers.

  3244 class CMObjectClosure : public ObjectClosure {

  3245 private:

  3246   CMTask* _task;

  3248 public:

  3249   void do_object(oop obj) {

  3250     _task->deal_with_reference(obj);

  3251   }

  3253   CMObjectClosure(CMTask* task) : _task(task) { }

  3254 };

  3256 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,

  3257                                ConcurrentMark* cm,

  3258                                CMTask* task)

  3259   : _g1h(g1h), _cm(cm), _task(task) {

  3260   assert(_ref_processor == NULL, "should be initialized to NULL");

  3262   if (G1UseConcMarkReferenceProcessing) {

  3263     _ref_processor = g1h->ref_processor_cm();

  3264     assert(_ref_processor != NULL, "should not be NULL");

  3265   }

  3266 }

  3268 void CMTask::setup_for_region(HeapRegion* hr) {

  3269   // Separated the asserts so that we know which one fires.

  3270   assert(hr != NULL,

  3271         "claim_region() should have filtered out continues humongous regions");

  3272   assert(!hr->continuesHumongous(),

  3273         "claim_region() should have filtered out continues humongous regions");

  3275   if (_cm->verbose_low()) {

  3276     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,

  3277                            _task_id, hr);

  3278   }

  3280   _curr_region  = hr;

  3281   _finger       = hr->bottom();

  3282   update_region_limit();

  3283 }

  3285 void CMTask::update_region_limit() {

  3286   HeapRegion* hr            = _curr_region;

  3287   HeapWord* bottom          = hr->bottom();

  3288   HeapWord* limit           = hr->next_top_at_mark_start();

  3290   if (limit == bottom) {

  3291     if (_cm->verbose_low()) {

  3292       gclog_or_tty->print_cr("[%d] found an empty region "

  3293                              "["PTR_FORMAT", "PTR_FORMAT")",

  3294                              _task_id, bottom, limit);

  3295     }

  3296     // The region was collected underneath our feet.

  3297     // We set the finger to bottom to ensure that the bitmap

  3298     // iteration that will follow this will not do anything.

  3299     // (this is not a condition that holds when we set the region up,

  3300     // as the region is not supposed to be empty in the first place)

  3301     _finger = bottom;

  3302   } else if (limit >= _region_limit) {

  3303     assert(limit >= _finger, "peace of mind");

  3304   } else {

  3305     assert(limit < _region_limit, "only way to get here");

  3306     // This can happen under some pretty unusual circumstances.  An

  3307     // evacuation pause empties the region underneath our feet (NTAMS

  3308     // at bottom). We then do some allocation in the region (NTAMS

  3309     // stays at bottom), followed by the region being used as a GC

  3310     // alloc region (NTAMS will move to top() and the objects

  3311     // originally below it will be grayed). All objects now marked in

  3312     // the region are explicitly grayed, if below the global finger,

  3313     // and we do not need in fact to scan anything else. So, we simply

  3314     // set _finger to be limit to ensure that the bitmap iteration

  3315     // doesn't do anything.

  3316     _finger = limit;

  3317   }

  3319   _region_limit = limit;

  3320 }

  3322 void CMTask::giveup_current_region() {

  3323   assert(_curr_region != NULL, "invariant");

  3324   if (_cm->verbose_low()) {

  3325     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,

  3326                            _task_id, _curr_region);

  3327   }

  3328   clear_region_fields();

  3329 }

  3331 void CMTask::clear_region_fields() {

  3332   // Values for these three fields that indicate that we're not

  3333   // holding on to a region.

  3334   _curr_region   = NULL;

  3335   _finger        = NULL;

  3336   _region_limit  = NULL;

  3337 }

  3339 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {

  3340   if (cm_oop_closure == NULL) {

  3341     assert(_cm_oop_closure != NULL, "invariant");

  3342   } else {

  3343     assert(_cm_oop_closure == NULL, "invariant");

  3344   }

  3345   _cm_oop_closure = cm_oop_closure;

  3346 }

  3348 void CMTask::reset(CMBitMap* nextMarkBitMap) {

  3349   guarantee(nextMarkBitMap != NULL, "invariant");

  3351   if (_cm->verbose_low()) {

  3352     gclog_or_tty->print_cr("[%d] resetting", _task_id);

  3353   }

  3355   _nextMarkBitMap                = nextMarkBitMap;

  3356   clear_region_fields();

  3358   _calls                         = 0;

  3359   _elapsed_time_ms               = 0.0;

  3360   _termination_time_ms           = 0.0;

  3361   _termination_start_time_ms     = 0.0;

  3363 #if _MARKING_STATS_

  3364   _local_pushes                  = 0;

  3365   _local_pops                    = 0;

  3366   _local_max_size                = 0;

  3367   _objs_scanned                  = 0;

  3368   _global_pushes                 = 0;

  3369   _global_pops                   = 0;

  3370   _global_max_size               = 0;

  3371   _global_transfers_to           = 0;

  3372   _global_transfers_from         = 0;

  3373   _regions_claimed               = 0;

  3374   _objs_found_on_bitmap          = 0;

  3375   _satb_buffers_processed        = 0;

  3376   _steal_attempts                = 0;

  3377   _steals                        = 0;

  3378   _aborted                       = 0;

  3379   _aborted_overflow              = 0;

  3380   _aborted_cm_aborted            = 0;

  3381   _aborted_yield                 = 0;

  3382   _aborted_timed_out             = 0;

  3383   _aborted_satb                  = 0;

  3384   _aborted_termination           = 0;

  3385 #endif // _MARKING_STATS_

  3386 }

  3388 bool CMTask::should_exit_termination() {

  3389   regular_clock_call();

  3390   // This is called when we are in the termination protocol. We should

  3391   // quit if, for some reason, this task wants to abort or the global

  3392   // stack is not empty (this means that we can get work from it).

  3393   return !_cm->mark_stack_empty() || has_aborted();

  3394 }

  3396 void CMTask::reached_limit() {

  3397   assert(_words_scanned >= _words_scanned_limit ||

  3398          _refs_reached >= _refs_reached_limit ,

  3399          "shouldn't have been called otherwise");

  3400   regular_clock_call();

  3401 }

  3403 void CMTask::regular_clock_call() {

  3404   if (has_aborted()) return;

  3406   // First, we need to recalculate the words scanned and refs reached

  3407   // limits for the next clock call.

  3408   recalculate_limits();

  3410   // During the regular clock call we do the following

  3412   // (1) If an overflow has been flagged, then we abort.

  3413   if (_cm->has_overflown()) {

  3414     set_has_aborted();

  3415     return;

  3416   }

  3418   // If we are not concurrent (i.e. we're doing remark) we don't need

  3419   // to check anything else. The other steps are only needed during

  3420   // the concurrent marking phase.

  3421   if (!concurrent()) return;

  3423   // (2) If marking has been aborted for Full GC, then we also abort.

  3424   if (_cm->has_aborted()) {

  3425     set_has_aborted();

  3426     statsOnly( ++_aborted_cm_aborted );

  3427     return;

  3428   }

  3430   double curr_time_ms = os::elapsedVTime() * 1000.0;

  3432   // (3) If marking stats are enabled, then we update the step history.

  3433 #if _MARKING_STATS_

  3434   if (_words_scanned >= _words_scanned_limit) {

  3435     ++_clock_due_to_scanning;

  3436   }

  3437   if (_refs_reached >= _refs_reached_limit) {

  3438     ++_clock_due_to_marking;

  3439   }

  3441   double last_interval_ms = curr_time_ms - _interval_start_time_ms;

  3442   _interval_start_time_ms = curr_time_ms;

  3443   _all_clock_intervals_ms.add(last_interval_ms);

  3445   if (_cm->verbose_medium()) {

  3446       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "

  3447                         "scanned = %d%s, refs reached = %d%s",

  3448                         _task_id, last_interval_ms,

  3449                         _words_scanned,

  3450                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",

  3451                         _refs_reached,

  3452                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");

  3453   }

  3454 #endif // _MARKING_STATS_

  3456   // (4) We check whether we should yield. If we have to, then we abort.

  3457   if (_cm->should_yield()) {

  3458     // We should yield. To do this we abort the task. The caller is

  3459     // responsible for yielding.

  3460     set_has_aborted();

  3461     statsOnly( ++_aborted_yield );

  3462     return;

  3463   }

  3465   // (5) We check whether we've reached our time quota. If we have,

  3466   // then we abort.

  3467   double elapsed_time_ms = curr_time_ms - _start_time_ms;

  3468   if (elapsed_time_ms > _time_target_ms) {

  3469     set_has_aborted();

  3470     _has_timed_out = true;

  3471     statsOnly( ++_aborted_timed_out );

  3472     return;

  3473   }

  3475   // (6) Finally, we check whether there are enough completed STAB

  3476   // buffers available for processing. If there are, we abort.

  3477   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  3478   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {

  3479     if (_cm->verbose_low()) {

  3480       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",

  3481                              _task_id);

  3482     }

  3483     // we do need to process SATB buffers, we'll abort and restart

  3484     // the marking task to do so

  3485     set_has_aborted();

  3486     statsOnly( ++_aborted_satb );

  3487     return;

  3488   }

  3489 }

  3491 void CMTask::recalculate_limits() {

  3492   _real_words_scanned_limit = _words_scanned + words_scanned_period;

  3493   _words_scanned_limit      = _real_words_scanned_limit;

  3495   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;

  3496   _refs_reached_limit       = _real_refs_reached_limit;

  3497 }

  3499 void CMTask::decrease_limits() {

  3500   // This is called when we believe that we're going to do an infrequent

  3501   // operation which will increase the per byte scanned cost (i.e. move

  3502   // entries to/from the global stack). It basically tries to decrease the

  3503   // scanning limit so that the clock is called earlier.

  3505   if (_cm->verbose_medium()) {

  3506     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);

  3507   }

  3509   _words_scanned_limit = _real_words_scanned_limit -

  3510     3 * words_scanned_period / 4;

  3511   _refs_reached_limit  = _real_refs_reached_limit -

  3512     3 * refs_reached_period / 4;

  3513 }

  3515 void CMTask::move_entries_to_global_stack() {

  3516   // local array where we'll store the entries that will be popped

  3517   // from the local queue

  3518   oop buffer[global_stack_transfer_size];

  3520   int n = 0;

  3521   oop obj;

  3522   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {

  3523     buffer[n] = obj;

  3524     ++n;

  3525   }

  3527   if (n > 0) {

  3528     // we popped at least one entry from the local queue

  3530     statsOnly( ++_global_transfers_to; _local_pops += n );

  3532     if (!_cm->mark_stack_push(buffer, n)) {

  3533       if (_cm->verbose_low()) {

  3534         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",

  3535                                _task_id);

  3536       }

  3537       set_has_aborted();

  3538     } else {

  3539       // the transfer was successful

  3541       if (_cm->verbose_medium()) {

  3542         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",

  3543                                _task_id, n);

  3544       }

  3545       statsOnly( int tmp_size = _cm->mark_stack_size();

  3546                  if (tmp_size > _global_max_size) {

  3547                    _global_max_size = tmp_size;

  3548                  }

  3549                  _global_pushes += n );

  3550     }

  3551   }

  3553   // this operation was quite expensive, so decrease the limits

  3554   decrease_limits();

  3555 }

  3557 void CMTask::get_entries_from_global_stack() {

  3558   // local array where we'll store the entries that will be popped

  3559   // from the global stack.

  3560   oop buffer[global_stack_transfer_size];

  3561   int n;

  3562   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);

  3563   assert(n <= global_stack_transfer_size,

  3564          "we should not pop more than the given limit");

  3565   if (n > 0) {

  3566     // yes, we did actually pop at least one entry

  3568     statsOnly( ++_global_transfers_from; _global_pops += n );

  3569     if (_cm->verbose_medium()) {

  3570       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",

  3571                              _task_id, n);

  3572     }

  3573     for (int i = 0; i < n; ++i) {

  3574       bool success = _task_queue->push(buffer[i]);

  3575       // We only call this when the local queue is empty or under a

  3576       // given target limit. So, we do not expect this push to fail.

  3577       assert(success, "invariant");

  3578     }

  3580     statsOnly( int tmp_size = _task_queue->size();

  3581                if (tmp_size > _local_max_size) {

  3582                  _local_max_size = tmp_size;

  3583                }

  3584                _local_pushes += n );

  3585   }

  3587   // this operation was quite expensive, so decrease the limits

  3588   decrease_limits();

  3589 }

  3591 void CMTask::drain_local_queue(bool partially) {

  3592   if (has_aborted()) return;

  3594   // Decide what the target size is, depending whether we're going to

  3595   // drain it partially (so that other tasks can steal if they run out

  3596   // of things to do) or totally (at the very end).

  3597   size_t target_size;

  3598   if (partially) {

  3599     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);

  3600   } else {

  3601     target_size = 0;

  3602   }

  3604   if (_task_queue->size() > target_size) {

  3605     if (_cm->verbose_high()) {

  3606       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",

  3607                              _task_id, target_size);

  3608     }

  3610     oop obj;

  3611     bool ret = _task_queue->pop_local(obj);

  3612     while (ret) {

  3613       statsOnly( ++_local_pops );

  3615       if (_cm->verbose_high()) {

  3616         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,

  3617                                (void*) obj);

  3618       }

  3620       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );

  3621       assert(!_g1h->is_on_master_free_list(

  3622                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");

  3624       scan_object(obj);

  3626       if (_task_queue->size() <= target_size || has_aborted()) {

  3627         ret = false;

  3628       } else {

  3629         ret = _task_queue->pop_local(obj);

  3630       }

  3631     }

  3633     if (_cm->verbose_high()) {

  3634       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",

  3635                              _task_id, _task_queue->size());

  3636     }

  3637   }

  3638 }

  3640 void CMTask::drain_global_stack(bool partially) {

  3641   if (has_aborted()) return;

  3643   // We have a policy to drain the local queue before we attempt to

  3644   // drain the global stack.

  3645   assert(partially || _task_queue->size() == 0, "invariant");

  3647   // Decide what the target size is, depending whether we're going to

  3648   // drain it partially (so that other tasks can steal if they run out

  3649   // of things to do) or totally (at the very end).  Notice that,

  3650   // because we move entries from the global stack in chunks or

  3651   // because another task might be doing the same, we might in fact

  3652   // drop below the target. But, this is not a problem.

  3653   size_t target_size;

  3654   if (partially) {

  3655     target_size = _cm->partial_mark_stack_size_target();

  3656   } else {

  3657     target_size = 0;

  3658   }

  3660   if (_cm->mark_stack_size() > target_size) {

  3661     if (_cm->verbose_low()) {

  3662       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",

  3663                              _task_id, target_size);

  3664     }

  3666     while (!has_aborted() && _cm->mark_stack_size() > target_size) {

  3667       get_entries_from_global_stack();

  3668       drain_local_queue(partially);

  3669     }

  3671     if (_cm->verbose_low()) {

  3672       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",

  3673                              _task_id, _cm->mark_stack_size());

  3674     }

  3675   }

  3676 }

  3678 // SATB Queue has several assumptions on whether to call the par or

  3679 // non-par versions of the methods. this is why some of the code is

  3680 // replicated. We should really get rid of the single-threaded version

  3681 // of the code to simplify things.

  3682 void CMTask::drain_satb_buffers() {

  3683   if (has_aborted()) return;

  3685   // We set this so that the regular clock knows that we're in the

  3686   // middle of draining buffers and doesn't set the abort flag when it

  3687   // notices that SATB buffers are available for draining. It'd be

  3688   // very counter productive if it did that. :-)

  3689   _draining_satb_buffers = true;

  3691   CMObjectClosure oc(this);

  3692   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();

  3693   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3694     satb_mq_set.set_par_closure(_task_id, &oc);

  3695   } else {

  3696     satb_mq_set.set_closure(&oc);

  3697   }

  3699   // This keeps claiming and applying the closure to completed buffers

  3700   // until we run out of buffers or we need to abort.

  3701   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3702     while (!has_aborted() &&

  3703            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {

  3704       if (_cm->verbose_medium()) {

  3705         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);

  3706       }

  3707       statsOnly( ++_satb_buffers_processed );

  3708       regular_clock_call();

  3709     }

  3710   } else {

  3711     while (!has_aborted() &&

  3712            satb_mq_set.apply_closure_to_completed_buffer()) {

  3713       if (_cm->verbose_medium()) {

  3714         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);

  3715       }

  3716       statsOnly( ++_satb_buffers_processed );

  3717       regular_clock_call();

  3718     }

  3719   }

  3721   if (!concurrent() && !has_aborted()) {

  3722     // We should only do this during remark.

  3723     if (G1CollectedHeap::use_parallel_gc_threads()) {

  3724       satb_mq_set.par_iterate_closure_all_threads(_task_id);

  3725     } else {

  3726       satb_mq_set.iterate_closure_all_threads();

  3727     }

  3728   }

  3730   _draining_satb_buffers = false;

  3732   assert(has_aborted() ||

  3733          concurrent() ||

  3734          satb_mq_set.completed_buffers_num() == 0, "invariant");

  3736   if (G1CollectedHeap::use_parallel_gc_threads()) {

  3737     satb_mq_set.set_par_closure(_task_id, NULL);

  3738   } else {

  3739     satb_mq_set.set_closure(NULL);

  3740   }

  3742   // again, this was a potentially expensive operation, decrease the

  3743   // limits to get the regular clock call early

  3744   decrease_limits();

  3745 }

  3747 void CMTask::print_stats() {

  3748   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",

  3749                          _task_id, _calls);

  3750   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",

  3751                          _elapsed_time_ms, _termination_time_ms);

  3752   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",

  3753                          _step_times_ms.num(), _step_times_ms.avg(),

  3754                          _step_times_ms.sd());

  3755   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",

  3756                          _step_times_ms.maximum(), _step_times_ms.sum());

  3758 #if _MARKING_STATS_

  3759   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",

  3760                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),

  3761                          _all_clock_intervals_ms.sd());

  3762   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",

  3763                          _all_clock_intervals_ms.maximum(),

  3764                          _all_clock_intervals_ms.sum());

  3765   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",

  3766                          _clock_due_to_scanning, _clock_due_to_marking);

  3767   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",

  3768                          _objs_scanned, _objs_found_on_bitmap);

  3769   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",

  3770                          _local_pushes, _local_pops, _local_max_size);

  3771   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",

  3772                          _global_pushes, _global_pops, _global_max_size);

  3773   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",

  3774                          _global_transfers_to,_global_transfers_from);

  3775   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);

  3776   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);

  3777   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",

  3778                          _steal_attempts, _steals);

  3779   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);

  3780   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",

  3781                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);

  3782   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",

  3783                          _aborted_timed_out, _aborted_satb, _aborted_termination);

  3784 #endif // _MARKING_STATS_

  3785 }

  3787 /*****************************************************************************

  3789     The do_marking_step(time_target_ms) method is the building block

  3790     of the parallel marking framework. It can be called in parallel

  3791     with other invocations of do_marking_step() on different tasks

  3792     (but only one per task, obviously) and concurrently with the

  3793     mutator threads, or during remark, hence it eliminates the need

  3794     for two versions of the code. When called during remark, it will

  3795     pick up from where the task left off during the concurrent marking

  3796     phase. Interestingly, tasks are also claimable during evacuation

  3797     pauses too, since do_marking_step() ensures that it aborts before

  3798     it needs to yield.

  3800     The data structures that is uses to do marking work are the

  3801     following:

  3803       (1) Marking Bitmap. If there are gray objects that appear only

  3804       on the bitmap (this happens either when dealing with an overflow

  3805       or when the initial marking phase has simply marked the roots

  3806       and didn't push them on the stack), then tasks claim heap

  3807       regions whose bitmap they then scan to find gray objects. A

  3808       global finger indicates where the end of the last claimed region

  3809       is. A local finger indicates how far into the region a task has

  3810       scanned. The two fingers are used to determine how to gray an

  3811       object (i.e. whether simply marking it is OK, as it will be

  3812       visited by a task in the future, or whether it needs to be also

  3813       pushed on a stack).

  3815       (2) Local Queue. The local queue of the task which is accessed

  3816       reasonably efficiently by the task. Other tasks can steal from

  3817       it when they run out of work. Throughout the marking phase, a

  3818       task attempts to keep its local queue short but not totally

  3819       empty, so that entries are available for stealing by other

  3820       tasks. Only when there is no more work, a task will totally

  3821       drain its local queue.

  3823       (3) Global Mark Stack. This handles local queue overflow. During

  3824       marking only sets of entries are moved between it and the local

  3825       queues, as access to it requires a mutex and more fine-grain

  3826       interaction with it which might cause contention. If it

  3827       overflows, then the marking phase should restart and iterate

  3828       over the bitmap to identify gray objects. Throughout the marking

  3829       phase, tasks attempt to keep the global mark stack at a small

  3830       length but not totally empty, so that entries are available for

  3831       popping by other tasks. Only when there is no more work, tasks

  3832       will totally drain the global mark stack.

  3834       (4) SATB Buffer Queue. This is where completed SATB buffers are

  3835       made available. Buffers are regularly removed from this queue

  3836       and scanned for roots, so that the queue doesn't get too

  3837       long. During remark, all completed buffers are processed, as

  3838       well as the filled in parts of any uncompleted buffers.

  3840     The do_marking_step() method tries to abort when the time target

  3841     has been reached. There are a few other cases when the

  3842     do_marking_step() method also aborts:

  3844       (1) When the marking phase has been aborted (after a Full GC).

  3846       (2) When a global overflow (on the global stack) has been

  3847       triggered. Before the task aborts, it will actually sync up with

  3848       the other tasks to ensure that all the marking data structures

  3849       (local queues, stacks, fingers etc.)  are re-initialised so that

  3850       when do_marking_step() completes, the marking phase can

  3851       immediately restart.

  3853       (3) When enough completed SATB buffers are available. The

  3854       do_marking_step() method only tries to drain SATB buffers right

  3855       at the beginning. So, if enough buffers are available, the

  3856       marking step aborts and the SATB buffers are processed at

  3857       the beginning of the next invocation.

  3859       (4) To yield. when we have to yield then we abort and yield

  3860       right at the end of do_marking_step(). This saves us from a lot

  3861       of hassle as, by yielding we might allow a Full GC. If this

  3862       happens then objects will be compacted underneath our feet, the

  3863       heap might shrink, etc. We save checking for this by just

  3864       aborting and doing the yield right at the end.

  3866     From the above it follows that the do_marking_step() method should

  3867     be called in a loop (or, otherwise, regularly) until it completes.

  3869     If a marking step completes without its has_aborted() flag being

  3870     true, it means it has completed the current marking phase (and

  3871     also all other marking tasks have done so and have all synced up).

  3873     A method called regular_clock_call() is invoked "regularly" (in

  3874     sub ms intervals) throughout marking. It is this clock method that

  3875     checks all the abort conditions which were mentioned above and

  3876     decides when the task should abort. A work-based scheme is used to

  3877     trigger this clock method: when the number of object words the

  3878     marking phase has scanned or the number of references the marking

  3879     phase has visited reach a given limit. Additional invocations to

  3880     the method clock have been planted in a few other strategic places

  3881     too. The initial reason for the clock method was to avoid calling

  3882     vtime too regularly, as it is quite expensive. So, once it was in

  3883     place, it was natural to piggy-back all the other conditions on it

  3884     too and not constantly check them throughout the code.

  3886  *****************************************************************************/

  3888 void CMTask::do_marking_step(double time_target_ms,

  3889                              bool do_stealing,

  3890                              bool do_termination) {

  3891   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");

  3892   assert(concurrent() == _cm->concurrent(), "they should be the same");

  3894   G1CollectorPolicy* g1_policy = _g1h->g1_policy();

  3895   assert(_task_queues != NULL, "invariant");

  3896   assert(_task_queue != NULL, "invariant");

  3897   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");

  3899   assert(!_claimed,

  3900          "only one thread should claim this task at any one time");

  3902   // OK, this doesn't safeguard again all possible scenarios, as it is

  3903   // possible for two threads to set the _claimed flag at the same

  3904   // time. But it is only for debugging purposes anyway and it will

  3905   // catch most problems.

  3906   _claimed = true;

  3908   _start_time_ms = os::elapsedVTime() * 1000.0;

  3909   statsOnly( _interval_start_time_ms = _start_time_ms );

  3911   double diff_prediction_ms =

  3912     g1_policy->get_new_prediction(&_marking_step_diffs_ms);

  3913   _time_target_ms = time_target_ms - diff_prediction_ms;

  3915   // set up the variables that are used in the work-based scheme to

  3916   // call the regular clock method

  3917   _words_scanned = 0;

  3918   _refs_reached  = 0;

  3919   recalculate_limits();

  3921   // clear all flags

  3922   clear_has_aborted();

  3923   _has_timed_out = false;

  3924   _draining_satb_buffers = false;

  3926   ++_calls;

  3928   if (_cm->verbose_low()) {

  3929     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "

  3930                            "target = %1.2lfms >>>>>>>>>>",

  3931                            _task_id, _calls, _time_target_ms);

  3932   }

  3934   // Set up the bitmap and oop closures. Anything that uses them is

  3935   // eventually called from this method, so it is OK to allocate these

  3936   // statically.

  3937   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);

  3938   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);

  3939   set_cm_oop_closure(&cm_oop_closure);

  3941   if (_cm->has_overflown()) {

  3942     // This can happen if the mark stack overflows during a GC pause

  3943     // and this task, after a yield point, restarts. We have to abort

  3944     // as we need to get into the overflow protocol which happens

  3945     // right at the end of this task.

  3946     set_has_aborted();

  3947   }

  3949   // First drain any available SATB buffers. After this, we will not

  3950   // look at SATB buffers before the next invocation of this method.

  3951   // If enough completed SATB buffers are queued up, the regular clock

  3952   // will abort this task so that it restarts.

  3953   drain_satb_buffers();

  3954   // ...then partially drain the local queue and the global stack

  3955   drain_local_queue(true);

  3956   drain_global_stack(true);

  3958   do {

  3959     if (!has_aborted() && _curr_region != NULL) {

  3960       // This means that we're already holding on to a region.

  3961       assert(_finger != NULL, "if region is not NULL, then the finger "

  3962              "should not be NULL either");

  3964       // We might have restarted this task after an evacuation pause

  3965       // which might have evacuated the region we're holding on to

  3966       // underneath our feet. Let's read its limit again to make sure

  3967       // that we do not iterate over a region of the heap that

  3968       // contains garbage (update_region_limit() will also move

  3969       // _finger to the start of the region if it is found empty).

  3970       update_region_limit();

  3971       // We will start from _finger not from the start of the region,

  3972       // as we might be restarting this task after aborting half-way

  3973       // through scanning this region. In this case, _finger points to

  3974       // the address where we last found a marked object. If this is a

  3975       // fresh region, _finger points to start().

  3976       MemRegion mr = MemRegion(_finger, _region_limit);

  3978       if (_cm->verbose_low()) {

  3979         gclog_or_tty->print_cr("[%d] we're scanning part "

  3980                                "["PTR_FORMAT", "PTR_FORMAT") "

  3981                                "of region "PTR_FORMAT,

  3982                                _task_id, _finger, _region_limit, _curr_region);

  3983       }

  3985       // Let's iterate over the bitmap of the part of the

  3986       // region that is left.

  3987       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {

  3988         // We successfully completed iterating over the region. Now,

  3989         // let's give up the region.

  3990         giveup_current_region();

  3991         regular_clock_call();

  3992       } else {

  3993         assert(has_aborted(), "currently the only way to do so");

  3994         // The only way to abort the bitmap iteration is to return

  3995         // false from the do_bit() method. However, inside the

  3996         // do_bit() method we move the _finger to point to the

  3997         // object currently being looked at. So, if we bail out, we

  3998         // have definitely set _finger to something non-null.

  3999         assert(_finger != NULL, "invariant");

  4001         // Region iteration was actually aborted. So now _finger

  4002         // points to the address of the object we last scanned. If we

  4003         // leave it there, when we restart this task, we will rescan

  4004         // the object. It is easy to avoid this. We move the finger by

  4005         // enough to point to the next possible object header (the

  4006         // bitmap knows by how much we need to move it as it knows its

  4007         // granularity).

  4008         assert(_finger < _region_limit, "invariant");

  4009         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);

  4010         // Check if bitmap iteration was aborted while scanning the last object

  4011         if (new_finger >= _region_limit) {

  4012           giveup_current_region();

  4013         } else {

  4014           move_finger_to(new_finger);

  4015         }

  4016       }

  4017     }

  4018     // At this point we have either completed iterating over the

  4019     // region we were holding on to, or we have aborted.

  4021     // We then partially drain the local queue and the global stack.

  4022     // (Do we really need this?)

  4023     drain_local_queue(true);

  4024     drain_global_stack(true);

  4026     // Read the note on the claim_region() method on why it might

  4027     // return NULL with potentially more regions available for

  4028     // claiming and why we have to check out_of_regions() to determine

  4029     // whether we're done or not.

  4030     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {

  4031       // We are going to try to claim a new region. We should have

  4032       // given up on the previous one.

  4033       // Separated the asserts so that we know which one fires.

  4034       assert(_curr_region  == NULL, "invariant");

  4035       assert(_finger       == NULL, "invariant");

  4036       assert(_region_limit == NULL, "invariant");

  4037       if (_cm->verbose_low()) {

  4038         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);

  4039       }

  4040       HeapRegion* claimed_region = _cm->claim_region(_task_id);

  4041       if (claimed_region != NULL) {

  4042         // Yes, we managed to claim one

  4043         statsOnly( ++_regions_claimed );

  4045         if (_cm->verbose_low()) {

  4046           gclog_or_tty->print_cr("[%d] we successfully claimed "

  4047                                  "region "PTR_FORMAT,

  4048                                  _task_id, claimed_region);

  4049         }

  4051         setup_for_region(claimed_region);

  4052         assert(_curr_region == claimed_region, "invariant");

  4053       }

  4054       // It is important to call the regular clock here. It might take

  4055       // a while to claim a region if, for example, we hit a large

  4056       // block of empty regions. So we need to call the regular clock

  4057       // method once round the loop to make sure it's called

  4058       // frequently enough.

  4059       regular_clock_call();

  4060     }

  4062     if (!has_aborted() && _curr_region == NULL) {

  4063       assert(_cm->out_of_regions(),

  4064              "at this point we should be out of regions");

  4065     }

  4066   } while ( _curr_region != NULL && !has_aborted());

  4068   if (!has_aborted()) {

  4069     // We cannot check whether the global stack is empty, since other

  4070     // tasks might be pushing objects to it concurrently.

  4071     assert(_cm->out_of_regions(),

  4072            "at this point we should be out of regions");

  4074     if (_cm->verbose_low()) {

  4075       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);

  4076     }

  4078     // Try to reduce the number of available SATB buffers so that

  4079     // remark has less work to do.

  4080     drain_satb_buffers();

  4081   }

  4083   // Since we've done everything else, we can now totally drain the

  4084   // local queue and global stack.

  4085   drain_local_queue(false);

  4086   drain_global_stack(false);

  4088   // Attempt at work stealing from other task's queues.

  4089   if (do_stealing && !has_aborted()) {

  4090     // We have not aborted. This means that we have finished all that

  4091     // we could. Let's try to do some stealing...

  4093     // We cannot check whether the global stack is empty, since other

  4094     // tasks might be pushing objects to it concurrently.

  4095     assert(_cm->out_of_regions() && _task_queue->size() == 0,

  4096            "only way to reach here");

  4098     if (_cm->verbose_low()) {

  4099       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);

  4100     }

  4102     while (!has_aborted()) {

  4103       oop obj;

  4104       statsOnly( ++_steal_attempts );

  4106       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {

  4107         if (_cm->verbose_medium()) {

  4108           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",

  4109                                  _task_id, (void*) obj);

  4110         }

  4112         statsOnly( ++_steals );

  4114         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),

  4115                "any stolen object should be marked");

  4116         scan_object(obj);

  4118         // And since we're towards the end, let's totally drain the

  4119         // local queue and global stack.

  4120         drain_local_queue(false);

  4121         drain_global_stack(false);

  4122       } else {

  4123         break;

  4124       }

  4125     }

  4126   }

  4128   // If we are about to wrap up and go into termination, check if we

  4129   // should raise the overflow flag.

  4130   if (do_termination && !has_aborted()) {

  4131     if (_cm->force_overflow()->should_force()) {

  4132       _cm->set_has_overflown();

  4133       regular_clock_call();

  4134     }

  4135   }

  4137   // We still haven't aborted. Now, let's try to get into the

  4138   // termination protocol.

  4139   if (do_termination && !has_aborted()) {

  4140     // We cannot check whether the global stack is empty, since other

  4141     // tasks might be concurrently pushing objects on it.

  4142     // Separated the asserts so that we know which one fires.

  4143     assert(_cm->out_of_regions(), "only way to reach here");

  4144     assert(_task_queue->size() == 0, "only way to reach here");

  4146     if (_cm->verbose_low()) {

  4147       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);

  4148     }

  4150     _termination_start_time_ms = os::elapsedVTime() * 1000.0;

  4151     // The CMTask class also extends the TerminatorTerminator class,

  4152     // hence its should_exit_termination() method will also decide

  4153     // whether to exit the termination protocol or not.

  4154     bool finished = _cm->terminator()->offer_termination(this);

  4155     double termination_end_time_ms = os::elapsedVTime() * 1000.0;

  4156     _termination_time_ms +=

  4157       termination_end_time_ms - _termination_start_time_ms;

  4159     if (finished) {

  4160       // We're all done.

  4162       if (_task_id == 0) {

  4163         // let's allow task 0 to do this

  4164         if (concurrent()) {

  4165           assert(_cm->concurrent_marking_in_progress(), "invariant");

  4166           // we need to set this to false before the next

  4167           // safepoint. This way we ensure that the marking phase

  4168           // doesn't observe any more heap expansions.

  4169           _cm->clear_concurrent_marking_in_progress();

  4170         }

  4171       }

  4173       // We can now guarantee that the global stack is empty, since

  4174       // all other tasks have finished. We separated the guarantees so

  4175       // that, if a condition is false, we can immediately find out

  4176       // which one.

  4177       guarantee(_cm->out_of_regions(), "only way to reach here");

  4178       guarantee(_cm->mark_stack_empty(), "only way to reach here");

  4179       guarantee(_task_queue->size() == 0, "only way to reach here");

  4180       guarantee(!_cm->has_overflown(), "only way to reach here");

  4181       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");

  4183       if (_cm->verbose_low()) {

  4184         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);

  4185       }

  4186     } else {

  4187       // Apparently there's more work to do. Let's abort this task. It

  4188       // will restart it and we can hopefully find more things to do.

  4190       if (_cm->verbose_low()) {

  4191         gclog_or_tty->print_cr("[%d] apparently there is more work to do",

  4192                                _task_id);

  4193       }

  4195       set_has_aborted();

  4196       statsOnly( ++_aborted_termination );

  4197     }

  4198   }

  4200   // Mainly for debugging purposes to make sure that a pointer to the

  4201   // closure which was statically allocated in this frame doesn't

  4202   // escape it by accident.

  4203   set_cm_oop_closure(NULL);

  4204   double end_time_ms = os::elapsedVTime() * 1000.0;

  4205   double elapsed_time_ms = end_time_ms - _start_time_ms;

  4206   // Update the step history.

  4207   _step_times_ms.add(elapsed_time_ms);

  4209   if (has_aborted()) {

  4210     // The task was aborted for some reason.

  4212     statsOnly( ++_aborted );

  4214     if (_has_timed_out) {

  4215       double diff_ms = elapsed_time_ms - _time_target_ms;

  4216       // Keep statistics of how well we did with respect to hitting

  4217       // our target only if we actually timed out (if we aborted for

  4218       // other reasons, then the results might get skewed).

  4219       _marking_step_diffs_ms.add(diff_ms);

  4220     }

  4222     if (_cm->has_overflown()) {

  4223       // This is the interesting one. We aborted because a global

  4224       // overflow was raised. This means we have to restart the

  4225       // marking phase and start iterating over regions. However, in

  4226       // order to do this we have to make sure that all tasks stop

  4227       // what they are doing and re-initialise in a safe manner. We

  4228       // will achieve this with the use of two barrier sync points.

  4230       if (_cm->verbose_low()) {

  4231         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);

  4232       }

  4234       _cm->enter_first_sync_barrier(_task_id);

  4235       // When we exit this sync barrier we know that all tasks have

  4236       // stopped doing marking work. So, it's now safe to

  4237       // re-initialise our data structures. At the end of this method,

  4238       // task 0 will clear the global data structures.

  4240       statsOnly( ++_aborted_overflow );

  4242       // We clear the local state of this task...

  4243       clear_region_fields();

  4245       // ...and enter the second barrier.

  4246       _cm->enter_second_sync_barrier(_task_id);

  4247       // At this point everything has bee re-initialised and we're

  4248       // ready to restart.

  4249     }

  4251     if (_cm->verbose_low()) {

  4252       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "

  4253                              "elapsed = %1.2lfms <<<<<<<<<<",

  4254                              _task_id, _time_target_ms, elapsed_time_ms);

  4255       if (_cm->has_aborted()) {

  4256         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",

  4257                                _task_id);

  4258       }

  4259     }

  4260   } else {

  4261     if (_cm->verbose_low()) {

  4262       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "

  4263                              "elapsed = %1.2lfms <<<<<<<<<<",

  4264                              _task_id, _time_target_ms, elapsed_time_ms);

  4265     }

  4266   }

  4268   _claimed = false;

  4269 }

  4271 CMTask::CMTask(int task_id,

  4272                ConcurrentMark* cm,

  4273                size_t* marked_bytes,

  4274                BitMap* card_bm,

  4275                CMTaskQueue* task_queue,

  4276                CMTaskQueueSet* task_queues)

  4277   : _g1h(G1CollectedHeap::heap()),

  4278     _task_id(task_id), _cm(cm),

  4279     _claimed(false),

  4280     _nextMarkBitMap(NULL), _hash_seed(17),

  4281     _task_queue(task_queue),

  4282     _task_queues(task_queues),

  4283     _cm_oop_closure(NULL),

  4284     _marked_bytes_array(marked_bytes),

  4285     _card_bm(card_bm) {

  4286   guarantee(task_queue != NULL, "invariant");

  4287   guarantee(task_queues != NULL, "invariant");

  4289   statsOnly( _clock_due_to_scanning = 0;

  4290              _clock_due_to_marking  = 0 );

  4292   _marking_step_diffs_ms.add(0.5);

  4293 }

  4295 // These are formatting macros that are used below to ensure

  4296 // consistent formatting. The *_H_* versions are used to format the

  4297 // header for a particular value and they should be kept consistent

  4298 // with the corresponding macro. Also note that most of the macros add

  4299 // the necessary white space (as a prefix) which makes them a bit

  4300 // easier to compose.

  4302 // All the output lines are prefixed with this string to be able to

  4303 // identify them easily in a large log file.

  4304 #define G1PPRL_LINE_PREFIX            "###"

  4306 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT

  4307 #ifdef _LP64

  4308 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"

  4309 #else // _LP64

  4310 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"

  4311 #endif // _LP64

  4313 // For per-region info

  4314 #define G1PPRL_TYPE_FORMAT            "   %-4s"

  4315 #define G1PPRL_TYPE_H_FORMAT          "   %4s"

  4316 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)

  4317 #define G1PPRL_BYTE_H_FORMAT          "  %9s"

  4318 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"

  4319 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"

  4321 // For summary info

  4322 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT

  4323 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT

  4324 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"

  4325 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"

  4327 G1PrintRegionLivenessInfoClosure::

  4328 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)

  4329   : _out(out),

  4330     _total_used_bytes(0), _total_capacity_bytes(0),

  4331     _total_prev_live_bytes(0), _total_next_live_bytes(0),

  4332     _hum_used_bytes(0), _hum_capacity_bytes(0),

  4333     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {

  4334   G1CollectedHeap* g1h = G1CollectedHeap::heap();

  4335   MemRegion g1_committed = g1h->g1_committed();

  4336   MemRegion g1_reserved = g1h->g1_reserved();

  4337   double now = os::elapsedTime();

  4339   // Print the header of the output.

  4340   _out->cr();

  4341   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);

  4342   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"

  4343                  G1PPRL_SUM_ADDR_FORMAT("committed")

  4344                  G1PPRL_SUM_ADDR_FORMAT("reserved")

  4345                  G1PPRL_SUM_BYTE_FORMAT("region-size"),

  4346                  g1_committed.start(), g1_committed.end(),

  4347                  g1_reserved.start(), g1_reserved.end(),

  4348                  HeapRegion::GrainBytes);

  4349   _out->print_cr(G1PPRL_LINE_PREFIX);

  4350   _out->print_cr(G1PPRL_LINE_PREFIX

  4351                  G1PPRL_TYPE_H_FORMAT

  4352                  G1PPRL_ADDR_BASE_H_FORMAT

  4353                  G1PPRL_BYTE_H_FORMAT

  4354                  G1PPRL_BYTE_H_FORMAT

  4355                  G1PPRL_BYTE_H_FORMAT

  4356                  G1PPRL_DOUBLE_H_FORMAT,

  4357                  "type", "address-range",

  4358                  "used", "prev-live", "next-live", "gc-eff");

  4359   _out->print_cr(G1PPRL_LINE_PREFIX

  4360                  G1PPRL_TYPE_H_FORMAT

  4361                  G1PPRL_ADDR_BASE_H_FORMAT

  4362                  G1PPRL_BYTE_H_FORMAT

  4363                  G1PPRL_BYTE_H_FORMAT

  4364                  G1PPRL_BYTE_H_FORMAT

  4365                  G1PPRL_DOUBLE_H_FORMAT,

  4366                  "", "",

  4367                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");

  4368 }

  4370 // It takes as a parameter a reference to one of the _hum_* fields, it

  4371 // deduces the corresponding value for a region in a humongous region

  4372 // series (either the region size, or what's left if the _hum_* field

  4373 // is < the region size), and updates the _hum_* field accordingly.

  4374 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {

  4375   size_t bytes = 0;

  4376   // The > 0 check is to deal with the prev and next live bytes which

  4377   // could be 0.

  4378   if (*hum_bytes > 0) {

  4379     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);

  4380     *hum_bytes -= bytes;

  4381   }

  4382   return bytes;

  4383 }

  4385 // It deduces the values for a region in a humongous region series

  4386 // from the _hum_* fields and updates those accordingly. It assumes

  4387 // that that _hum_* fields have already been set up from the "starts

  4388 // humongous" region and we visit the regions in address order.

  4389 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,

  4390                                                      size_t* capacity_bytes,

  4391                                                      size_t* prev_live_bytes,

  4392                                                      size_t* next_live_bytes) {

  4393   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");

  4394   *used_bytes      = get_hum_bytes(&_hum_used_bytes);

  4395   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);

  4396   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);

  4397   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);

  4398 }

  4400 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {

  4401   const char* type = "";

  4402   HeapWord* bottom       = r->bottom();

  4403   HeapWord* end          = r->end();

  4404   size_t capacity_bytes  = r->capacity();

  4405   size_t used_bytes      = r->used();

  4406   size_t prev_live_bytes = r->live_bytes();

  4407   size_t next_live_bytes = r->next_live_bytes();

  4408   double gc_eff          = r->gc_efficiency();

  4409   if (r->used() == 0) {

  4410     type = "FREE";

  4411   } else if (r->is_survivor()) {

  4412     type = "SURV";

  4413   } else if (r->is_young()) {

  4414     type = "EDEN";

  4415   } else if (r->startsHumongous()) {

  4416     type = "HUMS";

  4418     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&

  4419            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,

  4420            "they should have been zeroed after the last time we used them");

  4421     // Set up the _hum_* fields.

  4422     _hum_capacity_bytes  = capacity_bytes;

  4423     _hum_used_bytes      = used_bytes;

  4424     _hum_prev_live_bytes = prev_live_bytes;

  4425     _hum_next_live_bytes = next_live_bytes;

  4426     get_hum_bytes(&used_bytes, &capacity_bytes,

  4427                   &prev_live_bytes, &next_live_bytes);

  4428     end = bottom + HeapRegion::GrainWords;

  4429   } else if (r->continuesHumongous()) {

  4430     type = "HUMC";

  4431     get_hum_bytes(&used_bytes, &capacity_bytes,

  4432                   &prev_live_bytes, &next_live_bytes);

  4433     assert(end == bottom + HeapRegion::GrainWords, "invariant");

  4434   } else {

  4435     type = "OLD";

  4436   }

  4438   _total_used_bytes      += used_bytes;

  4439   _total_capacity_bytes  += capacity_bytes;

  4440   _total_prev_live_bytes += prev_live_bytes;

  4441   _total_next_live_bytes += next_live_bytes;

  4443   // Print a line for this particular region.

  4444   _out->print_cr(G1PPRL_LINE_PREFIX

  4445                  G1PPRL_TYPE_FORMAT

  4446                  G1PPRL_ADDR_BASE_FORMAT

  4447                  G1PPRL_BYTE_FORMAT

  4448                  G1PPRL_BYTE_FORMAT

  4449                  G1PPRL_BYTE_FORMAT

  4450                  G1PPRL_DOUBLE_FORMAT,

  4451                  type, bottom, end,

  4452                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);

  4454   return false;

  4455 }

  4457 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {

  4458   // Print the footer of the output.

  4459   _out->print_cr(G1PPRL_LINE_PREFIX);

  4460   _out->print_cr(G1PPRL_LINE_PREFIX

  4461                  " SUMMARY"

  4462                  G1PPRL_SUM_MB_FORMAT("capacity")

  4463                  G1PPRL_SUM_MB_PERC_FORMAT("used")

  4464                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")

  4465                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),

  4466                  bytes_to_mb(_total_capacity_bytes),

  4467                  bytes_to_mb(_total_used_bytes),

  4468                  perc(_total_used_bytes, _total_capacity_bytes),

  4469                  bytes_to_mb(_total_prev_live_bytes),

  4470                  perc(_total_prev_live_bytes, _total_capacity_bytes),

  4471                  bytes_to_mb(_total_next_live_bytes),

  4472                  perc(_total_next_live_bytes, _total_capacity_bytes));

  4473   _out->cr();

  4474 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/gc_implementation/g1/concurrentMark.cpp@37552638d24a

src/share/vm/gc_implementation/g1/concurrentMark.cpp