src/share/vm/gc_implementation/g1/concurrentMark.cpp

changeset 777
37f87013dfd8
child 778
afc1ce1efe66
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jun 05 15:57:56 2008 -0700
     1.3 @@ -0,0 +1,3957 @@
     1.4 +/*
     1.5 + * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_concurrentMark.cpp.incl"
    1.30 +
    1.31 +//
    1.32 +// CMS Bit Map Wrapper
    1.33 +
    1.34 +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
    1.35 +  _bm((uintptr_t*)NULL,0),
    1.36 +  _shifter(shifter) {
    1.37 +  _bmStartWord = (HeapWord*)(rs.base());
    1.38 +  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
    1.39 +  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
    1.40 +                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
    1.41 +
    1.42 +  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
    1.43 +  // For now we'll just commit all of the bit map up fromt.
    1.44 +  // Later on we'll try to be more parsimonious with swap.
    1.45 +  guarantee(_virtual_space.initialize(brs, brs.size()),
    1.46 +            "couldn't reseve backing store for CMS bit map");
    1.47 +  assert(_virtual_space.committed_size() == brs.size(),
    1.48 +         "didn't reserve backing store for all of CMS bit map?");
    1.49 +  _bm.set_map((uintptr_t*)_virtual_space.low());
    1.50 +  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
    1.51 +         _bmWordSize, "inconsistency in bit map sizing");
    1.52 +  _bm.set_size(_bmWordSize >> _shifter);
    1.53 +}
    1.54 +
    1.55 +HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
    1.56 +                                               HeapWord* limit) const {
    1.57 +  // First we must round addr *up* to a possible object boundary.
    1.58 +  addr = (HeapWord*)align_size_up((intptr_t)addr,
    1.59 +                                  HeapWordSize << _shifter);
    1.60 +  size_t addrOffset = heapWordToOffset(addr);
    1.61 +  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    1.62 +  size_t limitOffset = heapWordToOffset(limit);
    1.63 +  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
    1.64 +  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    1.65 +  assert(nextAddr >= addr, "get_next_one postcondition");
    1.66 +  assert(nextAddr == limit || isMarked(nextAddr),
    1.67 +         "get_next_one postcondition");
    1.68 +  return nextAddr;
    1.69 +}
    1.70 +
    1.71 +HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
    1.72 +                                                 HeapWord* limit) const {
    1.73 +  size_t addrOffset = heapWordToOffset(addr);
    1.74 +  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
    1.75 +  size_t limitOffset = heapWordToOffset(limit);
    1.76 +  size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
    1.77 +  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
    1.78 +  assert(nextAddr >= addr, "get_next_one postcondition");
    1.79 +  assert(nextAddr == limit || !isMarked(nextAddr),
    1.80 +         "get_next_one postcondition");
    1.81 +  return nextAddr;
    1.82 +}
    1.83 +
    1.84 +int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
    1.85 +  assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
    1.86 +  return (int) (diff >> _shifter);
    1.87 +}
    1.88 +
    1.89 +bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
    1.90 +  HeapWord* left  = MAX2(_bmStartWord, mr.start());
    1.91 +  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
    1.92 +  if (right > left) {
    1.93 +    // Right-open interval [leftOffset, rightOffset).
    1.94 +    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
    1.95 +  } else {
    1.96 +    return true;
    1.97 +  }
    1.98 +}
    1.99 +
   1.100 +void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
   1.101 +                                             size_t    from_start_index,
   1.102 +                                             HeapWord* to_start_word,
   1.103 +                                             size_t    word_num) {
   1.104 +  _bm.mostly_disjoint_range_union(from_bitmap,
   1.105 +                                  from_start_index,
   1.106 +                                  heapWordToOffset(to_start_word),
   1.107 +                                  word_num);
   1.108 +}
   1.109 +
   1.110 +#ifndef PRODUCT
   1.111 +bool CMBitMapRO::covers(ReservedSpace rs) const {
   1.112 +  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
   1.113 +  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
   1.114 +         "size inconsistency");
   1.115 +  return _bmStartWord == (HeapWord*)(rs.base()) &&
   1.116 +         _bmWordSize  == rs.size()>>LogHeapWordSize;
   1.117 +}
   1.118 +#endif
   1.119 +
   1.120 +void CMBitMap::clearAll() {
   1.121 +  _bm.clear();
   1.122 +  return;
   1.123 +}
   1.124 +
   1.125 +void CMBitMap::markRange(MemRegion mr) {
   1.126 +  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   1.127 +  assert(!mr.is_empty(), "unexpected empty region");
   1.128 +  assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
   1.129 +          ((HeapWord *) mr.end())),
   1.130 +         "markRange memory region end is not card aligned");
   1.131 +  // convert address range into offset range
   1.132 +  _bm.at_put_range(heapWordToOffset(mr.start()),
   1.133 +                   heapWordToOffset(mr.end()), true);
   1.134 +}
   1.135 +
   1.136 +void CMBitMap::clearRange(MemRegion mr) {
   1.137 +  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
   1.138 +  assert(!mr.is_empty(), "unexpected empty region");
   1.139 +  // convert address range into offset range
   1.140 +  _bm.at_put_range(heapWordToOffset(mr.start()),
   1.141 +                   heapWordToOffset(mr.end()), false);
   1.142 +}
   1.143 +
   1.144 +MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
   1.145 +                                            HeapWord* end_addr) {
   1.146 +  HeapWord* start = getNextMarkedWordAddress(addr);
   1.147 +  start = MIN2(start, end_addr);
   1.148 +  HeapWord* end   = getNextUnmarkedWordAddress(start);
   1.149 +  end = MIN2(end, end_addr);
   1.150 +  assert(start <= end, "Consistency check");
   1.151 +  MemRegion mr(start, end);
   1.152 +  if (!mr.is_empty()) {
   1.153 +    clearRange(mr);
   1.154 +  }
   1.155 +  return mr;
   1.156 +}
   1.157 +
   1.158 +CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
   1.159 +  _base(NULL), _cm(cm)
   1.160 +#ifdef ASSERT
   1.161 +  , _drain_in_progress(false)
   1.162 +  , _drain_in_progress_yields(false)
   1.163 +#endif
   1.164 +{}
   1.165 +
   1.166 +void CMMarkStack::allocate(size_t size) {
   1.167 +  _base = NEW_C_HEAP_ARRAY(oop, size);
   1.168 +  if (_base == NULL)
   1.169 +    vm_exit_during_initialization("Failed to allocate "
   1.170 +                                  "CM region mark stack");
   1.171 +  _index = 0;
   1.172 +  // QQQQ cast ...
   1.173 +  _capacity = (jint) size;
   1.174 +  _oops_do_bound = -1;
   1.175 +  NOT_PRODUCT(_max_depth = 0);
   1.176 +}
   1.177 +
   1.178 +CMMarkStack::~CMMarkStack() {
   1.179 +  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   1.180 +}
   1.181 +
   1.182 +void CMMarkStack::par_push(oop ptr) {
   1.183 +  while (true) {
   1.184 +    if (isFull()) {
   1.185 +      _overflow = true;
   1.186 +      return;
   1.187 +    }
   1.188 +    // Otherwise...
   1.189 +    jint index = _index;
   1.190 +    jint next_index = index+1;
   1.191 +    jint res = Atomic::cmpxchg(next_index, &_index, index);
   1.192 +    if (res == index) {
   1.193 +      _base[index] = ptr;
   1.194 +      // Note that we don't maintain this atomically.  We could, but it
   1.195 +      // doesn't seem necessary.
   1.196 +      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   1.197 +      return;
   1.198 +    }
   1.199 +    // Otherwise, we need to try again.
   1.200 +  }
   1.201 +}
   1.202 +
   1.203 +void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
   1.204 +  while (true) {
   1.205 +    if (isFull()) {
   1.206 +      _overflow = true;
   1.207 +      return;
   1.208 +    }
   1.209 +    // Otherwise...
   1.210 +    jint index = _index;
   1.211 +    jint next_index = index + n;
   1.212 +    if (next_index > _capacity) {
   1.213 +      _overflow = true;
   1.214 +      return;
   1.215 +    }
   1.216 +    jint res = Atomic::cmpxchg(next_index, &_index, index);
   1.217 +    if (res == index) {
   1.218 +      for (int i = 0; i < n; i++) {
   1.219 +        int ind = index + i;
   1.220 +        assert(ind < _capacity, "By overflow test above.");
   1.221 +        _base[ind] = ptr_arr[i];
   1.222 +      }
   1.223 +      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
   1.224 +      return;
   1.225 +    }
   1.226 +    // Otherwise, we need to try again.
   1.227 +  }
   1.228 +}
   1.229 +
   1.230 +
   1.231 +void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
   1.232 +  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   1.233 +  jint start = _index;
   1.234 +  jint next_index = start + n;
   1.235 +  if (next_index > _capacity) {
   1.236 +    _overflow = true;
   1.237 +    return;
   1.238 +  }
   1.239 +  // Otherwise.
   1.240 +  _index = next_index;
   1.241 +  for (int i = 0; i < n; i++) {
   1.242 +    int ind = start + i;
   1.243 +    guarantee(ind < _capacity, "By overflow test above.");
   1.244 +    _base[ind] = ptr_arr[i];
   1.245 +  }
   1.246 +}
   1.247 +
   1.248 +
   1.249 +bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
   1.250 +  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   1.251 +  jint index = _index;
   1.252 +  if (index == 0) {
   1.253 +    *n = 0;
   1.254 +    return false;
   1.255 +  } else {
   1.256 +    int k = MIN2(max, index);
   1.257 +    jint new_ind = index - k;
   1.258 +    for (int j = 0; j < k; j++) {
   1.259 +      ptr_arr[j] = _base[new_ind + j];
   1.260 +    }
   1.261 +    _index = new_ind;
   1.262 +    *n = k;
   1.263 +    return true;
   1.264 +  }
   1.265 +}
   1.266 +
   1.267 +
   1.268 +CMRegionStack::CMRegionStack() : _base(NULL) {}
   1.269 +
   1.270 +void CMRegionStack::allocate(size_t size) {
   1.271 +  _base = NEW_C_HEAP_ARRAY(MemRegion, size);
   1.272 +  if (_base == NULL)
   1.273 +    vm_exit_during_initialization("Failed to allocate "
   1.274 +                                  "CM region mark stack");
   1.275 +  _index = 0;
   1.276 +  // QQQQ cast ...
   1.277 +  _capacity = (jint) size;
   1.278 +}
   1.279 +
   1.280 +CMRegionStack::~CMRegionStack() {
   1.281 +  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
   1.282 +}
   1.283 +
   1.284 +void CMRegionStack::push(MemRegion mr) {
   1.285 +  assert(mr.word_size() > 0, "Precondition");
   1.286 +  while (true) {
   1.287 +    if (isFull()) {
   1.288 +      _overflow = true;
   1.289 +      return;
   1.290 +    }
   1.291 +    // Otherwise...
   1.292 +    jint index = _index;
   1.293 +    jint next_index = index+1;
   1.294 +    jint res = Atomic::cmpxchg(next_index, &_index, index);
   1.295 +    if (res == index) {
   1.296 +      _base[index] = mr;
   1.297 +      return;
   1.298 +    }
   1.299 +    // Otherwise, we need to try again.
   1.300 +  }
   1.301 +}
   1.302 +
   1.303 +MemRegion CMRegionStack::pop() {
   1.304 +  while (true) {
   1.305 +    // Otherwise...
   1.306 +    jint index = _index;
   1.307 +
   1.308 +    if (index == 0) {
   1.309 +      return MemRegion();
   1.310 +    }
   1.311 +    jint next_index = index-1;
   1.312 +    jint res = Atomic::cmpxchg(next_index, &_index, index);
   1.313 +    if (res == index) {
   1.314 +      MemRegion mr = _base[next_index];
   1.315 +      if (mr.start() != NULL) {
   1.316 +        tmp_guarantee_CM( mr.end() != NULL, "invariant" );
   1.317 +        tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
   1.318 +        return mr;
   1.319 +      } else {
   1.320 +        // that entry was invalidated... let's skip it
   1.321 +        tmp_guarantee_CM( mr.end() == NULL, "invariant" );
   1.322 +      }
   1.323 +    }
   1.324 +    // Otherwise, we need to try again.
   1.325 +  }
   1.326 +}
   1.327 +
   1.328 +bool CMRegionStack::invalidate_entries_into_cset() {
   1.329 +  bool result = false;
   1.330 +  G1CollectedHeap* g1h = G1CollectedHeap::heap();
   1.331 +  for (int i = 0; i < _oops_do_bound; ++i) {
   1.332 +    MemRegion mr = _base[i];
   1.333 +    if (mr.start() != NULL) {
   1.334 +      tmp_guarantee_CM( mr.end() != NULL, "invariant");
   1.335 +      tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
   1.336 +      HeapRegion* hr = g1h->heap_region_containing(mr.start());
   1.337 +      tmp_guarantee_CM( hr != NULL, "invariant" );
   1.338 +      if (hr->in_collection_set()) {
   1.339 +        // The region points into the collection set
   1.340 +        _base[i] = MemRegion();
   1.341 +        result = true;
   1.342 +      }
   1.343 +    } else {
   1.344 +      // that entry was invalidated... let's skip it
   1.345 +      tmp_guarantee_CM( mr.end() == NULL, "invariant" );
   1.346 +    }
   1.347 +  }
   1.348 +  return result;
   1.349 +}
   1.350 +
   1.351 +template<class OopClosureClass>
   1.352 +bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
   1.353 +  assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
   1.354 +         || SafepointSynchronize::is_at_safepoint(),
   1.355 +         "Drain recursion must be yield-safe.");
   1.356 +  bool res = true;
   1.357 +  debug_only(_drain_in_progress = true);
   1.358 +  debug_only(_drain_in_progress_yields = yield_after);
   1.359 +  while (!isEmpty()) {
   1.360 +    oop newOop = pop();
   1.361 +    assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
   1.362 +    assert(newOop->is_oop(), "Expected an oop");
   1.363 +    assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
   1.364 +           "only grey objects on this stack");
   1.365 +    // iterate over the oops in this oop, marking and pushing
   1.366 +    // the ones in CMS generation.
   1.367 +    newOop->oop_iterate(cl);
   1.368 +    if (yield_after && _cm->do_yield_check()) {
   1.369 +      res = false; break;
   1.370 +    }
   1.371 +  }
   1.372 +  debug_only(_drain_in_progress = false);
   1.373 +  return res;
   1.374 +}
   1.375 +
   1.376 +void CMMarkStack::oops_do(OopClosure* f) {
   1.377 +  if (_index == 0) return;
   1.378 +  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
   1.379 +         "Bound must be set.");
   1.380 +  for (int i = 0; i < _oops_do_bound; i++) {
   1.381 +    f->do_oop(&_base[i]);
   1.382 +  }
   1.383 +  _oops_do_bound = -1;
   1.384 +}
   1.385 +
   1.386 +bool ConcurrentMark::not_yet_marked(oop obj) const {
   1.387 +  return (_g1h->is_obj_ill(obj)
   1.388 +          || (_g1h->is_in_permanent(obj)
   1.389 +              && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
   1.390 +}
   1.391 +
   1.392 +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   1.393 +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   1.394 +#endif // _MSC_VER
   1.395 +
   1.396 +ConcurrentMark::ConcurrentMark(ReservedSpace rs,
   1.397 +                               int max_regions) :
   1.398 +  _markBitMap1(rs, MinObjAlignment - 1),
   1.399 +  _markBitMap2(rs, MinObjAlignment - 1),
   1.400 +
   1.401 +  _parallel_marking_threads(0),
   1.402 +  _sleep_factor(0.0),
   1.403 +  _marking_task_overhead(1.0),
   1.404 +  _cleanup_sleep_factor(0.0),
   1.405 +  _cleanup_task_overhead(1.0),
   1.406 +  _region_bm(max_regions, false /* in_resource_area*/),
   1.407 +  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
   1.408 +           CardTableModRefBS::card_shift,
   1.409 +           false /* in_resource_area*/),
   1.410 +  _prevMarkBitMap(&_markBitMap1),
   1.411 +  _nextMarkBitMap(&_markBitMap2),
   1.412 +  _at_least_one_mark_complete(false),
   1.413 +
   1.414 +  _markStack(this),
   1.415 +  _regionStack(),
   1.416 +  // _finger set in set_non_marking_state
   1.417 +
   1.418 +  _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
   1.419 +  // _active_tasks set in set_non_marking_state
   1.420 +  // _tasks set inside the constructor
   1.421 +  _task_queues(new CMTaskQueueSet((int) _max_task_num)),
   1.422 +  _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
   1.423 +
   1.424 +  _has_overflown(false),
   1.425 +  _concurrent(false),
   1.426 +
   1.427 +  // _verbose_level set below
   1.428 +
   1.429 +  _init_times(),
   1.430 +  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
   1.431 +  _cleanup_times(),
   1.432 +  _total_counting_time(0.0),
   1.433 +  _total_rs_scrub_time(0.0),
   1.434 +
   1.435 +  _parallel_workers(NULL),
   1.436 +  _cleanup_co_tracker(G1CLGroup)
   1.437 +{
   1.438 +  CMVerboseLevel verbose_level =
   1.439 +    (CMVerboseLevel) G1MarkingVerboseLevel;
   1.440 +  if (verbose_level < no_verbose)
   1.441 +    verbose_level = no_verbose;
   1.442 +  if (verbose_level > high_verbose)
   1.443 +    verbose_level = high_verbose;
   1.444 +  _verbose_level = verbose_level;
   1.445 +
   1.446 +  if (verbose_low())
   1.447 +    gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
   1.448 +                           "heap end = "PTR_FORMAT, _heap_start, _heap_end);
   1.449 +
   1.450 +  _markStack.allocate(G1CMStackSize);
   1.451 +  _regionStack.allocate(G1CMRegionStackSize);
   1.452 +
   1.453 +  // Create & start a ConcurrentMark thread.
   1.454 +  if (G1ConcMark) {
   1.455 +    _cmThread = new ConcurrentMarkThread(this);
   1.456 +    assert(cmThread() != NULL, "CM Thread should have been created");
   1.457 +    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
   1.458 +  } else {
   1.459 +    _cmThread = NULL;
   1.460 +  }
   1.461 +  _g1h = G1CollectedHeap::heap();
   1.462 +  assert(CGC_lock != NULL, "Where's the CGC_lock?");
   1.463 +  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
   1.464 +  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
   1.465 +
   1.466 +  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   1.467 +  satb_qs.set_buffer_size(G1SATBLogBufferSize);
   1.468 +
   1.469 +  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   1.470 +  _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
   1.471 +  for (int i = 0 ; i < size; i++) {
   1.472 +    _par_cleanup_thread_state[i] = new ParCleanupThreadState;
   1.473 +  }
   1.474 +
   1.475 +  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   1.476 +  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
   1.477 +
   1.478 +  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   1.479 +  _active_tasks = _max_task_num;
   1.480 +  for (int i = 0; i < (int) _max_task_num; ++i) {
   1.481 +    CMTaskQueue* task_queue = new CMTaskQueue();
   1.482 +    task_queue->initialize();
   1.483 +    _task_queues->register_queue(i, task_queue);
   1.484 +
   1.485 +    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
   1.486 +    _accum_task_vtime[i] = 0.0;
   1.487 +  }
   1.488 +
   1.489 +  if (ParallelMarkingThreads > ParallelGCThreads) {
   1.490 +    vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
   1.491 +                                  "than ParallelGCThreads.");
   1.492 +  }
   1.493 +  if (ParallelGCThreads == 0) {
   1.494 +    // if we are not running with any parallel GC threads we will not
   1.495 +    // spawn any marking threads either
   1.496 +    _parallel_marking_threads =   0;
   1.497 +    _sleep_factor             = 0.0;
   1.498 +    _marking_task_overhead    = 1.0;
   1.499 +  } else {
   1.500 +    if (ParallelMarkingThreads > 0) {
   1.501 +      // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
   1.502 +      // if both are set
   1.503 +
   1.504 +      _parallel_marking_threads = ParallelMarkingThreads;
   1.505 +      _sleep_factor             = 0.0;
   1.506 +      _marking_task_overhead    = 1.0;
   1.507 +    } else if (G1MarkingOverheadPerc > 0) {
   1.508 +      // we will calculate the number of parallel marking threads
   1.509 +      // based on a target overhead with respect to the soft real-time
   1.510 +      // goal
   1.511 +
   1.512 +      double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
   1.513 +      double overall_cm_overhead =
   1.514 +        (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
   1.515 +      double cpu_ratio = 1.0 / (double) os::processor_count();
   1.516 +      double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
   1.517 +      double marking_task_overhead =
   1.518 +        overall_cm_overhead / marking_thread_num *
   1.519 +                                                (double) os::processor_count();
   1.520 +      double sleep_factor =
   1.521 +                         (1.0 - marking_task_overhead) / marking_task_overhead;
   1.522 +
   1.523 +      _parallel_marking_threads = (size_t) marking_thread_num;
   1.524 +      _sleep_factor             = sleep_factor;
   1.525 +      _marking_task_overhead    = marking_task_overhead;
   1.526 +    } else {
   1.527 +      _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
   1.528 +      _sleep_factor             = 0.0;
   1.529 +      _marking_task_overhead    = 1.0;
   1.530 +    }
   1.531 +
   1.532 +    if (parallel_marking_threads() > 1)
   1.533 +      _cleanup_task_overhead = 1.0;
   1.534 +    else
   1.535 +      _cleanup_task_overhead = marking_task_overhead();
   1.536 +    _cleanup_sleep_factor =
   1.537 +                     (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
   1.538 +
   1.539 +#if 0
   1.540 +    gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
   1.541 +    gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
   1.542 +    gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
   1.543 +    gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
   1.544 +    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
   1.545 +#endif
   1.546 +
   1.547 +    guarantee( parallel_marking_threads() > 0, "peace of mind" );
   1.548 +    _parallel_workers = new WorkGang("Parallel Marking Threads",
   1.549 +                                     (int) parallel_marking_threads(), false, true);
   1.550 +    if (_parallel_workers == NULL)
   1.551 +      vm_exit_during_initialization("Failed necessary allocation.");
   1.552 +  }
   1.553 +
   1.554 +  // so that the call below can read a sensible value
   1.555 +  _heap_start = (HeapWord*) rs.base();
   1.556 +  set_non_marking_state();
   1.557 +}
   1.558 +
   1.559 +void ConcurrentMark::update_g1_committed(bool force) {
   1.560 +  // If concurrent marking is not in progress, then we do not need to
   1.561 +  // update _heap_end. This has a subtle and important
   1.562 +  // side-effect. Imagine that two evacuation pauses happen between
   1.563 +  // marking completion and remark. The first one can grow the
   1.564 +  // heap (hence now the finger is below the heap end). Then, the
   1.565 +  // second one could unnecessarily push regions on the region
   1.566 +  // stack. This causes the invariant that the region stack is empty
   1.567 +  // at the beginning of remark to be false. By ensuring that we do
   1.568 +  // not observe heap expansions after marking is complete, then we do
   1.569 +  // not have this problem.
   1.570 +  if (!concurrent_marking_in_progress() && !force)
   1.571 +    return;
   1.572 +
   1.573 +  MemRegion committed = _g1h->g1_committed();
   1.574 +  tmp_guarantee_CM( committed.start() == _heap_start,
   1.575 +                    "start shouldn't change" );
   1.576 +  HeapWord* new_end = committed.end();
   1.577 +  if (new_end > _heap_end) {
   1.578 +    // The heap has been expanded.
   1.579 +
   1.580 +    _heap_end = new_end;
   1.581 +  }
   1.582 +  // Notice that the heap can also shrink. However, this only happens
   1.583 +  // during a Full GC (at least currently) and the entire marking
   1.584 +  // phase will bail out and the task will not be restarted. So, let's
   1.585 +  // do nothing.
   1.586 +}
   1.587 +
   1.588 +void ConcurrentMark::reset() {
   1.589 +  // Starting values for these two. This should be called in a STW
   1.590 +  // phase. CM will be notified of any future g1_committed expansions
   1.591 +  // will be at the end of evacuation pauses, when tasks are
   1.592 +  // inactive.
   1.593 +  MemRegion committed = _g1h->g1_committed();
   1.594 +  _heap_start = committed.start();
   1.595 +  _heap_end   = committed.end();
   1.596 +
   1.597 +  guarantee( _heap_start != NULL &&
   1.598 +             _heap_end != NULL   &&
   1.599 +             _heap_start < _heap_end, "heap bounds should look ok" );
   1.600 +
   1.601 +  // reset all the marking data structures and any necessary flags
   1.602 +  clear_marking_state();
   1.603 +
   1.604 +  if (verbose_low())
   1.605 +    gclog_or_tty->print_cr("[global] resetting");
   1.606 +
   1.607 +  // We do reset all of them, since different phases will use
   1.608 +  // different number of active threads. So, it's easiest to have all
   1.609 +  // of them ready.
   1.610 +  for (int i = 0; i < (int) _max_task_num; ++i)
   1.611 +    _tasks[i]->reset(_nextMarkBitMap);
   1.612 +
   1.613 +  // we need this to make sure that the flag is on during the evac
   1.614 +  // pause with initial mark piggy-backed
   1.615 +  set_concurrent_marking_in_progress();
   1.616 +}
   1.617 +
   1.618 +void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
   1.619 +  guarantee( active_tasks <= _max_task_num, "we should not have more" );
   1.620 +
   1.621 +  _active_tasks = active_tasks;
   1.622 +  // Need to update the three data structures below according to the
   1.623 +  // number of active threads for this phase.
   1.624 +  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
   1.625 +  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
   1.626 +  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
   1.627 +
   1.628 +  _concurrent = concurrent;
   1.629 +  // We propagate this to all tasks, not just the active ones.
   1.630 +  for (int i = 0; i < (int) _max_task_num; ++i)
   1.631 +    _tasks[i]->set_concurrent(concurrent);
   1.632 +
   1.633 +  if (concurrent) {
   1.634 +    set_concurrent_marking_in_progress();
   1.635 +  } else {
   1.636 +    // We currently assume that the concurrent flag has been set to
   1.637 +    // false before we start remark. At this point we should also be
   1.638 +    // in a STW phase.
   1.639 +    guarantee( !concurrent_marking_in_progress(), "invariant" );
   1.640 +    guarantee( _finger == _heap_end, "only way to get here" );
   1.641 +    update_g1_committed(true);
   1.642 +  }
   1.643 +}
   1.644 +
   1.645 +void ConcurrentMark::set_non_marking_state() {
   1.646 +  // We set the global marking state to some default values when we're
   1.647 +  // not doing marking.
   1.648 +  clear_marking_state();
   1.649 +  _active_tasks = 0;
   1.650 +  clear_concurrent_marking_in_progress();
   1.651 +}
   1.652 +
   1.653 +ConcurrentMark::~ConcurrentMark() {
   1.654 +  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
   1.655 +  for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
   1.656 +  FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
   1.657 +                    _par_cleanup_thread_state);
   1.658 +
   1.659 +  for (int i = 0; i < (int) _max_task_num; ++i) {
   1.660 +    delete _task_queues->queue(i);
   1.661 +    delete _tasks[i];
   1.662 +  }
   1.663 +  delete _task_queues;
   1.664 +  FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
   1.665 +}
   1.666 +
   1.667 +// This closure is used to mark refs into the g1 generation
   1.668 +// from external roots in the CMS bit map.
   1.669 +// Called at the first checkpoint.
   1.670 +//
   1.671 +
   1.672 +#define PRINT_REACHABLE_AT_INITIAL_MARK 0
   1.673 +#if PRINT_REACHABLE_AT_INITIAL_MARK
   1.674 +static FILE* reachable_file = NULL;
   1.675 +
   1.676 +class PrintReachableClosure: public OopsInGenClosure {
   1.677 +  CMBitMap* _bm;
   1.678 +  int _level;
   1.679 +public:
   1.680 +  PrintReachableClosure(CMBitMap* bm) :
   1.681 +    _bm(bm), _level(0) {
   1.682 +    guarantee(reachable_file != NULL, "pre-condition");
   1.683 +  }
   1.684 +  void do_oop(oop* p) {
   1.685 +    oop obj = *p;
   1.686 +    HeapWord* obj_addr = (HeapWord*)obj;
   1.687 +    if (obj == NULL) return;
   1.688 +    fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
   1.689 +            _level, p, (void*) obj, _bm->isMarked(obj_addr));
   1.690 +    if (!_bm->isMarked(obj_addr)) {
   1.691 +      _bm->mark(obj_addr);
   1.692 +      _level++;
   1.693 +      obj->oop_iterate(this);
   1.694 +      _level--;
   1.695 +    }
   1.696 +  }
   1.697 +};
   1.698 +#endif // PRINT_REACHABLE_AT_INITIAL_MARK
   1.699 +
   1.700 +#define SEND_HEAP_DUMP_TO_FILE 0
   1.701 +#if SEND_HEAP_DUMP_TO_FILE
   1.702 +static FILE* heap_dump_file = NULL;
   1.703 +#endif // SEND_HEAP_DUMP_TO_FILE
   1.704 +
   1.705 +void ConcurrentMark::clearNextBitmap() {
   1.706 +   guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
   1.707 +
   1.708 +   // clear the mark bitmap (no grey objects to start with).
   1.709 +   // We need to do this in chunks and offer to yield in between
   1.710 +   // each chunk.
   1.711 +   HeapWord* start  = _nextMarkBitMap->startWord();
   1.712 +   HeapWord* end    = _nextMarkBitMap->endWord();
   1.713 +   HeapWord* cur    = start;
   1.714 +   size_t chunkSize = M;
   1.715 +   while (cur < end) {
   1.716 +     HeapWord* next = cur + chunkSize;
   1.717 +     if (next > end)
   1.718 +       next = end;
   1.719 +     MemRegion mr(cur,next);
   1.720 +     _nextMarkBitMap->clearRange(mr);
   1.721 +     cur = next;
   1.722 +     do_yield_check();
   1.723 +   }
   1.724 +}
   1.725 +
   1.726 +class NoteStartOfMarkHRClosure: public HeapRegionClosure {
   1.727 +public:
   1.728 +  bool doHeapRegion(HeapRegion* r) {
   1.729 +    if (!r->continuesHumongous()) {
   1.730 +      r->note_start_of_marking(true);
   1.731 +    }
   1.732 +    return false;
   1.733 +  }
   1.734 +};
   1.735 +
   1.736 +void ConcurrentMark::checkpointRootsInitialPre() {
   1.737 +  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   1.738 +  G1CollectorPolicy* g1p = g1h->g1_policy();
   1.739 +
   1.740 +  _has_aborted = false;
   1.741 +
   1.742 +  // Find all the reachable objects...
   1.743 +#if PRINT_REACHABLE_AT_INITIAL_MARK
   1.744 +  guarantee(reachable_file == NULL, "Protocol");
   1.745 +  char fn_buf[100];
   1.746 +  sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
   1.747 +  reachable_file = fopen(fn_buf, "w");
   1.748 +  // clear the mark bitmap (no grey objects to start with)
   1.749 +  _nextMarkBitMap->clearAll();
   1.750 +  PrintReachableClosure prcl(_nextMarkBitMap);
   1.751 +  g1h->process_strong_roots(
   1.752 +                            false,   // fake perm gen collection
   1.753 +                            SharedHeap::SO_AllClasses,
   1.754 +                            &prcl, // Regular roots
   1.755 +                            &prcl    // Perm Gen Roots
   1.756 +                            );
   1.757 +  // The root iteration above "consumed" dirty cards in the perm gen.
   1.758 +  // Therefore, as a shortcut, we dirty all such cards.
   1.759 +  g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
   1.760 +  fclose(reachable_file);
   1.761 +  reachable_file = NULL;
   1.762 +  // clear the mark bitmap again.
   1.763 +  _nextMarkBitMap->clearAll();
   1.764 +  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
   1.765 +  COMPILER2_PRESENT(DerivedPointerTable::clear());
   1.766 +#endif // PRINT_REACHABLE_AT_INITIAL_MARK
   1.767 +
   1.768 +  // Initialise marking structures. This has to be done in a STW phase.
   1.769 +  reset();
   1.770 +}
   1.771 +
   1.772 +class CMMarkRootsClosure: public OopsInGenClosure {
   1.773 +private:
   1.774 +  ConcurrentMark*  _cm;
   1.775 +  G1CollectedHeap* _g1h;
   1.776 +  bool             _do_barrier;
   1.777 +
   1.778 +public:
   1.779 +  CMMarkRootsClosure(ConcurrentMark* cm,
   1.780 +                     G1CollectedHeap* g1h,
   1.781 +                     bool do_barrier) : _cm(cm), _g1h(g1h),
   1.782 +                                        _do_barrier(do_barrier) { }
   1.783 +
   1.784 +  virtual void do_oop(narrowOop* p) {
   1.785 +    guarantee(false, "NYI");
   1.786 +  }
   1.787 +
   1.788 +  virtual void do_oop(oop* p) {
   1.789 +    oop thisOop = *p;
   1.790 +    if (thisOop != NULL) {
   1.791 +      assert(thisOop->is_oop() || thisOop->mark() == NULL,
   1.792 +             "expected an oop, possibly with mark word displaced");
   1.793 +      HeapWord* addr = (HeapWord*)thisOop;
   1.794 +      if (_g1h->is_in_g1_reserved(addr)) {
   1.795 +        _cm->grayRoot(thisOop);
   1.796 +      }
   1.797 +    }
   1.798 +    if (_do_barrier) {
   1.799 +      assert(!_g1h->is_in_g1_reserved(p),
   1.800 +             "Should be called on external roots");
   1.801 +      do_barrier(p);
   1.802 +    }
   1.803 +  }
   1.804 +};
   1.805 +
   1.806 +void ConcurrentMark::checkpointRootsInitialPost() {
   1.807 +  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
   1.808 +
   1.809 +  // For each region note start of marking.
   1.810 +  NoteStartOfMarkHRClosure startcl;
   1.811 +  g1h->heap_region_iterate(&startcl);
   1.812 +
   1.813 +  // Start weak-reference discovery.
   1.814 +  ReferenceProcessor* rp = g1h->ref_processor();
   1.815 +  rp->verify_no_references_recorded();
   1.816 +  rp->enable_discovery(); // enable ("weak") refs discovery
   1.817 +
   1.818 +  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   1.819 +  satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
   1.820 +  satb_mq_set.set_active_all_threads(true);
   1.821 +
   1.822 +  // update_g1_committed() will be called at the end of an evac pause
   1.823 +  // when marking is on. So, it's also called at the end of the
   1.824 +  // initial-mark pause to update the heap end, if the heap expands
   1.825 +  // during it. No need to call it here.
   1.826 +
   1.827 +  guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
   1.828 +
   1.829 +  size_t max_marking_threads =
   1.830 +    MAX2((size_t) 1, parallel_marking_threads());
   1.831 +  for (int i = 0; i < (int)_max_task_num; ++i) {
   1.832 +    _tasks[i]->enable_co_tracker();
   1.833 +    if (i < (int) max_marking_threads)
   1.834 +      _tasks[i]->reset_co_tracker(marking_task_overhead());
   1.835 +    else
   1.836 +      _tasks[i]->reset_co_tracker(0.0);
   1.837 +  }
   1.838 +}
   1.839 +
   1.840 +// Checkpoint the roots into this generation from outside
   1.841 +// this generation. [Note this initial checkpoint need only
   1.842 +// be approximate -- we'll do a catch up phase subsequently.]
   1.843 +void ConcurrentMark::checkpointRootsInitial() {
   1.844 +  assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
   1.845 +  G1CollectedHeap* g1h = G1CollectedHeap::heap();
   1.846 +
   1.847 +  double start = os::elapsedTime();
   1.848 +  GCOverheadReporter::recordSTWStart(start);
   1.849 +
   1.850 +  // If there has not been a GC[n-1] since last GC[n] cycle completed,
   1.851 +  // precede our marking with a collection of all
   1.852 +  // younger generations to keep floating garbage to a minimum.
   1.853 +  // YSR: we won't do this for now -- it's an optimization to be
   1.854 +  // done post-beta.
   1.855 +
   1.856 +  // YSR:    ignoring weak refs for now; will do at bug fixing stage
   1.857 +  // EVM:    assert(discoveredRefsAreClear());
   1.858 +
   1.859 +
   1.860 +  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   1.861 +  g1p->record_concurrent_mark_init_start();
   1.862 +  checkpointRootsInitialPre();
   1.863 +
   1.864 +  // YSR: when concurrent precleaning is in place, we'll
   1.865 +  // need to clear the cached card table here
   1.866 +
   1.867 +  ResourceMark rm;
   1.868 +  HandleMark  hm;
   1.869 +
   1.870 +  g1h->ensure_parsability(false);
   1.871 +  g1h->perm_gen()->save_marks();
   1.872 +
   1.873 +  CMMarkRootsClosure notOlder(this, g1h, false);
   1.874 +  CMMarkRootsClosure older(this, g1h, true);
   1.875 +
   1.876 +  g1h->set_marking_started();
   1.877 +  g1h->rem_set()->prepare_for_younger_refs_iterate(false);
   1.878 +
   1.879 +  g1h->process_strong_roots(false,   // fake perm gen collection
   1.880 +                            SharedHeap::SO_AllClasses,
   1.881 +                            &notOlder, // Regular roots
   1.882 +                            &older    // Perm Gen Roots
   1.883 +                            );
   1.884 +  checkpointRootsInitialPost();
   1.885 +
   1.886 +  // Statistics.
   1.887 +  double end = os::elapsedTime();
   1.888 +  _init_times.add((end - start) * 1000.0);
   1.889 +  GCOverheadReporter::recordSTWEnd(end);
   1.890 +
   1.891 +  g1p->record_concurrent_mark_init_end();
   1.892 +}
   1.893 +
   1.894 +/*
   1.895 +   Notice that in the next two methods, we actually leave the STS
   1.896 +   during the barrier sync and join it immediately afterwards. If we
   1.897 +   do not do this, this then the following deadlock can occur: one
   1.898 +   thread could be in the barrier sync code, waiting for the other
   1.899 +   thread to also sync up, whereas another one could be trying to
   1.900 +   yield, while also waiting for the other threads to sync up too.
   1.901 +
   1.902 +   Because the thread that does the sync barrier has left the STS, it
   1.903 +   is possible to be suspended for a Full GC or an evacuation pause
   1.904 +   could occur. This is actually safe, since the entering the sync
   1.905 +   barrier is one of the last things do_marking_step() does, and it
   1.906 +   doesn't manipulate any data structures afterwards.
   1.907 +*/
   1.908 +
   1.909 +void ConcurrentMark::enter_first_sync_barrier(int task_num) {
   1.910 +  if (verbose_low())
   1.911 +    gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
   1.912 +
   1.913 +  ConcurrentGCThread::stsLeave();
   1.914 +  _first_overflow_barrier_sync.enter();
   1.915 +  ConcurrentGCThread::stsJoin();
   1.916 +  // at this point everyone should have synced up and not be doing any
   1.917 +  // more work
   1.918 +
   1.919 +  if (verbose_low())
   1.920 +    gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
   1.921 +
   1.922 +  // let task 0 do this
   1.923 +  if (task_num == 0) {
   1.924 +    // task 0 is responsible for clearing the global data structures
   1.925 +    clear_marking_state();
   1.926 +
   1.927 +    if (PrintGC) {
   1.928 +      gclog_or_tty->date_stamp(PrintGCDateStamps);
   1.929 +      gclog_or_tty->stamp(PrintGCTimeStamps);
   1.930 +      gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
   1.931 +    }
   1.932 +  }
   1.933 +
   1.934 +  // after this, each task should reset its own data structures then
   1.935 +  // then go into the second barrier
   1.936 +}
   1.937 +
   1.938 +void ConcurrentMark::enter_second_sync_barrier(int task_num) {
   1.939 +  if (verbose_low())
   1.940 +    gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
   1.941 +
   1.942 +  ConcurrentGCThread::stsLeave();
   1.943 +  _second_overflow_barrier_sync.enter();
   1.944 +  ConcurrentGCThread::stsJoin();
   1.945 +  // at this point everything should be re-initialised and ready to go
   1.946 +
   1.947 +  if (verbose_low())
   1.948 +    gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
   1.949 +}
   1.950 +
   1.951 +void ConcurrentMark::grayRoot(oop p) {
   1.952 +  HeapWord* addr = (HeapWord*) p;
   1.953 +  // We can't really check against _heap_start and _heap_end, since it
   1.954 +  // is possible during an evacuation pause with piggy-backed
   1.955 +  // initial-mark that the committed space is expanded during the
   1.956 +  // pause without CM observing this change. So the assertions below
   1.957 +  // is a bit conservative; but better than nothing.
   1.958 +  tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
   1.959 +                    "address should be within the heap bounds" );
   1.960 +
   1.961 +  if (!_nextMarkBitMap->isMarked(addr))
   1.962 +    _nextMarkBitMap->parMark(addr);
   1.963 +}
   1.964 +
   1.965 +void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
   1.966 +  // The objects on the region have already been marked "in bulk" by
   1.967 +  // the caller. We only need to decide whether to push the region on
   1.968 +  // the region stack or not.
   1.969 +
   1.970 +  if (!concurrent_marking_in_progress() || !_should_gray_objects)
   1.971 +    // We're done with marking and waiting for remark. We do not need to
   1.972 +    // push anything else on the region stack.
   1.973 +    return;
   1.974 +
   1.975 +  HeapWord* finger = _finger;
   1.976 +
   1.977 +  if (verbose_low())
   1.978 +    gclog_or_tty->print_cr("[global] attempting to push "
   1.979 +                           "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
   1.980 +                           PTR_FORMAT, mr.start(), mr.end(), finger);
   1.981 +
   1.982 +  if (mr.start() < finger) {
   1.983 +    // The finger is always heap region aligned and it is not possible
   1.984 +    // for mr to span heap regions.
   1.985 +    tmp_guarantee_CM( mr.end() <= finger, "invariant" );
   1.986 +
   1.987 +    tmp_guarantee_CM( mr.start() <= mr.end() &&
   1.988 +                      _heap_start <= mr.start() &&
   1.989 +                      mr.end() <= _heap_end,
   1.990 +                  "region boundaries should fall within the committed space" );
   1.991 +    if (verbose_low())
   1.992 +      gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
   1.993 +                             "below the finger, pushing it",
   1.994 +                             mr.start(), mr.end());
   1.995 +
   1.996 +    if (!region_stack_push(mr)) {
   1.997 +      if (verbose_low())
   1.998 +        gclog_or_tty->print_cr("[global] region stack has overflown.");
   1.999 +    }
  1.1000 +  }
  1.1001 +}
  1.1002 +
  1.1003 +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
  1.1004 +  // The object is not marked by the caller. We need to at least mark
  1.1005 +  // it and maybe push in on the stack.
  1.1006 +
  1.1007 +  HeapWord* addr = (HeapWord*)p;
  1.1008 +  if (!_nextMarkBitMap->isMarked(addr)) {
  1.1009 +    // We definitely need to mark it, irrespective whether we bail out
  1.1010 +    // because we're done with marking.
  1.1011 +    if (_nextMarkBitMap->parMark(addr)) {
  1.1012 +      if (!concurrent_marking_in_progress() || !_should_gray_objects)
  1.1013 +        // If we're done with concurrent marking and we're waiting for
  1.1014 +        // remark, then we're not pushing anything on the stack.
  1.1015 +        return;
  1.1016 +
  1.1017 +      // No OrderAccess:store_load() is needed. It is implicit in the
  1.1018 +      // CAS done in parMark(addr) above
  1.1019 +      HeapWord* finger = _finger;
  1.1020 +
  1.1021 +      if (addr < finger) {
  1.1022 +        if (!mark_stack_push(oop(addr))) {
  1.1023 +          if (verbose_low())
  1.1024 +            gclog_or_tty->print_cr("[global] global stack overflow "
  1.1025 +                                   "during parMark");
  1.1026 +        }
  1.1027 +      }
  1.1028 +    }
  1.1029 +  }
  1.1030 +}
  1.1031 +
  1.1032 +class CMConcurrentMarkingTask: public AbstractGangTask {
  1.1033 +private:
  1.1034 +  ConcurrentMark*       _cm;
  1.1035 +  ConcurrentMarkThread* _cmt;
  1.1036 +
  1.1037 +public:
  1.1038 +  void work(int worker_i) {
  1.1039 +    guarantee( Thread::current()->is_ConcurrentGC_thread(),
  1.1040 +               "this should only be done by a conc GC thread" );
  1.1041 +
  1.1042 +    double start_vtime = os::elapsedVTime();
  1.1043 +
  1.1044 +    ConcurrentGCThread::stsJoin();
  1.1045 +
  1.1046 +    guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
  1.1047 +    CMTask* the_task = _cm->task(worker_i);
  1.1048 +    the_task->start_co_tracker();
  1.1049 +    the_task->record_start_time();
  1.1050 +    if (!_cm->has_aborted()) {
  1.1051 +      do {
  1.1052 +        double start_vtime_sec = os::elapsedVTime();
  1.1053 +        double start_time_sec = os::elapsedTime();
  1.1054 +        the_task->do_marking_step(10.0);
  1.1055 +        double end_time_sec = os::elapsedTime();
  1.1056 +        double end_vtime_sec = os::elapsedVTime();
  1.1057 +        double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
  1.1058 +        double elapsed_time_sec = end_time_sec - start_time_sec;
  1.1059 +        _cm->clear_has_overflown();
  1.1060 +
  1.1061 +        bool ret = _cm->do_yield_check(worker_i);
  1.1062 +
  1.1063 +        jlong sleep_time_ms;
  1.1064 +        if (!_cm->has_aborted() && the_task->has_aborted()) {
  1.1065 +          sleep_time_ms =
  1.1066 +            (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
  1.1067 +          ConcurrentGCThread::stsLeave();
  1.1068 +          os::sleep(Thread::current(), sleep_time_ms, false);
  1.1069 +          ConcurrentGCThread::stsJoin();
  1.1070 +        }
  1.1071 +        double end_time2_sec = os::elapsedTime();
  1.1072 +        double elapsed_time2_sec = end_time2_sec - start_time_sec;
  1.1073 +
  1.1074 +        the_task->update_co_tracker();
  1.1075 +
  1.1076 +#if 0
  1.1077 +          gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1.1078 +                                 "overhead %1.4lf",
  1.1079 +                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1.1080 +                                 the_task->conc_overhead(os::elapsedTime()) * 8.0);
  1.1081 +          gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
  1.1082 +                                 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
  1.1083 +#endif
  1.1084 +      } while (!_cm->has_aborted() && the_task->has_aborted());
  1.1085 +    }
  1.1086 +    the_task->record_end_time();
  1.1087 +    guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
  1.1088 +
  1.1089 +    ConcurrentGCThread::stsLeave();
  1.1090 +
  1.1091 +    double end_vtime = os::elapsedVTime();
  1.1092 +    the_task->update_co_tracker(true);
  1.1093 +    _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
  1.1094 +  }
  1.1095 +
  1.1096 +  CMConcurrentMarkingTask(ConcurrentMark* cm,
  1.1097 +                          ConcurrentMarkThread* cmt) :
  1.1098 +      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
  1.1099 +
  1.1100 +  ~CMConcurrentMarkingTask() { }
  1.1101 +};
  1.1102 +
  1.1103 +void ConcurrentMark::markFromRoots() {
  1.1104 +  // we might be tempted to assert that:
  1.1105 +  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
  1.1106 +  //        "inconsistent argument?");
  1.1107 +  // However that wouldn't be right, because it's possible that
  1.1108 +  // a safepoint is indeed in progress as a younger generation
  1.1109 +  // stop-the-world GC happens even as we mark in this generation.
  1.1110 +
  1.1111 +  _restart_for_overflow = false;
  1.1112 +
  1.1113 +  set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
  1.1114 +
  1.1115 +  CMConcurrentMarkingTask markingTask(this, cmThread());
  1.1116 +  if (parallel_marking_threads() > 0)
  1.1117 +    _parallel_workers->run_task(&markingTask);
  1.1118 +  else
  1.1119 +    markingTask.work(0);
  1.1120 +  print_stats();
  1.1121 +}
  1.1122 +
  1.1123 +void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
  1.1124 +  // world is stopped at this checkpoint
  1.1125 +  assert(SafepointSynchronize::is_at_safepoint(),
  1.1126 +         "world should be stopped");
  1.1127 +  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1.1128 +
  1.1129 +  // If a full collection has happened, we shouldn't do this.
  1.1130 +  if (has_aborted()) {
  1.1131 +    g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1.1132 +    return;
  1.1133 +  }
  1.1134 +
  1.1135 +  G1CollectorPolicy* g1p = g1h->g1_policy();
  1.1136 +  g1p->record_concurrent_mark_remark_start();
  1.1137 +
  1.1138 +  double start = os::elapsedTime();
  1.1139 +  GCOverheadReporter::recordSTWStart(start);
  1.1140 +
  1.1141 +  checkpointRootsFinalWork();
  1.1142 +
  1.1143 +  double mark_work_end = os::elapsedTime();
  1.1144 +
  1.1145 +  weakRefsWork(clear_all_soft_refs);
  1.1146 +
  1.1147 +  if (has_overflown()) {
  1.1148 +    // Oops.  We overflowed.  Restart concurrent marking.
  1.1149 +    _restart_for_overflow = true;
  1.1150 +    // Clear the flag. We do not need it any more.
  1.1151 +    clear_has_overflown();
  1.1152 +    if (G1TraceMarkStackOverflow)
  1.1153 +      gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
  1.1154 +  } else {
  1.1155 +    // We're done with marking.
  1.1156 +    JavaThread::satb_mark_queue_set().set_active_all_threads(false);
  1.1157 +  }
  1.1158 +
  1.1159 +#if VERIFY_OBJS_PROCESSED
  1.1160 +  _scan_obj_cl.objs_processed = 0;
  1.1161 +  ThreadLocalObjQueue::objs_enqueued = 0;
  1.1162 +#endif
  1.1163 +
  1.1164 +  // Statistics
  1.1165 +  double now = os::elapsedTime();
  1.1166 +  _remark_mark_times.add((mark_work_end - start) * 1000.0);
  1.1167 +  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
  1.1168 +  _remark_times.add((now - start) * 1000.0);
  1.1169 +
  1.1170 +  GCOverheadReporter::recordSTWEnd(now);
  1.1171 +  for (int i = 0; i < (int)_max_task_num; ++i)
  1.1172 +    _tasks[i]->disable_co_tracker();
  1.1173 +  _cleanup_co_tracker.enable();
  1.1174 +  _cleanup_co_tracker.reset(cleanup_task_overhead());
  1.1175 +  g1p->record_concurrent_mark_remark_end();
  1.1176 +}
  1.1177 +
  1.1178 +
  1.1179 +#define CARD_BM_TEST_MODE 0
  1.1180 +
  1.1181 +class CalcLiveObjectsClosure: public HeapRegionClosure {
  1.1182 +
  1.1183 +  CMBitMapRO* _bm;
  1.1184 +  ConcurrentMark* _cm;
  1.1185 +  COTracker* _co_tracker;
  1.1186 +  bool _changed;
  1.1187 +  bool _yield;
  1.1188 +  size_t _words_done;
  1.1189 +  size_t _tot_live;
  1.1190 +  size_t _tot_used;
  1.1191 +  size_t _regions_done;
  1.1192 +  double _start_vtime_sec;
  1.1193 +
  1.1194 +  BitMap* _region_bm;
  1.1195 +  BitMap* _card_bm;
  1.1196 +  intptr_t _bottom_card_num;
  1.1197 +  bool _final;
  1.1198 +
  1.1199 +  void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
  1.1200 +    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
  1.1201 +#if CARD_BM_TEST_MODE
  1.1202 +      guarantee(_card_bm->at(i - _bottom_card_num),
  1.1203 +                "Should already be set.");
  1.1204 +#else
  1.1205 +      _card_bm->par_at_put(i - _bottom_card_num, 1);
  1.1206 +#endif
  1.1207 +    }
  1.1208 +  }
  1.1209 +
  1.1210 +public:
  1.1211 +  CalcLiveObjectsClosure(bool final,
  1.1212 +                         CMBitMapRO *bm, ConcurrentMark *cm,
  1.1213 +                         BitMap* region_bm, BitMap* card_bm,
  1.1214 +                         COTracker* co_tracker) :
  1.1215 +    _bm(bm), _cm(cm), _changed(false), _yield(true),
  1.1216 +    _words_done(0), _tot_live(0), _tot_used(0),
  1.1217 +    _region_bm(region_bm), _card_bm(card_bm),
  1.1218 +    _final(final), _co_tracker(co_tracker),
  1.1219 +    _regions_done(0), _start_vtime_sec(0.0)
  1.1220 +  {
  1.1221 +    _bottom_card_num =
  1.1222 +      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
  1.1223 +               CardTableModRefBS::card_shift);
  1.1224 +  }
  1.1225 +
  1.1226 +  bool doHeapRegion(HeapRegion* hr) {
  1.1227 +    if (_co_tracker != NULL)
  1.1228 +      _co_tracker->update();
  1.1229 +
  1.1230 +    if (!_final && _regions_done == 0)
  1.1231 +      _start_vtime_sec = os::elapsedVTime();
  1.1232 +
  1.1233 +    if (hr->continuesHumongous()) return false;
  1.1234 +
  1.1235 +    HeapWord* nextTop = hr->next_top_at_mark_start();
  1.1236 +    HeapWord* start   = hr->top_at_conc_mark_count();
  1.1237 +    assert(hr->bottom() <= start && start <= hr->end() &&
  1.1238 +           hr->bottom() <= nextTop && nextTop <= hr->end() &&
  1.1239 +           start <= nextTop,
  1.1240 +           "Preconditions.");
  1.1241 +    // Otherwise, record the number of word's we'll examine.
  1.1242 +    size_t words_done = (nextTop - start);
  1.1243 +    // Find the first marked object at or after "start".
  1.1244 +    start = _bm->getNextMarkedWordAddress(start, nextTop);
  1.1245 +    size_t marked_bytes = 0;
  1.1246 +
  1.1247 +    // Below, the term "card num" means the result of shifting an address
  1.1248 +    // by the card shift -- address 0 corresponds to card number 0.  One
  1.1249 +    // must subtract the card num of the bottom of the heap to obtain a
  1.1250 +    // card table index.
  1.1251 +    // The first card num of the sequence of live cards currently being
  1.1252 +    // constructed.  -1 ==> no sequence.
  1.1253 +    intptr_t start_card_num = -1;
  1.1254 +    // The last card num of the sequence of live cards currently being
  1.1255 +    // constructed.  -1 ==> no sequence.
  1.1256 +    intptr_t last_card_num = -1;
  1.1257 +
  1.1258 +    while (start < nextTop) {
  1.1259 +      if (_yield && _cm->do_yield_check()) {
  1.1260 +        // We yielded.  It might be for a full collection, in which case
  1.1261 +        // all bets are off; terminate the traversal.
  1.1262 +        if (_cm->has_aborted()) {
  1.1263 +          _changed = false;
  1.1264 +          return true;
  1.1265 +        } else {
  1.1266 +          // Otherwise, it might be a collection pause, and the region
  1.1267 +          // we're looking at might be in the collection set.  We'll
  1.1268 +          // abandon this region.
  1.1269 +          return false;
  1.1270 +        }
  1.1271 +      }
  1.1272 +      oop obj = oop(start);
  1.1273 +      int obj_sz = obj->size();
  1.1274 +      // The card num of the start of the current object.
  1.1275 +      intptr_t obj_card_num =
  1.1276 +        intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
  1.1277 +
  1.1278 +      HeapWord* obj_last = start + obj_sz - 1;
  1.1279 +      intptr_t obj_last_card_num =
  1.1280 +        intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
  1.1281 +
  1.1282 +      if (obj_card_num != last_card_num) {
  1.1283 +        if (start_card_num == -1) {
  1.1284 +          assert(last_card_num == -1, "Both or neither.");
  1.1285 +          start_card_num = obj_card_num;
  1.1286 +        } else {
  1.1287 +          assert(last_card_num != -1, "Both or neither.");
  1.1288 +          assert(obj_card_num >= last_card_num, "Inv");
  1.1289 +          if ((obj_card_num - last_card_num) > 1) {
  1.1290 +            // Mark the last run, and start a new one.
  1.1291 +            mark_card_num_range(start_card_num, last_card_num);
  1.1292 +            start_card_num = obj_card_num;
  1.1293 +          }
  1.1294 +        }
  1.1295 +#if CARD_BM_TEST_MODE
  1.1296 +        /*
  1.1297 +        gclog_or_tty->print_cr("Setting bits from %d/%d.",
  1.1298 +                               obj_card_num - _bottom_card_num,
  1.1299 +                               obj_last_card_num - _bottom_card_num);
  1.1300 +        */
  1.1301 +        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
  1.1302 +          _card_bm->par_at_put(j - _bottom_card_num, 1);
  1.1303 +        }
  1.1304 +#endif
  1.1305 +      }
  1.1306 +      // In any case, we set the last card num.
  1.1307 +      last_card_num = obj_last_card_num;
  1.1308 +
  1.1309 +      marked_bytes += obj_sz * HeapWordSize;
  1.1310 +      // Find the next marked object after this one.
  1.1311 +      start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
  1.1312 +      _changed = true;
  1.1313 +    }
  1.1314 +    // Handle the last range, if any.
  1.1315 +    if (start_card_num != -1)
  1.1316 +      mark_card_num_range(start_card_num, last_card_num);
  1.1317 +    if (_final) {
  1.1318 +      // Mark the allocated-since-marking portion...
  1.1319 +      HeapWord* tp = hr->top();
  1.1320 +      if (nextTop < tp) {
  1.1321 +        start_card_num =
  1.1322 +          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
  1.1323 +        last_card_num =
  1.1324 +          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
  1.1325 +        mark_card_num_range(start_card_num, last_card_num);
  1.1326 +        // This definitely means the region has live objects.
  1.1327 +        _region_bm->par_at_put(hr->hrs_index(), 1);
  1.1328 +      }
  1.1329 +    }
  1.1330 +
  1.1331 +    hr->add_to_marked_bytes(marked_bytes);
  1.1332 +    // Update the live region bitmap.
  1.1333 +    if (marked_bytes > 0) {
  1.1334 +      _region_bm->par_at_put(hr->hrs_index(), 1);
  1.1335 +    }
  1.1336 +    hr->set_top_at_conc_mark_count(nextTop);
  1.1337 +    _tot_live += hr->next_live_bytes();
  1.1338 +    _tot_used += hr->used();
  1.1339 +    _words_done = words_done;
  1.1340 +
  1.1341 +    if (!_final) {
  1.1342 +      ++_regions_done;
  1.1343 +      if (_regions_done % 10 == 0) {
  1.1344 +        double end_vtime_sec = os::elapsedVTime();
  1.1345 +        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
  1.1346 +        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
  1.1347 +          jlong sleep_time_ms =
  1.1348 +            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
  1.1349 +#if 0
  1.1350 +          gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
  1.1351 +                                 "overhead %1.4lf",
  1.1352 +                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
  1.1353 +                                 _co_tracker->concOverhead(os::elapsedTime()));
  1.1354 +#endif
  1.1355 +          os::sleep(Thread::current(), sleep_time_ms, false);
  1.1356 +          _start_vtime_sec = end_vtime_sec;
  1.1357 +        }
  1.1358 +      }
  1.1359 +    }
  1.1360 +
  1.1361 +    return false;
  1.1362 +  }
  1.1363 +
  1.1364 +  bool changed() { return _changed;  }
  1.1365 +  void reset()   { _changed = false; _words_done = 0; }
  1.1366 +  void no_yield() { _yield = false; }
  1.1367 +  size_t words_done() { return _words_done; }
  1.1368 +  size_t tot_live() { return _tot_live; }
  1.1369 +  size_t tot_used() { return _tot_used; }
  1.1370 +};
  1.1371 +
  1.1372 +
  1.1373 +void ConcurrentMark::calcDesiredRegions() {
  1.1374 +  guarantee( _cleanup_co_tracker.enabled(), "invariant" );
  1.1375 +  _cleanup_co_tracker.start();
  1.1376 +
  1.1377 +  _region_bm.clear();
  1.1378 +  _card_bm.clear();
  1.1379 +  CalcLiveObjectsClosure calccl(false /*final*/,
  1.1380 +                                nextMarkBitMap(), this,
  1.1381 +                                &_region_bm, &_card_bm,
  1.1382 +                                &_cleanup_co_tracker);
  1.1383 +  G1CollectedHeap *g1h = G1CollectedHeap::heap();
  1.1384 +  g1h->heap_region_iterate(&calccl);
  1.1385 +
  1.1386 +  do {
  1.1387 +    calccl.reset();
  1.1388 +    g1h->heap_region_iterate(&calccl);
  1.1389 +  } while (calccl.changed());
  1.1390 +
  1.1391 +  _cleanup_co_tracker.update(true);
  1.1392 +}
  1.1393 +
  1.1394 +class G1ParFinalCountTask: public AbstractGangTask {
  1.1395 +protected:
  1.1396 +  G1CollectedHeap* _g1h;
  1.1397 +  CMBitMap* _bm;
  1.1398 +  size_t _n_workers;
  1.1399 +  size_t *_live_bytes;
  1.1400 +  size_t *_used_bytes;
  1.1401 +  BitMap* _region_bm;
  1.1402 +  BitMap* _card_bm;
  1.1403 +public:
  1.1404 +  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
  1.1405 +                      BitMap* region_bm, BitMap* card_bm) :
  1.1406 +    AbstractGangTask("G1 final counting"), _g1h(g1h),
  1.1407 +    _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
  1.1408 +  {
  1.1409 +    if (ParallelGCThreads > 0)
  1.1410 +      _n_workers = _g1h->workers()->total_workers();
  1.1411 +    else
  1.1412 +      _n_workers = 1;
  1.1413 +    _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1.1414 +    _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
  1.1415 +  }
  1.1416 +
  1.1417 +  ~G1ParFinalCountTask() {
  1.1418 +    FREE_C_HEAP_ARRAY(size_t, _live_bytes);
  1.1419 +    FREE_C_HEAP_ARRAY(size_t, _used_bytes);
  1.1420 +  }
  1.1421 +
  1.1422 +  void work(int i) {
  1.1423 +    CalcLiveObjectsClosure calccl(true /*final*/,
  1.1424 +                                  _bm, _g1h->concurrent_mark(),
  1.1425 +                                  _region_bm, _card_bm,
  1.1426 +                                  NULL /* CO tracker */);
  1.1427 +    calccl.no_yield();
  1.1428 +    if (ParallelGCThreads > 0) {
  1.1429 +      _g1h->heap_region_par_iterate_chunked(&calccl, i, 1);
  1.1430 +    } else {
  1.1431 +      _g1h->heap_region_iterate(&calccl);
  1.1432 +    }
  1.1433 +    assert(calccl.complete(), "Shouldn't have yielded!");
  1.1434 +
  1.1435 +    guarantee( (size_t)i < _n_workers, "invariant" );
  1.1436 +    _live_bytes[i] = calccl.tot_live();
  1.1437 +    _used_bytes[i] = calccl.tot_used();
  1.1438 +  }
  1.1439 +  size_t live_bytes()  {
  1.1440 +    size_t live_bytes = 0;
  1.1441 +    for (size_t i = 0; i < _n_workers; ++i)
  1.1442 +      live_bytes += _live_bytes[i];
  1.1443 +    return live_bytes;
  1.1444 +  }
  1.1445 +  size_t used_bytes()  {
  1.1446 +    size_t used_bytes = 0;
  1.1447 +    for (size_t i = 0; i < _n_workers; ++i)
  1.1448 +      used_bytes += _used_bytes[i];
  1.1449 +    return used_bytes;
  1.1450 +  }
  1.1451 +};
  1.1452 +
  1.1453 +class G1ParNoteEndTask;
  1.1454 +
  1.1455 +class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
  1.1456 +  G1CollectedHeap* _g1;
  1.1457 +  int _worker_num;
  1.1458 +  size_t _max_live_bytes;
  1.1459 +  size_t _regions_claimed;
  1.1460 +  size_t _freed_bytes;
  1.1461 +  size_t _cleared_h_regions;
  1.1462 +  size_t _freed_regions;
  1.1463 +  UncleanRegionList* _unclean_region_list;
  1.1464 +  double _claimed_region_time;
  1.1465 +  double _max_region_time;
  1.1466 +
  1.1467 +public:
  1.1468 +  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1.1469 +                             UncleanRegionList* list,
  1.1470 +                             int worker_num);
  1.1471 +  size_t freed_bytes() { return _freed_bytes; }
  1.1472 +  size_t cleared_h_regions() { return _cleared_h_regions; }
  1.1473 +  size_t freed_regions() { return  _freed_regions; }
  1.1474 +  UncleanRegionList* unclean_region_list() {
  1.1475 +    return _unclean_region_list;
  1.1476 +  }
  1.1477 +
  1.1478 +  bool doHeapRegion(HeapRegion *r);
  1.1479 +
  1.1480 +  size_t max_live_bytes() { return _max_live_bytes; }
  1.1481 +  size_t regions_claimed() { return _regions_claimed; }
  1.1482 +  double claimed_region_time_sec() { return _claimed_region_time; }
  1.1483 +  double max_region_time_sec() { return _max_region_time; }
  1.1484 +};
  1.1485 +
  1.1486 +class G1ParNoteEndTask: public AbstractGangTask {
  1.1487 +  friend class G1NoteEndOfConcMarkClosure;
  1.1488 +protected:
  1.1489 +  G1CollectedHeap* _g1h;
  1.1490 +  size_t _max_live_bytes;
  1.1491 +  size_t _freed_bytes;
  1.1492 +  ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
  1.1493 +public:
  1.1494 +  G1ParNoteEndTask(G1CollectedHeap* g1h,
  1.1495 +                   ConcurrentMark::ParCleanupThreadState**
  1.1496 +                   par_cleanup_thread_state) :
  1.1497 +    AbstractGangTask("G1 note end"), _g1h(g1h),
  1.1498 +    _max_live_bytes(0), _freed_bytes(0),
  1.1499 +    _par_cleanup_thread_state(par_cleanup_thread_state)
  1.1500 +  {}
  1.1501 +
  1.1502 +  void work(int i) {
  1.1503 +    double start = os::elapsedTime();
  1.1504 +    G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
  1.1505 +                                           &_par_cleanup_thread_state[i]->list,
  1.1506 +                                           i);
  1.1507 +    if (ParallelGCThreads > 0) {
  1.1508 +      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 2);
  1.1509 +    } else {
  1.1510 +      _g1h->heap_region_iterate(&g1_note_end);
  1.1511 +    }
  1.1512 +    assert(g1_note_end.complete(), "Shouldn't have yielded!");
  1.1513 +
  1.1514 +    // Now finish up freeing the current thread's regions.
  1.1515 +    _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
  1.1516 +                                  g1_note_end.cleared_h_regions(),
  1.1517 +                                  0, NULL);
  1.1518 +    {
  1.1519 +      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
  1.1520 +      _max_live_bytes += g1_note_end.max_live_bytes();
  1.1521 +      _freed_bytes += g1_note_end.freed_bytes();
  1.1522 +    }
  1.1523 +    double end = os::elapsedTime();
  1.1524 +    if (G1PrintParCleanupStats) {
  1.1525 +      gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
  1.1526 +                          "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
  1.1527 +                          i, start, end, (end-start)*1000.0,
  1.1528 +                          g1_note_end.regions_claimed(),
  1.1529 +                          g1_note_end.claimed_region_time_sec()*1000.0,
  1.1530 +                          g1_note_end.max_region_time_sec()*1000.0);
  1.1531 +    }
  1.1532 +  }
  1.1533 +  size_t max_live_bytes() { return _max_live_bytes; }
  1.1534 +  size_t freed_bytes() { return _freed_bytes; }
  1.1535 +};
  1.1536 +
  1.1537 +class G1ParScrubRemSetTask: public AbstractGangTask {
  1.1538 +protected:
  1.1539 +  G1RemSet* _g1rs;
  1.1540 +  BitMap* _region_bm;
  1.1541 +  BitMap* _card_bm;
  1.1542 +public:
  1.1543 +  G1ParScrubRemSetTask(G1CollectedHeap* g1h,
  1.1544 +                       BitMap* region_bm, BitMap* card_bm) :
  1.1545 +    AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
  1.1546 +    _region_bm(region_bm), _card_bm(card_bm)
  1.1547 +  {}
  1.1548 +
  1.1549 +  void work(int i) {
  1.1550 +    if (ParallelGCThreads > 0) {
  1.1551 +      _g1rs->scrub_par(_region_bm, _card_bm, i, 3);
  1.1552 +    } else {
  1.1553 +      _g1rs->scrub(_region_bm, _card_bm);
  1.1554 +    }
  1.1555 +  }
  1.1556 +
  1.1557 +};
  1.1558 +
  1.1559 +G1NoteEndOfConcMarkClosure::
  1.1560 +G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
  1.1561 +                           UncleanRegionList* list,
  1.1562 +                           int worker_num)
  1.1563 +  : _g1(g1), _worker_num(worker_num),
  1.1564 +    _max_live_bytes(0), _regions_claimed(0),
  1.1565 +    _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
  1.1566 +    _claimed_region_time(0.0), _max_region_time(0.0),
  1.1567 +    _unclean_region_list(list)
  1.1568 +{}
  1.1569 +
  1.1570 +bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
  1.1571 +  // We use a claim value of zero here because all regions
  1.1572 +  // were claimed with value 1 in the FinalCount task.
  1.1573 +  r->reset_gc_time_stamp();
  1.1574 +  if (!r->continuesHumongous()) {
  1.1575 +    double start = os::elapsedTime();
  1.1576 +    _regions_claimed++;
  1.1577 +    r->note_end_of_marking();
  1.1578 +    _max_live_bytes += r->max_live_bytes();
  1.1579 +    _g1->free_region_if_totally_empty_work(r,
  1.1580 +                                           _freed_bytes,
  1.1581 +                                           _cleared_h_regions,
  1.1582 +                                           _freed_regions,
  1.1583 +                                           _unclean_region_list,
  1.1584 +                                           true /*par*/);
  1.1585 +    double region_time = (os::elapsedTime() - start);
  1.1586 +    _claimed_region_time += region_time;
  1.1587 +    if (region_time > _max_region_time) _max_region_time = region_time;
  1.1588 +  }
  1.1589 +  return false;
  1.1590 +}
  1.1591 +
  1.1592 +void ConcurrentMark::cleanup() {
  1.1593 +  // world is stopped at this checkpoint
  1.1594 +  assert(SafepointSynchronize::is_at_safepoint(),
  1.1595 +         "world should be stopped");
  1.1596 +  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1.1597 +
  1.1598 +  // If a full collection has happened, we shouldn't do this.
  1.1599 +  if (has_aborted()) {
  1.1600 +    g1h->set_marking_complete(); // So bitmap clearing isn't confused
  1.1601 +    return;
  1.1602 +  }
  1.1603 +
  1.1604 +  _cleanup_co_tracker.disable();
  1.1605 +
  1.1606 +  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  1.1607 +  g1p->record_concurrent_mark_cleanup_start();
  1.1608 +
  1.1609 +  double start = os::elapsedTime();
  1.1610 +  GCOverheadReporter::recordSTWStart(start);
  1.1611 +
  1.1612 +  // Do counting once more with the world stopped for good measure.
  1.1613 +  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
  1.1614 +                                        &_region_bm, &_card_bm);
  1.1615 +  if (ParallelGCThreads > 0) {
  1.1616 +    int n_workers = g1h->workers()->total_workers();
  1.1617 +    g1h->set_par_threads(n_workers);
  1.1618 +    g1h->workers()->run_task(&g1_par_count_task);
  1.1619 +    g1h->set_par_threads(0);
  1.1620 +  } else {
  1.1621 +    g1_par_count_task.work(0);
  1.1622 +  }
  1.1623 +
  1.1624 +  size_t known_garbage_bytes =
  1.1625 +    g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
  1.1626 +#if 0
  1.1627 +  gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
  1.1628 +                         (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
  1.1629 +                         (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
  1.1630 +                         (double) known_garbage_bytes / (double) (1024 * 1024));
  1.1631 +#endif // 0
  1.1632 +  g1p->set_known_garbage_bytes(known_garbage_bytes);
  1.1633 +
  1.1634 +  size_t start_used_bytes = g1h->used();
  1.1635 +  _at_least_one_mark_complete = true;
  1.1636 +  g1h->set_marking_complete();
  1.1637 +
  1.1638 +  double count_end = os::elapsedTime();
  1.1639 +  double this_final_counting_time = (count_end - start);
  1.1640 +  if (G1PrintParCleanupStats) {
  1.1641 +    gclog_or_tty->print_cr("Cleanup:");
  1.1642 +    gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
  1.1643 +                           this_final_counting_time*1000.0);
  1.1644 +  }
  1.1645 +  _total_counting_time += this_final_counting_time;
  1.1646 +
  1.1647 +  // Install newly created mark bitMap as "prev".
  1.1648 +  swapMarkBitMaps();
  1.1649 +
  1.1650 +  g1h->reset_gc_time_stamp();
  1.1651 +
  1.1652 +  // Note end of marking in all heap regions.
  1.1653 +  double note_end_start = os::elapsedTime();
  1.1654 +  G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
  1.1655 +  if (ParallelGCThreads > 0) {
  1.1656 +    int n_workers = g1h->workers()->total_workers();
  1.1657 +    g1h->set_par_threads(n_workers);
  1.1658 +    g1h->workers()->run_task(&g1_par_note_end_task);
  1.1659 +    g1h->set_par_threads(0);
  1.1660 +  } else {
  1.1661 +    g1_par_note_end_task.work(0);
  1.1662 +  }
  1.1663 +  g1h->set_unclean_regions_coming(true);
  1.1664 +  double note_end_end = os::elapsedTime();
  1.1665 +  // Tell the mutators that there might be unclean regions coming...
  1.1666 +  if (G1PrintParCleanupStats) {
  1.1667 +    gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
  1.1668 +                           (note_end_end - note_end_start)*1000.0);
  1.1669 +  }
  1.1670 +
  1.1671 +  // Now we "scrub" remembered sets.  Note that we must do this before the
  1.1672 +  // call below, since it affects the metric by which we sort the heap
  1.1673 +  // regions.
  1.1674 +  if (G1ScrubRemSets) {
  1.1675 +    double rs_scrub_start = os::elapsedTime();
  1.1676 +    G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
  1.1677 +    if (ParallelGCThreads > 0) {
  1.1678 +      int n_workers = g1h->workers()->total_workers();
  1.1679 +      g1h->set_par_threads(n_workers);
  1.1680 +      g1h->workers()->run_task(&g1_par_scrub_rs_task);
  1.1681 +      g1h->set_par_threads(0);
  1.1682 +    } else {
  1.1683 +      g1_par_scrub_rs_task.work(0);
  1.1684 +    }
  1.1685 +
  1.1686 +    double rs_scrub_end = os::elapsedTime();
  1.1687 +    double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
  1.1688 +    _total_rs_scrub_time += this_rs_scrub_time;
  1.1689 +  }
  1.1690 +
  1.1691 +  // this will also free any regions totally full of garbage objects,
  1.1692 +  // and sort the regions.
  1.1693 +  g1h->g1_policy()->record_concurrent_mark_cleanup_end(
  1.1694 +                        g1_par_note_end_task.freed_bytes(),
  1.1695 +                        g1_par_note_end_task.max_live_bytes());
  1.1696 +
  1.1697 +  // Statistics.
  1.1698 +  double end = os::elapsedTime();
  1.1699 +  _cleanup_times.add((end - start) * 1000.0);
  1.1700 +  GCOverheadReporter::recordSTWEnd(end);
  1.1701 +
  1.1702 +  // G1CollectedHeap::heap()->print();
  1.1703 +  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
  1.1704 +  // G1CollectedHeap::heap()->get_gc_time_stamp());
  1.1705 +
  1.1706 +  if (PrintGC || PrintGCDetails) {
  1.1707 +    g1h->print_size_transition(gclog_or_tty,
  1.1708 +                               start_used_bytes,
  1.1709 +                               g1h->used(),
  1.1710 +                               g1h->capacity());
  1.1711 +  }
  1.1712 +
  1.1713 +  size_t cleaned_up_bytes = start_used_bytes - g1h->used();
  1.1714 +  g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
  1.1715 +
  1.1716 +  // We need to make this be a "collection" so any collection pause that
  1.1717 +  // races with it goes around and waits for completeCleanup to finish.
  1.1718 +  g1h->increment_total_collections();
  1.1719 +
  1.1720 +#ifndef PRODUCT
  1.1721 +  if (G1VerifyConcMark) {
  1.1722 +    G1CollectedHeap::heap()->prepare_for_verify();
  1.1723 +    G1CollectedHeap::heap()->verify(true,false);
  1.1724 +  }
  1.1725 +#endif
  1.1726 +}
  1.1727 +
  1.1728 +void ConcurrentMark::completeCleanup() {
  1.1729 +  // A full collection intervened.
  1.1730 +  if (has_aborted()) return;
  1.1731 +
  1.1732 +  int first = 0;
  1.1733 +  int last = (int)MAX2(ParallelGCThreads, (size_t)1);
  1.1734 +  for (int t = 0; t < last; t++) {
  1.1735 +    UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
  1.1736 +    assert(list->well_formed(), "Inv");
  1.1737 +    HeapRegion* hd = list->hd();
  1.1738 +    while (hd != NULL) {
  1.1739 +      // Now finish up the other stuff.
  1.1740 +      hd->rem_set()->clear();
  1.1741 +      HeapRegion* next_hd = hd->next_from_unclean_list();
  1.1742 +      (void)list->pop();
  1.1743 +      guarantee(list->hd() == next_hd, "how not?");
  1.1744 +      _g1h->put_region_on_unclean_list(hd);
  1.1745 +      if (!hd->isHumongous()) {
  1.1746 +        // Add this to the _free_regions count by 1.
  1.1747 +        _g1h->finish_free_region_work(0, 0, 1, NULL);
  1.1748 +      }
  1.1749 +      hd = list->hd();
  1.1750 +      guarantee(hd == next_hd, "how not?");
  1.1751 +    }
  1.1752 +  }
  1.1753 +}
  1.1754 +
  1.1755 +
  1.1756 +class G1CMIsAliveClosure: public BoolObjectClosure {
  1.1757 +  G1CollectedHeap* _g1;
  1.1758 + public:
  1.1759 +  G1CMIsAliveClosure(G1CollectedHeap* g1) :
  1.1760 +    _g1(g1)
  1.1761 +  {}
  1.1762 +
  1.1763 +  void do_object(oop obj) {
  1.1764 +    assert(false, "not to be invoked");
  1.1765 +  }
  1.1766 +  bool do_object_b(oop obj) {
  1.1767 +    HeapWord* addr = (HeapWord*)obj;
  1.1768 +    return addr != NULL &&
  1.1769 +           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  1.1770 +  }
  1.1771 +};
  1.1772 +
  1.1773 +class G1CMKeepAliveClosure: public OopClosure {
  1.1774 +  G1CollectedHeap* _g1;
  1.1775 +  ConcurrentMark*  _cm;
  1.1776 +  CMBitMap*        _bitMap;
  1.1777 + public:
  1.1778 +  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
  1.1779 +                       CMBitMap* bitMap) :
  1.1780 +    _g1(g1), _cm(cm),
  1.1781 +    _bitMap(bitMap) {}
  1.1782 +
  1.1783 +  void do_oop(narrowOop* p) {
  1.1784 +    guarantee(false, "NYI");
  1.1785 +  }
  1.1786 +
  1.1787 +  void do_oop(oop* p) {
  1.1788 +    oop thisOop = *p;
  1.1789 +    HeapWord* addr = (HeapWord*)thisOop;
  1.1790 +    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
  1.1791 +      _bitMap->mark(addr);
  1.1792 +      _cm->mark_stack_push(thisOop);
  1.1793 +    }
  1.1794 +  }
  1.1795 +};
  1.1796 +
  1.1797 +class G1CMDrainMarkingStackClosure: public VoidClosure {
  1.1798 +  CMMarkStack*                  _markStack;
  1.1799 +  CMBitMap*                     _bitMap;
  1.1800 +  G1CMKeepAliveClosure*         _oopClosure;
  1.1801 + public:
  1.1802 +  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
  1.1803 +                               G1CMKeepAliveClosure* oopClosure) :
  1.1804 +    _bitMap(bitMap),
  1.1805 +    _markStack(markStack),
  1.1806 +    _oopClosure(oopClosure)
  1.1807 +  {}
  1.1808 +
  1.1809 +  void do_void() {
  1.1810 +    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
  1.1811 +  }
  1.1812 +};
  1.1813 +
  1.1814 +void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  1.1815 +  ResourceMark rm;
  1.1816 +  HandleMark   hm;
  1.1817 +  ReferencePolicy* soft_ref_policy;
  1.1818 +
  1.1819 +  // Process weak references.
  1.1820 +  if (clear_all_soft_refs) {
  1.1821 +    soft_ref_policy = new AlwaysClearPolicy();
  1.1822 +  } else {
  1.1823 +#ifdef COMPILER2
  1.1824 +    soft_ref_policy = new LRUMaxHeapPolicy();
  1.1825 +#else
  1.1826 +    soft_ref_policy = new LRUCurrentHeapPolicy();
  1.1827 +#endif
  1.1828 +  }
  1.1829 +  assert(_markStack.isEmpty(), "mark stack should be empty");
  1.1830 +
  1.1831 +  G1CollectedHeap* g1 = G1CollectedHeap::heap();
  1.1832 +  G1CMIsAliveClosure g1IsAliveClosure(g1);
  1.1833 +
  1.1834 +  G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
  1.1835 +  G1CMDrainMarkingStackClosure
  1.1836 +    g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
  1.1837 +                               &g1KeepAliveClosure);
  1.1838 +
  1.1839 +  // XXXYYY  Also: copy the parallel ref processing code from CMS.
  1.1840 +  ReferenceProcessor* rp = g1->ref_processor();
  1.1841 +  rp->process_discovered_references(soft_ref_policy,
  1.1842 +                                    &g1IsAliveClosure,
  1.1843 +                                    &g1KeepAliveClosure,
  1.1844 +                                    &g1DrainMarkingStackClosure,
  1.1845 +                                    NULL);
  1.1846 +  assert(_markStack.overflow() || _markStack.isEmpty(),
  1.1847 +         "mark stack should be empty (unless it overflowed)");
  1.1848 +  if (_markStack.overflow()) {
  1.1849 +    set_has_overflown();
  1.1850 +  }
  1.1851 +
  1.1852 +  rp->enqueue_discovered_references();
  1.1853 +  rp->verify_no_references_recorded();
  1.1854 +  assert(!rp->discovery_enabled(), "should have been disabled");
  1.1855 +
  1.1856 +  // Now clean up stale oops in SymbolTable and StringTable
  1.1857 +  SymbolTable::unlink(&g1IsAliveClosure);
  1.1858 +  StringTable::unlink(&g1IsAliveClosure);
  1.1859 +}
  1.1860 +
  1.1861 +void ConcurrentMark::swapMarkBitMaps() {
  1.1862 +  CMBitMapRO* temp = _prevMarkBitMap;
  1.1863 +  _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
  1.1864 +  _nextMarkBitMap  = (CMBitMap*)  temp;
  1.1865 +}
  1.1866 +
  1.1867 +class CMRemarkTask: public AbstractGangTask {
  1.1868 +private:
  1.1869 +  ConcurrentMark *_cm;
  1.1870 +
  1.1871 +public:
  1.1872 +  void work(int worker_i) {
  1.1873 +    // Since all available tasks are actually started, we should
  1.1874 +    // only proceed if we're supposed to be actived.
  1.1875 +    if ((size_t)worker_i < _cm->active_tasks()) {
  1.1876 +      CMTask* task = _cm->task(worker_i);
  1.1877 +      task->record_start_time();
  1.1878 +      do {
  1.1879 +        task->do_marking_step(1000000000.0 /* something very large */);
  1.1880 +      } while (task->has_aborted() && !_cm->has_overflown());
  1.1881 +      // If we overflow, then we do not want to restart. We instead
  1.1882 +      // want to abort remark and do concurrent marking again.
  1.1883 +      task->record_end_time();
  1.1884 +    }
  1.1885 +  }
  1.1886 +
  1.1887 +  CMRemarkTask(ConcurrentMark* cm) :
  1.1888 +    AbstractGangTask("Par Remark"), _cm(cm) { }
  1.1889 +};
  1.1890 +
  1.1891 +void ConcurrentMark::checkpointRootsFinalWork() {
  1.1892 +  ResourceMark rm;
  1.1893 +  HandleMark   hm;
  1.1894 +  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  1.1895 +
  1.1896 +  g1h->ensure_parsability(false);
  1.1897 +
  1.1898 +  if (ParallelGCThreads > 0) {
  1.1899 +    g1h->change_strong_roots_parity();
  1.1900 +    // this is remark, so we'll use up all available threads
  1.1901 +    int active_workers = ParallelGCThreads;
  1.1902 +    set_phase(active_workers, false);
  1.1903 +
  1.1904 +    CMRemarkTask remarkTask(this);
  1.1905 +    // We will start all available threads, even if we decide that the
  1.1906 +    // active_workers will be fewer. The extra ones will just bail out
  1.1907 +    // immediately.
  1.1908 +    int n_workers = g1h->workers()->total_workers();
  1.1909 +    g1h->set_par_threads(n_workers);
  1.1910 +    g1h->workers()->run_task(&remarkTask);
  1.1911 +    g1h->set_par_threads(0);
  1.1912 +
  1.1913 +    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1.1914 +    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1.1915 +  } else {
  1.1916 +    g1h->change_strong_roots_parity();
  1.1917 +    // this is remark, so we'll use up all available threads
  1.1918 +    int active_workers = 1;
  1.1919 +    set_phase(active_workers, false);
  1.1920 +
  1.1921 +    CMRemarkTask remarkTask(this);
  1.1922 +    // We will start all available threads, even if we decide that the
  1.1923 +    // active_workers will be fewer. The extra ones will just bail out
  1.1924 +    // immediately.
  1.1925 +    remarkTask.work(0);
  1.1926 +
  1.1927 +    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1.1928 +    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1.1929 +  }
  1.1930 +
  1.1931 +  print_stats();
  1.1932 +
  1.1933 +  if (!restart_for_overflow())
  1.1934 +    set_non_marking_state();
  1.1935 +
  1.1936 +#if VERIFY_OBJS_PROCESSED
  1.1937 +  if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
  1.1938 +    gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
  1.1939 +                           _scan_obj_cl.objs_processed,
  1.1940 +                           ThreadLocalObjQueue::objs_enqueued);
  1.1941 +    guarantee(_scan_obj_cl.objs_processed ==
  1.1942 +              ThreadLocalObjQueue::objs_enqueued,
  1.1943 +              "Different number of objs processed and enqueued.");
  1.1944 +  }
  1.1945 +#endif
  1.1946 +}
  1.1947 +
  1.1948 +class ReachablePrinterOopClosure: public OopClosure {
  1.1949 +private:
  1.1950 +  G1CollectedHeap* _g1h;
  1.1951 +  CMBitMapRO*      _bitmap;
  1.1952 +  outputStream*    _out;
  1.1953 +
  1.1954 +public:
  1.1955 +  ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
  1.1956 +    _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
  1.1957 +
  1.1958 +  void do_oop(narrowOop* p) {
  1.1959 +    guarantee(false, "NYI");
  1.1960 +  }
  1.1961 +
  1.1962 +  void do_oop(oop* p) {
  1.1963 +    oop         obj = *p;
  1.1964 +    const char* str = NULL;
  1.1965 +    const char* str2 = "";
  1.1966 +
  1.1967 +    if (!_g1h->is_in_g1_reserved(obj))
  1.1968 +      str = "outside G1 reserved";
  1.1969 +    else {
  1.1970 +      HeapRegion* hr  = _g1h->heap_region_containing(obj);
  1.1971 +      guarantee( hr != NULL, "invariant" );
  1.1972 +      if (hr->obj_allocated_since_prev_marking(obj)) {
  1.1973 +        str = "over TAMS";
  1.1974 +        if (_bitmap->isMarked((HeapWord*) obj))
  1.1975 +          str2 = " AND MARKED";
  1.1976 +      } else if (_bitmap->isMarked((HeapWord*) obj))
  1.1977 +        str = "marked";
  1.1978 +      else
  1.1979 +        str = "#### NOT MARKED ####";
  1.1980 +    }
  1.1981 +
  1.1982 +    _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
  1.1983 +                   p, (void*) obj, str, str2);
  1.1984 +  }
  1.1985 +};
  1.1986 +
  1.1987 +class ReachablePrinterClosure: public BitMapClosure {
  1.1988 +private:
  1.1989 +  CMBitMapRO* _bitmap;
  1.1990 +  outputStream* _out;
  1.1991 +
  1.1992 +public:
  1.1993 +  ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
  1.1994 +    _bitmap(bitmap), _out(out) { }
  1.1995 +
  1.1996 +  bool do_bit(size_t offset) {
  1.1997 +    HeapWord* addr = _bitmap->offsetToHeapWord(offset);
  1.1998 +    ReachablePrinterOopClosure oopCl(_bitmap, _out);
  1.1999 +
  1.2000 +    _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
  1.2001 +    oop(addr)->oop_iterate(&oopCl);
  1.2002 +    _out->print_cr("");
  1.2003 +
  1.2004 +    return true;
  1.2005 +  }
  1.2006 +};
  1.2007 +
  1.2008 +class ObjInRegionReachablePrinterClosure : public ObjectClosure {
  1.2009 +private:
  1.2010 +  CMBitMapRO* _bitmap;
  1.2011 +  outputStream* _out;
  1.2012 +
  1.2013 +public:
  1.2014 +  void do_object(oop o) {
  1.2015 +    ReachablePrinterOopClosure oopCl(_bitmap, _out);
  1.2016 +
  1.2017 +    _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
  1.2018 +    o->oop_iterate(&oopCl);
  1.2019 +    _out->print_cr("");
  1.2020 +  }
  1.2021 +
  1.2022 +  ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
  1.2023 +    _bitmap(bitmap), _out(out) { }
  1.2024 +};
  1.2025 +
  1.2026 +class RegionReachablePrinterClosure : public HeapRegionClosure {
  1.2027 +private:
  1.2028 +  CMBitMapRO* _bitmap;
  1.2029 +  outputStream* _out;
  1.2030 +
  1.2031 +public:
  1.2032 +  bool doHeapRegion(HeapRegion* hr) {
  1.2033 +    HeapWord* b = hr->bottom();
  1.2034 +    HeapWord* e = hr->end();
  1.2035 +    HeapWord* t = hr->top();
  1.2036 +    HeapWord* p = hr->prev_top_at_mark_start();
  1.2037 +    _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
  1.2038 +                   "PTAMS: "PTR_FORMAT, b, e, t, p);
  1.2039 +    _out->print_cr("");
  1.2040 +
  1.2041 +    ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
  1.2042 +    hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
  1.2043 +
  1.2044 +    return false;
  1.2045 +  }
  1.2046 +
  1.2047 +  RegionReachablePrinterClosure(CMBitMapRO* bitmap,
  1.2048 +                                outputStream* out) :
  1.2049 +    _bitmap(bitmap), _out(out) { }
  1.2050 +};
  1.2051 +
  1.2052 +void ConcurrentMark::print_prev_bitmap_reachable() {
  1.2053 +  outputStream* out = gclog_or_tty;
  1.2054 +
  1.2055 +#if SEND_HEAP_DUMP_TO_FILE
  1.2056 +  guarantee(heap_dump_file == NULL, "Protocol");
  1.2057 +  char fn_buf[100];
  1.2058 +  sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
  1.2059 +  heap_dump_file = fopen(fn_buf, "w");
  1.2060 +  fileStream fstream(heap_dump_file);
  1.2061 +  out = &fstream;
  1.2062 +#endif // SEND_HEAP_DUMP_TO_FILE
  1.2063 +
  1.2064 +  RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
  1.2065 +  out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
  1.2066 +  _g1h->heap_region_iterate(&rcl);
  1.2067 +  out->print_cr("");
  1.2068 +
  1.2069 +  ReachablePrinterClosure cl(_prevMarkBitMap, out);
  1.2070 +  out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
  1.2071 +  _prevMarkBitMap->iterate(&cl);
  1.2072 +  out->print_cr("");
  1.2073 +
  1.2074 +#if SEND_HEAP_DUMP_TO_FILE
  1.2075 +  fclose(heap_dump_file);
  1.2076 +  heap_dump_file = NULL;
  1.2077 +#endif // SEND_HEAP_DUMP_TO_FILE
  1.2078 +}
  1.2079 +
  1.2080 +// This note is for drainAllSATBBuffers and the code in between.
  1.2081 +// In the future we could reuse a task to do this work during an
  1.2082 +// evacuation pause (since now tasks are not active and can be claimed
  1.2083 +// during an evacuation pause). This was a late change to the code and
  1.2084 +// is currently not being taken advantage of.
  1.2085 +
  1.2086 +class CMGlobalObjectClosure : public ObjectClosure {
  1.2087 +private:
  1.2088 +  ConcurrentMark* _cm;
  1.2089 +
  1.2090 +public:
  1.2091 +  void do_object(oop obj) {
  1.2092 +    _cm->deal_with_reference(obj);
  1.2093 +  }
  1.2094 +
  1.2095 +  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
  1.2096 +};
  1.2097 +
  1.2098 +void ConcurrentMark::deal_with_reference(oop obj) {
  1.2099 +  if (verbose_high())
  1.2100 +    gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
  1.2101 +                           (void*) obj);
  1.2102 +
  1.2103 +
  1.2104 +  HeapWord* objAddr = (HeapWord*) obj;
  1.2105 +  if (_g1h->is_in_g1_reserved(objAddr)) {
  1.2106 +    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
  1.2107 +    HeapRegion* hr = _g1h->heap_region_containing(obj);
  1.2108 +    if (_g1h->is_obj_ill(obj, hr)) {
  1.2109 +      if (verbose_high())
  1.2110 +        gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
  1.2111 +                               "marked", (void*) obj);
  1.2112 +
  1.2113 +      // we need to mark it first
  1.2114 +      if (_nextMarkBitMap->parMark(objAddr)) {
  1.2115 +        // No OrderAccess:store_load() is needed. It is implicit in the
  1.2116 +        // CAS done in parMark(objAddr) above
  1.2117 +        HeapWord* finger = _finger;
  1.2118 +        if (objAddr < finger) {
  1.2119 +          if (verbose_high())
  1.2120 +            gclog_or_tty->print_cr("[global] below the global finger "
  1.2121 +                                   "("PTR_FORMAT"), pushing it", finger);
  1.2122 +          if (!mark_stack_push(obj)) {
  1.2123 +            if (verbose_low())
  1.2124 +              gclog_or_tty->print_cr("[global] global stack overflow during "
  1.2125 +                                     "deal_with_reference");
  1.2126 +          }
  1.2127 +        }
  1.2128 +      }
  1.2129 +    }
  1.2130 +  }
  1.2131 +}
  1.2132 +
  1.2133 +void ConcurrentMark::drainAllSATBBuffers() {
  1.2134 +  CMGlobalObjectClosure oc(this);
  1.2135 +  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1.2136 +  satb_mq_set.set_closure(&oc);
  1.2137 +
  1.2138 +  while (satb_mq_set.apply_closure_to_completed_buffer()) {
  1.2139 +    if (verbose_medium())
  1.2140 +      gclog_or_tty->print_cr("[global] processed an SATB buffer");
  1.2141 +  }
  1.2142 +
  1.2143 +  // no need to check whether we should do this, as this is only
  1.2144 +  // called during an evacuation pause
  1.2145 +  satb_mq_set.iterate_closure_all_threads();
  1.2146 +
  1.2147 +  satb_mq_set.set_closure(NULL);
  1.2148 +  guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1.2149 +}
  1.2150 +
  1.2151 +void ConcurrentMark::markPrev(oop p) {
  1.2152 +  // Note we are overriding the read-only view of the prev map here, via
  1.2153 +  // the cast.
  1.2154 +  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
  1.2155 +}
  1.2156 +
  1.2157 +void ConcurrentMark::clear(oop p) {
  1.2158 +  assert(p != NULL && p->is_oop(), "expected an oop");
  1.2159 +  HeapWord* addr = (HeapWord*)p;
  1.2160 +  assert(addr >= _nextMarkBitMap->startWord() ||
  1.2161 +         addr < _nextMarkBitMap->endWord(), "in a region");
  1.2162 +
  1.2163 +  _nextMarkBitMap->clear(addr);
  1.2164 +}
  1.2165 +
  1.2166 +void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
  1.2167 +  // Note we are overriding the read-only view of the prev map here, via
  1.2168 +  // the cast.
  1.2169 +  ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
  1.2170 +  _nextMarkBitMap->clearRange(mr);
  1.2171 +}
  1.2172 +
  1.2173 +HeapRegion*
  1.2174 +ConcurrentMark::claim_region(int task_num) {
  1.2175 +  // "checkpoint" the finger
  1.2176 +  HeapWord* finger = _finger;
  1.2177 +
  1.2178 +  // _heap_end will not change underneath our feet; it only changes at
  1.2179 +  // yield points.
  1.2180 +  while (finger < _heap_end) {
  1.2181 +    tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
  1.2182 +
  1.2183 +    // is the gap between reading the finger and doing the CAS too long?
  1.2184 +
  1.2185 +    HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
  1.2186 +    HeapWord*   bottom        = curr_region->bottom();
  1.2187 +    HeapWord*   end           = curr_region->end();
  1.2188 +    HeapWord*   limit         = curr_region->next_top_at_mark_start();
  1.2189 +
  1.2190 +    if (verbose_low())
  1.2191 +      gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
  1.2192 +                             "["PTR_FORMAT", "PTR_FORMAT"), "
  1.2193 +                             "limit = "PTR_FORMAT,
  1.2194 +                             task_num, curr_region, bottom, end, limit);
  1.2195 +
  1.2196 +    HeapWord* res =
  1.2197 +      (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
  1.2198 +    if (res == finger) {
  1.2199 +      // we succeeded
  1.2200 +
  1.2201 +      // notice that _finger == end cannot be guaranteed here since,
  1.2202 +      // someone else might have moved the finger even further
  1.2203 +      guarantee( _finger >= end, "the finger should have moved forward" );
  1.2204 +
  1.2205 +      if (verbose_low())
  1.2206 +        gclog_or_tty->print_cr("[%d] we were successful with region = "
  1.2207 +                               PTR_FORMAT, task_num, curr_region);
  1.2208 +
  1.2209 +      if (limit > bottom) {
  1.2210 +        if (verbose_low())
  1.2211 +          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
  1.2212 +                                 "returning it ", task_num, curr_region);
  1.2213 +        return curr_region;
  1.2214 +      } else {
  1.2215 +        tmp_guarantee_CM( limit == bottom,
  1.2216 +                          "the region limit should be at bottom" );
  1.2217 +        if (verbose_low())
  1.2218 +          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
  1.2219 +                                 "returning NULL", task_num, curr_region);
  1.2220 +        // we return NULL and the caller should try calling
  1.2221 +        // claim_region() again.
  1.2222 +        return NULL;
  1.2223 +      }
  1.2224 +    } else {
  1.2225 +      guarantee( _finger > finger, "the finger should have moved forward" );
  1.2226 +      if (verbose_low())
  1.2227 +        gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
  1.2228 +                               "global finger = "PTR_FORMAT", "
  1.2229 +                               "our finger = "PTR_FORMAT,
  1.2230 +                               task_num, _finger, finger);
  1.2231 +
  1.2232 +      // read it again
  1.2233 +      finger = _finger;
  1.2234 +    }
  1.2235 +  }
  1.2236 +
  1.2237 +  return NULL;
  1.2238 +}
  1.2239 +
  1.2240 +void ConcurrentMark::oops_do(OopClosure* cl) {
  1.2241 +  if (_markStack.size() > 0 && verbose_low())
  1.2242 +    gclog_or_tty->print_cr("[global] scanning the global marking stack, "
  1.2243 +                           "size = %d", _markStack.size());
  1.2244 +  // we first iterate over the contents of the mark stack...
  1.2245 +  _markStack.oops_do(cl);
  1.2246 +
  1.2247 +  for (int i = 0; i < (int)_max_task_num; ++i) {
  1.2248 +    OopTaskQueue* queue = _task_queues->queue((int)i);
  1.2249 +
  1.2250 +    if (queue->size() > 0 && verbose_low())
  1.2251 +      gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
  1.2252 +                             "size = %d", i, queue->size());
  1.2253 +
  1.2254 +    // ...then over the contents of the all the task queues.
  1.2255 +    queue->oops_do(cl);
  1.2256 +  }
  1.2257 +
  1.2258 +  // finally, invalidate any entries that in the region stack that
  1.2259 +  // point into the collection set
  1.2260 +  if (_regionStack.invalidate_entries_into_cset()) {
  1.2261 +    // otherwise, any gray objects copied during the evacuation pause
  1.2262 +    // might not be visited.
  1.2263 +    guarantee( _should_gray_objects, "invariant" );
  1.2264 +  }
  1.2265 +}
  1.2266 +
  1.2267 +void ConcurrentMark::clear_marking_state() {
  1.2268 +  _markStack.setEmpty();
  1.2269 +  _markStack.clear_overflow();
  1.2270 +  _regionStack.setEmpty();
  1.2271 +  _regionStack.clear_overflow();
  1.2272 +  clear_has_overflown();
  1.2273 +  _finger = _heap_start;
  1.2274 +
  1.2275 +  for (int i = 0; i < (int)_max_task_num; ++i) {
  1.2276 +    OopTaskQueue* queue = _task_queues->queue(i);
  1.2277 +    queue->set_empty();
  1.2278 +  }
  1.2279 +}
  1.2280 +
  1.2281 +void ConcurrentMark::print_stats() {
  1.2282 +  if (verbose_stats()) {
  1.2283 +    gclog_or_tty->print_cr("---------------------------------------------------------------------");
  1.2284 +    for (size_t i = 0; i < _active_tasks; ++i) {
  1.2285 +      _tasks[i]->print_stats();
  1.2286 +      gclog_or_tty->print_cr("---------------------------------------------------------------------");
  1.2287 +    }
  1.2288 +  }
  1.2289 +}
  1.2290 +
  1.2291 +class CSMarkOopClosure: public OopClosure {
  1.2292 +  friend class CSMarkBitMapClosure;
  1.2293 +
  1.2294 +  G1CollectedHeap* _g1h;
  1.2295 +  CMBitMap*        _bm;
  1.2296 +  ConcurrentMark*  _cm;
  1.2297 +  oop*             _ms;
  1.2298 +  jint*            _array_ind_stack;
  1.2299 +  int              _ms_size;
  1.2300 +  int              _ms_ind;
  1.2301 +  int              _array_increment;
  1.2302 +
  1.2303 +  bool push(oop obj, int arr_ind = 0) {
  1.2304 +    if (_ms_ind == _ms_size) {
  1.2305 +      gclog_or_tty->print_cr("Mark stack is full.");
  1.2306 +      return false;
  1.2307 +    }
  1.2308 +    _ms[_ms_ind] = obj;
  1.2309 +    if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
  1.2310 +    _ms_ind++;
  1.2311 +    return true;
  1.2312 +  }
  1.2313 +
  1.2314 +  oop pop() {
  1.2315 +    if (_ms_ind == 0) return NULL;
  1.2316 +    else {
  1.2317 +      _ms_ind--;
  1.2318 +      return _ms[_ms_ind];
  1.2319 +    }
  1.2320 +  }
  1.2321 +
  1.2322 +  bool drain() {
  1.2323 +    while (_ms_ind > 0) {
  1.2324 +      oop obj = pop();
  1.2325 +      assert(obj != NULL, "Since index was non-zero.");
  1.2326 +      if (obj->is_objArray()) {
  1.2327 +        jint arr_ind = _array_ind_stack[_ms_ind];
  1.2328 +        objArrayOop aobj = objArrayOop(obj);
  1.2329 +        jint len = aobj->length();
  1.2330 +        jint next_arr_ind = arr_ind + _array_increment;
  1.2331 +        if (next_arr_ind < len) {
  1.2332 +          push(obj, next_arr_ind);
  1.2333 +        }
  1.2334 +        // Now process this portion of this one.
  1.2335 +        int lim = MIN2(next_arr_ind, len);
  1.2336 +        assert(!UseCompressedOops, "This needs to be fixed");
  1.2337 +        for (int j = arr_ind; j < lim; j++) {
  1.2338 +          do_oop(aobj->obj_at_addr<oop>(j));
  1.2339 +        }
  1.2340 +
  1.2341 +      } else {
  1.2342 +        obj->oop_iterate(this);
  1.2343 +      }
  1.2344 +      if (abort()) return false;
  1.2345 +    }
  1.2346 +    return true;
  1.2347 +  }
  1.2348 +
  1.2349 +public:
  1.2350 +  CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
  1.2351 +    _g1h(G1CollectedHeap::heap()),
  1.2352 +    _cm(cm),
  1.2353 +    _bm(cm->nextMarkBitMap()),
  1.2354 +    _ms_size(ms_size), _ms_ind(0),
  1.2355 +    _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
  1.2356 +    _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
  1.2357 +    _array_increment(MAX2(ms_size/8, 16))
  1.2358 +  {}
  1.2359 +
  1.2360 +  ~CSMarkOopClosure() {
  1.2361 +    FREE_C_HEAP_ARRAY(oop, _ms);
  1.2362 +    FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  1.2363 +  }
  1.2364 +
  1.2365 +  void do_oop(narrowOop* p) {
  1.2366 +    guarantee(false, "NYI");
  1.2367 +  }
  1.2368 +
  1.2369 +  void do_oop(oop* p) {
  1.2370 +    oop obj = *p;
  1.2371 +    if (obj == NULL) return;
  1.2372 +    if (obj->is_forwarded()) {
  1.2373 +      // If the object has already been forwarded, we have to make sure
  1.2374 +      // that it's marked.  So follow the forwarding pointer.  Note that
  1.2375 +      // this does the right thing for self-forwarding pointers in the
  1.2376 +      // evacuation failure case.
  1.2377 +      obj = obj->forwardee();
  1.2378 +    }
  1.2379 +    HeapRegion* hr = _g1h->heap_region_containing(obj);
  1.2380 +    if (hr != NULL) {
  1.2381 +      if (hr->in_collection_set()) {
  1.2382 +        if (_g1h->is_obj_ill(obj)) {
  1.2383 +          _bm->mark((HeapWord*)obj);
  1.2384 +          if (!push(obj)) {
  1.2385 +            gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
  1.2386 +            set_abort();
  1.2387 +          }
  1.2388 +        }
  1.2389 +      } else {
  1.2390 +        // Outside the collection set; we need to gray it
  1.2391 +        _cm->deal_with_reference(obj);
  1.2392 +      }
  1.2393 +    }
  1.2394 +  }
  1.2395 +};
  1.2396 +
  1.2397 +class CSMarkBitMapClosure: public BitMapClosure {
  1.2398 +  G1CollectedHeap* _g1h;
  1.2399 +  CMBitMap*        _bitMap;
  1.2400 +  ConcurrentMark*  _cm;
  1.2401 +  CSMarkOopClosure _oop_cl;
  1.2402 +public:
  1.2403 +  CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
  1.2404 +    _g1h(G1CollectedHeap::heap()),
  1.2405 +    _bitMap(cm->nextMarkBitMap()),
  1.2406 +    _oop_cl(cm, ms_size)
  1.2407 +  {}
  1.2408 +
  1.2409 +  ~CSMarkBitMapClosure() {}
  1.2410 +
  1.2411 +  bool do_bit(size_t offset) {
  1.2412 +    // convert offset into a HeapWord*
  1.2413 +    HeapWord* addr = _bitMap->offsetToHeapWord(offset);
  1.2414 +    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
  1.2415 +           "address out of range");
  1.2416 +    assert(_bitMap->isMarked(addr), "tautology");
  1.2417 +    oop obj = oop(addr);
  1.2418 +    if (!obj->is_forwarded()) {
  1.2419 +      if (!_oop_cl.push(obj)) return false;
  1.2420 +      if (!_oop_cl.drain()) return false;
  1.2421 +    }
  1.2422 +    // Otherwise...
  1.2423 +    return true;
  1.2424 +  }
  1.2425 +};
  1.2426 +
  1.2427 +
  1.2428 +class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
  1.2429 +  CMBitMap* _bm;
  1.2430 +  CSMarkBitMapClosure _bit_cl;
  1.2431 +  enum SomePrivateConstants {
  1.2432 +    MSSize = 1000
  1.2433 +  };
  1.2434 +  bool _completed;
  1.2435 +public:
  1.2436 +  CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
  1.2437 +    _bm(cm->nextMarkBitMap()),
  1.2438 +    _bit_cl(cm, MSSize),
  1.2439 +    _completed(true)
  1.2440 +  {}
  1.2441 +
  1.2442 +  ~CompleteMarkingInCSHRClosure() {}
  1.2443 +
  1.2444 +  bool doHeapRegion(HeapRegion* r) {
  1.2445 +    if (!r->evacuation_failed()) {
  1.2446 +      MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
  1.2447 +      if (!mr.is_empty()) {
  1.2448 +        if (!_bm->iterate(&_bit_cl, mr)) {
  1.2449 +          _completed = false;
  1.2450 +          return true;
  1.2451 +        }
  1.2452 +      }
  1.2453 +    }
  1.2454 +    return false;
  1.2455 +  }
  1.2456 +
  1.2457 +  bool completed() { return _completed; }
  1.2458 +};
  1.2459 +
  1.2460 +class ClearMarksInHRClosure: public HeapRegionClosure {
  1.2461 +  CMBitMap* _bm;
  1.2462 +public:
  1.2463 +  ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
  1.2464 +
  1.2465 +  bool doHeapRegion(HeapRegion* r) {
  1.2466 +    if (!r->used_region().is_empty() && !r->evacuation_failed()) {
  1.2467 +      MemRegion usedMR = r->used_region();
  1.2468 +      _bm->clearRange(r->used_region());
  1.2469 +    }
  1.2470 +    return false;
  1.2471 +  }
  1.2472 +};
  1.2473 +
  1.2474 +void ConcurrentMark::complete_marking_in_collection_set() {
  1.2475 +  G1CollectedHeap* g1h =  G1CollectedHeap::heap();
  1.2476 +
  1.2477 +  if (!g1h->mark_in_progress()) {
  1.2478 +    g1h->g1_policy()->record_mark_closure_time(0.0);
  1.2479 +    return;
  1.2480 +  }
  1.2481 +
  1.2482 +  int i = 1;
  1.2483 +  double start = os::elapsedTime();
  1.2484 +  while (true) {
  1.2485 +    i++;
  1.2486 +    CompleteMarkingInCSHRClosure cmplt(this);
  1.2487 +    g1h->collection_set_iterate(&cmplt);
  1.2488 +    if (cmplt.completed()) break;
  1.2489 +  }
  1.2490 +  double end_time = os::elapsedTime();
  1.2491 +  double elapsed_time_ms = (end_time - start) * 1000.0;
  1.2492 +  g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
  1.2493 +  if (PrintGCDetails) {
  1.2494 +    gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
  1.2495 +  }
  1.2496 +
  1.2497 +  ClearMarksInHRClosure clr(nextMarkBitMap());
  1.2498 +  g1h->collection_set_iterate(&clr);
  1.2499 +}
  1.2500 +
  1.2501 +// The next two methods deal with the following optimisation. Some
  1.2502 +// objects are gray by being marked and located above the finger. If
  1.2503 +// they are copied, during an evacuation pause, below the finger then
  1.2504 +// the need to be pushed on the stack. The observation is that, if
  1.2505 +// there are no regions in the collection set located above the
  1.2506 +// finger, then the above cannot happen, hence we do not need to
  1.2507 +// explicitly gray any objects when copying them to below the
  1.2508 +// finger. The global stack will be scanned to ensure that, if it
  1.2509 +// points to objects being copied, it will update their
  1.2510 +// location. There is a tricky situation with the gray objects in
  1.2511 +// region stack that are being coped, however. See the comment in
  1.2512 +// newCSet().
  1.2513 +
  1.2514 +void ConcurrentMark::newCSet() {
  1.2515 +  if (!concurrent_marking_in_progress())
  1.2516 +    // nothing to do if marking is not in progress
  1.2517 +    return;
  1.2518 +
  1.2519 +  // find what the lowest finger is among the global and local fingers
  1.2520 +  _min_finger = _finger;
  1.2521 +  for (int i = 0; i < (int)_max_task_num; ++i) {
  1.2522 +    CMTask* task = _tasks[i];
  1.2523 +    HeapWord* task_finger = task->finger();
  1.2524 +    if (task_finger != NULL && task_finger < _min_finger)
  1.2525 +      _min_finger = task_finger;
  1.2526 +  }
  1.2527 +
  1.2528 +  _should_gray_objects = false;
  1.2529 +
  1.2530 +  // This fixes a very subtle and fustrating bug. It might be the case
  1.2531 +  // that, during en evacuation pause, heap regions that contain
  1.2532 +  // objects that are gray (by being in regions contained in the
  1.2533 +  // region stack) are included in the collection set. Since such gray
  1.2534 +  // objects will be moved, and because it's not easy to redirect
  1.2535 +  // region stack entries to point to a new location (because objects
  1.2536 +  // in one region might be scattered to multiple regions after they
  1.2537 +  // are copied), one option is to ensure that all marked objects
  1.2538 +  // copied during a pause are pushed on the stack. Notice, however,
  1.2539 +  // that this problem can only happen when the region stack is not
  1.2540 +  // empty during an evacuation pause. So, we make the fix a bit less
  1.2541 +  // conservative and ensure that regions are pushed on the stack,
  1.2542 +  // irrespective whether all collection set regions are below the
  1.2543 +  // finger, if the region stack is not empty. This is expected to be
  1.2544 +  // a rare case, so I don't think it's necessary to be smarted about it.
  1.2545 +  if (!region_stack_empty())
  1.2546 +    _should_gray_objects = true;
  1.2547 +}
  1.2548 +
  1.2549 +void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
  1.2550 +  if (!concurrent_marking_in_progress())
  1.2551 +    return;
  1.2552 +
  1.2553 +  HeapWord* region_end = hr->end();
  1.2554 +  if (region_end > _min_finger)
  1.2555 +    _should_gray_objects = true;
  1.2556 +}
  1.2557 +
  1.2558 +void ConcurrentMark::disable_co_trackers() {
  1.2559 +  if (has_aborted()) {
  1.2560 +    if (_cleanup_co_tracker.enabled())
  1.2561 +      _cleanup_co_tracker.disable();
  1.2562 +    for (int i = 0; i < (int)_max_task_num; ++i) {
  1.2563 +      CMTask* task = _tasks[i];
  1.2564 +      if (task->co_tracker_enabled())
  1.2565 +        task->disable_co_tracker();
  1.2566 +    }
  1.2567 +  } else {
  1.2568 +    guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
  1.2569 +    for (int i = 0; i < (int)_max_task_num; ++i) {
  1.2570 +      CMTask* task = _tasks[i];
  1.2571 +      guarantee( !task->co_tracker_enabled(), "invariant" );
  1.2572 +    }
  1.2573 +  }
  1.2574 +}
  1.2575 +
  1.2576 +// abandon current marking iteration due to a Full GC
  1.2577 +void ConcurrentMark::abort() {
  1.2578 +  // If we're not marking, nothing to do.
  1.2579 +  if (!G1ConcMark) return;
  1.2580 +
  1.2581 +  // Clear all marks to force marking thread to do nothing
  1.2582 +  _nextMarkBitMap->clearAll();
  1.2583 +  // Empty mark stack
  1.2584 +  clear_marking_state();
  1.2585 +  for (int i = 0; i < (int)_max_task_num; ++i)
  1.2586 +    _tasks[i]->clear_region_fields();
  1.2587 +  _has_aborted = true;
  1.2588 +
  1.2589 +  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1.2590 +  satb_mq_set.abandon_partial_marking();
  1.2591 +  satb_mq_set.set_active_all_threads(false);
  1.2592 +}
  1.2593 +
  1.2594 +static void print_ms_time_info(const char* prefix, const char* name,
  1.2595 +                               NumberSeq& ns) {
  1.2596 +  gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
  1.2597 +                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
  1.2598 +  if (ns.num() > 0) {
  1.2599 +    gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
  1.2600 +                           prefix, ns.sd(), ns.maximum());
  1.2601 +  }
  1.2602 +}
  1.2603 +
  1.2604 +void ConcurrentMark::print_summary_info() {
  1.2605 +  gclog_or_tty->print_cr(" Concurrent marking:");
  1.2606 +  print_ms_time_info("  ", "init marks", _init_times);
  1.2607 +  print_ms_time_info("  ", "remarks", _remark_times);
  1.2608 +  {
  1.2609 +    print_ms_time_info("     ", "final marks", _remark_mark_times);
  1.2610 +    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
  1.2611 +
  1.2612 +  }
  1.2613 +  print_ms_time_info("  ", "cleanups", _cleanup_times);
  1.2614 +  gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
  1.2615 +                         _total_counting_time,
  1.2616 +                         (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
  1.2617 +                          (double)_cleanup_times.num()
  1.2618 +                         : 0.0));
  1.2619 +  if (G1ScrubRemSets) {
  1.2620 +    gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
  1.2621 +                           _total_rs_scrub_time,
  1.2622 +                           (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
  1.2623 +                            (double)_cleanup_times.num()
  1.2624 +                           : 0.0));
  1.2625 +  }
  1.2626 +  gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
  1.2627 +                         (_init_times.sum() + _remark_times.sum() +
  1.2628 +                          _cleanup_times.sum())/1000.0);
  1.2629 +  gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
  1.2630 +                "(%8.2f s marking, %8.2f s counting).",
  1.2631 +                cmThread()->vtime_accum(),
  1.2632 +                cmThread()->vtime_mark_accum(),
  1.2633 +                cmThread()->vtime_count_accum());
  1.2634 +}
  1.2635 +
  1.2636 +// Closures
  1.2637 +// XXX: there seems to be a lot of code  duplication here;
  1.2638 +// should refactor and consolidate the shared code.
  1.2639 +
  1.2640 +// This closure is used to mark refs into the CMS generation in
  1.2641 +// the CMS bit map. Called at the first checkpoint.
  1.2642 +
  1.2643 +// We take a break if someone is trying to stop the world.
  1.2644 +bool ConcurrentMark::do_yield_check(int worker_i) {
  1.2645 +  if (should_yield()) {
  1.2646 +    if (worker_i == 0)
  1.2647 +      _g1h->g1_policy()->record_concurrent_pause();
  1.2648 +    cmThread()->yield();
  1.2649 +    if (worker_i == 0)
  1.2650 +      _g1h->g1_policy()->record_concurrent_pause_end();
  1.2651 +    return true;
  1.2652 +  } else {
  1.2653 +    return false;
  1.2654 +  }
  1.2655 +}
  1.2656 +
  1.2657 +bool ConcurrentMark::should_yield() {
  1.2658 +  return cmThread()->should_yield();
  1.2659 +}
  1.2660 +
  1.2661 +bool ConcurrentMark::containing_card_is_marked(void* p) {
  1.2662 +  size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
  1.2663 +  return _card_bm.at(offset >> CardTableModRefBS::card_shift);
  1.2664 +}
  1.2665 +
  1.2666 +bool ConcurrentMark::containing_cards_are_marked(void* start,
  1.2667 +                                                 void* last) {
  1.2668 +  return
  1.2669 +    containing_card_is_marked(start) &&
  1.2670 +    containing_card_is_marked(last);
  1.2671 +}
  1.2672 +
  1.2673 +#ifndef PRODUCT
  1.2674 +// for debugging purposes
  1.2675 +void ConcurrentMark::print_finger() {
  1.2676 +  gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
  1.2677 +                         _heap_start, _heap_end, _finger);
  1.2678 +  for (int i = 0; i < (int) _max_task_num; ++i) {
  1.2679 +    gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
  1.2680 +  }
  1.2681 +  gclog_or_tty->print_cr("");
  1.2682 +}
  1.2683 +#endif
  1.2684 +
  1.2685 +// Closure for iteration over bitmaps
  1.2686 +class CMBitMapClosure : public BitMapClosure {
  1.2687 +private:
  1.2688 +  // the bitmap that is being iterated over
  1.2689 +  CMBitMap*                   _nextMarkBitMap;
  1.2690 +  ConcurrentMark*             _cm;
  1.2691 +  CMTask*                     _task;
  1.2692 +  // true if we're scanning a heap region claimed by the task (so that
  1.2693 +  // we move the finger along), false if we're not, i.e. currently when
  1.2694 +  // scanning a heap region popped from the region stack (so that we
  1.2695 +  // do not move the task finger along; it'd be a mistake if we did so).
  1.2696 +  bool                        _scanning_heap_region;
  1.2697 +
  1.2698 +public:
  1.2699 +  CMBitMapClosure(CMTask *task,
  1.2700 +                  ConcurrentMark* cm,
  1.2701 +                  CMBitMap* nextMarkBitMap)
  1.2702 +    :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
  1.2703 +
  1.2704 +  void set_scanning_heap_region(bool scanning_heap_region) {
  1.2705 +    _scanning_heap_region = scanning_heap_region;
  1.2706 +  }
  1.2707 +
  1.2708 +  bool do_bit(size_t offset) {
  1.2709 +    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
  1.2710 +    tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
  1.2711 +    tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
  1.2712 +
  1.2713 +    if (_scanning_heap_region) {
  1.2714 +      statsOnly( _task->increase_objs_found_on_bitmap() );
  1.2715 +      tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
  1.2716 +      // We move that task's local finger along.
  1.2717 +      _task->move_finger_to(addr);
  1.2718 +    } else {
  1.2719 +      // We move the task's region finger along.
  1.2720 +      _task->move_region_finger_to(addr);
  1.2721 +    }
  1.2722 +
  1.2723 +    _task->scan_object(oop(addr));
  1.2724 +    // we only partially drain the local queue and global stack
  1.2725 +    _task->drain_local_queue(true);
  1.2726 +    _task->drain_global_stack(true);
  1.2727 +
  1.2728 +    // if the has_aborted flag has been raised, we need to bail out of
  1.2729 +    // the iteration
  1.2730 +    return !_task->has_aborted();
  1.2731 +  }
  1.2732 +};
  1.2733 +
  1.2734 +// Closure for iterating over objects, currently only used for
  1.2735 +// processing SATB buffers.
  1.2736 +class CMObjectClosure : public ObjectClosure {
  1.2737 +private:
  1.2738 +  CMTask* _task;
  1.2739 +
  1.2740 +public:
  1.2741 +  void do_object(oop obj) {
  1.2742 +    _task->deal_with_reference(obj);
  1.2743 +  }
  1.2744 +
  1.2745 +  CMObjectClosure(CMTask* task) : _task(task) { }
  1.2746 +};
  1.2747 +
  1.2748 +// Closure for iterating over object fields
  1.2749 +class CMOopClosure : public OopClosure {
  1.2750 +private:
  1.2751 +  G1CollectedHeap*   _g1h;
  1.2752 +  ConcurrentMark*    _cm;
  1.2753 +  CMTask*            _task;
  1.2754 +
  1.2755 +public:
  1.2756 +  void do_oop(narrowOop* p) {
  1.2757 +    guarantee(false, "NYI");
  1.2758 +  }
  1.2759 +
  1.2760 +  void do_oop(oop* p) {
  1.2761 +    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
  1.2762 +
  1.2763 +    oop obj = *p;
  1.2764 +    if (_cm->verbose_high())
  1.2765 +      gclog_or_tty->print_cr("[%d] we're looking at location "
  1.2766 +                             "*"PTR_FORMAT" = "PTR_FORMAT,
  1.2767 +                             _task->task_id(), p, (void*) obj);
  1.2768 +    _task->deal_with_reference(obj);
  1.2769 +  }
  1.2770 +
  1.2771 +  CMOopClosure(G1CollectedHeap* g1h,
  1.2772 +               ConcurrentMark* cm,
  1.2773 +               CMTask* task)
  1.2774 +    : _g1h(g1h), _cm(cm), _task(task) { }
  1.2775 +};
  1.2776 +
  1.2777 +void CMTask::setup_for_region(HeapRegion* hr) {
  1.2778 +  tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
  1.2779 +      "claim_region() should have filtered out continues humongous regions" );
  1.2780 +
  1.2781 +  if (_cm->verbose_low())
  1.2782 +    gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
  1.2783 +                           _task_id, hr);
  1.2784 +
  1.2785 +  _curr_region  = hr;
  1.2786 +  _finger       = hr->bottom();
  1.2787 +  update_region_limit();
  1.2788 +}
  1.2789 +
  1.2790 +void CMTask::update_region_limit() {
  1.2791 +  HeapRegion* hr            = _curr_region;
  1.2792 +  HeapWord* bottom          = hr->bottom();
  1.2793 +  HeapWord* limit           = hr->next_top_at_mark_start();
  1.2794 +
  1.2795 +  if (limit == bottom) {
  1.2796 +    if (_cm->verbose_low())
  1.2797 +      gclog_or_tty->print_cr("[%d] found an empty region "
  1.2798 +                             "["PTR_FORMAT", "PTR_FORMAT")",
  1.2799 +                             _task_id, bottom, limit);
  1.2800 +    // The region was collected underneath our feet.
  1.2801 +    // We set the finger to bottom to ensure that the bitmap
  1.2802 +    // iteration that will follow this will not do anything.
  1.2803 +    // (this is not a condition that holds when we set the region up,
  1.2804 +    // as the region is not supposed to be empty in the first place)
  1.2805 +    _finger = bottom;
  1.2806 +  } else if (limit >= _region_limit) {
  1.2807 +    tmp_guarantee_CM( limit >= _finger, "peace of mind" );
  1.2808 +  } else {
  1.2809 +    tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
  1.2810 +    // This can happen under some pretty unusual circumstances.  An
  1.2811 +    // evacuation pause empties the region underneath our feet (NTAMS
  1.2812 +    // at bottom). We then do some allocation in the region (NTAMS
  1.2813 +    // stays at bottom), followed by the region being used as a GC
  1.2814 +    // alloc region (NTAMS will move to top() and the objects
  1.2815 +    // originally below it will be grayed). All objects now marked in
  1.2816 +    // the region are explicitly grayed, if below the global finger,
  1.2817 +    // and we do not need in fact to scan anything else. So, we simply
  1.2818 +    // set _finger to be limit to ensure that the bitmap iteration
  1.2819 +    // doesn't do anything.
  1.2820 +    _finger = limit;
  1.2821 +  }
  1.2822 +
  1.2823 +  _region_limit = limit;
  1.2824 +}
  1.2825 +
  1.2826 +void CMTask::giveup_current_region() {
  1.2827 +  tmp_guarantee_CM( _curr_region != NULL, "invariant" );
  1.2828 +  if (_cm->verbose_low())
  1.2829 +    gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
  1.2830 +                           _task_id, _curr_region);
  1.2831 +  clear_region_fields();
  1.2832 +}
  1.2833 +
  1.2834 +void CMTask::clear_region_fields() {
  1.2835 +  // Values for these three fields that indicate that we're not
  1.2836 +  // holding on to a region.
  1.2837 +  _curr_region   = NULL;
  1.2838 +  _finger        = NULL;
  1.2839 +  _region_limit  = NULL;
  1.2840 +
  1.2841 +  _region_finger = NULL;
  1.2842 +}
  1.2843 +
  1.2844 +void CMTask::reset(CMBitMap* nextMarkBitMap) {
  1.2845 +  guarantee( nextMarkBitMap != NULL, "invariant" );
  1.2846 +
  1.2847 +  if (_cm->verbose_low())
  1.2848 +    gclog_or_tty->print_cr("[%d] resetting", _task_id);
  1.2849 +
  1.2850 +  _nextMarkBitMap                = nextMarkBitMap;
  1.2851 +  clear_region_fields();
  1.2852 +
  1.2853 +  _calls                         = 0;
  1.2854 +  _elapsed_time_ms               = 0.0;
  1.2855 +  _termination_time_ms           = 0.0;
  1.2856 +  _termination_start_time_ms     = 0.0;
  1.2857 +
  1.2858 +#if _MARKING_STATS_
  1.2859 +  _local_pushes                  = 0;
  1.2860 +  _local_pops                    = 0;
  1.2861 +  _local_max_size                = 0;
  1.2862 +  _objs_scanned                  = 0;
  1.2863 +  _global_pushes                 = 0;
  1.2864 +  _global_pops                   = 0;
  1.2865 +  _global_max_size               = 0;
  1.2866 +  _global_transfers_to           = 0;
  1.2867 +  _global_transfers_from         = 0;
  1.2868 +  _region_stack_pops             = 0;
  1.2869 +  _regions_claimed               = 0;
  1.2870 +  _objs_found_on_bitmap          = 0;
  1.2871 +  _satb_buffers_processed        = 0;
  1.2872 +  _steal_attempts                = 0;
  1.2873 +  _steals                        = 0;
  1.2874 +  _aborted                       = 0;
  1.2875 +  _aborted_overflow              = 0;
  1.2876 +  _aborted_cm_aborted            = 0;
  1.2877 +  _aborted_yield                 = 0;
  1.2878 +  _aborted_timed_out             = 0;
  1.2879 +  _aborted_satb                  = 0;
  1.2880 +  _aborted_termination           = 0;
  1.2881 +#endif // _MARKING_STATS_
  1.2882 +}
  1.2883 +
  1.2884 +bool CMTask::should_exit_termination() {
  1.2885 +  regular_clock_call();
  1.2886 +  // This is called when we are in the termination protocol. We should
  1.2887 +  // quit if, for some reason, this task wants to abort or the global
  1.2888 +  // stack is not empty (this means that we can get work from it).
  1.2889 +  return !_cm->mark_stack_empty() || has_aborted();
  1.2890 +}
  1.2891 +
  1.2892 +// This determines whether the method below will check both the local
  1.2893 +// and global fingers when determining whether to push on the stack a
  1.2894 +// gray object (value 1) or whether it will only check the global one
  1.2895 +// (value 0). The tradeoffs are that the former will be a bit more
  1.2896 +// accurate and possibly push less on the stack, but it might also be
  1.2897 +// a little bit slower.
  1.2898 +
  1.2899 +#define _CHECK_BOTH_FINGERS_      1
  1.2900 +
  1.2901 +void CMTask::deal_with_reference(oop obj) {
  1.2902 +  if (_cm->verbose_high())
  1.2903 +    gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
  1.2904 +                           _task_id, (void*) obj);
  1.2905 +
  1.2906 +  ++_refs_reached;
  1.2907 +
  1.2908 +  HeapWord* objAddr = (HeapWord*) obj;
  1.2909 +  if (_g1h->is_in_g1_reserved(objAddr)) {
  1.2910 +    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
  1.2911 +    HeapRegion* hr =  _g1h->heap_region_containing(obj);
  1.2912 +    if (_g1h->is_obj_ill(obj, hr)) {
  1.2913 +      if (_cm->verbose_high())
  1.2914 +        gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
  1.2915 +                               _task_id, (void*) obj);
  1.2916 +
  1.2917 +      // we need to mark it first
  1.2918 +      if (_nextMarkBitMap->parMark(objAddr)) {
  1.2919 +        // No OrderAccess:store_load() is needed. It is implicit in the
  1.2920 +        // CAS done in parMark(objAddr) above
  1.2921 +        HeapWord* global_finger = _cm->finger();
  1.2922 +
  1.2923 +#if _CHECK_BOTH_FINGERS_
  1.2924 +        // we will check both the local and global fingers
  1.2925 +
  1.2926 +        if (_finger != NULL && objAddr < _finger) {
  1.2927 +          if (_cm->verbose_high())
  1.2928 +            gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
  1.2929 +                                   "pushing it", _task_id, _finger);
  1.2930 +          push(obj);
  1.2931 +        } else if (_curr_region != NULL && objAddr < _region_limit) {
  1.2932 +          // do nothing
  1.2933 +        } else if (objAddr < global_finger) {
  1.2934 +          // Notice that the global finger might be moving forward
  1.2935 +          // concurrently. This is not a problem. In the worst case, we
  1.2936 +          // mark the object while it is above the global finger and, by
  1.2937 +          // the time we read the global finger, it has moved forward
  1.2938 +          // passed this object. In this case, the object will probably
  1.2939 +          // be visited when a task is scanning the region and will also
  1.2940 +          // be pushed on the stack. So, some duplicate work, but no
  1.2941 +          // correctness problems.
  1.2942 +
  1.2943 +          if (_cm->verbose_high())
  1.2944 +            gclog_or_tty->print_cr("[%d] below the global finger "
  1.2945 +                                   "("PTR_FORMAT"), pushing it",
  1.2946 +                                   _task_id, global_finger);
  1.2947 +          push(obj);
  1.2948 +        } else {
  1.2949 +          // do nothing
  1.2950 +        }
  1.2951 +#else // _CHECK_BOTH_FINGERS_
  1.2952 +      // we will only check the global finger
  1.2953 +
  1.2954 +        if (objAddr < global_finger) {
  1.2955 +          // see long comment above
  1.2956 +
  1.2957 +          if (_cm->verbose_high())
  1.2958 +            gclog_or_tty->print_cr("[%d] below the global finger "
  1.2959 +                                   "("PTR_FORMAT"), pushing it",
  1.2960 +                                   _task_id, global_finger);
  1.2961 +          push(obj);
  1.2962 +        }
  1.2963 +#endif // _CHECK_BOTH_FINGERS_
  1.2964 +      }
  1.2965 +    }
  1.2966 +  }
  1.2967 +}
  1.2968 +
  1.2969 +void CMTask::push(oop obj) {
  1.2970 +  HeapWord* objAddr = (HeapWord*) obj;
  1.2971 +  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
  1.2972 +  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
  1.2973 +  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
  1.2974 +
  1.2975 +  if (_cm->verbose_high())
  1.2976 +    gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
  1.2977 +
  1.2978 +  if (!_task_queue->push(obj)) {
  1.2979 +    // The local task queue looks full. We need to push some entries
  1.2980 +    // to the global stack.
  1.2981 +
  1.2982 +    if (_cm->verbose_medium())
  1.2983 +      gclog_or_tty->print_cr("[%d] task queue overflow, "
  1.2984 +                             "moving entries to the global stack",
  1.2985 +                             _task_id);
  1.2986 +    move_entries_to_global_stack();
  1.2987 +
  1.2988 +    // this should succeed since, even if we overflow the global
  1.2989 +    // stack, we should have definitely removed some entries from the
  1.2990 +    // local queue. So, there must be space on it.
  1.2991 +    bool success = _task_queue->push(obj);
  1.2992 +    tmp_guarantee_CM( success, "invariant" );
  1.2993 +  }
  1.2994 +
  1.2995 +  statsOnly( int tmp_size = _task_queue->size();
  1.2996 +             if (tmp_size > _local_max_size)
  1.2997 +               _local_max_size = tmp_size;
  1.2998 +             ++_local_pushes );
  1.2999 +}
  1.3000 +
  1.3001 +void CMTask::reached_limit() {
  1.3002 +  tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
  1.3003 +                    _refs_reached >= _refs_reached_limit ,
  1.3004 +                 "shouldn't have been called otherwise" );
  1.3005 +  regular_clock_call();
  1.3006 +}
  1.3007 +
  1.3008 +void CMTask::regular_clock_call() {
  1.3009 +  if (has_aborted())
  1.3010 +    return;
  1.3011 +
  1.3012 +  // First, we need to recalculate the words scanned and refs reached
  1.3013 +  // limits for the next clock call.
  1.3014 +  recalculate_limits();
  1.3015 +
  1.3016 +  // During the regular clock call we do the following
  1.3017 +
  1.3018 +  // (1) If an overflow has been flagged, then we abort.
  1.3019 +  if (_cm->has_overflown()) {
  1.3020 +    set_has_aborted();
  1.3021 +    return;
  1.3022 +  }
  1.3023 +
  1.3024 +  // If we are not concurrent (i.e. we're doing remark) we don't need
  1.3025 +  // to check anything else. The other steps are only needed during
  1.3026 +  // the concurrent marking phase.
  1.3027 +  if (!concurrent())
  1.3028 +    return;
  1.3029 +
  1.3030 +  // (2) If marking has been aborted for Full GC, then we also abort.
  1.3031 +  if (_cm->has_aborted()) {
  1.3032 +    set_has_aborted();
  1.3033 +    statsOnly( ++_aborted_cm_aborted );
  1.3034 +    return;
  1.3035 +  }
  1.3036 +
  1.3037 +  double curr_time_ms = os::elapsedVTime() * 1000.0;
  1.3038 +
  1.3039 +  // (3) If marking stats are enabled, then we update the step history.
  1.3040 +#if _MARKING_STATS_
  1.3041 +  if (_words_scanned >= _words_scanned_limit)
  1.3042 +    ++_clock_due_to_scanning;
  1.3043 +  if (_refs_reached >= _refs_reached_limit)
  1.3044 +    ++_clock_due_to_marking;
  1.3045 +
  1.3046 +  double last_interval_ms = curr_time_ms - _interval_start_time_ms;
  1.3047 +  _interval_start_time_ms = curr_time_ms;
  1.3048 +  _all_clock_intervals_ms.add(last_interval_ms);
  1.3049 +
  1.3050 +  if (_cm->verbose_medium()) {
  1.3051 +    gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
  1.3052 +                           "scanned = %d%s, refs reached = %d%s",
  1.3053 +                           _task_id, last_interval_ms,
  1.3054 +                           _words_scanned,
  1.3055 +                           (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
  1.3056 +                           _refs_reached,
  1.3057 +                           (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
  1.3058 +  }
  1.3059 +#endif // _MARKING_STATS_
  1.3060 +
  1.3061 +  // (4) We check whether we should yield. If we have to, then we abort.
  1.3062 +  if (_cm->should_yield()) {
  1.3063 +    // We should yield. To do this we abort the task. The caller is
  1.3064 +    // responsible for yielding.
  1.3065 +    set_has_aborted();
  1.3066 +    statsOnly( ++_aborted_yield );
  1.3067 +    return;
  1.3068 +  }
  1.3069 +
  1.3070 +  // (5) We check whether we've reached our time quota. If we have,
  1.3071 +  // then we abort.
  1.3072 +  double elapsed_time_ms = curr_time_ms - _start_time_ms;
  1.3073 +  if (elapsed_time_ms > _time_target_ms) {
  1.3074 +    set_has_aborted();
  1.3075 +    _has_aborted_timed_out = true;
  1.3076 +    statsOnly( ++_aborted_timed_out );
  1.3077 +    return;
  1.3078 +  }
  1.3079 +
  1.3080 +  // (6) Finally, we check whether there are enough completed STAB
  1.3081 +  // buffers available for processing. If there are, we abort.
  1.3082 +  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1.3083 +  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
  1.3084 +    if (_cm->verbose_low())
  1.3085 +      gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
  1.3086 +                             _task_id);
  1.3087 +    // we do need to process SATB buffers, we'll abort and restart
  1.3088 +    // the marking task to do so
  1.3089 +    set_has_aborted();
  1.3090 +    statsOnly( ++_aborted_satb );
  1.3091 +    return;
  1.3092 +  }
  1.3093 +}
  1.3094 +
  1.3095 +void CMTask::recalculate_limits() {
  1.3096 +  _real_words_scanned_limit = _words_scanned + words_scanned_period;
  1.3097 +  _words_scanned_limit      = _real_words_scanned_limit;
  1.3098 +
  1.3099 +  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
  1.3100 +  _refs_reached_limit       = _real_refs_reached_limit;
  1.3101 +}
  1.3102 +
  1.3103 +void CMTask::decrease_limits() {
  1.3104 +  // This is called when we believe that we're going to do an infrequent
  1.3105 +  // operation which will increase the per byte scanned cost (i.e. move
  1.3106 +  // entries to/from the global stack). It basically tries to decrease the
  1.3107 +  // scanning limit so that the clock is called earlier.
  1.3108 +
  1.3109 +  if (_cm->verbose_medium())
  1.3110 +    gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
  1.3111 +
  1.3112 +  _words_scanned_limit = _real_words_scanned_limit -
  1.3113 +    3 * words_scanned_period / 4;
  1.3114 +  _refs_reached_limit  = _real_refs_reached_limit -
  1.3115 +    3 * refs_reached_period / 4;
  1.3116 +}
  1.3117 +
  1.3118 +void CMTask::move_entries_to_global_stack() {
  1.3119 +  // local array where we'll store the entries that will be popped
  1.3120 +  // from the local queue
  1.3121 +  oop buffer[global_stack_transfer_size];
  1.3122 +
  1.3123 +  int n = 0;
  1.3124 +  oop obj;
  1.3125 +  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
  1.3126 +    buffer[n] = obj;
  1.3127 +    ++n;
  1.3128 +  }
  1.3129 +
  1.3130 +  if (n > 0) {
  1.3131 +    // we popped at least one entry from the local queue
  1.3132 +
  1.3133 +    statsOnly( ++_global_transfers_to; _local_pops += n );
  1.3134 +
  1.3135 +    if (!_cm->mark_stack_push(buffer, n)) {
  1.3136 +      if (_cm->verbose_low())
  1.3137 +        gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
  1.3138 +      set_has_aborted();
  1.3139 +    } else {
  1.3140 +      // the transfer was successful
  1.3141 +
  1.3142 +      if (_cm->verbose_medium())
  1.3143 +        gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
  1.3144 +                               _task_id, n);
  1.3145 +      statsOnly( int tmp_size = _cm->mark_stack_size();
  1.3146 +                 if (tmp_size > _global_max_size)
  1.3147 +                   _global_max_size = tmp_size;
  1.3148 +                 _global_pushes += n );
  1.3149 +    }
  1.3150 +  }
  1.3151 +
  1.3152 +  // this operation was quite expensive, so decrease the limits
  1.3153 +  decrease_limits();
  1.3154 +}
  1.3155 +
  1.3156 +void CMTask::get_entries_from_global_stack() {
  1.3157 +  // local array where we'll store the entries that will be popped
  1.3158 +  // from the global stack.
  1.3159 +  oop buffer[global_stack_transfer_size];
  1.3160 +  int n;
  1.3161 +  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
  1.3162 +  tmp_guarantee_CM( n <= global_stack_transfer_size,
  1.3163 +                    "we should not pop more than the given limit" );
  1.3164 +  if (n > 0) {
  1.3165 +    // yes, we did actually pop at least one entry
  1.3166 +
  1.3167 +    statsOnly( ++_global_transfers_from; _global_pops += n );
  1.3168 +    if (_cm->verbose_medium())
  1.3169 +      gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
  1.3170 +                             _task_id, n);
  1.3171 +    for (int i = 0; i < n; ++i) {
  1.3172 +      bool success = _task_queue->push(buffer[i]);
  1.3173 +      // We only call this when the local queue is empty or under a
  1.3174 +      // given target limit. So, we do not expect this push to fail.
  1.3175 +      tmp_guarantee_CM( success, "invariant" );
  1.3176 +    }
  1.3177 +
  1.3178 +    statsOnly( int tmp_size = _task_queue->size();
  1.3179 +               if (tmp_size > _local_max_size)
  1.3180 +                 _local_max_size = tmp_size;
  1.3181 +               _local_pushes += n );
  1.3182 +  }
  1.3183 +
  1.3184 +  // this operation was quite expensive, so decrease the limits
  1.3185 +  decrease_limits();
  1.3186 +}
  1.3187 +
  1.3188 +void CMTask::drain_local_queue(bool partially) {
  1.3189 +  if (has_aborted())
  1.3190 +    return;
  1.3191 +
  1.3192 +  // Decide what the target size is, depending whether we're going to
  1.3193 +  // drain it partially (so that other tasks can steal if they run out
  1.3194 +  // of things to do) or totally (at the very end).
  1.3195 +  size_t target_size;
  1.3196 +  if (partially)
  1.3197 +    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
  1.3198 +  else
  1.3199 +    target_size = 0;
  1.3200 +
  1.3201 +  if (_task_queue->size() > target_size) {
  1.3202 +    if (_cm->verbose_high())
  1.3203 +      gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
  1.3204 +                             _task_id, target_size);
  1.3205 +
  1.3206 +    oop obj;
  1.3207 +    bool ret = _task_queue->pop_local(obj);
  1.3208 +    while (ret) {
  1.3209 +      statsOnly( ++_local_pops );
  1.3210 +
  1.3211 +      if (_cm->verbose_high())
  1.3212 +        gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
  1.3213 +                               (void*) obj);
  1.3214 +
  1.3215 +      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
  1.3216 +                        "invariant" );
  1.3217 +
  1.3218 +      scan_object(obj);
  1.3219 +
  1.3220 +      if (_task_queue->size() <= target_size || has_aborted())
  1.3221 +        ret = false;
  1.3222 +      else
  1.3223 +        ret = _task_queue->pop_local(obj);
  1.3224 +    }
  1.3225 +
  1.3226 +    if (_cm->verbose_high())
  1.3227 +      gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
  1.3228 +                             _task_id, _task_queue->size());
  1.3229 +  }
  1.3230 +}
  1.3231 +
  1.3232 +void CMTask::drain_global_stack(bool partially) {
  1.3233 +  if (has_aborted())
  1.3234 +    return;
  1.3235 +
  1.3236 +  // We have a policy to drain the local queue before we attempt to
  1.3237 +  // drain the global stack.
  1.3238 +  tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
  1.3239 +
  1.3240 +  // Decide what the target size is, depending whether we're going to
  1.3241 +  // drain it partially (so that other tasks can steal if they run out
  1.3242 +  // of things to do) or totally (at the very end).  Notice that,
  1.3243 +  // because we move entries from the global stack in chunks or
  1.3244 +  // because another task might be doing the same, we might in fact
  1.3245 +  // drop below the target. But, this is not a problem.
  1.3246 +  size_t target_size;
  1.3247 +  if (partially)
  1.3248 +    target_size = _cm->partial_mark_stack_size_target();
  1.3249 +  else
  1.3250 +    target_size = 0;
  1.3251 +
  1.3252 +  if (_cm->mark_stack_size() > target_size) {
  1.3253 +    if (_cm->verbose_low())
  1.3254 +      gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
  1.3255 +                             _task_id, target_size);
  1.3256 +
  1.3257 +    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
  1.3258 +      get_entries_from_global_stack();
  1.3259 +      drain_local_queue(partially);
  1.3260 +    }
  1.3261 +
  1.3262 +    if (_cm->verbose_low())
  1.3263 +      gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
  1.3264 +                             _task_id, _cm->mark_stack_size());
  1.3265 +  }
  1.3266 +}
  1.3267 +
  1.3268 +// SATB Queue has several assumptions on whether to call the par or
  1.3269 +// non-par versions of the methods. this is why some of the code is
  1.3270 +// replicated. We should really get rid of the single-threaded version
  1.3271 +// of the code to simplify things.
  1.3272 +void CMTask::drain_satb_buffers() {
  1.3273 +  if (has_aborted())
  1.3274 +    return;
  1.3275 +
  1.3276 +  // We set this so that the regular clock knows that we're in the
  1.3277 +  // middle of draining buffers and doesn't set the abort flag when it
  1.3278 +  // notices that SATB buffers are available for draining. It'd be
  1.3279 +  // very counter productive if it did that. :-)
  1.3280 +  _draining_satb_buffers = true;
  1.3281 +
  1.3282 +  CMObjectClosure oc(this);
  1.3283 +  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
  1.3284 +  if (ParallelGCThreads > 0)
  1.3285 +    satb_mq_set.set_par_closure(_task_id, &oc);
  1.3286 +  else
  1.3287 +    satb_mq_set.set_closure(&oc);
  1.3288 +
  1.3289 +  // This keeps claiming and applying the closure to completed buffers
  1.3290 +  // until we run out of buffers or we need to abort.
  1.3291 +  if (ParallelGCThreads > 0) {
  1.3292 +    while (!has_aborted() &&
  1.3293 +           satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
  1.3294 +      if (_cm->verbose_medium())
  1.3295 +        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  1.3296 +      statsOnly( ++_satb_buffers_processed );
  1.3297 +      regular_clock_call();
  1.3298 +    }
  1.3299 +  } else {
  1.3300 +    while (!has_aborted() &&
  1.3301 +           satb_mq_set.apply_closure_to_completed_buffer()) {
  1.3302 +      if (_cm->verbose_medium())
  1.3303 +        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
  1.3304 +      statsOnly( ++_satb_buffers_processed );
  1.3305 +      regular_clock_call();
  1.3306 +    }
  1.3307 +  }
  1.3308 +
  1.3309 +  if (!concurrent() && !has_aborted()) {
  1.3310 +    // We should only do this during remark.
  1.3311 +    if (ParallelGCThreads > 0)
  1.3312 +      satb_mq_set.par_iterate_closure_all_threads(_task_id);
  1.3313 +    else
  1.3314 +      satb_mq_set.iterate_closure_all_threads();
  1.3315 +  }
  1.3316 +
  1.3317 +  _draining_satb_buffers = false;
  1.3318 +
  1.3319 +  tmp_guarantee_CM( has_aborted() ||
  1.3320 +                    concurrent() ||
  1.3321 +                    satb_mq_set.completed_buffers_num() == 0, "invariant" );
  1.3322 +
  1.3323 +  if (ParallelGCThreads > 0)
  1.3324 +    satb_mq_set.set_par_closure(_task_id, NULL);
  1.3325 +  else
  1.3326 +    satb_mq_set.set_closure(NULL);
  1.3327 +
  1.3328 +  // again, this was a potentially expensive operation, decrease the
  1.3329 +  // limits to get the regular clock call early
  1.3330 +  decrease_limits();
  1.3331 +}
  1.3332 +
  1.3333 +void CMTask::drain_region_stack(BitMapClosure* bc) {
  1.3334 +  if (has_aborted())
  1.3335 +    return;
  1.3336 +
  1.3337 +  tmp_guarantee_CM( _region_finger == NULL,
  1.3338 +                    "it should be NULL when we're not scanning a region" );
  1.3339 +
  1.3340 +  if (!_cm->region_stack_empty()) {
  1.3341 +    if (_cm->verbose_low())
  1.3342 +      gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
  1.3343 +                             _task_id, _cm->region_stack_size());
  1.3344 +
  1.3345 +    MemRegion mr = _cm->region_stack_pop();
  1.3346 +    // it returns MemRegion() if the pop fails
  1.3347 +    statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  1.3348 +
  1.3349 +    while (mr.start() != NULL) {
  1.3350 +      if (_cm->verbose_medium())
  1.3351 +        gclog_or_tty->print_cr("[%d] we are scanning region "
  1.3352 +                               "["PTR_FORMAT", "PTR_FORMAT")",
  1.3353 +                               _task_id, mr.start(), mr.end());
  1.3354 +      tmp_guarantee_CM( mr.end() <= _cm->finger(),
  1.3355 +                        "otherwise the region shouldn't be on the stack" );
  1.3356 +      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
  1.3357 +      if (_nextMarkBitMap->iterate(bc, mr)) {
  1.3358 +        tmp_guarantee_CM( !has_aborted(),
  1.3359 +               "cannot abort the task without aborting the bitmap iteration" );
  1.3360 +
  1.3361 +        // We finished iterating over the region without aborting.
  1.3362 +        regular_clock_call();
  1.3363 +        if (has_aborted())
  1.3364 +          mr = MemRegion();
  1.3365 +        else {
  1.3366 +          mr = _cm->region_stack_pop();
  1.3367 +          // it returns MemRegion() if the pop fails
  1.3368 +          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
  1.3369 +        }
  1.3370 +      } else {
  1.3371 +        guarantee( has_aborted(), "currently the only way to do so" );
  1.3372 +
  1.3373 +        // The only way to abort the bitmap iteration is to return
  1.3374 +        // false from the do_bit() method. However, inside the
  1.3375 +        // do_bit() method we move the _region_finger to point to the
  1.3376 +        // object currently being looked at. So, if we bail out, we
  1.3377 +        // have definitely set _region_finger to something non-null.
  1.3378 +        guarantee( _region_finger != NULL, "invariant" );
  1.3379 +
  1.3380 +        // The iteration was actually aborted. So now _region_finger
  1.3381 +        // points to the address of the object we last scanned. If we
  1.3382 +        // leave it there, when we restart this task, we will rescan
  1.3383 +        // the object. It is easy to avoid this. We move the finger by
  1.3384 +        // enough to point to the next possible object header (the
  1.3385 +        // bitmap knows by how much we need to move it as it knows its
  1.3386 +        // granularity).
  1.3387 +        MemRegion newRegion =
  1.3388 +          MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
  1.3389 +
  1.3390 +        if (!newRegion.is_empty()) {
  1.3391 +          if (_cm->verbose_low()) {
  1.3392 +            gclog_or_tty->print_cr("[%d] pushing unscanned region"
  1.3393 +                                   "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
  1.3394 +                                   _task_id,
  1.3395 +                                   newRegion.start(), newRegion.end());
  1.3396 +          }
  1.3397 +          // Now push the part of the region we didn't scan on the
  1.3398 +          // region stack to make sure a task scans it later.
  1.3399 +          _cm->region_stack_push(newRegion);
  1.3400 +        }
  1.3401 +        // break from while
  1.3402 +        mr = MemRegion();
  1.3403 +      }
  1.3404 +      _region_finger = NULL;
  1.3405 +    }
  1.3406 +
  1.3407 +    // We only push regions on the region stack during evacuation
  1.3408 +    // pauses. So if we come out the above iteration because we region
  1.3409 +    // stack is empty, it will remain empty until the next yield
  1.3410 +    // point. So, the guarantee below is safe.
  1.3411 +    guarantee( has_aborted() || _cm->region_stack_empty(),
  1.3412 +               "only way to exit the loop" );
  1.3413 +
  1.3414 +    if (_cm->verbose_low())
  1.3415 +      gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
  1.3416 +                             _task_id, _cm->region_stack_size());
  1.3417 +  }
  1.3418 +}
  1.3419 +
  1.3420 +void CMTask::print_stats() {
  1.3421 +  gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
  1.3422 +                         _task_id, _calls);
  1.3423 +  gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
  1.3424 +                         _elapsed_time_ms, _termination_time_ms);
  1.3425 +  gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  1.3426 +                         _step_times_ms.num(), _step_times_ms.avg(),
  1.3427 +                         _step_times_ms.sd());
  1.3428 +  gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
  1.3429 +                         _step_times_ms.maximum(), _step_times_ms.sum());
  1.3430 +
  1.3431 +#if _MARKING_STATS_
  1.3432 +  gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
  1.3433 +                         _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
  1.3434 +                         _all_clock_intervals_ms.sd());
  1.3435 +  gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
  1.3436 +                         _all_clock_intervals_ms.maximum(),
  1.3437 +                         _all_clock_intervals_ms.sum());
  1.3438 +  gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
  1.3439 +                         _clock_due_to_scanning, _clock_due_to_marking);
  1.3440 +  gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
  1.3441 +                         _objs_scanned, _objs_found_on_bitmap);
  1.3442 +  gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
  1.3443 +                         _local_pushes, _local_pops, _local_max_size);
  1.3444 +  gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
  1.3445 +                         _global_pushes, _global_pops, _global_max_size);
  1.3446 +  gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
  1.3447 +                         _global_transfers_to,_global_transfers_from);
  1.3448 +  gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
  1.3449 +                         _regions_claimed, _region_stack_pops);
  1.3450 +  gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
  1.3451 +  gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
  1.3452 +                         _steal_attempts, _steals);
  1.3453 +  gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
  1.3454 +  gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
  1.3455 +                         _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
  1.3456 +  gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
  1.3457 +                         _aborted_timed_out, _aborted_satb, _aborted_termination);
  1.3458 +#endif // _MARKING_STATS_
  1.3459 +}
  1.3460 +
  1.3461 +/*****************************************************************************
  1.3462 +
  1.3463 +    The do_marking_step(time_target_ms) method is the building block
  1.3464 +    of the parallel marking framework. It can be called in parallel
  1.3465 +    with other invocations of do_marking_step() on different tasks
  1.3466 +    (but only one per task, obviously) and concurrently with the
  1.3467 +    mutator threads, or during remark, hence it eliminates the need
  1.3468 +    for two versions of the code. When called during remark, it will
  1.3469 +    pick up from where the task left off during the concurrent marking
  1.3470 +    phase. Interestingly, tasks are also claimable during evacuation
  1.3471 +    pauses too, since do_marking_step() ensures that it aborts before
  1.3472 +    it needs to yield.
  1.3473 +
  1.3474 +    The data structures that is uses to do marking work are the
  1.3475 +    following:
  1.3476 +
  1.3477 +      (1) Marking Bitmap. If there are gray objects that appear only
  1.3478 +      on the bitmap (this happens either when dealing with an overflow
  1.3479 +      or when the initial marking phase has simply marked the roots
  1.3480 +      and didn't push them on the stack), then tasks claim heap
  1.3481 +      regions whose bitmap they then scan to find gray objects. A
  1.3482 +      global finger indicates where the end of the last claimed region
  1.3483 +      is. A local finger indicates how far into the region a task has
  1.3484 +      scanned. The two fingers are used to determine how to gray an
  1.3485 +      object (i.e. whether simply marking it is OK, as it will be
  1.3486 +      visited by a task in the future, or whether it needs to be also
  1.3487 +      pushed on a stack).
  1.3488 +
  1.3489 +      (2) Local Queue. The local queue of the task which is accessed
  1.3490 +      reasonably efficiently by the task. Other tasks can steal from
  1.3491 +      it when they run out of work. Throughout the marking phase, a
  1.3492 +      task attempts to keep its local queue short but not totally
  1.3493 +      empty, so that entries are available for stealing by other
  1.3494 +      tasks. Only when there is no more work, a task will totally
  1.3495 +      drain its local queue.
  1.3496 +
  1.3497 +      (3) Global Mark Stack. This handles local queue overflow. During
  1.3498 +      marking only sets of entries are moved between it and the local
  1.3499 +      queues, as access to it requires a mutex and more fine-grain
  1.3500 +      interaction with it which might cause contention. If it
  1.3501 +      overflows, then the marking phase should restart and iterate
  1.3502 +      over the bitmap to identify gray objects. Throughout the marking
  1.3503 +      phase, tasks attempt to keep the global mark stack at a small
  1.3504 +      length but not totally empty, so that entries are available for
  1.3505 +      popping by other tasks. Only when there is no more work, tasks
  1.3506 +      will totally drain the global mark stack.
  1.3507 +
  1.3508 +      (4) Global Region Stack. Entries on it correspond to areas of
  1.3509 +      the bitmap that need to be scanned since they contain gray
  1.3510 +      objects. Pushes on the region stack only happen during
  1.3511 +      evacuation pauses and typically correspond to areas covered by
  1.3512 +      GC LABS. If it overflows, then the marking phase should restart
  1.3513 +      and iterate over the bitmap to identify gray objects. Tasks will
  1.3514 +      try to totally drain the region stack as soon as possible.
  1.3515 +
  1.3516 +      (5) SATB Buffer Queue. This is where completed SATB buffers are
  1.3517 +      made available. Buffers are regularly removed from this queue
  1.3518 +      and scanned for roots, so that the queue doesn't get too
  1.3519 +      long. During remark, all completed buffers are processed, as
  1.3520 +      well as the filled in parts of any uncompleted buffers.
  1.3521 +
  1.3522 +    The do_marking_step() method tries to abort when the time target
  1.3523 +    has been reached. There are a few other cases when the
  1.3524 +    do_marking_step() method also aborts:
  1.3525 +
  1.3526 +      (1) When the marking phase has been aborted (after a Full GC).
  1.3527 +
  1.3528 +      (2) When a global overflow (either on the global stack or the
  1.3529 +      region stack) has been triggered. Before the task aborts, it
  1.3530 +      will actually sync up with the other tasks to ensure that all
  1.3531 +      the marking data structures (local queues, stacks, fingers etc.)
  1.3532 +      are re-initialised so that when do_marking_step() completes,
  1.3533 +      the marking phase can immediately restart.
  1.3534 +
  1.3535 +      (3) When enough completed SATB buffers are available. The
  1.3536 +      do_marking_step() method only tries to drain SATB buffers right
  1.3537 +      at the beginning. So, if enough buffers are available, the
  1.3538 +      marking step aborts and the SATB buffers are processed at
  1.3539 +      the beginning of the next invocation.
  1.3540 +
  1.3541 +      (4) To yield. when we have to yield then we abort and yield
  1.3542 +      right at the end of do_marking_step(). This saves us from a lot
  1.3543 +      of hassle as, by yielding we might allow a Full GC. If this
  1.3544 +      happens then objects will be compacted underneath our feet, the
  1.3545 +      heap might shrink, etc. We save checking for this by just
  1.3546 +      aborting and doing the yield right at the end.
  1.3547 +
  1.3548 +    From the above it follows that the do_marking_step() method should
  1.3549 +    be called in a loop (or, otherwise, regularly) until it completes.
  1.3550 +
  1.3551 +    If a marking step completes without its has_aborted() flag being
  1.3552 +    true, it means it has completed the current marking phase (and
  1.3553 +    also all other marking tasks have done so and have all synced up).
  1.3554 +
  1.3555 +    A method called regular_clock_call() is invoked "regularly" (in
  1.3556 +    sub ms intervals) throughout marking. It is this clock method that
  1.3557 +    checks all the abort conditions which were mentioned above and
  1.3558 +    decides when the task should abort. A work-based scheme is used to
  1.3559 +    trigger this clock method: when the number of object words the
  1.3560 +    marking phase has scanned or the number of references the marking
  1.3561 +    phase has visited reach a given limit. Additional invocations to
  1.3562 +    the method clock have been planted in a few other strategic places
  1.3563 +    too. The initial reason for the clock method was to avoid calling
  1.3564 +    vtime too regularly, as it is quite expensive. So, once it was in
  1.3565 +    place, it was natural to piggy-back all the other conditions on it
  1.3566 +    too and not constantly check them throughout the code.
  1.3567 +
  1.3568 + *****************************************************************************/
  1.3569 +
  1.3570 +void CMTask::do_marking_step(double time_target_ms) {
  1.3571 +  guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
  1.3572 +  guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
  1.3573 +
  1.3574 +  guarantee( concurrent() || _cm->region_stack_empty(),
  1.3575 +             "the region stack should have been cleared before remark" );
  1.3576 +  guarantee( _region_finger == NULL,
  1.3577 +             "this should be non-null only when a region is being scanned" );
  1.3578 +
  1.3579 +  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
  1.3580 +  guarantee( _task_queues != NULL, "invariant" );
  1.3581 +  guarantee( _task_queue != NULL,  "invariant" );
  1.3582 +  guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
  1.3583 +
  1.3584 +  guarantee( !_claimed,
  1.3585 +             "only one thread should claim this task at any one time" );
  1.3586 +
  1.3587 +  // OK, this doesn't safeguard again all possible scenarios, as it is
  1.3588 +  // possible for two threads to set the _claimed flag at the same
  1.3589 +  // time. But it is only for debugging purposes anyway and it will
  1.3590 +  // catch most problems.
  1.3591 +  _claimed = true;
  1.3592 +
  1.3593 +  _start_time_ms = os::elapsedVTime() * 1000.0;
  1.3594 +  statsOnly( _interval_start_time_ms = _start_time_ms );
  1.3595 +
  1.3596 +  double diff_prediction_ms =
  1.3597 +    g1_policy->get_new_prediction(&_marking_step_diffs_ms);
  1.3598 +  _time_target_ms = time_target_ms - diff_prediction_ms;
  1.3599 +
  1.3600 +  // set up the variables that are used in the work-based scheme to
  1.3601 +  // call the regular clock method
  1.3602 +  _words_scanned = 0;
  1.3603 +  _refs_reached  = 0;
  1.3604 +  recalculate_limits();
  1.3605 +
  1.3606 +  // clear all flags
  1.3607 +  clear_has_aborted();
  1.3608 +  _has_aborted_timed_out = false;
  1.3609 +  _draining_satb_buffers = false;
  1.3610 +
  1.3611 +  ++_calls;
  1.3612 +
  1.3613 +  if (_cm->verbose_low())
  1.3614 +    gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
  1.3615 +                           "target = %1.2lfms >>>>>>>>>>",
  1.3616 +                           _task_id, _calls, _time_target_ms);
  1.3617 +
  1.3618 +  // Set up the bitmap and oop closures. Anything that uses them is
  1.3619 +  // eventually called from this method, so it is OK to allocate these
  1.3620 +  // statically.
  1.3621 +  CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
  1.3622 +  CMOopClosure    oop_closure(_g1h, _cm, this);
  1.3623 +  set_oop_closure(&oop_closure);
  1.3624 +
  1.3625 +  if (_cm->has_overflown()) {
  1.3626 +    // This can happen if the region stack or the mark stack overflows
  1.3627 +    // during a GC pause and this task, after a yield point,
  1.3628 +    // restarts. We have to abort as we need to get into the overflow
  1.3629 +    // protocol which happens right at the end of this task.
  1.3630 +    set_has_aborted();
  1.3631 +  }
  1.3632 +
  1.3633 +  // First drain any available SATB buffers. After this, we will not
  1.3634 +  // look at SATB buffers before the next invocation of this method.
  1.3635 +  // If enough completed SATB buffers are queued up, the regular clock
  1.3636 +  // will abort this task so that it restarts.
  1.3637 +  drain_satb_buffers();
  1.3638 +  // ...then partially drain the local queue and the global stack
  1.3639 +  drain_local_queue(true);
  1.3640 +  drain_global_stack(true);
  1.3641 +
  1.3642 +  // Then totally drain the region stack.  We will not look at
  1.3643 +  // it again before the next invocation of this method. Entries on
  1.3644 +  // the region stack are only added during evacuation pauses, for
  1.3645 +  // which we have to yield. When we do, we abort the task anyway so
  1.3646 +  // it will look at the region stack again when it restarts.
  1.3647 +  bitmap_closure.set_scanning_heap_region(false);
  1.3648 +  drain_region_stack(&bitmap_closure);
  1.3649 +  // ...then partially drain the local queue and the global stack
  1.3650 +  drain_local_queue(true);
  1.3651 +  drain_global_stack(true);
  1.3652 +
  1.3653 +  do {
  1.3654 +    if (!has_aborted() && _curr_region != NULL) {
  1.3655 +      // This means that we're already holding on to a region.
  1.3656 +      tmp_guarantee_CM( _finger != NULL,
  1.3657 +                        "if region is not NULL, then the finger "
  1.3658 +                        "should not be NULL either" );
  1.3659 +
  1.3660 +      // We might have restarted this task after an evacuation pause
  1.3661 +      // which might have evacuated the region we're holding on to
  1.3662 +      // underneath our feet. Let's read its limit again to make sure
  1.3663 +      // that we do not iterate over a region of the heap that
  1.3664 +      // contains garbage (update_region_limit() will also move
  1.3665 +      // _finger to the start of the region if it is found empty).
  1.3666 +      update_region_limit();
  1.3667 +      // We will start from _finger not from the start of the region,
  1.3668 +      // as we might be restarting this task after aborting half-way
  1.3669 +      // through scanning this region. In this case, _finger points to
  1.3670 +      // the address where we last found a marked object. If this is a
  1.3671 +      // fresh region, _finger points to start().
  1.3672 +      MemRegion mr = MemRegion(_finger, _region_limit);
  1.3673 +
  1.3674 +      if (_cm->verbose_low())
  1.3675 +        gclog_or_tty->print_cr("[%d] we're scanning part "
  1.3676 +                               "["PTR_FORMAT", "PTR_FORMAT") "
  1.3677 +                               "of region "PTR_FORMAT,
  1.3678 +                               _task_id, _finger, _region_limit, _curr_region);
  1.3679 +
  1.3680 +      // Let's iterate over the bitmap of the part of the
  1.3681 +      // region that is left.
  1.3682 +      bitmap_closure.set_scanning_heap_region(true);
  1.3683 +      if (mr.is_empty() ||
  1.3684 +          _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
  1.3685 +        // We successfully completed iterating over the region. Now,
  1.3686 +        // let's give up the region.
  1.3687 +        giveup_current_region();
  1.3688 +        regular_clock_call();
  1.3689 +      } else {
  1.3690 +        guarantee( has_aborted(), "currently the only way to do so" );
  1.3691 +        // The only way to abort the bitmap iteration is to return
  1.3692 +        // false from the do_bit() method. However, inside the
  1.3693 +        // do_bit() method we move the _finger to point to the
  1.3694 +        // object currently being looked at. So, if we bail out, we
  1.3695 +        // have definitely set _finger to something non-null.
  1.3696 +        guarantee( _finger != NULL, "invariant" );
  1.3697 +
  1.3698 +        // Region iteration was actually aborted. So now _finger
  1.3699 +        // points to the address of the object we last scanned. If we
  1.3700 +        // leave it there, when we restart this task, we will rescan
  1.3701 +        // the object. It is easy to avoid this. We move the finger by
  1.3702 +        // enough to point to the next possible object header (the
  1.3703 +        // bitmap knows by how much we need to move it as it knows its
  1.3704 +        // granularity).
  1.3705 +        move_finger_to(_nextMarkBitMap->nextWord(_finger));
  1.3706 +      }
  1.3707 +    }
  1.3708 +    // At this point we have either completed iterating over the
  1.3709 +    // region we were holding on to, or we have aborted.
  1.3710 +
  1.3711 +    // We then partially drain the local queue and the global stack.
  1.3712 +    // (Do we really need this?)
  1.3713 +    drain_local_queue(true);
  1.3714 +    drain_global_stack(true);
  1.3715 +
  1.3716 +    // Read the note on the claim_region() method on why it might
  1.3717 +    // return NULL with potentially more regions available for
  1.3718 +    // claiming and why we have to check out_of_regions() to determine
  1.3719 +    // whether we're done or not.
  1.3720 +    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
  1.3721 +      // We are going to try to claim a new region. We should have
  1.3722 +      // given up on the previous one.
  1.3723 +      tmp_guarantee_CM( _curr_region  == NULL &&
  1.3724 +                        _finger       == NULL &&
  1.3725 +                        _region_limit == NULL, "invariant" );
  1.3726 +      if (_cm->verbose_low())
  1.3727 +        gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
  1.3728 +      HeapRegion* claimed_region = _cm->claim_region(_task_id);
  1.3729 +      if (claimed_region != NULL) {
  1.3730 +        // Yes, we managed to claim one
  1.3731 +        statsOnly( ++_regions_claimed );
  1.3732 +
  1.3733 +        if (_cm->verbose_low())
  1.3734 +          gclog_or_tty->print_cr("[%d] we successfully claimed "
  1.3735 +                                 "region "PTR_FORMAT,
  1.3736 +                                 _task_id, claimed_region);
  1.3737 +
  1.3738 +        setup_for_region(claimed_region);
  1.3739 +        tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
  1.3740 +      }
  1.3741 +      // It is important to call the regular clock here. It might take
  1.3742 +      // a while to claim a region if, for example, we hit a large
  1.3743 +      // block of empty regions. So we need to call the regular clock
  1.3744 +      // method once round the loop to make sure it's called
  1.3745 +      // frequently enough.
  1.3746 +      regular_clock_call();
  1.3747 +    }
  1.3748 +
  1.3749 +    if (!has_aborted() && _curr_region == NULL) {
  1.3750 +      tmp_guarantee_CM( _cm->out_of_regions(),
  1.3751 +                        "at this point we should be out of regions" );
  1.3752 +    }
  1.3753 +  } while ( _curr_region != NULL && !has_aborted());
  1.3754 +
  1.3755 +  if (!has_aborted()) {
  1.3756 +    // We cannot check whether the global stack is empty, since other
  1.3757 +    // tasks might be pushing objects to it concurrently.
  1.3758 +    tmp_guarantee_CM( _cm->out_of_regions() && _cm->region_stack_empty(),
  1.3759 +                      "at this point we should be out of regions" );
  1.3760 +
  1.3761 +    if (_cm->verbose_low())
  1.3762 +      gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
  1.3763 +
  1.3764 +    // Try to reduce the number of available SATB buffers so that
  1.3765 +    // remark has less work to do.
  1.3766 +    drain_satb_buffers();
  1.3767 +  }
  1.3768 +
  1.3769 +  // Since we've done everything else, we can now totally drain the
  1.3770 +  // local queue and global stack.
  1.3771 +  drain_local_queue(false);
  1.3772 +  drain_global_stack(false);
  1.3773 +
  1.3774 +  // Attempt at work stealing from other task's queues.
  1.3775 +  if (!has_aborted()) {
  1.3776 +    // We have not aborted. This means that we have finished all that
  1.3777 +    // we could. Let's try to do some stealing...
  1.3778 +
  1.3779 +    // We cannot check whether the global stack is empty, since other
  1.3780 +    // tasks might be pushing objects to it concurrently.
  1.3781 +    guarantee( _cm->out_of_regions() &&
  1.3782 +               _cm->region_stack_empty() &&
  1.3783 +               _task_queue->size() == 0, "only way to reach here" );
  1.3784 +
  1.3785 +    if (_cm->verbose_low())
  1.3786 +      gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
  1.3787 +
  1.3788 +    while (!has_aborted()) {
  1.3789 +      oop obj;
  1.3790 +      statsOnly( ++_steal_attempts );
  1.3791 +
  1.3792 +      if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
  1.3793 +        if (_cm->verbose_medium())
  1.3794 +          gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
  1.3795 +                                 _task_id, (void*) obj);
  1.3796 +
  1.3797 +        statsOnly( ++_steals );
  1.3798 +
  1.3799 +        tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
  1.3800 +                          "any stolen object should be marked" );
  1.3801 +        scan_object(obj);
  1.3802 +
  1.3803 +        // And since we're towards the end, let's totally drain the
  1.3804 +        // local queue and global stack.
  1.3805 +        drain_local_queue(false);
  1.3806 +        drain_global_stack(false);
  1.3807 +      } else {
  1.3808 +        break;
  1.3809 +      }
  1.3810 +    }
  1.3811 +  }
  1.3812 +
  1.3813 +  // We still haven't aborted. Now, let's try to get into the
  1.3814 +  // termination protocol.
  1.3815 +  if (!has_aborted()) {
  1.3816 +    // We cannot check whether the global stack is empty, since other
  1.3817 +    // tasks might be concurrently pushing objects on it.
  1.3818 +    guarantee( _cm->out_of_regions() &&
  1.3819 +               _cm->region_stack_empty() &&
  1.3820 +               _task_queue->size() == 0, "only way to reach here" );
  1.3821 +
  1.3822 +    if (_cm->verbose_low())
  1.3823 +      gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
  1.3824 +
  1.3825 +    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
  1.3826 +    // The CMTask class also extends the TerminatorTerminator class,
  1.3827 +    // hence its should_exit_termination() method will also decide
  1.3828 +    // whether to exit the termination protocol or not.
  1.3829 +    bool finished = _cm->terminator()->offer_termination(this);
  1.3830 +    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
  1.3831 +    _termination_time_ms +=
  1.3832 +      termination_end_time_ms - _termination_start_time_ms;
  1.3833 +
  1.3834 +    if (finished) {
  1.3835 +      // We're all done.
  1.3836 +
  1.3837 +      if (_task_id == 0) {
  1.3838 +        // let's allow task 0 to do this
  1.3839 +        if (concurrent()) {
  1.3840 +          guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
  1.3841 +          // we need to set this to false before the next
  1.3842 +          // safepoint. This way we ensure that the marking phase
  1.3843 +          // doesn't observe any more heap expansions.
  1.3844 +          _cm->clear_concurrent_marking_in_progress();
  1.3845 +        }
  1.3846 +      }
  1.3847 +
  1.3848 +      // We can now guarantee that the global stack is empty, since
  1.3849 +      // all other tasks have finished.
  1.3850 +      guarantee( _cm->out_of_regions() &&
  1.3851 +                 _cm->region_stack_empty() &&
  1.3852 +                 _cm->mark_stack_empty() &&
  1.3853 +                 _task_queue->size() == 0 &&
  1.3854 +                 !_cm->has_overflown() &&
  1.3855 +                 !_cm->mark_stack_overflow() &&
  1.3856 +                 !_cm->region_stack_overflow(),
  1.3857 +                 "only way to reach here" );
  1.3858 +
  1.3859 +      if (_cm->verbose_low())
  1.3860 +        gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
  1.3861 +    } else {
  1.3862 +      // Apparently there's more work to do. Let's abort this task. It
  1.3863 +      // will restart it and we can hopefully find more things to do.
  1.3864 +
  1.3865 +      if (_cm->verbose_low())
  1.3866 +        gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
  1.3867 +
  1.3868 +      set_has_aborted();
  1.3869 +      statsOnly( ++_aborted_termination );
  1.3870 +    }
  1.3871 +  }
  1.3872 +
  1.3873 +  // Mainly for debugging purposes to make sure that a pointer to the
  1.3874 +  // closure which was statically allocated in this frame doesn't
  1.3875 +  // escape it by accident.
  1.3876 +  set_oop_closure(NULL);
  1.3877 +  double end_time_ms = os::elapsedVTime() * 1000.0;
  1.3878 +  double elapsed_time_ms = end_time_ms - _start_time_ms;
  1.3879 +  // Update the step history.
  1.3880 +  _step_times_ms.add(elapsed_time_ms);
  1.3881 +
  1.3882 +  if (has_aborted()) {
  1.3883 +    // The task was aborted for some reason.
  1.3884 +
  1.3885 +    statsOnly( ++_aborted );
  1.3886 +
  1.3887 +    if (_has_aborted_timed_out) {
  1.3888 +      double diff_ms = elapsed_time_ms - _time_target_ms;
  1.3889 +      // Keep statistics of how well we did with respect to hitting
  1.3890 +      // our target only if we actually timed out (if we aborted for
  1.3891 +      // other reasons, then the results might get skewed).
  1.3892 +      _marking_step_diffs_ms.add(diff_ms);
  1.3893 +    }
  1.3894 +
  1.3895 +    if (_cm->has_overflown()) {
  1.3896 +      // This is the interesting one. We aborted because a global
  1.3897 +      // overflow was raised. This means we have to restart the
  1.3898 +      // marking phase and start iterating over regions. However, in
  1.3899 +      // order to do this we have to make sure that all tasks stop
  1.3900 +      // what they are doing and re-initialise in a safe manner. We
  1.3901 +      // will achieve this with the use of two barrier sync points.
  1.3902 +
  1.3903 +      if (_cm->verbose_low())
  1.3904 +        gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
  1.3905 +
  1.3906 +      _cm->enter_first_sync_barrier(_task_id);
  1.3907 +      // When we exit this sync barrier we know that all tasks have
  1.3908 +      // stopped doing marking work. So, it's now safe to
  1.3909 +      // re-initialise our data structures. At the end of this method,
  1.3910 +      // task 0 will clear the global data structures.
  1.3911 +
  1.3912 +      statsOnly( ++_aborted_overflow );
  1.3913 +
  1.3914 +      // We clear the local state of this task...
  1.3915 +      clear_region_fields();
  1.3916 +
  1.3917 +      // ...and enter the second barrier.
  1.3918 +      _cm->enter_second_sync_barrier(_task_id);
  1.3919 +      // At this point everything has bee re-initialised and we're
  1.3920 +      // ready to restart.
  1.3921 +    }
  1.3922 +
  1.3923 +    if (_cm->verbose_low()) {
  1.3924 +      gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
  1.3925 +                             "elapsed = %1.2lfms <<<<<<<<<<",
  1.3926 +                             _task_id, _time_target_ms, elapsed_time_ms);
  1.3927 +      if (_cm->has_aborted())
  1.3928 +        gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
  1.3929 +                               _task_id);
  1.3930 +    }
  1.3931 +  } else {
  1.3932 +    if (_cm->verbose_low())
  1.3933 +      gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
  1.3934 +                             "elapsed = %1.2lfms <<<<<<<<<<",
  1.3935 +                             _task_id, _time_target_ms, elapsed_time_ms);
  1.3936 +  }
  1.3937 +
  1.3938 +  _claimed = false;
  1.3939 +}
  1.3940 +
  1.3941 +CMTask::CMTask(int task_id,
  1.3942 +               ConcurrentMark* cm,
  1.3943 +               CMTaskQueue* task_queue,
  1.3944 +               CMTaskQueueSet* task_queues)
  1.3945 +  : _g1h(G1CollectedHeap::heap()),
  1.3946 +    _co_tracker(G1CMGroup),
  1.3947 +    _task_id(task_id), _cm(cm),
  1.3948 +    _claimed(false),
  1.3949 +    _nextMarkBitMap(NULL), _hash_seed(17),
  1.3950 +    _task_queue(task_queue),
  1.3951 +    _task_queues(task_queues),
  1.3952 +    _oop_closure(NULL) {
  1.3953 +  guarantee( task_queue != NULL, "invariant" );
  1.3954 +  guarantee( task_queues != NULL, "invariant" );
  1.3955 +
  1.3956 +  statsOnly( _clock_due_to_scanning = 0;
  1.3957 +             _clock_due_to_marking  = 0 );
  1.3958 +
  1.3959 +  _marking_step_diffs_ms.add(0.5);
  1.3960 +}

mercurial