6722116: CMS: Incorrect overflow handling when using parallel concurrent marking

Tue, 26 Aug 2008 14:54:48 -0700

author
ysr
date
Tue, 26 Aug 2008 14:54:48 -0700
changeset 775
ebeb6490b814
parent 736
387a62b4be60
child 776
d60e4e6d7f72

6722116: CMS: Incorrect overflow handling when using parallel concurrent marking
Summary: Fixed CMSConcMarkingTask::reset() to store the restart address upon a marking stack overflow and to use it as the base, suitably aligned, for restarting the scan in CMSConcMarkingTask::do_scan_and_mark().
Reviewed-by: jcoomes, tonyp

src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Aug 20 23:05:04 2008 -0700
     1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Tue Aug 26 14:54:48 2008 -0700
     1.3 @@ -2790,10 +2790,11 @@
     1.4    assert(n_threads > 0, "Unexpected n_threads argument");
     1.5    const size_t task_size = rescan_task_size();
     1.6    size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size;
     1.7 -  assert((used_region().start() + (n_tasks - 1)*task_size <
     1.8 -          used_region().end()) &&
     1.9 -         (used_region().start() + n_tasks*task_size >=
    1.10 -          used_region().end()), "n_task calculation incorrect");
    1.11 +  assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect");
    1.12 +  assert(n_tasks == 0 ||
    1.13 +         ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) &&
    1.14 +          (used_region().start() + n_tasks*task_size >= used_region().end())),
    1.15 +         "n_tasks calculation incorrect");
    1.16    SequentialSubTasksDone* pst = conc_par_seq_tasks();
    1.17    assert(!pst->valid(), "Clobbering existing data?");
    1.18    pst->set_par_threads(n_threads);
    1.19 @@ -2833,7 +2834,7 @@
    1.20    assert(n_tasks == 0 ||
    1.21           ((span.start() + (n_tasks - 1)*task_size < span.end()) &&
    1.22            (span.start() + n_tasks*task_size >= span.end())),
    1.23 -         "n_task calculation incorrect");
    1.24 +         "n_tasks calculation incorrect");
    1.25    SequentialSubTasksDone* pst = conc_par_seq_tasks();
    1.26    assert(!pst->valid(), "Clobbering existing data?");
    1.27    pst->set_par_threads(n_threads);
     2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Aug 20 23:05:04 2008 -0700
     2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Tue Aug 26 14:54:48 2008 -0700
     2.3 @@ -3650,6 +3650,7 @@
     2.4    CompactibleFreeListSpace*  _cms_space;
     2.5    CompactibleFreeListSpace* _perm_space;
     2.6    HeapWord*     _global_finger;
     2.7 +  HeapWord*     _restart_addr;
     2.8  
     2.9    //  Exposed here for yielding support
    2.10    Mutex* const _bit_map_lock;
    2.11 @@ -3680,7 +3681,7 @@
    2.12      _term.set_task(this);
    2.13      assert(_cms_space->bottom() < _perm_space->bottom(),
    2.14             "Finger incorrectly initialized below");
    2.15 -    _global_finger = _cms_space->bottom();
    2.16 +    _restart_addr = _global_finger = _cms_space->bottom();
    2.17    }
    2.18  
    2.19  
    2.20 @@ -3698,6 +3699,10 @@
    2.21    bool result() { return _result; }
    2.22  
    2.23    void reset(HeapWord* ra) {
    2.24 +    assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
    2.25 +    assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
    2.26 +    assert(ra             <  _perm_space->end(), "ra too large");
    2.27 +    _restart_addr = _global_finger = ra;
    2.28      _term.reset_for_reuse();
    2.29    }
    2.30  
    2.31 @@ -3842,16 +3847,24 @@
    2.32    int n_tasks = pst->n_tasks();
    2.33    // We allow that there may be no tasks to do here because
    2.34    // we are restarting after a stack overflow.
    2.35 -  assert(pst->valid() || n_tasks == 0, "Uninitializd use?");
    2.36 +  assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
    2.37    int nth_task = 0;
    2.38  
    2.39 -  HeapWord* start = sp->bottom();
    2.40 +  HeapWord* aligned_start = sp->bottom();
    2.41 +  if (sp->used_region().contains(_restart_addr)) {
    2.42 +    // Align down to a card boundary for the start of 0th task
    2.43 +    // for this space.
    2.44 +    aligned_start =
    2.45 +      (HeapWord*)align_size_down((uintptr_t)_restart_addr,
    2.46 +                                 CardTableModRefBS::card_size);
    2.47 +  }
    2.48 +
    2.49    size_t chunk_size = sp->marking_task_size();
    2.50    while (!pst->is_task_claimed(/* reference */ nth_task)) {
    2.51      // Having claimed the nth task in this space,
    2.52      // compute the chunk that it corresponds to:
    2.53 -    MemRegion span = MemRegion(start + nth_task*chunk_size,
    2.54 -                               start + (nth_task+1)*chunk_size);
    2.55 +    MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
    2.56 +                               aligned_start + (nth_task+1)*chunk_size);
    2.57      // Try and bump the global finger via a CAS;
    2.58      // note that we need to do the global finger bump
    2.59      // _before_ taking the intersection below, because
    2.60 @@ -3866,26 +3879,40 @@
    2.61      // beyond the "top" address of the space.
    2.62      span = span.intersection(sp->used_region());
    2.63      if (!span.is_empty()) {  // Non-null task
    2.64 -      // We want to skip the first object because
    2.65 -      // the protocol is to scan any object in its entirety
    2.66 -      // that _starts_ in this span; a fortiori, any
    2.67 -      // object starting in an earlier span is scanned
    2.68 -      // as part of an earlier claimed task.
    2.69 -      // Below we use the "careful" version of block_start
    2.70 -      // so we do not try to navigate uninitialized objects.
    2.71 -      HeapWord* prev_obj = sp->block_start_careful(span.start());
    2.72 -      // Below we use a variant of block_size that uses the
    2.73 -      // Printezis bits to avoid waiting for allocated
    2.74 -      // objects to become initialized/parsable.
    2.75 -      while (prev_obj < span.start()) {
    2.76 -        size_t sz = sp->block_size_no_stall(prev_obj, _collector);
    2.77 -        if (sz > 0) {
    2.78 -          prev_obj += sz;
    2.79 +      HeapWord* prev_obj;
    2.80 +      assert(!span.contains(_restart_addr) || nth_task == 0,
    2.81 +             "Inconsistency");
    2.82 +      if (nth_task == 0) {
    2.83 +        // For the 0th task, we'll not need to compute a block_start.
    2.84 +        if (span.contains(_restart_addr)) {
    2.85 +          // In the case of a restart because of stack overflow,
    2.86 +          // we might additionally skip a chunk prefix.
    2.87 +          prev_obj = _restart_addr;
    2.88          } else {
    2.89 -          // In this case we may end up doing a bit of redundant
    2.90 -          // scanning, but that appears unavoidable, short of
    2.91 -          // locking the free list locks; see bug 6324141.
    2.92 -          break;
    2.93 +          prev_obj = span.start();
    2.94 +        }
    2.95 +      } else {
    2.96 +        // We want to skip the first object because
    2.97 +        // the protocol is to scan any object in its entirety
    2.98 +        // that _starts_ in this span; a fortiori, any
    2.99 +        // object starting in an earlier span is scanned
   2.100 +        // as part of an earlier claimed task.
   2.101 +        // Below we use the "careful" version of block_start
   2.102 +        // so we do not try to navigate uninitialized objects.
   2.103 +        prev_obj = sp->block_start_careful(span.start());
   2.104 +        // Below we use a variant of block_size that uses the
   2.105 +        // Printezis bits to avoid waiting for allocated
   2.106 +        // objects to become initialized/parsable.
   2.107 +        while (prev_obj < span.start()) {
   2.108 +          size_t sz = sp->block_size_no_stall(prev_obj, _collector);
   2.109 +          if (sz > 0) {
   2.110 +            prev_obj += sz;
   2.111 +          } else {
   2.112 +            // In this case we may end up doing a bit of redundant
   2.113 +            // scanning, but that appears unavoidable, short of
   2.114 +            // locking the free list locks; see bug 6324141.
   2.115 +            break;
   2.116 +          }
   2.117          }
   2.118        }
   2.119        if (prev_obj < span.end()) {
   2.120 @@ -3938,12 +3965,14 @@
   2.121    void handle_stack_overflow(HeapWord* lost);
   2.122  };
   2.123  
   2.124 -// Grey object rescan during work stealing phase --
   2.125 -// the salient assumption here is that stolen oops must
   2.126 -// always be initialized, so we do not need to check for
   2.127 -// uninitialized objects before scanning here.
   2.128 +// Grey object scanning during work stealing phase --
   2.129 +// the salient assumption here is that any references
   2.130 +// that are in these stolen objects being scanned must
   2.131 +// already have been initialized (else they would not have
   2.132 +// been published), so we do not need to check for
   2.133 +// uninitialized objects before pushing here.
   2.134  void Par_ConcMarkingClosure::do_oop(oop obj) {
   2.135 -  assert(obj->is_oop_or_null(), "expected an oop or NULL");
   2.136 +  assert(obj->is_oop_or_null(true), "expected an oop or NULL");
   2.137    HeapWord* addr = (HeapWord*)obj;
   2.138    // Check if oop points into the CMS generation
   2.139    // and is not marked
   2.140 @@ -4001,7 +4030,7 @@
   2.141  // in CMSCollector's _restart_address.
   2.142  void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
   2.143    // We need to do this under a mutex to prevent other
   2.144 -  // workers from interfering with the expansion below.
   2.145 +  // workers from interfering with the work done below.
   2.146    MutexLockerEx ml(_overflow_stack->par_lock(),
   2.147                     Mutex::_no_safepoint_check_flag);
   2.148    // Remember the least grey address discarded
   2.149 @@ -6554,7 +6583,7 @@
   2.150    if (obj != NULL) {
   2.151      // Ignore mark word because this could be an already marked oop
   2.152      // that may be chained at the end of the overflow list.
   2.153 -    assert(obj->is_oop(), "expected an oop");
   2.154 +    assert(obj->is_oop(true), "expected an oop");
   2.155      HeapWord* addr = (HeapWord*)obj;
   2.156      if (_span.contains(addr) &&
   2.157          !_bit_map->isMarked(addr)) {
   2.158 @@ -7289,6 +7318,8 @@
   2.159    _should_remember_klasses(collector->should_unload_classes())
   2.160  { }
   2.161  
   2.162 +// Assumes thread-safe access by callers, who are
   2.163 +// responsible for mutual exclusion.
   2.164  void CMSCollector::lower_restart_addr(HeapWord* low) {
   2.165    assert(_span.contains(low), "Out of bounds addr");
   2.166    if (_restart_addr == NULL) {
   2.167 @@ -7314,7 +7345,7 @@
   2.168  // in CMSCollector's _restart_address.
   2.169  void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
   2.170    // We need to do this under a mutex to prevent other
   2.171 -  // workers from interfering with the expansion below.
   2.172 +  // workers from interfering with the work done below.
   2.173    MutexLockerEx ml(_overflow_stack->par_lock(),
   2.174                     Mutex::_no_safepoint_check_flag);
   2.175    // Remember the least grey address discarded

mercurial