Tue, 26 Aug 2008 14:54:48 -0700
6722116: CMS: Incorrect overflow handling when using parallel concurrent marking
Summary: Fixed CMSConcMarkingTask::reset() to store the restart address upon a marking stack overflow and to use it as the base, suitably aligned, for restarting the scan in CMSConcMarkingTask::do_scan_and_mark().
Reviewed-by: jcoomes, tonyp
1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Aug 20 23:05:04 2008 -0700 1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Tue Aug 26 14:54:48 2008 -0700 1.3 @@ -2790,10 +2790,11 @@ 1.4 assert(n_threads > 0, "Unexpected n_threads argument"); 1.5 const size_t task_size = rescan_task_size(); 1.6 size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size; 1.7 - assert((used_region().start() + (n_tasks - 1)*task_size < 1.8 - used_region().end()) && 1.9 - (used_region().start() + n_tasks*task_size >= 1.10 - used_region().end()), "n_task calculation incorrect"); 1.11 + assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect"); 1.12 + assert(n_tasks == 0 || 1.13 + ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) && 1.14 + (used_region().start() + n_tasks*task_size >= used_region().end())), 1.15 + "n_tasks calculation incorrect"); 1.16 SequentialSubTasksDone* pst = conc_par_seq_tasks(); 1.17 assert(!pst->valid(), "Clobbering existing data?"); 1.18 pst->set_par_threads(n_threads); 1.19 @@ -2833,7 +2834,7 @@ 1.20 assert(n_tasks == 0 || 1.21 ((span.start() + (n_tasks - 1)*task_size < span.end()) && 1.22 (span.start() + n_tasks*task_size >= span.end())), 1.23 - "n_task calculation incorrect"); 1.24 + "n_tasks calculation incorrect"); 1.25 SequentialSubTasksDone* pst = conc_par_seq_tasks(); 1.26 assert(!pst->valid(), "Clobbering existing data?"); 1.27 pst->set_par_threads(n_threads);
2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Aug 20 23:05:04 2008 -0700 2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Tue Aug 26 14:54:48 2008 -0700 2.3 @@ -3650,6 +3650,7 @@ 2.4 CompactibleFreeListSpace* _cms_space; 2.5 CompactibleFreeListSpace* _perm_space; 2.6 HeapWord* _global_finger; 2.7 + HeapWord* _restart_addr; 2.8 2.9 // Exposed here for yielding support 2.10 Mutex* const _bit_map_lock; 2.11 @@ -3680,7 +3681,7 @@ 2.12 _term.set_task(this); 2.13 assert(_cms_space->bottom() < _perm_space->bottom(), 2.14 "Finger incorrectly initialized below"); 2.15 - _global_finger = _cms_space->bottom(); 2.16 + _restart_addr = _global_finger = _cms_space->bottom(); 2.17 } 2.18 2.19 2.20 @@ -3698,6 +3699,10 @@ 2.21 bool result() { return _result; } 2.22 2.23 void reset(HeapWord* ra) { 2.24 + assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)"); 2.25 + assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)"); 2.26 + assert(ra < _perm_space->end(), "ra too large"); 2.27 + _restart_addr = _global_finger = ra; 2.28 _term.reset_for_reuse(); 2.29 } 2.30 2.31 @@ -3842,16 +3847,24 @@ 2.32 int n_tasks = pst->n_tasks(); 2.33 // We allow that there may be no tasks to do here because 2.34 // we are restarting after a stack overflow. 2.35 - assert(pst->valid() || n_tasks == 0, "Uninitializd use?"); 2.36 + assert(pst->valid() || n_tasks == 0, "Uninitialized use?"); 2.37 int nth_task = 0; 2.38 2.39 - HeapWord* start = sp->bottom(); 2.40 + HeapWord* aligned_start = sp->bottom(); 2.41 + if (sp->used_region().contains(_restart_addr)) { 2.42 + // Align down to a card boundary for the start of 0th task 2.43 + // for this space. 2.44 + aligned_start = 2.45 + (HeapWord*)align_size_down((uintptr_t)_restart_addr, 2.46 + CardTableModRefBS::card_size); 2.47 + } 2.48 + 2.49 size_t chunk_size = sp->marking_task_size(); 2.50 while (!pst->is_task_claimed(/* reference */ nth_task)) { 2.51 // Having claimed the nth task in this space, 2.52 // compute the chunk that it corresponds to: 2.53 - MemRegion span = MemRegion(start + nth_task*chunk_size, 2.54 - start + (nth_task+1)*chunk_size); 2.55 + MemRegion span = MemRegion(aligned_start + nth_task*chunk_size, 2.56 + aligned_start + (nth_task+1)*chunk_size); 2.57 // Try and bump the global finger via a CAS; 2.58 // note that we need to do the global finger bump 2.59 // _before_ taking the intersection below, because 2.60 @@ -3866,26 +3879,40 @@ 2.61 // beyond the "top" address of the space. 2.62 span = span.intersection(sp->used_region()); 2.63 if (!span.is_empty()) { // Non-null task 2.64 - // We want to skip the first object because 2.65 - // the protocol is to scan any object in its entirety 2.66 - // that _starts_ in this span; a fortiori, any 2.67 - // object starting in an earlier span is scanned 2.68 - // as part of an earlier claimed task. 2.69 - // Below we use the "careful" version of block_start 2.70 - // so we do not try to navigate uninitialized objects. 2.71 - HeapWord* prev_obj = sp->block_start_careful(span.start()); 2.72 - // Below we use a variant of block_size that uses the 2.73 - // Printezis bits to avoid waiting for allocated 2.74 - // objects to become initialized/parsable. 2.75 - while (prev_obj < span.start()) { 2.76 - size_t sz = sp->block_size_no_stall(prev_obj, _collector); 2.77 - if (sz > 0) { 2.78 - prev_obj += sz; 2.79 + HeapWord* prev_obj; 2.80 + assert(!span.contains(_restart_addr) || nth_task == 0, 2.81 + "Inconsistency"); 2.82 + if (nth_task == 0) { 2.83 + // For the 0th task, we'll not need to compute a block_start. 2.84 + if (span.contains(_restart_addr)) { 2.85 + // In the case of a restart because of stack overflow, 2.86 + // we might additionally skip a chunk prefix. 2.87 + prev_obj = _restart_addr; 2.88 } else { 2.89 - // In this case we may end up doing a bit of redundant 2.90 - // scanning, but that appears unavoidable, short of 2.91 - // locking the free list locks; see bug 6324141. 2.92 - break; 2.93 + prev_obj = span.start(); 2.94 + } 2.95 + } else { 2.96 + // We want to skip the first object because 2.97 + // the protocol is to scan any object in its entirety 2.98 + // that _starts_ in this span; a fortiori, any 2.99 + // object starting in an earlier span is scanned 2.100 + // as part of an earlier claimed task. 2.101 + // Below we use the "careful" version of block_start 2.102 + // so we do not try to navigate uninitialized objects. 2.103 + prev_obj = sp->block_start_careful(span.start()); 2.104 + // Below we use a variant of block_size that uses the 2.105 + // Printezis bits to avoid waiting for allocated 2.106 + // objects to become initialized/parsable. 2.107 + while (prev_obj < span.start()) { 2.108 + size_t sz = sp->block_size_no_stall(prev_obj, _collector); 2.109 + if (sz > 0) { 2.110 + prev_obj += sz; 2.111 + } else { 2.112 + // In this case we may end up doing a bit of redundant 2.113 + // scanning, but that appears unavoidable, short of 2.114 + // locking the free list locks; see bug 6324141. 2.115 + break; 2.116 + } 2.117 } 2.118 } 2.119 if (prev_obj < span.end()) { 2.120 @@ -3938,12 +3965,14 @@ 2.121 void handle_stack_overflow(HeapWord* lost); 2.122 }; 2.123 2.124 -// Grey object rescan during work stealing phase -- 2.125 -// the salient assumption here is that stolen oops must 2.126 -// always be initialized, so we do not need to check for 2.127 -// uninitialized objects before scanning here. 2.128 +// Grey object scanning during work stealing phase -- 2.129 +// the salient assumption here is that any references 2.130 +// that are in these stolen objects being scanned must 2.131 +// already have been initialized (else they would not have 2.132 +// been published), so we do not need to check for 2.133 +// uninitialized objects before pushing here. 2.134 void Par_ConcMarkingClosure::do_oop(oop obj) { 2.135 - assert(obj->is_oop_or_null(), "expected an oop or NULL"); 2.136 + assert(obj->is_oop_or_null(true), "expected an oop or NULL"); 2.137 HeapWord* addr = (HeapWord*)obj; 2.138 // Check if oop points into the CMS generation 2.139 // and is not marked 2.140 @@ -4001,7 +4030,7 @@ 2.141 // in CMSCollector's _restart_address. 2.142 void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) { 2.143 // We need to do this under a mutex to prevent other 2.144 - // workers from interfering with the expansion below. 2.145 + // workers from interfering with the work done below. 2.146 MutexLockerEx ml(_overflow_stack->par_lock(), 2.147 Mutex::_no_safepoint_check_flag); 2.148 // Remember the least grey address discarded 2.149 @@ -6554,7 +6583,7 @@ 2.150 if (obj != NULL) { 2.151 // Ignore mark word because this could be an already marked oop 2.152 // that may be chained at the end of the overflow list. 2.153 - assert(obj->is_oop(), "expected an oop"); 2.154 + assert(obj->is_oop(true), "expected an oop"); 2.155 HeapWord* addr = (HeapWord*)obj; 2.156 if (_span.contains(addr) && 2.157 !_bit_map->isMarked(addr)) { 2.158 @@ -7289,6 +7318,8 @@ 2.159 _should_remember_klasses(collector->should_unload_classes()) 2.160 { } 2.161 2.162 +// Assumes thread-safe access by callers, who are 2.163 +// responsible for mutual exclusion. 2.164 void CMSCollector::lower_restart_addr(HeapWord* low) { 2.165 assert(_span.contains(low), "Out of bounds addr"); 2.166 if (_restart_addr == NULL) { 2.167 @@ -7314,7 +7345,7 @@ 2.168 // in CMSCollector's _restart_address. 2.169 void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) { 2.170 // We need to do this under a mutex to prevent other 2.171 - // workers from interfering with the expansion below. 2.172 + // workers from interfering with the work done below. 2.173 MutexLockerEx ml(_overflow_stack->par_lock(), 2.174 Mutex::_no_safepoint_check_flag); 2.175 // Remember the least grey address discarded