Wed, 14 May 2014 13:32:44 +0200
8040803: G1: Concurrent mark hangs when mark stack overflows
Reviewed-by: brutisso, ehelin
1.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Apr 17 18:47:15 2014 +0200 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed May 14 13:32:44 2014 +0200 1.3 @@ -978,7 +978,9 @@ 1.4 if (concurrent()) { 1.5 ConcurrentGCThread::stsLeave(); 1.6 } 1.7 - _first_overflow_barrier_sync.enter(); 1.8 + 1.9 + bool barrier_aborted = !_first_overflow_barrier_sync.enter(); 1.10 + 1.11 if (concurrent()) { 1.12 ConcurrentGCThread::stsJoin(); 1.13 } 1.14 @@ -986,7 +988,17 @@ 1.15 // more work 1.16 1.17 if (verbose_low()) { 1.18 - gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1.19 + if (barrier_aborted) { 1.20 + gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id); 1.21 + } else { 1.22 + gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id); 1.23 + } 1.24 + } 1.25 + 1.26 + if (barrier_aborted) { 1.27 + // If the barrier aborted we ignore the overflow condition and 1.28 + // just abort the whole marking phase as quickly as possible. 1.29 + return; 1.30 } 1.31 1.32 // If we're executing the concurrent phase of marking, reset the marking 1.33 @@ -1026,14 +1038,20 @@ 1.34 if (concurrent()) { 1.35 ConcurrentGCThread::stsLeave(); 1.36 } 1.37 - _second_overflow_barrier_sync.enter(); 1.38 + 1.39 + bool barrier_aborted = !_second_overflow_barrier_sync.enter(); 1.40 + 1.41 if (concurrent()) { 1.42 ConcurrentGCThread::stsJoin(); 1.43 } 1.44 // at this point everything should be re-initialized and ready to go 1.45 1.46 if (verbose_low()) { 1.47 - gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1.48 + if (barrier_aborted) { 1.49 + gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id); 1.50 + } else { 1.51 + gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id); 1.52 + } 1.53 } 1.54 } 1.55 1.56 @@ -3232,6 +3250,8 @@ 1.57 for (uint i = 0; i < _max_worker_id; ++i) { 1.58 _tasks[i]->clear_region_fields(); 1.59 } 1.60 + _first_overflow_barrier_sync.abort(); 1.61 + _second_overflow_barrier_sync.abort(); 1.62 _has_aborted = true; 1.63 1.64 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2.1 --- a/src/share/vm/utilities/workgroup.cpp Thu Apr 17 18:47:15 2014 +0200 2.2 +++ b/src/share/vm/utilities/workgroup.cpp Wed May 14 13:32:44 2014 +0200 2.3 @@ -378,21 +378,22 @@ 2.4 2.5 WorkGangBarrierSync::WorkGangBarrierSync() 2.6 : _monitor(Mutex::safepoint, "work gang barrier sync", true), 2.7 - _n_workers(0), _n_completed(0), _should_reset(false) { 2.8 + _n_workers(0), _n_completed(0), _should_reset(false), _aborted(false) { 2.9 } 2.10 2.11 WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name) 2.12 : _monitor(Mutex::safepoint, name, true), 2.13 - _n_workers(n_workers), _n_completed(0), _should_reset(false) { 2.14 + _n_workers(n_workers), _n_completed(0), _should_reset(false), _aborted(false) { 2.15 } 2.16 2.17 void WorkGangBarrierSync::set_n_workers(uint n_workers) { 2.18 - _n_workers = n_workers; 2.19 - _n_completed = 0; 2.20 + _n_workers = n_workers; 2.21 + _n_completed = 0; 2.22 _should_reset = false; 2.23 + _aborted = false; 2.24 } 2.25 2.26 -void WorkGangBarrierSync::enter() { 2.27 +bool WorkGangBarrierSync::enter() { 2.28 MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag); 2.29 if (should_reset()) { 2.30 // The should_reset() was set and we are the first worker to enter 2.31 @@ -415,10 +416,17 @@ 2.32 set_should_reset(true); 2.33 monitor()->notify_all(); 2.34 } else { 2.35 - while (n_completed() != n_workers()) { 2.36 + while (n_completed() != n_workers() && !aborted()) { 2.37 monitor()->wait(/* no_safepoint_check */ true); 2.38 } 2.39 } 2.40 + return !aborted(); 2.41 +} 2.42 + 2.43 +void WorkGangBarrierSync::abort() { 2.44 + MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag); 2.45 + set_aborted(); 2.46 + monitor()->notify_all(); 2.47 } 2.48 2.49 // SubTasksDone functions.
3.1 --- a/src/share/vm/utilities/workgroup.hpp Thu Apr 17 18:47:15 2014 +0200 3.2 +++ b/src/share/vm/utilities/workgroup.hpp Wed May 14 13:32:44 2014 +0200 3.3 @@ -359,18 +359,20 @@ 3.4 class WorkGangBarrierSync : public StackObj { 3.5 protected: 3.6 Monitor _monitor; 3.7 - uint _n_workers; 3.8 - uint _n_completed; 3.9 + uint _n_workers; 3.10 + uint _n_completed; 3.11 bool _should_reset; 3.12 + bool _aborted; 3.13 3.14 Monitor* monitor() { return &_monitor; } 3.15 uint n_workers() { return _n_workers; } 3.16 uint n_completed() { return _n_completed; } 3.17 bool should_reset() { return _should_reset; } 3.18 + bool aborted() { return _aborted; } 3.19 3.20 void zero_completed() { _n_completed = 0; } 3.21 void inc_completed() { _n_completed++; } 3.22 - 3.23 + void set_aborted() { _aborted = true; } 3.24 void set_should_reset(bool v) { _should_reset = v; } 3.25 3.26 public: 3.27 @@ -383,8 +385,14 @@ 3.28 3.29 // Enter the barrier. A worker that enters the barrier will 3.30 // not be allowed to leave until all other threads have 3.31 - // also entered the barrier. 3.32 - void enter(); 3.33 + // also entered the barrier or the barrier is aborted. 3.34 + // Returns false if the barrier was aborted. 3.35 + bool enter(); 3.36 + 3.37 + // Aborts the barrier and wakes up any threads waiting for 3.38 + // the barrier to complete. The barrier will remain in the 3.39 + // aborted state until the next call to set_n_workers(). 3.40 + void abort(); 3.41 }; 3.42 3.43 // A class to manage claiming of subtasks within a group of tasks. The