8040803: G1: Concurrent mark hangs when mark stack overflows

Wed, 14 May 2014 13:32:44 +0200

author
pliden
date
Wed, 14 May 2014 13:32:44 +0200
changeset 6692
487f09bf44e0
parent 6691
e4d318eea75a
child 6693
8a140676873f

8040803: G1: Concurrent mark hangs when mark stack overflows
Reviewed-by: brutisso, ehelin

src/share/vm/gc_implementation/g1/concurrentMark.cpp file | annotate | diff | comparison | revisions
src/share/vm/utilities/workgroup.cpp file | annotate | diff | comparison | revisions
src/share/vm/utilities/workgroup.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Apr 17 18:47:15 2014 +0200
     1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed May 14 13:32:44 2014 +0200
     1.3 @@ -978,7 +978,9 @@
     1.4    if (concurrent()) {
     1.5      ConcurrentGCThread::stsLeave();
     1.6    }
     1.7 -  _first_overflow_barrier_sync.enter();
     1.8 +
     1.9 +  bool barrier_aborted = !_first_overflow_barrier_sync.enter();
    1.10 +
    1.11    if (concurrent()) {
    1.12      ConcurrentGCThread::stsJoin();
    1.13    }
    1.14 @@ -986,7 +988,17 @@
    1.15    // more work
    1.16  
    1.17    if (verbose_low()) {
    1.18 -    gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
    1.19 +    if (barrier_aborted) {
    1.20 +      gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
    1.21 +    } else {
    1.22 +      gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
    1.23 +    }
    1.24 +  }
    1.25 +
    1.26 +  if (barrier_aborted) {
    1.27 +    // If the barrier aborted we ignore the overflow condition and
    1.28 +    // just abort the whole marking phase as quickly as possible.
    1.29 +    return;
    1.30    }
    1.31  
    1.32    // If we're executing the concurrent phase of marking, reset the marking
    1.33 @@ -1026,14 +1038,20 @@
    1.34    if (concurrent()) {
    1.35      ConcurrentGCThread::stsLeave();
    1.36    }
    1.37 -  _second_overflow_barrier_sync.enter();
    1.38 +
    1.39 +  bool barrier_aborted = !_second_overflow_barrier_sync.enter();
    1.40 +
    1.41    if (concurrent()) {
    1.42      ConcurrentGCThread::stsJoin();
    1.43    }
    1.44    // at this point everything should be re-initialized and ready to go
    1.45  
    1.46    if (verbose_low()) {
    1.47 -    gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
    1.48 +    if (barrier_aborted) {
    1.49 +      gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
    1.50 +    } else {
    1.51 +      gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
    1.52 +    }
    1.53    }
    1.54  }
    1.55  
    1.56 @@ -3232,6 +3250,8 @@
    1.57    for (uint i = 0; i < _max_worker_id; ++i) {
    1.58      _tasks[i]->clear_region_fields();
    1.59    }
    1.60 +  _first_overflow_barrier_sync.abort();
    1.61 +  _second_overflow_barrier_sync.abort();
    1.62    _has_aborted = true;
    1.63  
    1.64    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
     2.1 --- a/src/share/vm/utilities/workgroup.cpp	Thu Apr 17 18:47:15 2014 +0200
     2.2 +++ b/src/share/vm/utilities/workgroup.cpp	Wed May 14 13:32:44 2014 +0200
     2.3 @@ -378,21 +378,22 @@
     2.4  
     2.5  WorkGangBarrierSync::WorkGangBarrierSync()
     2.6    : _monitor(Mutex::safepoint, "work gang barrier sync", true),
     2.7 -    _n_workers(0), _n_completed(0), _should_reset(false) {
     2.8 +    _n_workers(0), _n_completed(0), _should_reset(false), _aborted(false) {
     2.9  }
    2.10  
    2.11  WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name)
    2.12    : _monitor(Mutex::safepoint, name, true),
    2.13 -    _n_workers(n_workers), _n_completed(0), _should_reset(false) {
    2.14 +    _n_workers(n_workers), _n_completed(0), _should_reset(false), _aborted(false) {
    2.15  }
    2.16  
    2.17  void WorkGangBarrierSync::set_n_workers(uint n_workers) {
    2.18 -  _n_workers   = n_workers;
    2.19 -  _n_completed = 0;
    2.20 +  _n_workers    = n_workers;
    2.21 +  _n_completed  = 0;
    2.22    _should_reset = false;
    2.23 +  _aborted      = false;
    2.24  }
    2.25  
    2.26 -void WorkGangBarrierSync::enter() {
    2.27 +bool WorkGangBarrierSync::enter() {
    2.28    MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
    2.29    if (should_reset()) {
    2.30      // The should_reset() was set and we are the first worker to enter
    2.31 @@ -415,10 +416,17 @@
    2.32      set_should_reset(true);
    2.33      monitor()->notify_all();
    2.34    } else {
    2.35 -    while (n_completed() != n_workers()) {
    2.36 +    while (n_completed() != n_workers() && !aborted()) {
    2.37        monitor()->wait(/* no_safepoint_check */ true);
    2.38      }
    2.39    }
    2.40 +  return !aborted();
    2.41 +}
    2.42 +
    2.43 +void WorkGangBarrierSync::abort() {
    2.44 +  MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
    2.45 +  set_aborted();
    2.46 +  monitor()->notify_all();
    2.47  }
    2.48  
    2.49  // SubTasksDone functions.
     3.1 --- a/src/share/vm/utilities/workgroup.hpp	Thu Apr 17 18:47:15 2014 +0200
     3.2 +++ b/src/share/vm/utilities/workgroup.hpp	Wed May 14 13:32:44 2014 +0200
     3.3 @@ -359,18 +359,20 @@
     3.4  class WorkGangBarrierSync : public StackObj {
     3.5  protected:
     3.6    Monitor _monitor;
     3.7 -  uint     _n_workers;
     3.8 -  uint     _n_completed;
     3.9 +  uint    _n_workers;
    3.10 +  uint    _n_completed;
    3.11    bool    _should_reset;
    3.12 +  bool    _aborted;
    3.13  
    3.14    Monitor* monitor()        { return &_monitor; }
    3.15    uint     n_workers()      { return _n_workers; }
    3.16    uint     n_completed()    { return _n_completed; }
    3.17    bool     should_reset()   { return _should_reset; }
    3.18 +  bool     aborted()        { return _aborted; }
    3.19  
    3.20    void     zero_completed() { _n_completed = 0; }
    3.21    void     inc_completed()  { _n_completed++; }
    3.22 -
    3.23 +  void     set_aborted()    { _aborted = true; }
    3.24    void     set_should_reset(bool v) { _should_reset = v; }
    3.25  
    3.26  public:
    3.27 @@ -383,8 +385,14 @@
    3.28  
    3.29    // Enter the barrier. A worker that enters the barrier will
    3.30    // not be allowed to leave until all other threads have
    3.31 -  // also entered the barrier.
    3.32 -  void enter();
    3.33 +  // also entered the barrier or the barrier is aborted.
    3.34 +  // Returns false if the barrier was aborted.
    3.35 +  bool enter();
    3.36 +
    3.37 +  // Aborts the barrier and wakes up any threads waiting for
    3.38 +  // the barrier to complete. The barrier will remain in the
    3.39 +  // aborted state until the next call to set_n_workers().
    3.40 +  void abort();
    3.41  };
    3.42  
    3.43  // A class to manage claiming of subtasks within a group of tasks.  The

mercurial