Merge

Wed, 10 Jun 2009 14:57:21 -0700

author
jmasa
date
Wed, 10 Jun 2009 14:57:21 -0700
changeset 1235
7295839252de
parent 1228
eacd97c88873
parent 1234
f89cf529c3c7
child 1236
cf4f487696ba
child 1241
821269eca479

Merge

     1.1 --- a/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Jun 05 10:25:39 2009 -0700
     1.2 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jun 10 14:57:21 2009 -0700
     1.3 @@ -4454,43 +4454,26 @@
     1.4      delayed()->nop();
     1.5    }
     1.6  
     1.7 -  // Now we decide how to generate the card table write.  If we're
     1.8 -  // enqueueing, we call out to a generated function.  Otherwise, we do it
     1.9 -  // inline here.
    1.10 -
    1.11 -  if (G1RSBarrierUseQueue) {
    1.12 -    // If the "store_addr" register is an "in" or "local" register, move it to
    1.13 -    // a scratch reg so we can pass it as an argument.
    1.14 -    bool use_scr = !(store_addr->is_global() || store_addr->is_out());
    1.15 -    // Pick a scratch register different from "tmp".
    1.16 -    Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
    1.17 -    // Make sure we use up the delay slot!
    1.18 -    if (use_scr) {
    1.19 -      post_filter_masm->mov(store_addr, scr);
    1.20 -    } else {
    1.21 -      post_filter_masm->nop();
    1.22 -    }
    1.23 -    generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
    1.24 -    save_frame(0);
    1.25 -    call(dirty_card_log_enqueue);
    1.26 -    if (use_scr) {
    1.27 -      delayed()->mov(scr, O0);
    1.28 -    } else {
    1.29 -      delayed()->mov(store_addr->after_save(), O0);
    1.30 -    }
    1.31 -    restore();
    1.32 -
    1.33 +  // If the "store_addr" register is an "in" or "local" register, move it to
    1.34 +  // a scratch reg so we can pass it as an argument.
    1.35 +  bool use_scr = !(store_addr->is_global() || store_addr->is_out());
    1.36 +  // Pick a scratch register different from "tmp".
    1.37 +  Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
    1.38 +  // Make sure we use up the delay slot!
    1.39 +  if (use_scr) {
    1.40 +    post_filter_masm->mov(store_addr, scr);
    1.41    } else {
    1.42 -
    1.43 -#ifdef _LP64
    1.44 -    post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
    1.45 -#else
    1.46 -    post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
    1.47 -#endif
    1.48 -    assert(tmp != store_addr, "need separate temp reg");
    1.49 -    set(bs->byte_map_base, tmp);
    1.50 -    stb(G0, tmp, store_addr);
    1.51 +    post_filter_masm->nop();
    1.52    }
    1.53 +  generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
    1.54 +  save_frame(0);
    1.55 +  call(dirty_card_log_enqueue);
    1.56 +  if (use_scr) {
    1.57 +    delayed()->mov(scr, O0);
    1.58 +  } else {
    1.59 +    delayed()->mov(store_addr->after_save(), O0);
    1.60 +  }
    1.61 +  restore();
    1.62  
    1.63    bind(filtered);
    1.64  
     2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Jun 05 10:25:39 2009 -0700
     2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jun 10 14:57:21 2009 -0700
     2.3 @@ -555,6 +555,7 @@
     2.4    _collector_policy(cp),
     2.5    _should_unload_classes(false),
     2.6    _concurrent_cycles_since_last_unload(0),
     2.7 +  _roots_scanning_options(0),
     2.8    _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
     2.9  {
    2.10    if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
     3.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Fri Jun 05 10:25:39 2009 -0700
     3.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jun 10 14:57:21 2009 -0700
     3.3 @@ -545,6 +545,11 @@
     3.4    bool unloaded_classes_last_cycle() const {
     3.5      return concurrent_cycles_since_last_unload() == 0;
     3.6    }
     3.7 +  // Root scanning options for perm gen
     3.8 +  int _roots_scanning_options;
     3.9 +  int roots_scanning_options() const      { return _roots_scanning_options; }
    3.10 +  void add_root_scanning_option(int o)    { _roots_scanning_options |= o;   }
    3.11 +  void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o;  }
    3.12  
    3.13    // Verification support
    3.14    CMSBitMap     _verification_mark_bm;
    3.15 @@ -719,11 +724,6 @@
    3.16    NOT_PRODUCT(bool simulate_overflow();)       // sequential
    3.17    NOT_PRODUCT(bool par_simulate_overflow();)   // MT version
    3.18  
    3.19 -  int _roots_scanning_options;
    3.20 -  int roots_scanning_options() const      { return _roots_scanning_options; }
    3.21 -  void add_root_scanning_option(int o)    { _roots_scanning_options |= o;   }
    3.22 -  void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o;  }
    3.23 -
    3.24    // CMS work methods
    3.25    void checkpointRootsInitialWork(bool asynch); // initial checkpoint work
    3.26  
     4.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Fri Jun 05 10:25:39 2009 -0700
     4.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Jun 10 14:57:21 2009 -0700
     4.3 @@ -25,26 +25,37 @@
     4.4  #include "incls/_precompiled.incl"
     4.5  #include "incls/_concurrentG1Refine.cpp.incl"
     4.6  
     4.7 -bool ConcurrentG1Refine::_enabled = false;
     4.8 -
     4.9  ConcurrentG1Refine::ConcurrentG1Refine() :
    4.10 -  _pya(PYA_continue), _last_pya(PYA_continue),
    4.11 -  _last_cards_during(), _first_traversal(false),
    4.12    _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
    4.13    _hot_cache(NULL),
    4.14    _def_use_cache(false), _use_cache(false),
    4.15 -  _n_periods(0), _total_cards(0), _total_travs(0)
    4.16 +  _n_periods(0), _total_cards(0), _total_travs(0),
    4.17 +  _threads(NULL), _n_threads(0)
    4.18  {
    4.19    if (G1ConcRefine) {
    4.20 -    _cg1rThread = new ConcurrentG1RefineThread(this);
    4.21 -    assert(cg1rThread() != NULL, "Conc refine should have been created");
    4.22 -    assert(cg1rThread()->cg1r() == this,
    4.23 -           "Conc refine thread should refer to this");
    4.24 -  } else {
    4.25 -    _cg1rThread = NULL;
    4.26 +    _n_threads = (int)thread_num();
    4.27 +    if (_n_threads > 0) {
    4.28 +      _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
    4.29 +      int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    4.30 +      ConcurrentG1RefineThread *next = NULL;
    4.31 +      for (int i = _n_threads - 1; i >= 0; i--) {
    4.32 +        ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    4.33 +        assert(t != NULL, "Conc refine should have been created");
    4.34 +        assert(t->cg1r() == this, "Conc refine thread should refer to this");
    4.35 +        _threads[i] = t;
    4.36 +        next = t;
    4.37 +      }
    4.38 +    }
    4.39    }
    4.40  }
    4.41  
    4.42 +size_t ConcurrentG1Refine::thread_num() {
    4.43 +  if (G1ConcRefine) {
    4.44 +    return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
    4.45 +  }
    4.46 +  return 0;
    4.47 +}
    4.48 +
    4.49  void ConcurrentG1Refine::init() {
    4.50    if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
    4.51      G1CollectedHeap* g1h = G1CollectedHeap::heap();
    4.52 @@ -75,6 +86,14 @@
    4.53    }
    4.54  }
    4.55  
    4.56 +void ConcurrentG1Refine::stop() {
    4.57 +  if (_threads != NULL) {
    4.58 +    for (int i = 0; i < _n_threads; i++) {
    4.59 +      _threads[i]->stop();
    4.60 +    }
    4.61 +  }
    4.62 +}
    4.63 +
    4.64  ConcurrentG1Refine::~ConcurrentG1Refine() {
    4.65    if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
    4.66      assert(_card_counts != NULL, "Logic");
    4.67 @@ -88,104 +107,22 @@
    4.68      assert(_hot_cache != NULL, "Logic");
    4.69      FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
    4.70    }
    4.71 -}
    4.72 -
    4.73 -bool ConcurrentG1Refine::refine() {
    4.74 -  G1CollectedHeap* g1h = G1CollectedHeap::heap();
    4.75 -  unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
    4.76 -  clear_hot_cache();  // Any previous values in this are now invalid.
    4.77 -  g1h->g1_rem_set()->concurrentRefinementPass(this);
    4.78 -  _traversals++;
    4.79 -  unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
    4.80 -  unsigned cards_during = cards_after-cards_before;
    4.81 -  // If this is the first traversal in the current enabling
    4.82 -  // and we did some cards, or if the number of cards found is decreasing
    4.83 -  // sufficiently quickly, then keep going.  Otherwise, sleep a while.
    4.84 -  bool res =
    4.85 -    (_first_traversal && cards_during > 0)
    4.86 -    ||
    4.87 -    (!_first_traversal && cards_during * 3 < _last_cards_during * 2);
    4.88 -  _last_cards_during = cards_during;
    4.89 -  _first_traversal = false;
    4.90 -  return res;
    4.91 -}
    4.92 -
    4.93 -void ConcurrentG1Refine::enable() {
    4.94 -  MutexLocker x(G1ConcRefine_mon);
    4.95 -  if (!_enabled) {
    4.96 -    _enabled = true;
    4.97 -    _first_traversal = true; _last_cards_during = 0;
    4.98 -    G1ConcRefine_mon->notify_all();
    4.99 +  if (_threads != NULL) {
   4.100 +    for (int i = 0; i < _n_threads; i++) {
   4.101 +      delete _threads[i];
   4.102 +    }
   4.103 +    FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   4.104    }
   4.105  }
   4.106  
   4.107 -unsigned ConcurrentG1Refine::disable() {
   4.108 -  MutexLocker x(G1ConcRefine_mon);
   4.109 -  if (_enabled) {
   4.110 -    _enabled = false;
   4.111 -    return _traversals;
   4.112 -  } else {
   4.113 -    return 0;
   4.114 +void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   4.115 +  if (_threads != NULL) {
   4.116 +    for (int i = 0; i < _n_threads; i++) {
   4.117 +      tc->do_thread(_threads[i]);
   4.118 +    }
   4.119    }
   4.120  }
   4.121  
   4.122 -void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
   4.123 -  G1ConcRefine_mon->lock();
   4.124 -  while (!_enabled) {
   4.125 -    G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
   4.126 -  }
   4.127 -  G1ConcRefine_mon->unlock();
   4.128 -  _traversals = 0;
   4.129 -};
   4.130 -
   4.131 -void ConcurrentG1Refine::set_pya_restart() {
   4.132 -  // If we're using the log-based RS barrier, the above will cause
   4.133 -  // in-progress traversals of completed log buffers to quit early; we will
   4.134 -  // also abandon all other buffers.
   4.135 -  if (G1RSBarrierUseQueue) {
   4.136 -    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   4.137 -    dcqs.abandon_logs();
   4.138 -    // Reset the post-yield actions.
   4.139 -    _pya = PYA_continue;
   4.140 -    _last_pya = PYA_continue;
   4.141 -  } else {
   4.142 -    _pya = PYA_restart;
   4.143 -  }
   4.144 -}
   4.145 -
   4.146 -void ConcurrentG1Refine::set_pya_cancel() {
   4.147 -  _pya = PYA_cancel;
   4.148 -}
   4.149 -
   4.150 -PostYieldAction ConcurrentG1Refine::get_pya() {
   4.151 -  if (_pya != PYA_continue) {
   4.152 -    jint val = _pya;
   4.153 -    while (true) {
   4.154 -      jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
   4.155 -      if (val_read == val) {
   4.156 -        PostYieldAction res = (PostYieldAction)val;
   4.157 -        assert(res != PYA_continue, "Only the refine thread should reset.");
   4.158 -        _last_pya = res;
   4.159 -        return res;
   4.160 -      } else {
   4.161 -        val = val_read;
   4.162 -      }
   4.163 -    }
   4.164 -  }
   4.165 -  // QQQ WELL WHAT DO WE RETURN HERE???
   4.166 -  // make up something!
   4.167 -  return PYA_continue;
   4.168 -}
   4.169 -
   4.170 -PostYieldAction ConcurrentG1Refine::get_last_pya() {
   4.171 -  PostYieldAction res = _last_pya;
   4.172 -  _last_pya = PYA_continue;
   4.173 -  return res;
   4.174 -}
   4.175 -
   4.176 -bool ConcurrentG1Refine::do_traversal() {
   4.177 -  return _cg1rThread->do_traversal();
   4.178 -}
   4.179  
   4.180  int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
   4.181    size_t card_num = (card_ptr - _ct_bot);
     5.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Fri Jun 05 10:25:39 2009 -0700
     5.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Jun 10 14:57:21 2009 -0700
     5.3 @@ -26,26 +26,9 @@
     5.4  class ConcurrentG1RefineThread;
     5.5  class G1RemSet;
     5.6  
     5.7 -// What to do after a yield:
     5.8 -enum PostYieldAction {
     5.9 -  PYA_continue,  // Continue the traversal
    5.10 -  PYA_restart,   // Restart
    5.11 -  PYA_cancel     // It's been completed by somebody else: cancel.
    5.12 -};
    5.13 -
    5.14  class ConcurrentG1Refine: public CHeapObj {
    5.15 -  ConcurrentG1RefineThread* _cg1rThread;
    5.16 -
    5.17 -  volatile jint _pya;
    5.18 -  PostYieldAction _last_pya;
    5.19 -
    5.20 -  static bool _enabled;  // Protected by G1ConcRefine_mon.
    5.21 -  unsigned _traversals;
    5.22 -
    5.23 -  // Number of cards processed during last refinement traversal.
    5.24 -  unsigned _first_traversal;
    5.25 -  unsigned _last_cards_during;
    5.26 -
    5.27 +  ConcurrentG1RefineThread** _threads;
    5.28 +  int _n_threads;
    5.29    // The cache for card refinement.
    5.30    bool     _use_cache;
    5.31    bool     _def_use_cache;
    5.32 @@ -74,37 +57,10 @@
    5.33    ~ConcurrentG1Refine();
    5.34  
    5.35    void init(); // Accomplish some initialization that has to wait.
    5.36 +  void stop();
    5.37  
    5.38 -  // Enabled Conc refinement, waking up thread if necessary.
    5.39 -  void enable();
    5.40 -
    5.41 -  // Returns the number of traversals performed since this refiner was enabled.
    5.42 -  unsigned disable();
    5.43 -
    5.44 -  // Requires G1ConcRefine_mon to be held.
    5.45 -  bool enabled() { return _enabled; }
    5.46 -
    5.47 -  // Returns only when G1 concurrent refinement has been enabled.
    5.48 -  void wait_for_ConcurrentG1Refine_enabled();
    5.49 -
    5.50 -  // Do one concurrent refinement pass over the card table.  Returns "true"
    5.51 -  // if heuristics determine that another pass should be done immediately.
    5.52 -  bool refine();
    5.53 -
    5.54 -  // Indicate that an in-progress refinement pass should start over.
    5.55 -  void set_pya_restart();
    5.56 -  // Indicate that an in-progress refinement pass should quit.
    5.57 -  void set_pya_cancel();
    5.58 -
    5.59 -  // Get the appropriate post-yield action.  Also sets last_pya.
    5.60 -  PostYieldAction get_pya();
    5.61 -
    5.62 -  // The last PYA read by "get_pya".
    5.63 -  PostYieldAction get_last_pya();
    5.64 -
    5.65 -  bool do_traversal();
    5.66 -
    5.67 -  ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
    5.68 +  // Iterate over the conc refine threads
    5.69 +  void threads_do(ThreadClosure *tc);
    5.70  
    5.71    // If this is the first entry for the slot, writes into the cache and
    5.72    // returns NULL.  If it causes an eviction, returns the evicted pointer.
    5.73 @@ -129,4 +85,6 @@
    5.74  
    5.75    void clear_and_record_card_counts();
    5.76    void print_final_card_counts();
    5.77 +
    5.78 +  static size_t thread_num();
    5.79  };
     6.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Fri Jun 05 10:25:39 2009 -0700
     6.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Wed Jun 10 14:57:21 2009 -0700
     6.3 @@ -30,12 +30,14 @@
     6.4  // The CM thread is created when the G1 garbage collector is used
     6.5  
     6.6  ConcurrentG1RefineThread::
     6.7 -ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
     6.8 +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
     6.9 +                         int worker_id_offset, int worker_id) :
    6.10    ConcurrentGCThread(),
    6.11 +  _worker_id_offset(worker_id_offset),
    6.12 +  _worker_id(worker_id),
    6.13 +  _active(false),
    6.14 +  _next(next),
    6.15    _cg1r(cg1r),
    6.16 -  _started(false),
    6.17 -  _in_progress(false),
    6.18 -  _do_traversal(false),
    6.19    _vtime_accum(0.0),
    6.20    _co_tracker(G1CRGroup),
    6.21    _interval_ms(5.0)
    6.22 @@ -43,112 +45,6 @@
    6.23    create_and_start();
    6.24  }
    6.25  
    6.26 -const long timeout = 200; // ms.
    6.27 -
    6.28 -void ConcurrentG1RefineThread::traversalBasedRefinement() {
    6.29 -  _cg1r->wait_for_ConcurrentG1Refine_enabled();
    6.30 -  MutexLocker x(G1ConcRefine_mon);
    6.31 -  while (_cg1r->enabled()) {
    6.32 -    MutexUnlocker ux(G1ConcRefine_mon);
    6.33 -    ResourceMark rm;
    6.34 -    HandleMark   hm;
    6.35 -
    6.36 -    if (G1TraceConcurrentRefinement) {
    6.37 -      gclog_or_tty->print_cr("G1-Refine starting pass");
    6.38 -    }
    6.39 -    _sts.join();
    6.40 -    bool no_sleep = _cg1r->refine();
    6.41 -    _sts.leave();
    6.42 -    if (!no_sleep) {
    6.43 -      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
    6.44 -      // We do this only for the timeout; we don't expect this to be signalled.
    6.45 -      CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
    6.46 -    }
    6.47 -  }
    6.48 -}
    6.49 -
    6.50 -void ConcurrentG1RefineThread::queueBasedRefinement() {
    6.51 -  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
    6.52 -  // Wait for completed log buffers to exist.
    6.53 -  {
    6.54 -    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
    6.55 -    while (!_do_traversal && !dcqs.process_completed_buffers() &&
    6.56 -           !_should_terminate) {
    6.57 -      DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
    6.58 -    }
    6.59 -  }
    6.60 -
    6.61 -  if (_should_terminate) {
    6.62 -    return;
    6.63 -  }
    6.64 -
    6.65 -  // Now we take them off (this doesn't hold locks while it applies
    6.66 -  // closures.)  (If we did a full collection, then we'll do a full
    6.67 -  // traversal.
    6.68 -  _sts.join();
    6.69 -  if (_do_traversal) {
    6.70 -    (void)_cg1r->refine();
    6.71 -    switch (_cg1r->get_last_pya()) {
    6.72 -    case PYA_cancel: case PYA_continue:
    6.73 -      // Continue was caught and handled inside "refine".  If it's still
    6.74 -      // "continue" when we get here, we're done.
    6.75 -      _do_traversal = false;
    6.76 -      break;
    6.77 -    case PYA_restart:
    6.78 -      assert(_do_traversal, "Because of Full GC.");
    6.79 -      break;
    6.80 -    }
    6.81 -  } else {
    6.82 -    int n_logs = 0;
    6.83 -    int lower_limit = 0;
    6.84 -    double start_vtime_sec; // only used when G1SmoothConcRefine is on
    6.85 -    int prev_buffer_num; // only used when G1SmoothConcRefine is on
    6.86 -
    6.87 -    if (G1SmoothConcRefine) {
    6.88 -      lower_limit = 0;
    6.89 -      start_vtime_sec = os::elapsedVTime();
    6.90 -      prev_buffer_num = (int) dcqs.completed_buffers_num();
    6.91 -    } else {
    6.92 -      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
    6.93 -    }
    6.94 -    while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
    6.95 -      double end_vtime_sec;
    6.96 -      double elapsed_vtime_sec;
    6.97 -      int elapsed_vtime_ms;
    6.98 -      int curr_buffer_num;
    6.99 -
   6.100 -      if (G1SmoothConcRefine) {
   6.101 -        end_vtime_sec = os::elapsedVTime();
   6.102 -        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
   6.103 -        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
   6.104 -        curr_buffer_num = (int) dcqs.completed_buffers_num();
   6.105 -
   6.106 -        if (curr_buffer_num > prev_buffer_num ||
   6.107 -            curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
   6.108 -          decreaseInterval(elapsed_vtime_ms);
   6.109 -        } else if (curr_buffer_num < prev_buffer_num) {
   6.110 -          increaseInterval(elapsed_vtime_ms);
   6.111 -        }
   6.112 -      }
   6.113 -
   6.114 -      sample_young_list_rs_lengths();
   6.115 -      _co_tracker.update(false);
   6.116 -
   6.117 -      if (G1SmoothConcRefine) {
   6.118 -        prev_buffer_num = curr_buffer_num;
   6.119 -        _sts.leave();
   6.120 -        os::sleep(Thread::current(), (jlong) _interval_ms, false);
   6.121 -        _sts.join();
   6.122 -        start_vtime_sec = os::elapsedVTime();
   6.123 -      }
   6.124 -      n_logs++;
   6.125 -    }
   6.126 -    // Make sure we harvest the PYA, if any.
   6.127 -    (void)_cg1r->get_pya();
   6.128 -  }
   6.129 -  _sts.leave();
   6.130 -}
   6.131 -
   6.132  void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
   6.133    G1CollectedHeap* g1h = G1CollectedHeap::heap();
   6.134    G1CollectorPolicy* g1p = g1h->g1_policy();
   6.135 @@ -184,15 +80,97 @@
   6.136    _co_tracker.start();
   6.137  
   6.138    while (!_should_terminate) {
   6.139 -    // wait until started is set.
   6.140 -    if (G1RSBarrierUseQueue) {
   6.141 -      queueBasedRefinement();
   6.142 +    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   6.143 +    // Wait for completed log buffers to exist.
   6.144 +    {
   6.145 +      MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
   6.146 +      while (((_worker_id == 0 && !dcqs.process_completed_buffers()) ||
   6.147 +              (_worker_id > 0 && !is_active())) &&
   6.148 +             !_should_terminate) {
   6.149 +         DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
   6.150 +      }
   6.151 +    }
   6.152 +
   6.153 +    if (_should_terminate) {
   6.154 +      return;
   6.155 +    }
   6.156 +
   6.157 +    // Now we take them off (this doesn't hold locks while it applies
   6.158 +    // closures.)  (If we did a full collection, then we'll do a full
   6.159 +    // traversal.
   6.160 +    _sts.join();
   6.161 +    int n_logs = 0;
   6.162 +    int lower_limit = 0;
   6.163 +    double start_vtime_sec; // only used when G1SmoothConcRefine is on
   6.164 +    int prev_buffer_num; // only used when G1SmoothConcRefine is on
   6.165 +    // This thread activation threshold
   6.166 +    int threshold = DCQBarrierProcessCompletedThreshold * _worker_id;
   6.167 +    // Next thread activation threshold
   6.168 +    int next_threshold = threshold + DCQBarrierProcessCompletedThreshold;
   6.169 +    int deactivation_threshold = MAX2<int>(threshold - DCQBarrierProcessCompletedThreshold / 2, 0);
   6.170 +
   6.171 +    if (G1SmoothConcRefine) {
   6.172 +      lower_limit = 0;
   6.173 +      start_vtime_sec = os::elapsedVTime();
   6.174 +      prev_buffer_num = (int) dcqs.completed_buffers_num();
   6.175      } else {
   6.176 -      traversalBasedRefinement();
   6.177 +      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
   6.178      }
   6.179 -    _sts.join();
   6.180 -    _co_tracker.update();
   6.181 +    while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) {
   6.182 +      double end_vtime_sec;
   6.183 +      double elapsed_vtime_sec;
   6.184 +      int elapsed_vtime_ms;
   6.185 +      int curr_buffer_num = (int) dcqs.completed_buffers_num();
   6.186 +
   6.187 +      if (G1SmoothConcRefine) {
   6.188 +        end_vtime_sec = os::elapsedVTime();
   6.189 +        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
   6.190 +        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
   6.191 +
   6.192 +        if (curr_buffer_num > prev_buffer_num ||
   6.193 +            curr_buffer_num > next_threshold) {
   6.194 +          decreaseInterval(elapsed_vtime_ms);
   6.195 +        } else if (curr_buffer_num < prev_buffer_num) {
   6.196 +          increaseInterval(elapsed_vtime_ms);
   6.197 +        }
   6.198 +      }
   6.199 +      if (_worker_id == 0) {
   6.200 +        sample_young_list_rs_lengths();
   6.201 +      } else if (curr_buffer_num < deactivation_threshold) {
   6.202 +        // If the number of the buffer has fallen below our threshold
   6.203 +        // we should deactivate. The predecessor will reactivate this
   6.204 +        // thread should the number of the buffers cross the threshold again.
   6.205 +        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
   6.206 +        deactivate();
   6.207 +        if (G1TraceConcurrentRefinement) {
   6.208 +          gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id);
   6.209 +        }
   6.210 +        break;
   6.211 +      }
   6.212 +      _co_tracker.update(false);
   6.213 +
   6.214 +      // Check if we need to activate the next thread.
   6.215 +      if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) {
   6.216 +        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
   6.217 +        _next->activate();
   6.218 +        DirtyCardQ_CBL_mon->notify_all();
   6.219 +        if (G1TraceConcurrentRefinement) {
   6.220 +          gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id);
   6.221 +        }
   6.222 +      }
   6.223 +
   6.224 +      if (G1SmoothConcRefine) {
   6.225 +        prev_buffer_num = curr_buffer_num;
   6.226 +        _sts.leave();
   6.227 +        os::sleep(Thread::current(), (jlong) _interval_ms, false);
   6.228 +        _sts.join();
   6.229 +        start_vtime_sec = os::elapsedVTime();
   6.230 +      }
   6.231 +      n_logs++;
   6.232 +    }
   6.233 +    _co_tracker.update(false);
   6.234      _sts.leave();
   6.235 +
   6.236      if (os::supports_vtime()) {
   6.237        _vtime_accum = (os::elapsedVTime() - _vtime_start);
   6.238      } else {
   6.239 @@ -240,7 +218,3 @@
   6.240    Thread::print();
   6.241    gclog_or_tty->cr();
   6.242  }
   6.243 -
   6.244 -void ConcurrentG1RefineThread::set_do_traversal(bool b) {
   6.245 -  _do_traversal = b;
   6.246 -}
     7.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Fri Jun 05 10:25:39 2009 -0700
     7.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Wed Jun 10 14:57:21 2009 -0700
     7.3 @@ -33,21 +33,27 @@
     7.4  
     7.5    double _vtime_start;  // Initial virtual time.
     7.6    double _vtime_accum;  // Initial virtual time.
     7.7 +  int _worker_id;
     7.8 +  int _worker_id_offset;
     7.9  
    7.10 +  // The refinement threads collection is linked list. A predecessor can activate a successor
    7.11 +  // when the number of the rset update buffer crosses a certain threshold. A successor
    7.12 +  // would self-deactivate when the number of the buffers falls below the threshold.
    7.13 +  bool _active;
    7.14 +  ConcurrentG1RefineThread *       _next;
    7.15   public:
    7.16    virtual void run();
    7.17  
    7.18 +  bool is_active()  { return _active;  }
    7.19 +  void activate()   { _active = true;  }
    7.20 +  void deactivate() { _active = false; }
    7.21 +
    7.22   private:
    7.23    ConcurrentG1Refine*              _cg1r;
    7.24 -  bool                             _started;
    7.25 -  bool                             _in_progress;
    7.26 -  volatile bool                    _restart;
    7.27  
    7.28    COTracker                        _co_tracker;
    7.29    double                           _interval_ms;
    7.30  
    7.31 -  bool                             _do_traversal;
    7.32 -
    7.33    void decreaseInterval(int processing_time_ms) {
    7.34      double min_interval_ms = (double) processing_time_ms;
    7.35      _interval_ms = 0.8 * _interval_ms;
    7.36 @@ -63,16 +69,13 @@
    7.37  
    7.38    void sleepBeforeNextCycle();
    7.39  
    7.40 -  void traversalBasedRefinement();
    7.41 -
    7.42 -  void queueBasedRefinement();
    7.43 -
    7.44    // For use by G1CollectedHeap, which is a friend.
    7.45    static SuspendibleThreadSet* sts() { return &_sts; }
    7.46  
    7.47   public:
    7.48    // Constructor
    7.49 -  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
    7.50 +  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next,
    7.51 +                           int worker_id_offset, int worker_id);
    7.52  
    7.53    // Printing
    7.54    void print();
    7.55 @@ -82,23 +85,11 @@
    7.56  
    7.57    ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
    7.58  
    7.59 -
    7.60 -  void            set_started()                  { _started = true;   }
    7.61 -  void            clear_started()                { _started = false;  }
    7.62 -  bool            started()                      { return _started;   }
    7.63 -
    7.64 -  void            set_in_progress()              { _in_progress = true;   }
    7.65 -  void            clear_in_progress()            { _in_progress = false;  }
    7.66 -  bool            in_progress()                  { return _in_progress;   }
    7.67 -
    7.68 -  void            set_do_traversal(bool b);
    7.69 -  bool            do_traversal() { return _do_traversal; }
    7.70 -
    7.71    void            sample_young_list_rs_lengths();
    7.72  
    7.73    // Yield for GC
    7.74    void            yield();
    7.75  
    7.76    // shutdown
    7.77 -  static void stop();
    7.78 +  void stop();
    7.79  };
     8.1 --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Fri Jun 05 10:25:39 2009 -0700
     8.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Wed Jun 10 14:57:21 2009 -0700
     8.3 @@ -80,5 +80,5 @@
     8.4    void            yield();
     8.5  
     8.6    // shutdown
     8.7 -  static void stop();
     8.8 +  void stop();
     8.9  };
     9.1 --- a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Fri Jun 05 10:25:39 2009 -0700
     9.2 +++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Wed Jun 10 14:57:21 2009 -0700
     9.3 @@ -73,7 +73,7 @@
     9.4    // while holding the ZF_needed_mon lock.
     9.5  
     9.6    // shutdown
     9.7 -  static void stop();
     9.8 +  void stop();
     9.9  
    9.10    // Stats
    9.11    static void note_region_alloc() {_region_allocs++; }
    10.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Fri Jun 05 10:25:39 2009 -0700
    10.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Wed Jun 10 14:57:21 2009 -0700
    10.3 @@ -71,11 +71,11 @@
    10.4    _all_active = true;
    10.5  }
    10.6  
    10.7 +// Determines how many mutator threads can process the buffers in parallel.
    10.8  size_t DirtyCardQueueSet::num_par_ids() {
    10.9 -  return MAX2(ParallelGCThreads, (size_t)2);
   10.10 +  return os::processor_count();
   10.11  }
   10.12  
   10.13 -
   10.14  void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
   10.15                                     int max_completed_queue,
   10.16                                     Mutex* lock, PtrQueueSet* fl_owner) {
   10.17 @@ -85,8 +85,6 @@
   10.18  
   10.19    _shared_dirty_card_queue.set_lock(lock);
   10.20    _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
   10.21 -  bool b = _free_ids->claim_perm_id(0);
   10.22 -  guarantee(b, "Must reserve id zero for concurrent refinement thread.");
   10.23  }
   10.24  
   10.25  void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
   10.26 @@ -234,7 +232,7 @@
   10.27      nd = get_completed_buffer_lock(stop_at);
   10.28    }
   10.29    bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
   10.30 -  if (res) _processed_buffers_rs_thread++;
   10.31 +  if (res) Atomic::inc(&_processed_buffers_rs_thread);
   10.32    return res;
   10.33  }
   10.34  
    11.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jun 05 10:25:39 2009 -0700
    11.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jun 10 14:57:21 2009 -0700
    11.3 @@ -446,8 +446,61 @@
    11.4    gclog_or_tty->print_cr("");
    11.5  }
    11.6  
    11.7 +void G1CollectedHeap::push_dirty_cards_region(HeapRegion* hr)
    11.8 +{
    11.9 +  // Claim the right to put the region on the dirty cards region list
   11.10 +  // by installing a self pointer.
   11.11 +  HeapRegion* next = hr->get_next_dirty_cards_region();
   11.12 +  if (next == NULL) {
   11.13 +    HeapRegion* res = (HeapRegion*)
   11.14 +      Atomic::cmpxchg_ptr(hr, hr->next_dirty_cards_region_addr(),
   11.15 +                          NULL);
   11.16 +    if (res == NULL) {
   11.17 +      HeapRegion* head;
   11.18 +      do {
   11.19 +        // Put the region to the dirty cards region list.
   11.20 +        head = _dirty_cards_region_list;
   11.21 +        next = (HeapRegion*)
   11.22 +          Atomic::cmpxchg_ptr(hr, &_dirty_cards_region_list, head);
   11.23 +        if (next == head) {
   11.24 +          assert(hr->get_next_dirty_cards_region() == hr,
   11.25 +                 "hr->get_next_dirty_cards_region() != hr");
   11.26 +          if (next == NULL) {
   11.27 +            // The last region in the list points to itself.
   11.28 +            hr->set_next_dirty_cards_region(hr);
   11.29 +          } else {
   11.30 +            hr->set_next_dirty_cards_region(next);
   11.31 +          }
   11.32 +        }
   11.33 +      } while (next != head);
   11.34 +    }
   11.35 +  }
   11.36 +}
   11.37 +
   11.38 +HeapRegion* G1CollectedHeap::pop_dirty_cards_region()
   11.39 +{
   11.40 +  HeapRegion* head;
   11.41 +  HeapRegion* hr;
   11.42 +  do {
   11.43 +    head = _dirty_cards_region_list;
   11.44 +    if (head == NULL) {
   11.45 +      return NULL;
   11.46 +    }
   11.47 +    HeapRegion* new_head = head->get_next_dirty_cards_region();
   11.48 +    if (head == new_head) {
   11.49 +      // The last region.
   11.50 +      new_head = NULL;
   11.51 +    }
   11.52 +    hr = (HeapRegion*)Atomic::cmpxchg_ptr(new_head, &_dirty_cards_region_list,
   11.53 +                                          head);
   11.54 +  } while (hr != head);
   11.55 +  assert(hr != NULL, "invariant");
   11.56 +  hr->set_next_dirty_cards_region(NULL);
   11.57 +  return hr;
   11.58 +}
   11.59 +
   11.60  void G1CollectedHeap::stop_conc_gc_threads() {
   11.61 -  _cg1r->cg1rThread()->stop();
   11.62 +  _cg1r->stop();
   11.63    _czft->stop();
   11.64    _cmThread->stop();
   11.65  }
   11.66 @@ -1001,12 +1054,8 @@
   11.67  
   11.68      gc_epilogue(true);
   11.69  
   11.70 -    // Abandon concurrent refinement.  This must happen last: in the
   11.71 -    // dirty-card logging system, some cards may be dirty by weak-ref
   11.72 -    // processing, and may be enqueued.  But the whole card table is
   11.73 -    // dirtied, so this should abandon those logs, and set "do_traversal"
   11.74 -    // to true.
   11.75 -    concurrent_g1_refine()->set_pya_restart();
   11.76 +    // Discard all rset updates
   11.77 +    JavaThread::dirty_card_queue_set().abandon_logs();
   11.78      assert(!G1DeferredRSUpdate
   11.79             || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any");
   11.80      assert(regions_accounted_for(), "Region leakage!");
   11.81 @@ -1333,7 +1382,8 @@
   11.82    _gc_time_stamp(0),
   11.83    _surviving_young_words(NULL),
   11.84    _in_cset_fast_test(NULL),
   11.85 -  _in_cset_fast_test_base(NULL) {
   11.86 +  _in_cset_fast_test_base(NULL),
   11.87 +  _dirty_cards_region_list(NULL) {
   11.88    _g1h = this; // To catch bugs.
   11.89    if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
   11.90      vm_exit_during_initialization("Failed necessary allocation.");
   11.91 @@ -1521,12 +1571,12 @@
   11.92                                                 SATB_Q_FL_lock,
   11.93                                                 0,
   11.94                                                 Shared_SATB_Q_lock);
   11.95 -  if (G1RSBarrierUseQueue) {
   11.96 -    JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
   11.97 -                                                  DirtyCardQ_FL_lock,
   11.98 -                                                  G1DirtyCardQueueMax,
   11.99 -                                                  Shared_DirtyCardQ_lock);
  11.100 -  }
  11.101 +
  11.102 +  JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
  11.103 +                                                DirtyCardQ_FL_lock,
  11.104 +                                                G1DirtyCardQueueMax,
  11.105 +                                                Shared_DirtyCardQ_lock);
  11.106 +
  11.107    if (G1DeferredRSUpdate) {
  11.108      dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
  11.109                                        DirtyCardQ_FL_lock,
  11.110 @@ -2249,6 +2299,15 @@
  11.111    _hrs->iterate(&blk);
  11.112  }
  11.113  
  11.114 +class PrintOnThreadsClosure : public ThreadClosure {
  11.115 +  outputStream* _st;
  11.116 +public:
  11.117 +  PrintOnThreadsClosure(outputStream* st) : _st(st) { }
  11.118 +  virtual void do_thread(Thread *t) {
  11.119 +    t->print_on(_st);
  11.120 +  }
  11.121 +};
  11.122 +
  11.123  void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
  11.124    if (ParallelGCThreads > 0) {
  11.125      workers()->print_worker_threads();
  11.126 @@ -2256,8 +2315,9 @@
  11.127    st->print("\"G1 concurrent mark GC Thread\" ");
  11.128    _cmThread->print();
  11.129    st->cr();
  11.130 -  st->print("\"G1 concurrent refinement GC Thread\" ");
  11.131 -  _cg1r->cg1rThread()->print_on(st);
  11.132 +  st->print("\"G1 concurrent refinement GC Threads\" ");
  11.133 +  PrintOnThreadsClosure p(st);
  11.134 +  _cg1r->threads_do(&p);
  11.135    st->cr();
  11.136    st->print("\"G1 zero-fill GC Thread\" ");
  11.137    _czft->print_on(st);
  11.138 @@ -2269,7 +2329,7 @@
  11.139      workers()->threads_do(tc);
  11.140    }
  11.141    tc->do_thread(_cmThread);
  11.142 -  tc->do_thread(_cg1r->cg1rThread());
  11.143 +  _cg1r->threads_do(tc);
  11.144    tc->do_thread(_czft);
  11.145  }
  11.146  
  11.147 @@ -4685,15 +4745,58 @@
  11.148    }
  11.149  }
  11.150  
  11.151 +
  11.152 +class G1ParCleanupCTTask : public AbstractGangTask {
  11.153 +  CardTableModRefBS* _ct_bs;
  11.154 +  G1CollectedHeap* _g1h;
  11.155 +public:
  11.156 +  G1ParCleanupCTTask(CardTableModRefBS* ct_bs,
  11.157 +                     G1CollectedHeap* g1h) :
  11.158 +    AbstractGangTask("G1 Par Cleanup CT Task"),
  11.159 +    _ct_bs(ct_bs),
  11.160 +    _g1h(g1h)
  11.161 +  { }
  11.162 +
  11.163 +  void work(int i) {
  11.164 +    HeapRegion* r;
  11.165 +    while (r = _g1h->pop_dirty_cards_region()) {
  11.166 +      clear_cards(r);
  11.167 +    }
  11.168 +  }
  11.169 +  void clear_cards(HeapRegion* r) {
  11.170 +    // Cards for Survivor and Scan-Only regions will be dirtied later.
  11.171 +    if (!r->is_scan_only() && !r->is_survivor()) {
  11.172 +      _ct_bs->clear(MemRegion(r->bottom(), r->end()));
  11.173 +    }
  11.174 +  }
  11.175 +};
  11.176 +
  11.177 +
  11.178  void G1CollectedHeap::cleanUpCardTable() {
  11.179    CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
  11.180    double start = os::elapsedTime();
  11.181  
  11.182 -  ct_bs->clear(_g1_committed);
  11.183 -
  11.184 +  // Iterate over the dirty cards region list.
  11.185 +  G1ParCleanupCTTask cleanup_task(ct_bs, this);
  11.186 +  if (ParallelGCThreads > 0) {
  11.187 +    set_par_threads(workers()->total_workers());
  11.188 +    workers()->run_task(&cleanup_task);
  11.189 +    set_par_threads(0);
  11.190 +  } else {
  11.191 +    while (_dirty_cards_region_list) {
  11.192 +      HeapRegion* r = _dirty_cards_region_list;
  11.193 +      cleanup_task.clear_cards(r);
  11.194 +      _dirty_cards_region_list = r->get_next_dirty_cards_region();
  11.195 +      if (_dirty_cards_region_list == r) {
  11.196 +        // The last region.
  11.197 +        _dirty_cards_region_list = NULL;
  11.198 +      }
  11.199 +      r->set_next_dirty_cards_region(NULL);
  11.200 +    }
  11.201 +  }
  11.202    // now, redirty the cards of the scan-only and survivor regions
  11.203    // (it seemed faster to do it this way, instead of iterating over
  11.204 -  // all regions and then clearing / dirtying as approprite)
  11.205 +  // all regions and then clearing / dirtying as appropriate)
  11.206    dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region());
  11.207    dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region());
  11.208  
    12.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jun 05 10:25:39 2009 -0700
    12.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jun 10 14:57:21 2009 -0700
    12.3 @@ -158,6 +158,7 @@
    12.4    friend class RegionSorter;
    12.5    friend class CountRCClosure;
    12.6    friend class EvacPopObjClosure;
    12.7 +  friend class G1ParCleanupCTTask;
    12.8  
    12.9    // Other related classes.
   12.10    friend class G1MarkSweep;
   12.11 @@ -1191,6 +1192,16 @@
   12.12    ConcurrentMark* concurrent_mark() const { return _cm; }
   12.13    ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
   12.14  
   12.15 +  // The dirty cards region list is used to record a subset of regions
   12.16 +  // whose cards need clearing. The list if populated during the
   12.17 +  // remembered set scanning and drained during the card table
   12.18 +  // cleanup. Although the methods are reentrant, population/draining
   12.19 +  // phases must not overlap. For synchronization purposes the last
   12.20 +  // element on the list points to itself.
   12.21 +  HeapRegion* _dirty_cards_region_list;
   12.22 +  void push_dirty_cards_region(HeapRegion* hr);
   12.23 +  HeapRegion* pop_dirty_cards_region();
   12.24 +
   12.25  public:
   12.26    void stop_conc_gc_threads();
   12.27  
    13.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Jun 05 10:25:39 2009 -0700
    13.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jun 10 14:57:21 2009 -0700
    13.3 @@ -167,11 +167,6 @@
    13.4  
    13.5    _all_full_gc_times_ms(new NumberSeq()),
    13.6  
    13.7 -  _conc_refine_enabled(0),
    13.8 -  _conc_refine_zero_traversals(0),
    13.9 -  _conc_refine_max_traversals(0),
   13.10 -  _conc_refine_current_delta(G1ConcRefineInitialDelta),
   13.11 -
   13.12    // G1PausesBtwnConcMark defaults to -1
   13.13    // so the hack is to do the cast  QQQ FIXME
   13.14    _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
   13.15 @@ -1634,9 +1629,8 @@
   13.16          print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
   13.17          print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false);
   13.18          print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
   13.19 -        if (G1RSBarrierUseQueue)
   13.20 -          print_par_buffers(3, "Processed Buffers",
   13.21 -                            _par_last_update_rs_processed_buffers, true);
   13.22 +        print_par_buffers(3, "Processed Buffers",
   13.23 +                          _par_last_update_rs_processed_buffers, true);
   13.24          print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
   13.25          print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
   13.26          print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms);
   13.27 @@ -1649,9 +1643,8 @@
   13.28          print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   13.29        } else {
   13.30          print_stats(1, "Update RS", update_rs_time);
   13.31 -        if (G1RSBarrierUseQueue)
   13.32 -          print_stats(2, "Processed Buffers",
   13.33 -                      (int)update_rs_processed_buffers);
   13.34 +        print_stats(2, "Processed Buffers",
   13.35 +                    (int)update_rs_processed_buffers);
   13.36          print_stats(1, "Ext Root Scanning", ext_root_scan_time);
   13.37          print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
   13.38          print_stats(1, "Scan-Only Scanning", scan_only_time);
   13.39 @@ -2467,18 +2460,6 @@
   13.40                 (double) _region_num_young / (double) all_region_num * 100.0,
   13.41                 _region_num_tenured,
   13.42                 (double) _region_num_tenured / (double) all_region_num * 100.0);
   13.43 -
   13.44 -    if (!G1RSBarrierUseQueue) {
   13.45 -      gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) "
   13.46 -                    "did zero traversals.",
   13.47 -                    _conc_refine_enabled, _conc_refine_zero_traversals,
   13.48 -                    _conc_refine_enabled > 0 ?
   13.49 -                    100.0 * (float)_conc_refine_zero_traversals/
   13.50 -                    (float)_conc_refine_enabled : 0.0);
   13.51 -      gclog_or_tty->print_cr("  Max # of traversals = %d.",
   13.52 -                    _conc_refine_max_traversals);
   13.53 -      gclog_or_tty->print_cr("");
   13.54 -    }
   13.55    }
   13.56    if (TraceGen1Time) {
   13.57      if (_all_full_gc_times_ms->num() > 0) {
   13.58 @@ -2500,38 +2481,6 @@
   13.59  #endif // PRODUCT
   13.60  }
   13.61  
   13.62 -void G1CollectorPolicy::update_conc_refine_data() {
   13.63 -  unsigned traversals = _g1->concurrent_g1_refine()->disable();
   13.64 -  if (traversals == 0) _conc_refine_zero_traversals++;
   13.65 -  _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals,
   13.66 -                                     (size_t)traversals);
   13.67 -
   13.68 -  if (G1PolicyVerbose > 1)
   13.69 -    gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals);
   13.70 -  double multiplier = 1.0;
   13.71 -  if (traversals == 0) {
   13.72 -    multiplier = 4.0;
   13.73 -  } else if (traversals > (size_t)G1ConcRefineTargTraversals) {
   13.74 -    multiplier = 1.0/1.5;
   13.75 -  } else if (traversals < (size_t)G1ConcRefineTargTraversals) {
   13.76 -    multiplier = 1.5;
   13.77 -  }
   13.78 -  if (G1PolicyVerbose > 1) {
   13.79 -    gclog_or_tty->print_cr("  Multiplier = %7.2f.", multiplier);
   13.80 -    gclog_or_tty->print("  Delta went from %d regions to ",
   13.81 -               _conc_refine_current_delta);
   13.82 -  }
   13.83 -  _conc_refine_current_delta =
   13.84 -    MIN2(_g1->n_regions(),
   13.85 -         (size_t)(_conc_refine_current_delta * multiplier));
   13.86 -  _conc_refine_current_delta =
   13.87 -    MAX2(_conc_refine_current_delta, (size_t)1);
   13.88 -  if (G1PolicyVerbose > 1) {
   13.89 -    gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta);
   13.90 -  }
   13.91 -  _conc_refine_enabled++;
   13.92 -}
   13.93 -
   13.94  bool
   13.95  G1CollectorPolicy::should_add_next_region_to_young_list() {
   13.96    assert(in_young_gc_mode(), "should be in young GC mode");
    14.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Jun 05 10:25:39 2009 -0700
    14.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jun 10 14:57:21 2009 -0700
    14.3 @@ -637,18 +637,6 @@
    14.4    // The number of collection pauses at the end of the last mark.
    14.5    size_t _n_pauses_at_mark_end;
    14.6  
    14.7 -  // ==== This section is for stats related to starting Conc Refinement on time.
    14.8 -  size_t _conc_refine_enabled;
    14.9 -  size_t _conc_refine_zero_traversals;
   14.10 -  size_t _conc_refine_max_traversals;
   14.11 -  // In # of heap regions.
   14.12 -  size_t _conc_refine_current_delta;
   14.13 -
   14.14 -  // At the beginning of a collection pause, update the variables above,
   14.15 -  // especially the "delta".
   14.16 -  void update_conc_refine_data();
   14.17 -  // ====
   14.18 -
   14.19    // Stash a pointer to the g1 heap.
   14.20    G1CollectedHeap* _g1;
   14.21  
    15.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Jun 05 10:25:39 2009 -0700
    15.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jun 10 14:57:21 2009 -0700
    15.3 @@ -105,28 +105,6 @@
    15.4    _g1->heap_region_iterate(&rc);
    15.5  }
    15.6  
    15.7 -class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
    15.8 -  G1CollectedHeap*    _g1h;
    15.9 -  ModRefBarrierSet*   _mr_bs;
   15.10 -  UpdateRSOopClosure  _cl;
   15.11 -  int _worker_i;
   15.12 -public:
   15.13 -  UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
   15.14 -    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
   15.15 -    _mr_bs(g1->mr_bs()),
   15.16 -    _worker_i(worker_i),
   15.17 -    _g1h(g1)
   15.18 -    {}
   15.19 -  bool doHeapRegion(HeapRegion* r) {
   15.20 -    if (!r->in_collection_set() && !r->continuesHumongous()) {
   15.21 -      _cl.set_from(r);
   15.22 -      r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind);
   15.23 -      _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true);
   15.24 -    }
   15.25 -    return false;
   15.26 -  }
   15.27 -};
   15.28 -
   15.29  class VerifyRSCleanCardOopClosure: public OopClosure {
   15.30    G1CollectedHeap* _g1;
   15.31  public:
   15.32 @@ -241,6 +219,7 @@
   15.33      HeapRegionRemSet* hrrs = r->rem_set();
   15.34      if (hrrs->iter_is_complete()) return false; // All done.
   15.35      if (!_try_claimed && !hrrs->claim_iter()) return false;
   15.36 +    _g1h->push_dirty_cards_region(r);
   15.37      // If we didn't return above, then
   15.38      //   _try_claimed || r->claim_iter()
   15.39      // is true: either we're supposed to work on claimed-but-not-complete
   15.40 @@ -264,6 +243,10 @@
   15.41        assert(card_region != NULL, "Yielding cards not in the heap?");
   15.42        _cards++;
   15.43  
   15.44 +      if (!card_region->is_on_dirty_cards_region_list()) {
   15.45 +        _g1h->push_dirty_cards_region(card_region);
   15.46 +      }
   15.47 +
   15.48         // If the card is dirty, then we will scan it during updateRS.
   15.49        if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
   15.50            if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
   15.51 @@ -350,30 +333,17 @@
   15.52    double start = os::elapsedTime();
   15.53    _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
   15.54  
   15.55 -  if (G1RSBarrierUseQueue && !cg1r->do_traversal()) {
   15.56 -    // Apply the appropriate closure to all remaining log entries.
   15.57 -    _g1->iterate_dirty_card_closure(false, worker_i);
   15.58 -    // Now there should be no dirty cards.
   15.59 -    if (G1RSLogCheckCardTable) {
   15.60 -      CountNonCleanMemRegionClosure cl(_g1);
   15.61 -      _ct_bs->mod_card_iterate(&cl);
   15.62 -      // XXX This isn't true any more: keeping cards of young regions
   15.63 -      // marked dirty broke it.  Need some reasonable fix.
   15.64 -      guarantee(cl.n() == 0, "Card table should be clean.");
   15.65 -    }
   15.66 -  } else {
   15.67 -    UpdateRSOutOfRegionClosure update_rs(_g1, worker_i);
   15.68 -    _g1->heap_region_iterate(&update_rs);
   15.69 -    // We did a traversal; no further one is necessary.
   15.70 -    if (G1RSBarrierUseQueue) {
   15.71 -      assert(cg1r->do_traversal(), "Or we shouldn't have gotten here.");
   15.72 -      cg1r->set_pya_cancel();
   15.73 -    }
   15.74 -    if (_cg1r->use_cache()) {
   15.75 -      _cg1r->clear_and_record_card_counts();
   15.76 -      _cg1r->clear_hot_cache();
   15.77 -    }
   15.78 +  // Apply the appropriate closure to all remaining log entries.
   15.79 +  _g1->iterate_dirty_card_closure(false, worker_i);
   15.80 +  // Now there should be no dirty cards.
   15.81 +  if (G1RSLogCheckCardTable) {
   15.82 +    CountNonCleanMemRegionClosure cl(_g1);
   15.83 +    _ct_bs->mod_card_iterate(&cl);
   15.84 +    // XXX This isn't true any more: keeping cards of young regions
   15.85 +    // marked dirty broke it.  Need some reasonable fix.
   15.86 +    guarantee(cl.n() == 0, "Card table should be clean.");
   15.87    }
   15.88 +
   15.89    _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
   15.90  }
   15.91  
   15.92 @@ -486,11 +456,6 @@
   15.93                                    * 1000.0);
   15.94  }
   15.95  
   15.96 -void HRInto_G1RemSet::set_par_traversal(bool b) {
   15.97 -  _par_traversal_in_progress = b;
   15.98 -  HeapRegionRemSet::set_par_traversal(b);
   15.99 -}
  15.100 -
  15.101  void HRInto_G1RemSet::cleanupHRRS() {
  15.102    HeapRegionRemSet::cleanup();
  15.103  }
  15.104 @@ -527,7 +492,7 @@
  15.105        updateRS(worker_i);
  15.106        scanNewRefsRS(oc, worker_i);
  15.107      } else {
  15.108 -      _g1p->record_update_rs_start_time(worker_i, os::elapsedTime());
  15.109 +      _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
  15.110        _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
  15.111        _g1p->record_update_rs_time(worker_i, 0.0);
  15.112        _g1p->record_scan_new_refs_time(worker_i, 0.0);
  15.113 @@ -535,7 +500,7 @@
  15.114      if (G1ParallelRSetScanningEnabled || (worker_i == 0)) {
  15.115        scanRS(oc, worker_i);
  15.116      } else {
  15.117 -      _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime());
  15.118 +      _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
  15.119        _g1p->record_scan_rs_time(worker_i, 0.0);
  15.120      }
  15.121    } else {
  15.122 @@ -562,11 +527,6 @@
  15.123    if (ParallelGCThreads > 0) {
  15.124      set_par_traversal(true);
  15.125      _seq_task->set_par_threads((int)n_workers());
  15.126 -    if (cg1r->do_traversal()) {
  15.127 -      updateRS(0);
  15.128 -      // Have to do this again after updaters
  15.129 -      cleanupHRRS();
  15.130 -    }
  15.131    }
  15.132    guarantee( _cards_scanned == NULL, "invariant" );
  15.133    _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
  15.134 @@ -647,11 +607,8 @@
  15.135    _g1->collection_set_iterate(&iterClosure);
  15.136    // Set all cards back to clean.
  15.137    _g1->cleanUpCardTable();
  15.138 +
  15.139    if (ParallelGCThreads > 0) {
  15.140 -    ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
  15.141 -    if (cg1r->do_traversal()) {
  15.142 -      cg1r->cg1rThread()->set_do_traversal(false);
  15.143 -    }
  15.144      set_par_traversal(false);
  15.145    }
  15.146  
  15.147 @@ -721,139 +678,8 @@
  15.148  }
  15.149  
  15.150  
  15.151 -class ConcRefineRegionClosure: public HeapRegionClosure {
  15.152 -  G1CollectedHeap* _g1h;
  15.153 -  CardTableModRefBS* _ctbs;
  15.154 -  ConcurrentGCThread* _cgc_thrd;
  15.155 -  ConcurrentG1Refine* _cg1r;
  15.156 -  unsigned _cards_processed;
  15.157 -  UpdateRSOopClosure _update_rs_oop_cl;
  15.158 -public:
  15.159 -  ConcRefineRegionClosure(CardTableModRefBS* ctbs,
  15.160 -                          ConcurrentG1Refine* cg1r,
  15.161 -                          HRInto_G1RemSet* g1rs) :
  15.162 -    _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()),
  15.163 -    _update_rs_oop_cl(g1rs), _cards_processed(0),
  15.164 -    _g1h(G1CollectedHeap::heap())
  15.165 -  {}
  15.166 -
  15.167 -  bool doHeapRegion(HeapRegion* r) {
  15.168 -    if (!r->in_collection_set() &&
  15.169 -        !r->continuesHumongous() &&
  15.170 -        !r->is_young()) {
  15.171 -      _update_rs_oop_cl.set_from(r);
  15.172 -      UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
  15.173 -
  15.174 -      // For each run of dirty card in the region:
  15.175 -      //   1) Clear the cards.
  15.176 -      //   2) Process the range corresponding to the run, adding any
  15.177 -      //      necessary RS entries.
  15.178 -      // 1 must precede 2, so that a concurrent modification redirties the
  15.179 -      // card.  If a processing attempt does not succeed, because it runs
  15.180 -      // into an unparseable region, we will do binary search to find the
  15.181 -      // beginning of the next parseable region.
  15.182 -      HeapWord* startAddr = r->bottom();
  15.183 -      HeapWord* endAddr = r->used_region().end();
  15.184 -      HeapWord* lastAddr;
  15.185 -      HeapWord* nextAddr;
  15.186 -
  15.187 -      for (nextAddr = lastAddr = startAddr;
  15.188 -           nextAddr < endAddr;
  15.189 -           nextAddr = lastAddr) {
  15.190 -        MemRegion dirtyRegion;
  15.191 -
  15.192 -        // Get and clear dirty region from card table
  15.193 -        MemRegion next_mr(nextAddr, endAddr);
  15.194 -        dirtyRegion =
  15.195 -          _ctbs->dirty_card_range_after_reset(
  15.196 -                           next_mr,
  15.197 -                           true, CardTableModRefBS::clean_card_val());
  15.198 -        assert(dirtyRegion.start() >= nextAddr,
  15.199 -               "returned region inconsistent?");
  15.200 -
  15.201 -        if (!dirtyRegion.is_empty()) {
  15.202 -          HeapWord* stop_point =
  15.203 -            r->object_iterate_mem_careful(dirtyRegion,
  15.204 -                                          &update_rs_obj_cl);
  15.205 -          if (stop_point == NULL) {
  15.206 -            lastAddr = dirtyRegion.end();
  15.207 -            _cards_processed +=
  15.208 -              (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words);
  15.209 -          } else {
  15.210 -            // We're going to skip one or more cards that we can't parse.
  15.211 -            HeapWord* next_parseable_card =
  15.212 -              r->next_block_start_careful(stop_point);
  15.213 -            // Round this up to a card boundary.
  15.214 -            next_parseable_card =
  15.215 -              _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card));
  15.216 -            // Now we invalidate the intervening cards so we'll see them
  15.217 -            // again.
  15.218 -            MemRegion remaining_dirty =
  15.219 -              MemRegion(stop_point, dirtyRegion.end());
  15.220 -            MemRegion skipped =
  15.221 -              MemRegion(stop_point, next_parseable_card);
  15.222 -            _ctbs->invalidate(skipped.intersection(remaining_dirty));
  15.223 -
  15.224 -            // Now start up again where we can parse.
  15.225 -            lastAddr = next_parseable_card;
  15.226 -
  15.227 -            // Count how many we did completely.
  15.228 -            _cards_processed +=
  15.229 -              (stop_point - dirtyRegion.start()) /
  15.230 -              CardTableModRefBS::card_size_in_words;
  15.231 -          }
  15.232 -          // Allow interruption at regular intervals.
  15.233 -          // (Might need to make them more regular, if we get big
  15.234 -          // dirty regions.)
  15.235 -          if (_cgc_thrd != NULL) {
  15.236 -            if (_cgc_thrd->should_yield()) {
  15.237 -              _cgc_thrd->yield();
  15.238 -              switch (_cg1r->get_pya()) {
  15.239 -              case PYA_continue:
  15.240 -                // This may have changed: re-read.
  15.241 -                endAddr = r->used_region().end();
  15.242 -                continue;
  15.243 -              case PYA_restart: case PYA_cancel:
  15.244 -                return true;
  15.245 -              }
  15.246 -            }
  15.247 -          }
  15.248 -        } else {
  15.249 -          break;
  15.250 -        }
  15.251 -      }
  15.252 -    }
  15.253 -    // A good yield opportunity.
  15.254 -    if (_cgc_thrd != NULL) {
  15.255 -      if (_cgc_thrd->should_yield()) {
  15.256 -        _cgc_thrd->yield();
  15.257 -        switch (_cg1r->get_pya()) {
  15.258 -        case PYA_restart: case PYA_cancel:
  15.259 -          return true;
  15.260 -        default:
  15.261 -          break;
  15.262 -        }
  15.263 -
  15.264 -      }
  15.265 -    }
  15.266 -    return false;
  15.267 -  }
  15.268 -
  15.269 -  unsigned cards_processed() { return _cards_processed; }
  15.270 -};
  15.271 -
  15.272 -
  15.273 -void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) {
  15.274 -  ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this);
  15.275 -  _g1->heap_region_iterate(&cr_cl);
  15.276 -  _conc_refine_traversals++;
  15.277 -  _conc_refine_cards += cr_cl.cards_processed();
  15.278 -}
  15.279 -
  15.280  static IntHistogram out_of_histo(50, 50);
  15.281  
  15.282 -
  15.283 -
  15.284  void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
  15.285    // If the card is no longer dirty, nothing to do.
  15.286    if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
  15.287 @@ -983,10 +809,16 @@
  15.288    HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
  15.289  };
  15.290  
  15.291 +class PrintRSThreadVTimeClosure : public ThreadClosure {
  15.292 +public:
  15.293 +  virtual void do_thread(Thread *t) {
  15.294 +    ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
  15.295 +    gclog_or_tty->print("    %5.2f", crt->vtime_accum());
  15.296 +  }
  15.297 +};
  15.298 +
  15.299  void HRInto_G1RemSet::print_summary_info() {
  15.300    G1CollectedHeap* g1 = G1CollectedHeap::heap();
  15.301 -  ConcurrentG1RefineThread* cg1r_thrd =
  15.302 -    g1->concurrent_g1_refine()->cg1rThread();
  15.303  
  15.304  #if CARD_REPEAT_HISTO
  15.305    gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
  15.306 @@ -999,15 +831,13 @@
  15.307      gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
  15.308      out_of_histo.print_on(gclog_or_tty);
  15.309    }
  15.310 -  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in "
  15.311 -                "%5.2fs.",
  15.312 -                _conc_refine_cards, cg1r_thrd->vtime_accum());
  15.313 -
  15.314 +  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
  15.315 +                         _conc_refine_cards);
  15.316    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  15.317    jint tot_processed_buffers =
  15.318      dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
  15.319    gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
  15.320 -  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS thread.",
  15.321 +  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS threads.",
  15.322                  dcqs.processed_buffers_rs_thread(),
  15.323                  100.0*(float)dcqs.processed_buffers_rs_thread()/
  15.324                  (float)tot_processed_buffers);
  15.325 @@ -1015,15 +845,12 @@
  15.326                  dcqs.processed_buffers_mut(),
  15.327                  100.0*(float)dcqs.processed_buffers_mut()/
  15.328                  (float)tot_processed_buffers);
  15.329 -  gclog_or_tty->print_cr("   Did %d concurrent refinement traversals.",
  15.330 -                _conc_refine_traversals);
  15.331 -  if (!G1RSBarrierUseQueue) {
  15.332 -    gclog_or_tty->print_cr("   Scanned %8.2f cards/traversal.",
  15.333 -                  _conc_refine_traversals > 0 ?
  15.334 -                  (float)_conc_refine_cards/(float)_conc_refine_traversals :
  15.335 -                  0);
  15.336 -  }
  15.337 +  gclog_or_tty->print_cr("  Conc RS threads times(s)");
  15.338 +  PrintRSThreadVTimeClosure p;
  15.339 +  gclog_or_tty->print("     ");
  15.340 +  g1->concurrent_g1_refine()->threads_do(&p);
  15.341    gclog_or_tty->print_cr("");
  15.342 +
  15.343    if (G1UseHRIntoRS) {
  15.344      HRRSStatsIter blk;
  15.345      g1->heap_region_iterate(&blk);
    16.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Jun 05 10:25:39 2009 -0700
    16.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jun 10 14:57:21 2009 -0700
    16.3 @@ -33,15 +33,12 @@
    16.4  class G1RemSet: public CHeapObj {
    16.5  protected:
    16.6    G1CollectedHeap* _g1;
    16.7 -
    16.8 -  unsigned _conc_refine_traversals;
    16.9    unsigned _conc_refine_cards;
   16.10 -
   16.11    size_t n_workers();
   16.12  
   16.13  public:
   16.14    G1RemSet(G1CollectedHeap* g1) :
   16.15 -    _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
   16.16 +    _g1(g1), _conc_refine_cards(0)
   16.17    {}
   16.18  
   16.19    // Invoke "blk->do_oop" on all pointers into the CS in object in regions
   16.20 @@ -81,19 +78,11 @@
   16.21    virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
   16.22                           int worker_num, int claim_val) = 0;
   16.23  
   16.24 -  // Do any "refinement" activity that might be appropriate to the given
   16.25 -  // G1RemSet.  If "refinement" has iterateive "passes", do one pass.
   16.26 -  // If "t" is non-NULL, it is the thread performing the refinement.
   16.27 -  // Default implementation does nothing.
   16.28 -  virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
   16.29 -
   16.30    // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   16.31    // join and leave around parts that must be atomic wrt GC.  (NULL means
   16.32    // being done at a safepoint.)
   16.33    virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
   16.34  
   16.35 -  unsigned conc_refine_cards() { return _conc_refine_cards; }
   16.36 -
   16.37    // Print any relevant summary info.
   16.38    virtual void print_summary_info() {}
   16.39  
   16.40 @@ -153,7 +142,7 @@
   16.41    // progress.  If so, then cards added to remembered sets should also have
   16.42    // their references into the collection summarized in "_new_refs".
   16.43    bool _par_traversal_in_progress;
   16.44 -  void set_par_traversal(bool b);
   16.45 +  void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
   16.46    GrowableArray<oop*>** _new_refs;
   16.47    void new_refs_iterate(OopClosure* cl);
   16.48  
   16.49 @@ -194,7 +183,6 @@
   16.50    void scrub_par(BitMap* region_bm, BitMap* card_bm,
   16.51                   int worker_num, int claim_val);
   16.52  
   16.53 -  virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
   16.54    virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
   16.55  
   16.56    virtual void print_summary_info();
    17.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Jun 05 10:25:39 2009 -0700
    17.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jun 10 14:57:21 2009 -0700
    17.3 @@ -147,9 +147,6 @@
    17.4    develop(bool, G1PrintCTFilterStats, false,                                \
    17.5            "If true, print stats on RS filtering effectiveness")             \
    17.6                                                                              \
    17.7 -  develop(bool, G1RSBarrierUseQueue, true,                                  \
    17.8 -          "If true, use queueing RS barrier")                               \
    17.9 -                                                                            \
   17.10    develop(bool, G1DeferredRSUpdate, true,                                   \
   17.11            "If true, use deferred RS updates")                               \
   17.12                                                                              \
   17.13 @@ -253,6 +250,10 @@
   17.14                                                                              \
   17.15    experimental(bool, G1ParallelRSetScanningEnabled, false,                  \
   17.16            "Enables the parallelization of remembered set scanning "         \
   17.17 -          "during evacuation pauses")
   17.18 +          "during evacuation pauses")                                       \
   17.19 +                                                                            \
   17.20 +  product(uintx, G1ParallelRSetThreads, 0,                                  \
   17.21 +          "If non-0 is the number of parallel rem set update threads, "     \
   17.22 +          "otherwise the value is determined ergonomically.")
   17.23  
   17.24  G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
    18.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Jun 05 10:25:39 2009 -0700
    18.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jun 10 14:57:21 2009 -0700
    18.3 @@ -351,6 +351,7 @@
    18.4      _claimed(InitialClaimValue), _evacuation_failed(false),
    18.5      _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
    18.6      _young_type(NotYoung), _next_young_region(NULL),
    18.7 +    _next_dirty_cards_region(NULL),
    18.8      _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1),
    18.9      _rem_set(NULL), _zfs(NotZeroFilled)
   18.10  {
    19.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Jun 05 10:25:39 2009 -0700
    19.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jun 10 14:57:21 2009 -0700
    19.3 @@ -227,6 +227,9 @@
    19.4    // next region in the young "generation" region set
    19.5    HeapRegion* _next_young_region;
    19.6  
    19.7 +  // Next region whose cards need cleaning
    19.8 +  HeapRegion* _next_dirty_cards_region;
    19.9 +
   19.10    // For parallel heapRegion traversal.
   19.11    jint _claimed;
   19.12  
   19.13 @@ -468,6 +471,11 @@
   19.14      _next_young_region = hr;
   19.15    }
   19.16  
   19.17 +  HeapRegion* get_next_dirty_cards_region() const { return _next_dirty_cards_region; }
   19.18 +  HeapRegion** next_dirty_cards_region_addr() { return &_next_dirty_cards_region; }
   19.19 +  void set_next_dirty_cards_region(HeapRegion* hr) { _next_dirty_cards_region = hr; }
   19.20 +  bool is_on_dirty_cards_region_list() const { return get_next_dirty_cards_region() != NULL; }
   19.21 +
   19.22    // Allows logical separation between objects allocated before and after.
   19.23    void save_marks();
   19.24  
    20.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Jun 05 10:25:39 2009 -0700
    20.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Jun 10 14:57:21 2009 -0700
    20.3 @@ -1052,18 +1052,11 @@
    20.4  
    20.5  }
    20.6  
    20.7 -
    20.8 -bool HeapRegionRemSet::_par_traversal = false;
    20.9 -
   20.10 -void HeapRegionRemSet::set_par_traversal(bool b) {
   20.11 -  assert(_par_traversal != b, "Proper alternation...");
   20.12 -  _par_traversal = b;
   20.13 -}
   20.14 -
   20.15 +// Determines how many threads can add records to an rset in parallel.
   20.16 +// This can be done by either mutator threads together with the
   20.17 +// concurrent refinement threads or GC threads.
   20.18  int HeapRegionRemSet::num_par_rem_sets() {
   20.19 -  // We always have at least two, so that a mutator thread can claim an
   20.20 -  // id and add to a rem set.
   20.21 -  return (int) MAX2(ParallelGCThreads, (size_t)2);
   20.22 +  return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
   20.23  }
   20.24  
   20.25  HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
    21.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Fri Jun 05 10:25:39 2009 -0700
    21.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Jun 10 14:57:21 2009 -0700
    21.3 @@ -177,8 +177,6 @@
    21.4    G1BlockOffsetSharedArray* _bosa;
    21.5    G1BlockOffsetSharedArray* bosa() const { return _bosa; }
    21.6  
    21.7 -  static bool _par_traversal;
    21.8 -
    21.9    OtherRegionsTable _other_regions;
   21.10  
   21.11    // One set bit for every region that has an entry for this one.
   21.12 @@ -211,8 +209,6 @@
   21.13                     HeapRegion* hr);
   21.14  
   21.15    static int num_par_rem_sets();
   21.16 -  static bool par_traversal() { return _par_traversal; }
   21.17 -  static void set_par_traversal(bool b);
   21.18  
   21.19    HeapRegion* hr() const {
   21.20      return _other_regions.hr();
    22.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Fri Jun 05 10:25:39 2009 -0700
    22.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jun 10 14:57:21 2009 -0700
    22.3 @@ -172,7 +172,7 @@
    22.4    _n_completed_buffers++;
    22.5  
    22.6    if (!_process_completed &&
    22.7 -      _n_completed_buffers == _process_completed_threshold) {
    22.8 +      _n_completed_buffers >= _process_completed_threshold) {
    22.9      _process_completed = true;
   22.10      if (_notify_when_complete)
   22.11        _cbl_mon->notify_all();
    23.1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1	Fri Jun 05 10:25:39 2009 -0700
    23.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Wed Jun 10 14:57:21 2009 -0700
    23.3 @@ -49,6 +49,8 @@
    23.4  
    23.5  concurrentG1Refine.hpp			globalDefinitions.hpp
    23.6  concurrentG1Refine.hpp			allocation.hpp
    23.7 +concurrentG1Refine.hpp			thread.hpp
    23.8 +
    23.9  
   23.10  concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
   23.11  concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
   23.12 @@ -280,6 +282,7 @@
   23.13  
   23.14  heapRegionRemSet.cpp                    allocation.hpp
   23.15  heapRegionRemSet.cpp                    bitMap.inline.hpp
   23.16 +heapRegionRemSet.cpp                    concurrentG1Refine.hpp
   23.17  heapRegionRemSet.cpp                    g1BlockOffsetTable.inline.hpp
   23.18  heapRegionRemSet.cpp                    g1CollectedHeap.inline.hpp
   23.19  heapRegionRemSet.cpp                    heapRegionRemSet.hpp
    24.1 --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Fri Jun 05 10:25:39 2009 -0700
    24.2 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Wed Jun 10 14:57:21 2009 -0700
    24.3 @@ -27,13 +27,12 @@
    24.4  # include "incls/_precompiled.incl"
    24.5  # include "incls/_concurrentGCThread.cpp.incl"
    24.6  
    24.7 -bool ConcurrentGCThread::_should_terminate    = false;
    24.8 -bool ConcurrentGCThread::_has_terminated      = false;
    24.9  int  ConcurrentGCThread::_CGC_flag            = CGC_nil;
   24.10  
   24.11  SuspendibleThreadSet ConcurrentGCThread::_sts;
   24.12  
   24.13 -ConcurrentGCThread::ConcurrentGCThread() {
   24.14 +ConcurrentGCThread::ConcurrentGCThread() :
   24.15 +  _should_terminate(false), _has_terminated(false) {
   24.16    _sts.initialize();
   24.17  };
   24.18  
    25.1 --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Fri Jun 05 10:25:39 2009 -0700
    25.2 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Wed Jun 10 14:57:21 2009 -0700
    25.3 @@ -72,8 +72,8 @@
    25.4    friend class VMStructs;
    25.5  
    25.6  protected:
    25.7 -  static bool _should_terminate;
    25.8 -  static bool _has_terminated;
    25.9 +  bool _should_terminate;
   25.10 +  bool _has_terminated;
   25.11  
   25.12    enum CGC_flag_type {
   25.13      CGC_nil           = 0x0,
    26.1 --- a/src/share/vm/memory/cardTableRS.cpp	Fri Jun 05 10:25:39 2009 -0700
    26.2 +++ b/src/share/vm/memory/cardTableRS.cpp	Wed Jun 10 14:57:21 2009 -0700
    26.3 @@ -33,12 +33,8 @@
    26.4  {
    26.5  #ifndef SERIALGC
    26.6    if (UseG1GC) {
    26.7 -    if (G1RSBarrierUseQueue) {
    26.8        _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap,
    26.9                                                    max_covered_regions);
   26.10 -    } else {
   26.11 -      _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions);
   26.12 -    }
   26.13    } else {
   26.14      _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
   26.15    }
    27.1 --- a/src/share/vm/runtime/mutexLocker.cpp	Fri Jun 05 10:25:39 2009 -0700
    27.2 +++ b/src/share/vm/runtime/mutexLocker.cpp	Wed Jun 10 14:57:21 2009 -0700
    27.3 @@ -70,7 +70,6 @@
    27.4  Monitor* CMark_lock                   = NULL;
    27.5  Monitor* ZF_mon                       = NULL;
    27.6  Monitor* Cleanup_mon                  = NULL;
    27.7 -Monitor* G1ConcRefine_mon             = NULL;
    27.8  Mutex*   SATB_Q_FL_lock               = NULL;
    27.9  Monitor* SATB_Q_CBL_mon               = NULL;
   27.10  Mutex*   Shared_SATB_Q_lock           = NULL;
   27.11 @@ -168,7 +167,6 @@
   27.12      def(CMark_lock                 , Monitor, nonleaf,     true ); // coordinate concurrent mark thread
   27.13      def(ZF_mon                     , Monitor, leaf,        true );
   27.14      def(Cleanup_mon                , Monitor, nonleaf,     true );
   27.15 -    def(G1ConcRefine_mon           , Monitor, nonleaf,     true );
   27.16      def(SATB_Q_FL_lock             , Mutex  , special,     true );
   27.17      def(SATB_Q_CBL_mon             , Monitor, nonleaf,     true );
   27.18      def(Shared_SATB_Q_lock         , Mutex,   nonleaf,     true );
    28.1 --- a/src/share/vm/runtime/mutexLocker.hpp	Fri Jun 05 10:25:39 2009 -0700
    28.2 +++ b/src/share/vm/runtime/mutexLocker.hpp	Wed Jun 10 14:57:21 2009 -0700
    28.3 @@ -63,9 +63,6 @@
    28.4  extern Monitor* CMark_lock;                      // used for concurrent mark thread coordination
    28.5  extern Monitor* ZF_mon;                          // used for G1 conc zero-fill.
    28.6  extern Monitor* Cleanup_mon;                     // used for G1 conc cleanup.
    28.7 -extern Monitor* G1ConcRefine_mon;                // used for G1 conc-refine
    28.8 -                                                 // coordination.
    28.9 -
   28.10  extern Mutex*   SATB_Q_FL_lock;                  // Protects SATB Q
   28.11                                                   // buffer free list.
   28.12  extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q

mercurial