8069273: Decrease Hot Card Cache Lock contention

Thu, 29 Jan 2015 15:05:25 +0100

author
redestad
date
Thu, 29 Jan 2015 15:05:25 +0100
changeset 7653
b6a1bf5222c5
parent 7652
ae374055ebce
child 7654
36c7518fd486

8069273: Decrease Hot Card Cache Lock contention
Reviewed-by: tschatzl, mgerdin

src/share/vm/gc_implementation/g1/g1HotCardCache.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1HotCardCache.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/mutexLocker.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/mutexLocker.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp	Thu Sep 18 11:27:59 2014 +0200
     1.2 +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp	Thu Jan 29 15:05:25 2015 +0100
     1.3 @@ -1,5 +1,5 @@
     1.4  /*
     1.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
     1.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8   *
     1.9   * This code is free software; you can redistribute it and/or modify it
    1.10 @@ -36,11 +36,10 @@
    1.11    if (default_use_cache()) {
    1.12      _use_cache = true;
    1.13  
    1.14 -    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
    1.15 +    _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize;
    1.16      _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);
    1.17  
    1.18 -    _n_hot = 0;
    1.19 -    _hot_cache_idx = 0;
    1.20 +    reset_hot_cache_internal();
    1.21  
    1.22      // For refining the cards in the hot cache in parallel
    1.23      _hot_cache_par_chunk_size = (int)(ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size);
    1.24 @@ -64,26 +63,21 @@
    1.25      // return it for immediate refining.
    1.26      return card_ptr;
    1.27    }
    1.28 +  // Otherwise, the card is hot.
    1.29 +  size_t index = Atomic::add_ptr((intptr_t)1, (volatile intptr_t*)&_hot_cache_idx) - 1;
    1.30 +  size_t masked_index = index & (_hot_cache_size - 1);
    1.31 +  jbyte* current_ptr = _hot_cache[masked_index];
    1.32  
    1.33 -  // Otherwise, the card is hot.
    1.34 -  jbyte* res = NULL;
    1.35 -  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
    1.36 -  if (_n_hot == _hot_cache_size) {
    1.37 -    res = _hot_cache[_hot_cache_idx];
    1.38 -    _n_hot--;
    1.39 -  }
    1.40 -
    1.41 -  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
    1.42 -  _hot_cache[_hot_cache_idx] = card_ptr;
    1.43 -  _hot_cache_idx++;
    1.44 -
    1.45 -  if (_hot_cache_idx == _hot_cache_size) {
    1.46 -    // Wrap around
    1.47 -    _hot_cache_idx = 0;
    1.48 -  }
    1.49 -  _n_hot++;
    1.50 -
    1.51 -  return res;
    1.52 +  // Try to store the new card pointer into the cache. Compare-and-swap to guard
    1.53 +  // against the unlikely event of a race resulting in another card pointer to
    1.54 +  // have already been written to the cache. In this case we will return
    1.55 +  // card_ptr in favor of the other option, which would be starting over. This
    1.56 +  // should be OK since card_ptr will likely be the older card already when/if
    1.57 +  // this ever happens.
    1.58 +  jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr,
    1.59 +                                                    &_hot_cache[masked_index],
    1.60 +                                                    current_ptr);
    1.61 +  return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
    1.62  }
    1.63  
    1.64  void G1HotCardCache::drain(uint worker_i,
    1.65 @@ -96,38 +90,37 @@
    1.66  
    1.67    assert(_hot_cache != NULL, "Logic");
    1.68    assert(!use_cache(), "cache should be disabled");
    1.69 -  int start_idx;
    1.70 +  while (_hot_cache_par_claimed_idx < _hot_cache_size) {
    1.71 +    size_t end_idx = Atomic::add_ptr((intptr_t)_hot_cache_par_chunk_size,
    1.72 +                                     (volatile intptr_t*)&_hot_cache_par_claimed_idx);
    1.73 +    size_t start_idx = end_idx - _hot_cache_par_chunk_size;
    1.74 +    // The current worker has successfully claimed the chunk [start_idx..end_idx)
    1.75 +    end_idx = MIN2(end_idx, _hot_cache_size);
    1.76 +    for (size_t i = start_idx; i < end_idx; i++) {
    1.77 +      jbyte* card_ptr = _hot_cache[i];
    1.78 +      if (card_ptr != NULL) {
    1.79 +        if (g1rs->refine_card(card_ptr, worker_i, true)) {
    1.80 +          // The part of the heap spanned by the card contains references
    1.81 +          // that point into the current collection set.
    1.82 +          // We need to record the card pointer in the DirtyCardQueueSet
    1.83 +          // that we use for such cards.
    1.84 +          //
    1.85 +          // The only time we care about recording cards that contain
    1.86 +          // references that point into the collection set is during
    1.87 +          // RSet updating while within an evacuation pause.
    1.88 +          // In this case worker_i should be the id of a GC worker thread
    1.89 +          assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
    1.90 +          assert(worker_i < ParallelGCThreads,
    1.91 +                 err_msg("incorrect worker id: %u", worker_i));
    1.92  
    1.93 -  while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
    1.94 -    int end_idx = start_idx + _hot_cache_par_chunk_size;
    1.95 -
    1.96 -    if (start_idx ==
    1.97 -        Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
    1.98 -      // The current worker has successfully claimed the chunk [start_idx..end_idx)
    1.99 -      end_idx = MIN2(end_idx, _n_hot);
   1.100 -      for (int i = start_idx; i < end_idx; i++) {
   1.101 -        jbyte* card_ptr = _hot_cache[i];
   1.102 -        if (card_ptr != NULL) {
   1.103 -          if (g1rs->refine_card(card_ptr, worker_i, true)) {
   1.104 -            // The part of the heap spanned by the card contains references
   1.105 -            // that point into the current collection set.
   1.106 -            // We need to record the card pointer in the DirtyCardQueueSet
   1.107 -            // that we use for such cards.
   1.108 -            //
   1.109 -            // The only time we care about recording cards that contain
   1.110 -            // references that point into the collection set is during
   1.111 -            // RSet updating while within an evacuation pause.
   1.112 -            // In this case worker_i should be the id of a GC worker thread
   1.113 -            assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
   1.114 -            assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads),
   1.115 -                   err_msg("incorrect worker id: "UINT32_FORMAT, worker_i));
   1.116 -
   1.117 -            into_cset_dcq->enqueue(card_ptr);
   1.118 -          }
   1.119 +          into_cset_dcq->enqueue(card_ptr);
   1.120          }
   1.121 +      } else {
   1.122 +        break;
   1.123        }
   1.124      }
   1.125    }
   1.126 +
   1.127    // The existing entries in the hot card cache, which were just refined
   1.128    // above, are discarded prior to re-enabling the cache near the end of the GC.
   1.129  }
     2.1 --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp	Thu Sep 18 11:27:59 2014 +0200
     2.2 +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp	Thu Jan 29 15:05:25 2015 +0100
     2.3 @@ -1,5 +1,5 @@
     2.4  /*
     2.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
     2.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
     2.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.8   *
     2.9   * This code is free software; you can redistribute it and/or modify it
    2.10 @@ -54,21 +54,30 @@
    2.11  // code, increasing throughput.
    2.12  
    2.13  class G1HotCardCache: public CHeapObj<mtGC> {
    2.14 -  G1CollectedHeap*   _g1h;
    2.15 +
    2.16 +  G1CollectedHeap*  _g1h;
    2.17 +
    2.18 +  bool              _use_cache;
    2.19 +
    2.20 +  G1CardCounts      _card_counts;
    2.21  
    2.22    // The card cache table
    2.23 -  jbyte**      _hot_cache;
    2.24 +  jbyte**           _hot_cache;
    2.25  
    2.26 -  int          _hot_cache_size;
    2.27 -  int          _n_hot;
    2.28 -  int          _hot_cache_idx;
    2.29 +  size_t            _hot_cache_size;
    2.30  
    2.31 -  int          _hot_cache_par_chunk_size;
    2.32 -  volatile int _hot_cache_par_claimed_idx;
    2.33 +  int               _hot_cache_par_chunk_size;
    2.34  
    2.35 -  bool         _use_cache;
    2.36 +  // Avoids false sharing when concurrently updating _hot_cache_idx or
    2.37 +  // _hot_cache_par_claimed_idx. These are never updated at the same time
    2.38 +  // thus it's not necessary to separate them as well
    2.39 +  char _pad_before[DEFAULT_CACHE_LINE_SIZE];
    2.40  
    2.41 -  G1CardCounts _card_counts;
    2.42 +  volatile size_t _hot_cache_idx;
    2.43 +
    2.44 +  volatile size_t _hot_cache_par_claimed_idx;
    2.45 +
    2.46 +  char _pad_after[DEFAULT_CACHE_LINE_SIZE];
    2.47  
    2.48    // The number of cached cards a thread claims when flushing the cache
    2.49    static const int ClaimChunkSize = 32;
    2.50 @@ -113,16 +122,25 @@
    2.51    void reset_hot_cache() {
    2.52      assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
    2.53      assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
    2.54 -    _hot_cache_idx = 0; _n_hot = 0;
    2.55 +    if (default_use_cache()) {
    2.56 +        reset_hot_cache_internal();
    2.57 +    }
    2.58    }
    2.59  
    2.60 -  bool hot_cache_is_empty() { return _n_hot == 0; }
    2.61 -
    2.62    // Zeros the values in the card counts table for entire committed heap
    2.63    void reset_card_counts();
    2.64  
    2.65    // Zeros the values in the card counts table for the given region
    2.66    void reset_card_counts(HeapRegion* hr);
    2.67 +
    2.68 + private:
    2.69 +  void reset_hot_cache_internal() {
    2.70 +    assert(_hot_cache != NULL, "Logic");
    2.71 +    _hot_cache_idx = 0;
    2.72 +    for (size_t i = 0; i < _hot_cache_size; i++) {
    2.73 +      _hot_cache[i] = NULL;
    2.74 +    }
    2.75 +  }
    2.76  };
    2.77  
    2.78  #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP
     3.1 --- a/src/share/vm/runtime/mutexLocker.cpp	Thu Sep 18 11:27:59 2014 +0200
     3.2 +++ b/src/share/vm/runtime/mutexLocker.cpp	Thu Jan 29 15:05:25 2015 +0100
     3.3 @@ -1,5 +1,5 @@
     3.4  /*
     3.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3.6 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
     3.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3.8   *
     3.9   * This code is free software; you can redistribute it and/or modify it
    3.10 @@ -119,7 +119,6 @@
    3.11  Mutex*   OldSets_lock                 = NULL;
    3.12  Monitor* RootRegionScan_lock          = NULL;
    3.13  Mutex*   MMUTracker_lock              = NULL;
    3.14 -Mutex*   HotCardCache_lock            = NULL;
    3.15  
    3.16  Monitor* GCTaskManager_lock           = NULL;
    3.17  
    3.18 @@ -200,7 +199,6 @@
    3.19      def(OldSets_lock               , Mutex  , leaf     ,   true );
    3.20      def(RootRegionScan_lock        , Monitor, leaf     ,   true );
    3.21      def(MMUTracker_lock            , Mutex  , leaf     ,   true );
    3.22 -    def(HotCardCache_lock          , Mutex  , special  ,   true );
    3.23      def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
    3.24  
    3.25      def(StringDedupQueue_lock      , Monitor, leaf,        true );
     4.1 --- a/src/share/vm/runtime/mutexLocker.hpp	Thu Sep 18 11:27:59 2014 +0200
     4.2 +++ b/src/share/vm/runtime/mutexLocker.hpp	Thu Jan 29 15:05:25 2015 +0100
     4.3 @@ -1,5 +1,5 @@
     4.4  /*
     4.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     4.6 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
     4.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4.8   *
     4.9   * This code is free software; you can redistribute it and/or modify it
    4.10 @@ -137,7 +137,6 @@
    4.11  extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
    4.12  extern Mutex*   MMUTracker_lock;                 // protects the MMU
    4.13                                                   // tracker data structures
    4.14 -extern Mutex*   HotCardCache_lock;               // protects the hot card cache
    4.15  
    4.16  extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
    4.17  extern Monitor* Service_lock;                    // a lock used for service thread operation

mercurial