Thu, 29 Jan 2015 15:05:25 +0100
8069273: Decrease Hot Card Cache Lock contention
Reviewed-by: tschatzl, mgerdin
1.1 --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp Thu Sep 18 11:27:59 2014 +0200 1.2 +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp Thu Jan 29 15:05:25 2015 +0100 1.3 @@ -1,5 +1,5 @@ 1.4 /* 1.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 1.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 * 1.9 * This code is free software; you can redistribute it and/or modify it 1.10 @@ -36,11 +36,10 @@ 1.11 if (default_use_cache()) { 1.12 _use_cache = true; 1.13 1.14 - _hot_cache_size = (1 << G1ConcRSLogCacheSize); 1.15 + _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize; 1.16 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); 1.17 1.18 - _n_hot = 0; 1.19 - _hot_cache_idx = 0; 1.20 + reset_hot_cache_internal(); 1.21 1.22 // For refining the cards in the hot cache in parallel 1.23 _hot_cache_par_chunk_size = (int)(ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size); 1.24 @@ -64,26 +63,21 @@ 1.25 // return it for immediate refining. 1.26 return card_ptr; 1.27 } 1.28 + // Otherwise, the card is hot. 1.29 + size_t index = Atomic::add_ptr((intptr_t)1, (volatile intptr_t*)&_hot_cache_idx) - 1; 1.30 + size_t masked_index = index & (_hot_cache_size - 1); 1.31 + jbyte* current_ptr = _hot_cache[masked_index]; 1.32 1.33 - // Otherwise, the card is hot. 1.34 - jbyte* res = NULL; 1.35 - MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); 1.36 - if (_n_hot == _hot_cache_size) { 1.37 - res = _hot_cache[_hot_cache_idx]; 1.38 - _n_hot--; 1.39 - } 1.40 - 1.41 - // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. 1.42 - _hot_cache[_hot_cache_idx] = card_ptr; 1.43 - _hot_cache_idx++; 1.44 - 1.45 - if (_hot_cache_idx == _hot_cache_size) { 1.46 - // Wrap around 1.47 - _hot_cache_idx = 0; 1.48 - } 1.49 - _n_hot++; 1.50 - 1.51 - return res; 1.52 + // Try to store the new card pointer into the cache. Compare-and-swap to guard 1.53 + // against the unlikely event of a race resulting in another card pointer to 1.54 + // have already been written to the cache. In this case we will return 1.55 + // card_ptr in favor of the other option, which would be starting over. This 1.56 + // should be OK since card_ptr will likely be the older card already when/if 1.57 + // this ever happens. 1.58 + jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr, 1.59 + &_hot_cache[masked_index], 1.60 + current_ptr); 1.61 + return (previous_ptr == current_ptr) ? previous_ptr : card_ptr; 1.62 } 1.63 1.64 void G1HotCardCache::drain(uint worker_i, 1.65 @@ -96,38 +90,37 @@ 1.66 1.67 assert(_hot_cache != NULL, "Logic"); 1.68 assert(!use_cache(), "cache should be disabled"); 1.69 - int start_idx; 1.70 + while (_hot_cache_par_claimed_idx < _hot_cache_size) { 1.71 + size_t end_idx = Atomic::add_ptr((intptr_t)_hot_cache_par_chunk_size, 1.72 + (volatile intptr_t*)&_hot_cache_par_claimed_idx); 1.73 + size_t start_idx = end_idx - _hot_cache_par_chunk_size; 1.74 + // The current worker has successfully claimed the chunk [start_idx..end_idx) 1.75 + end_idx = MIN2(end_idx, _hot_cache_size); 1.76 + for (size_t i = start_idx; i < end_idx; i++) { 1.77 + jbyte* card_ptr = _hot_cache[i]; 1.78 + if (card_ptr != NULL) { 1.79 + if (g1rs->refine_card(card_ptr, worker_i, true)) { 1.80 + // The part of the heap spanned by the card contains references 1.81 + // that point into the current collection set. 1.82 + // We need to record the card pointer in the DirtyCardQueueSet 1.83 + // that we use for such cards. 1.84 + // 1.85 + // The only time we care about recording cards that contain 1.86 + // references that point into the collection set is during 1.87 + // RSet updating while within an evacuation pause. 1.88 + // In this case worker_i should be the id of a GC worker thread 1.89 + assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); 1.90 + assert(worker_i < ParallelGCThreads, 1.91 + err_msg("incorrect worker id: %u", worker_i)); 1.92 1.93 - while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once 1.94 - int end_idx = start_idx + _hot_cache_par_chunk_size; 1.95 - 1.96 - if (start_idx == 1.97 - Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { 1.98 - // The current worker has successfully claimed the chunk [start_idx..end_idx) 1.99 - end_idx = MIN2(end_idx, _n_hot); 1.100 - for (int i = start_idx; i < end_idx; i++) { 1.101 - jbyte* card_ptr = _hot_cache[i]; 1.102 - if (card_ptr != NULL) { 1.103 - if (g1rs->refine_card(card_ptr, worker_i, true)) { 1.104 - // The part of the heap spanned by the card contains references 1.105 - // that point into the current collection set. 1.106 - // We need to record the card pointer in the DirtyCardQueueSet 1.107 - // that we use for such cards. 1.108 - // 1.109 - // The only time we care about recording cards that contain 1.110 - // references that point into the collection set is during 1.111 - // RSet updating while within an evacuation pause. 1.112 - // In this case worker_i should be the id of a GC worker thread 1.113 - assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); 1.114 - assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), 1.115 - err_msg("incorrect worker id: "UINT32_FORMAT, worker_i)); 1.116 - 1.117 - into_cset_dcq->enqueue(card_ptr); 1.118 - } 1.119 + into_cset_dcq->enqueue(card_ptr); 1.120 } 1.121 + } else { 1.122 + break; 1.123 } 1.124 } 1.125 } 1.126 + 1.127 // The existing entries in the hot card cache, which were just refined 1.128 // above, are discarded prior to re-enabling the cache near the end of the GC. 1.129 }
2.1 --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp Thu Sep 18 11:27:59 2014 +0200 2.2 +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp Thu Jan 29 15:05:25 2015 +0100 2.3 @@ -1,5 +1,5 @@ 2.4 /* 2.5 - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 2.6 + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 2.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.8 * 2.9 * This code is free software; you can redistribute it and/or modify it 2.10 @@ -54,21 +54,30 @@ 2.11 // code, increasing throughput. 2.12 2.13 class G1HotCardCache: public CHeapObj<mtGC> { 2.14 - G1CollectedHeap* _g1h; 2.15 + 2.16 + G1CollectedHeap* _g1h; 2.17 + 2.18 + bool _use_cache; 2.19 + 2.20 + G1CardCounts _card_counts; 2.21 2.22 // The card cache table 2.23 - jbyte** _hot_cache; 2.24 + jbyte** _hot_cache; 2.25 2.26 - int _hot_cache_size; 2.27 - int _n_hot; 2.28 - int _hot_cache_idx; 2.29 + size_t _hot_cache_size; 2.30 2.31 - int _hot_cache_par_chunk_size; 2.32 - volatile int _hot_cache_par_claimed_idx; 2.33 + int _hot_cache_par_chunk_size; 2.34 2.35 - bool _use_cache; 2.36 + // Avoids false sharing when concurrently updating _hot_cache_idx or 2.37 + // _hot_cache_par_claimed_idx. These are never updated at the same time 2.38 + // thus it's not necessary to separate them as well 2.39 + char _pad_before[DEFAULT_CACHE_LINE_SIZE]; 2.40 2.41 - G1CardCounts _card_counts; 2.42 + volatile size_t _hot_cache_idx; 2.43 + 2.44 + volatile size_t _hot_cache_par_claimed_idx; 2.45 + 2.46 + char _pad_after[DEFAULT_CACHE_LINE_SIZE]; 2.47 2.48 // The number of cached cards a thread claims when flushing the cache 2.49 static const int ClaimChunkSize = 32; 2.50 @@ -113,16 +122,25 @@ 2.51 void reset_hot_cache() { 2.52 assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); 2.53 assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); 2.54 - _hot_cache_idx = 0; _n_hot = 0; 2.55 + if (default_use_cache()) { 2.56 + reset_hot_cache_internal(); 2.57 + } 2.58 } 2.59 2.60 - bool hot_cache_is_empty() { return _n_hot == 0; } 2.61 - 2.62 // Zeros the values in the card counts table for entire committed heap 2.63 void reset_card_counts(); 2.64 2.65 // Zeros the values in the card counts table for the given region 2.66 void reset_card_counts(HeapRegion* hr); 2.67 + 2.68 + private: 2.69 + void reset_hot_cache_internal() { 2.70 + assert(_hot_cache != NULL, "Logic"); 2.71 + _hot_cache_idx = 0; 2.72 + for (size_t i = 0; i < _hot_cache_size; i++) { 2.73 + _hot_cache[i] = NULL; 2.74 + } 2.75 + } 2.76 }; 2.77 2.78 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP
3.1 --- a/src/share/vm/runtime/mutexLocker.cpp Thu Sep 18 11:27:59 2014 +0200 3.2 +++ b/src/share/vm/runtime/mutexLocker.cpp Thu Jan 29 15:05:25 2015 +0100 3.3 @@ -1,5 +1,5 @@ 3.4 /* 3.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 3.6 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.8 * 3.9 * This code is free software; you can redistribute it and/or modify it 3.10 @@ -119,7 +119,6 @@ 3.11 Mutex* OldSets_lock = NULL; 3.12 Monitor* RootRegionScan_lock = NULL; 3.13 Mutex* MMUTracker_lock = NULL; 3.14 -Mutex* HotCardCache_lock = NULL; 3.15 3.16 Monitor* GCTaskManager_lock = NULL; 3.17 3.18 @@ -200,7 +199,6 @@ 3.19 def(OldSets_lock , Mutex , leaf , true ); 3.20 def(RootRegionScan_lock , Monitor, leaf , true ); 3.21 def(MMUTracker_lock , Mutex , leaf , true ); 3.22 - def(HotCardCache_lock , Mutex , special , true ); 3.23 def(EvacFailureStack_lock , Mutex , nonleaf , true ); 3.24 3.25 def(StringDedupQueue_lock , Monitor, leaf, true );
4.1 --- a/src/share/vm/runtime/mutexLocker.hpp Thu Sep 18 11:27:59 2014 +0200 4.2 +++ b/src/share/vm/runtime/mutexLocker.hpp Thu Jan 29 15:05:25 2015 +0100 4.3 @@ -1,5 +1,5 @@ 4.4 /* 4.5 - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 4.6 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 4.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.8 * 4.9 * This code is free software; you can redistribute it and/or modify it 4.10 @@ -137,7 +137,6 @@ 4.11 extern Monitor* RootRegionScan_lock; // used to notify that the CM threads have finished scanning the IM snapshot regions 4.12 extern Mutex* MMUTracker_lock; // protects the MMU 4.13 // tracker data structures 4.14 -extern Mutex* HotCardCache_lock; // protects the hot card cache 4.15 4.16 extern Mutex* Management_lock; // a lock used to serialize JVM management 4.17 extern Monitor* Service_lock; // a lock used for service thread operation