Wed, 30 Sep 2009 14:50:51 -0400
6890137: G1: revamp reachable object dump
Summary: Revamp the reachable object dump debugging facility.
Reviewed-by: jmasa, apetrusenko
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentG1Refine.cpp.incl"
28 // Possible sizes for the card counts cache: odd primes that roughly double in size.
29 // (See jvmtiTagMap.cpp).
30 int ConcurrentG1Refine::_cc_cache_sizes[] = {
31 16381, 32771, 76831, 150001, 307261,
32 614563, 1228891, 2457733, 4915219, 9830479,
33 19660831, 39321619, 78643219, 157286461, -1
34 };
36 ConcurrentG1Refine::ConcurrentG1Refine() :
37 _card_counts(NULL), _card_epochs(NULL),
38 _n_card_counts(0), _max_n_card_counts(0),
39 _cache_size_index(0), _expand_card_counts(false),
40 _hot_cache(NULL),
41 _def_use_cache(false), _use_cache(false),
42 _n_periods(0),
43 _threads(NULL), _n_threads(0)
44 {
45 if (G1ConcRefine) {
46 _n_threads = (int)thread_num();
47 if (_n_threads > 0) {
48 _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
49 int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
50 ConcurrentG1RefineThread *next = NULL;
51 for (int i = _n_threads - 1; i >= 0; i--) {
52 ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
53 assert(t != NULL, "Conc refine should have been created");
54 assert(t->cg1r() == this, "Conc refine thread should refer to this");
55 _threads[i] = t;
56 next = t;
57 }
58 }
59 }
60 }
62 size_t ConcurrentG1Refine::thread_num() {
63 if (G1ConcRefine) {
64 return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
65 }
66 return 0;
67 }
69 void ConcurrentG1Refine::init() {
70 if (G1ConcRSLogCacheSize > 0) {
71 _g1h = G1CollectedHeap::heap();
72 _max_n_card_counts =
73 (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
75 size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
76 guarantee(_max_n_card_counts < max_card_num, "card_num representation");
78 int desired = _max_n_card_counts / InitialCacheFraction;
79 for (_cache_size_index = 0;
80 _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
81 if (_cc_cache_sizes[_cache_size_index] >= desired) break;
82 }
83 _cache_size_index = MAX2(0, (_cache_size_index - 1));
85 int initial_size = _cc_cache_sizes[_cache_size_index];
86 if (initial_size < 0) initial_size = _max_n_card_counts;
88 // Make sure we don't go bigger than we will ever need
89 _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
91 _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
92 _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
94 Copy::fill_to_bytes(&_card_counts[0],
95 _n_card_counts * sizeof(CardCountCacheEntry));
96 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
98 ModRefBarrierSet* bs = _g1h->mr_bs();
99 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
100 _ct_bs = (CardTableModRefBS*)bs;
101 _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
103 _def_use_cache = true;
104 _use_cache = true;
105 _hot_cache_size = (1 << G1ConcRSLogCacheSize);
106 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
107 _n_hot = 0;
108 _hot_cache_idx = 0;
110 // For refining the cards in the hot cache in parallel
111 int n_workers = (ParallelGCThreads > 0 ?
112 _g1h->workers()->total_workers() : 1);
113 _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
114 _hot_cache_par_claimed_idx = 0;
115 }
116 }
118 void ConcurrentG1Refine::stop() {
119 if (_threads != NULL) {
120 for (int i = 0; i < _n_threads; i++) {
121 _threads[i]->stop();
122 }
123 }
124 }
126 ConcurrentG1Refine::~ConcurrentG1Refine() {
127 if (G1ConcRSLogCacheSize > 0) {
128 assert(_card_counts != NULL, "Logic");
129 FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
130 assert(_card_epochs != NULL, "Logic");
131 FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
132 assert(_hot_cache != NULL, "Logic");
133 FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
134 }
135 if (_threads != NULL) {
136 for (int i = 0; i < _n_threads; i++) {
137 delete _threads[i];
138 }
139 FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
140 }
141 }
143 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
144 if (_threads != NULL) {
145 for (int i = 0; i < _n_threads; i++) {
146 tc->do_thread(_threads[i]);
147 }
148 }
149 }
151 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
152 HeapWord* start = _ct_bs->addr_for(card_ptr);
153 HeapRegion* r = _g1h->heap_region_containing(start);
154 if (r != NULL && r->is_young()) {
155 return true;
156 }
157 // This card is not associated with a heap region
158 // so can't be young.
159 return false;
160 }
162 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
163 unsigned new_card_num = ptr_2_card_num(card_ptr);
164 unsigned bucket = hash(new_card_num);
165 assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
167 CardCountCacheEntry* count_ptr = &_card_counts[bucket];
168 CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
170 // We have to construct a new entry if we haven't updated the counts
171 // during the current period, or if the count was updated for a
172 // different card number.
173 unsigned int new_epoch = (unsigned int) _n_periods;
174 julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
176 while (true) {
177 // Fetch the previous epoch value
178 julong prev_epoch_entry = epoch_ptr->_value;
179 julong cas_res;
181 if (extract_epoch(prev_epoch_entry) != new_epoch) {
182 // This entry has not yet been updated during this period.
183 // Note: we update the epoch value atomically to ensure
184 // that there is only one winner that updates the cached
185 // card_ptr value even though all the refine threads share
186 // the same epoch value.
188 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
189 (volatile jlong*)&epoch_ptr->_value,
190 (jlong) prev_epoch_entry);
192 if (cas_res == prev_epoch_entry) {
193 // We have successfully won the race to update the
194 // epoch and card_num value. Make it look like the
195 // count and eviction count were previously cleared.
196 count_ptr->_count = 1;
197 count_ptr->_evict_count = 0;
198 *count = 0;
199 // We can defer the processing of card_ptr
200 *defer = true;
201 return card_ptr;
202 }
203 // We did not win the race to update the epoch field, so some other
204 // thread must have done it. The value that gets returned by CAS
205 // should be the new epoch value.
206 assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
207 // We could 'continue' here or just re-read the previous epoch value
208 prev_epoch_entry = epoch_ptr->_value;
209 }
211 // The epoch entry for card_ptr has been updated during this period.
212 unsigned old_card_num = extract_card_num(prev_epoch_entry);
214 // The card count that will be returned to caller
215 *count = count_ptr->_count;
217 // Are we updating the count for the same card?
218 if (new_card_num == old_card_num) {
219 // Same card - just update the count. We could have more than one
220 // thread racing to update count for the current card. It should be
221 // OK not to use a CAS as the only penalty should be some missed
222 // increments of the count which delays identifying the card as "hot".
224 if (*count < max_jubyte) count_ptr->_count++;
225 // We can defer the processing of card_ptr
226 *defer = true;
227 return card_ptr;
228 }
230 // Different card - evict old card info
231 if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
232 if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
233 // Trigger a resize the next time we clear
234 _expand_card_counts = true;
235 }
237 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
238 (volatile jlong*)&epoch_ptr->_value,
239 (jlong) prev_epoch_entry);
241 if (cas_res == prev_epoch_entry) {
242 // We successfully updated the card num value in the epoch entry
243 count_ptr->_count = 0; // initialize counter for new card num
245 // Even though the region containg the card at old_card_num was not
246 // in the young list when old_card_num was recorded in the epoch
247 // cache it could have been added to the free list and subsequently
248 // added to the young list in the intervening time. If the evicted
249 // card is in a young region just return the card_ptr and the evicted
250 // card will not be cleaned. See CR 6817995.
252 jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
253 if (is_young_card(old_card_ptr)) {
254 *count = 0;
255 // We can defer the processing of card_ptr
256 *defer = true;
257 return card_ptr;
258 }
260 // We do not want to defer processing of card_ptr in this case
261 // (we need to refine old_card_ptr and card_ptr)
262 *defer = false;
263 return old_card_ptr;
264 }
265 // Someone else beat us - try again.
266 }
267 }
269 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
270 int count;
271 jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
272 assert(cached_ptr != NULL, "bad cached card ptr");
273 assert(!is_young_card(cached_ptr), "shouldn't get a card in young region");
275 // The card pointer we obtained from card count cache is not hot
276 // so do not store it in the cache; return it for immediate
277 // refining.
278 if (count < G1ConcRSHotCardLimit) {
279 return cached_ptr;
280 }
282 // Otherwise, the pointer we got from the _card_counts is hot.
283 jbyte* res = NULL;
284 MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
285 if (_n_hot == _hot_cache_size) {
286 res = _hot_cache[_hot_cache_idx];
287 _n_hot--;
288 }
289 // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
290 _hot_cache[_hot_cache_idx] = cached_ptr;
291 _hot_cache_idx++;
292 if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
293 _n_hot++;
295 if (res != NULL) {
296 // Even though the region containg res was not in the young list
297 // when it was recorded in the hot cache it could have been added
298 // to the free list and subsequently added to the young list in
299 // the intervening time. If res is in a young region, return NULL
300 // so that res is not cleaned. See CR 6817995.
302 if (is_young_card(res)) {
303 res = NULL;
304 }
305 }
307 return res;
308 }
310 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
311 assert(!use_cache(), "cache should be disabled");
312 int start_idx;
314 while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
315 int end_idx = start_idx + _hot_cache_par_chunk_size;
317 if (start_idx ==
318 Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
319 // The current worker has successfully claimed the chunk [start_idx..end_idx)
320 end_idx = MIN2(end_idx, _n_hot);
321 for (int i = start_idx; i < end_idx; i++) {
322 jbyte* entry = _hot_cache[i];
323 if (entry != NULL) {
324 g1rs->concurrentRefineOneCard(entry, worker_i);
325 }
326 }
327 }
328 }
329 }
331 void ConcurrentG1Refine::expand_card_count_cache() {
332 if (_n_card_counts < _max_n_card_counts) {
333 int new_idx = _cache_size_index+1;
334 int new_size = _cc_cache_sizes[new_idx];
335 if (new_size < 0) new_size = _max_n_card_counts;
337 // Make sure we don't go bigger than we will ever need
338 new_size = MIN2((unsigned) new_size, _max_n_card_counts);
340 // Expand the card count and card epoch tables
341 if (new_size > (int)_n_card_counts) {
342 // We can just free and allocate a new array as we're
343 // not interested in preserving the contents
344 assert(_card_counts != NULL, "Logic!");
345 assert(_card_epochs != NULL, "Logic!");
346 FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
347 FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
348 _n_card_counts = new_size;
349 _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
350 _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
351 _cache_size_index = new_idx;
352 }
353 }
354 }
356 void ConcurrentG1Refine::clear_and_record_card_counts() {
357 if (G1ConcRSLogCacheSize == 0) return;
359 #ifndef PRODUCT
360 double start = os::elapsedTime();
361 #endif
363 if (_expand_card_counts) {
364 expand_card_count_cache();
365 _expand_card_counts = false;
366 // Only need to clear the epochs.
367 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
368 }
370 int this_epoch = (int) _n_periods;
371 assert((this_epoch+1) <= max_jint, "to many periods");
372 // Update epoch
373 _n_periods++;
375 #ifndef PRODUCT
376 double elapsed = os::elapsedTime() - start;
377 _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
378 #endif
379 }
381 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
382 for (int i = 0; i < _n_threads; ++i) {
383 _threads[i]->print_on(st);
384 st->cr();
385 }
386 }