Mon, 19 Jul 2010 11:06:34 -0700
6956639: G1: assert(cached_ptr != card_ptr) failed: shouldn't be, concurrentG1Refine.cpp:307
Summary: During concurrent refinment, filter cards in young regions after it has been determined that the region has been allocated from and the young type of the region has been set.
Reviewed-by: iveresov, tonyp, jcoomes
1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentG1Refine.cpp.incl"
28 // Possible sizes for the card counts cache: odd primes that roughly double in size.
29 // (See jvmtiTagMap.cpp).
30 int ConcurrentG1Refine::_cc_cache_sizes[] = {
31 16381, 32771, 76831, 150001, 307261,
32 614563, 1228891, 2457733, 4915219, 9830479,
33 19660831, 39321619, 78643219, 157286461, -1
34 };
36 ConcurrentG1Refine::ConcurrentG1Refine() :
37 _card_counts(NULL), _card_epochs(NULL),
38 _n_card_counts(0), _max_n_card_counts(0),
39 _cache_size_index(0), _expand_card_counts(false),
40 _hot_cache(NULL),
41 _def_use_cache(false), _use_cache(false),
42 _n_periods(0),
43 _threads(NULL), _n_threads(0)
44 {
46 // Ergomonically select initial concurrent refinement parameters
47 if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
48 FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
49 }
50 set_green_zone(G1ConcRefinementGreenZone);
52 if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
53 FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
54 }
55 set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
57 if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
58 FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
59 }
60 set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
61 _n_worker_threads = thread_num();
62 // We need one extra thread to do the young gen rset size sampling.
63 _n_threads = _n_worker_threads + 1;
64 reset_threshold_step();
66 _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
67 int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
68 ConcurrentG1RefineThread *next = NULL;
69 for (int i = _n_threads - 1; i >= 0; i--) {
70 ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
71 assert(t != NULL, "Conc refine should have been created");
72 assert(t->cg1r() == this, "Conc refine thread should refer to this");
73 _threads[i] = t;
74 next = t;
75 }
76 }
78 void ConcurrentG1Refine::reset_threshold_step() {
79 if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
80 _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
81 } else {
82 _thread_threshold_step = G1ConcRefinementThresholdStep;
83 }
84 }
86 int ConcurrentG1Refine::thread_num() {
87 return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
88 }
90 void ConcurrentG1Refine::init() {
91 if (G1ConcRSLogCacheSize > 0) {
92 _g1h = G1CollectedHeap::heap();
93 _max_n_card_counts =
94 (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
96 size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
97 guarantee(_max_n_card_counts < max_card_num, "card_num representation");
99 int desired = _max_n_card_counts / InitialCacheFraction;
100 for (_cache_size_index = 0;
101 _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
102 if (_cc_cache_sizes[_cache_size_index] >= desired) break;
103 }
104 _cache_size_index = MAX2(0, (_cache_size_index - 1));
106 int initial_size = _cc_cache_sizes[_cache_size_index];
107 if (initial_size < 0) initial_size = _max_n_card_counts;
109 // Make sure we don't go bigger than we will ever need
110 _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
112 _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
113 _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
115 Copy::fill_to_bytes(&_card_counts[0],
116 _n_card_counts * sizeof(CardCountCacheEntry));
117 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
119 ModRefBarrierSet* bs = _g1h->mr_bs();
120 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
121 _ct_bs = (CardTableModRefBS*)bs;
122 _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
124 _def_use_cache = true;
125 _use_cache = true;
126 _hot_cache_size = (1 << G1ConcRSLogCacheSize);
127 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
128 _n_hot = 0;
129 _hot_cache_idx = 0;
131 // For refining the cards in the hot cache in parallel
132 int n_workers = (ParallelGCThreads > 0 ?
133 _g1h->workers()->total_workers() : 1);
134 _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
135 _hot_cache_par_claimed_idx = 0;
136 }
137 }
139 void ConcurrentG1Refine::stop() {
140 if (_threads != NULL) {
141 for (int i = 0; i < _n_threads; i++) {
142 _threads[i]->stop();
143 }
144 }
145 }
147 void ConcurrentG1Refine::reinitialize_threads() {
148 reset_threshold_step();
149 if (_threads != NULL) {
150 for (int i = 0; i < _n_threads; i++) {
151 _threads[i]->initialize();
152 }
153 }
154 }
156 ConcurrentG1Refine::~ConcurrentG1Refine() {
157 if (G1ConcRSLogCacheSize > 0) {
158 assert(_card_counts != NULL, "Logic");
159 FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
160 assert(_card_epochs != NULL, "Logic");
161 FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
162 assert(_hot_cache != NULL, "Logic");
163 FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
164 }
165 if (_threads != NULL) {
166 for (int i = 0; i < _n_threads; i++) {
167 delete _threads[i];
168 }
169 FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
170 }
171 }
173 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
174 if (_threads != NULL) {
175 for (int i = 0; i < _n_threads; i++) {
176 tc->do_thread(_threads[i]);
177 }
178 }
179 }
181 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
182 HeapWord* start = _ct_bs->addr_for(card_ptr);
183 HeapRegion* r = _g1h->heap_region_containing(start);
184 if (r != NULL && r->is_young()) {
185 return true;
186 }
187 // This card is not associated with a heap region
188 // so can't be young.
189 return false;
190 }
192 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
193 unsigned new_card_num = ptr_2_card_num(card_ptr);
194 unsigned bucket = hash(new_card_num);
195 assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
197 CardCountCacheEntry* count_ptr = &_card_counts[bucket];
198 CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
200 // We have to construct a new entry if we haven't updated the counts
201 // during the current period, or if the count was updated for a
202 // different card number.
203 unsigned int new_epoch = (unsigned int) _n_periods;
204 julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
206 while (true) {
207 // Fetch the previous epoch value
208 julong prev_epoch_entry = epoch_ptr->_value;
209 julong cas_res;
211 if (extract_epoch(prev_epoch_entry) != new_epoch) {
212 // This entry has not yet been updated during this period.
213 // Note: we update the epoch value atomically to ensure
214 // that there is only one winner that updates the cached
215 // card_ptr value even though all the refine threads share
216 // the same epoch value.
218 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
219 (volatile jlong*)&epoch_ptr->_value,
220 (jlong) prev_epoch_entry);
222 if (cas_res == prev_epoch_entry) {
223 // We have successfully won the race to update the
224 // epoch and card_num value. Make it look like the
225 // count and eviction count were previously cleared.
226 count_ptr->_count = 1;
227 count_ptr->_evict_count = 0;
228 *count = 0;
229 // We can defer the processing of card_ptr
230 *defer = true;
231 return card_ptr;
232 }
233 // We did not win the race to update the epoch field, so some other
234 // thread must have done it. The value that gets returned by CAS
235 // should be the new epoch value.
236 assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
237 // We could 'continue' here or just re-read the previous epoch value
238 prev_epoch_entry = epoch_ptr->_value;
239 }
241 // The epoch entry for card_ptr has been updated during this period.
242 unsigned old_card_num = extract_card_num(prev_epoch_entry);
244 // The card count that will be returned to caller
245 *count = count_ptr->_count;
247 // Are we updating the count for the same card?
248 if (new_card_num == old_card_num) {
249 // Same card - just update the count. We could have more than one
250 // thread racing to update count for the current card. It should be
251 // OK not to use a CAS as the only penalty should be some missed
252 // increments of the count which delays identifying the card as "hot".
254 if (*count < max_jubyte) count_ptr->_count++;
255 // We can defer the processing of card_ptr
256 *defer = true;
257 return card_ptr;
258 }
260 // Different card - evict old card info
261 if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
262 if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
263 // Trigger a resize the next time we clear
264 _expand_card_counts = true;
265 }
267 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
268 (volatile jlong*)&epoch_ptr->_value,
269 (jlong) prev_epoch_entry);
271 if (cas_res == prev_epoch_entry) {
272 // We successfully updated the card num value in the epoch entry
273 count_ptr->_count = 0; // initialize counter for new card num
274 jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
276 // Even though the region containg the card at old_card_num was not
277 // in the young list when old_card_num was recorded in the epoch
278 // cache it could have been added to the free list and subsequently
279 // added to the young list in the intervening time. See CR 6817995.
280 // We do not deal with this case here - it will be handled in
281 // HeapRegion::oops_on_card_seq_iterate_careful after it has been
282 // determined that the region containing the card has been allocated
283 // to, and it's safe to check the young type of the region.
285 // We do not want to defer processing of card_ptr in this case
286 // (we need to refine old_card_ptr and card_ptr)
287 *defer = false;
288 return old_card_ptr;
289 }
290 // Someone else beat us - try again.
291 }
292 }
294 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
295 int count;
296 jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
297 assert(cached_ptr != NULL, "bad cached card ptr");
299 // We've just inserted a card pointer into the card count cache
300 // and got back the card that we just inserted or (evicted) the
301 // previous contents of that count slot.
303 // The card we got back could be in a young region. When the
304 // returned card (if evicted) was originally inserted, we had
305 // determined that its containing region was not young. However
306 // it is possible for the region to be freed during a cleanup
307 // pause, then reallocated and tagged as young which will result
308 // in the returned card residing in a young region.
309 //
310 // We do not deal with this case here - the change from non-young
311 // to young could be observed at any time - it will be handled in
312 // HeapRegion::oops_on_card_seq_iterate_careful after it has been
313 // determined that the region containing the card has been allocated
314 // to.
316 // The card pointer we obtained from card count cache is not hot
317 // so do not store it in the cache; return it for immediate
318 // refining.
319 if (count < G1ConcRSHotCardLimit) {
320 return cached_ptr;
321 }
323 // Otherwise, the pointer we got from the _card_counts cache is hot.
324 jbyte* res = NULL;
325 MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
326 if (_n_hot == _hot_cache_size) {
327 res = _hot_cache[_hot_cache_idx];
328 _n_hot--;
329 }
330 // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
331 _hot_cache[_hot_cache_idx] = cached_ptr;
332 _hot_cache_idx++;
333 if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
334 _n_hot++;
336 // The card obtained from the hot card cache could be in a young
337 // region. See above on how this can happen.
339 return res;
340 }
342 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
343 assert(!use_cache(), "cache should be disabled");
344 int start_idx;
346 while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
347 int end_idx = start_idx + _hot_cache_par_chunk_size;
349 if (start_idx ==
350 Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
351 // The current worker has successfully claimed the chunk [start_idx..end_idx)
352 end_idx = MIN2(end_idx, _n_hot);
353 for (int i = start_idx; i < end_idx; i++) {
354 jbyte* entry = _hot_cache[i];
355 if (entry != NULL) {
356 g1rs->concurrentRefineOneCard(entry, worker_i);
357 }
358 }
359 }
360 }
361 }
363 void ConcurrentG1Refine::expand_card_count_cache() {
364 if (_n_card_counts < _max_n_card_counts) {
365 int new_idx = _cache_size_index+1;
366 int new_size = _cc_cache_sizes[new_idx];
367 if (new_size < 0) new_size = _max_n_card_counts;
369 // Make sure we don't go bigger than we will ever need
370 new_size = MIN2((unsigned) new_size, _max_n_card_counts);
372 // Expand the card count and card epoch tables
373 if (new_size > (int)_n_card_counts) {
374 // We can just free and allocate a new array as we're
375 // not interested in preserving the contents
376 assert(_card_counts != NULL, "Logic!");
377 assert(_card_epochs != NULL, "Logic!");
378 FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
379 FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
380 _n_card_counts = new_size;
381 _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
382 _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
383 _cache_size_index = new_idx;
384 }
385 }
386 }
388 void ConcurrentG1Refine::clear_and_record_card_counts() {
389 if (G1ConcRSLogCacheSize == 0) return;
391 #ifndef PRODUCT
392 double start = os::elapsedTime();
393 #endif
395 if (_expand_card_counts) {
396 expand_card_count_cache();
397 _expand_card_counts = false;
398 // Only need to clear the epochs.
399 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
400 }
402 int this_epoch = (int) _n_periods;
403 assert((this_epoch+1) <= max_jint, "to many periods");
404 // Update epoch
405 _n_periods++;
407 #ifndef PRODUCT
408 double elapsed = os::elapsedTime() - start;
409 _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
410 #endif
411 }
413 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
414 for (int i = 0; i < _n_threads; ++i) {
415 _threads[i]->print_on(st);
416 st->cr();
417 }
418 }