24 |
24 |
25 #include "precompiled.hpp" |
25 #include "precompiled.hpp" |
26 #include "gc_implementation/g1/concurrentG1Refine.hpp" |
26 #include "gc_implementation/g1/concurrentG1Refine.hpp" |
27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp" |
27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp" |
28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" |
28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" |
29 #include "gc_implementation/g1/g1CollectorPolicy.hpp" |
29 #include "gc_implementation/g1/g1HotCardCache.hpp" |
30 #include "gc_implementation/g1/g1GCPhaseTimes.hpp" |
|
31 #include "gc_implementation/g1/g1RemSet.hpp" |
|
32 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" |
|
33 #include "memory/space.inline.hpp" |
|
34 #include "runtime/atomic.hpp" |
|
35 #include "runtime/java.hpp" |
|
36 #include "utilities/copy.hpp" |
|
37 |
30 |
38 // Possible sizes for the card counts cache: odd primes that roughly double in size. |
31 ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h) : |
39 // (See jvmtiTagMap.cpp). |
32 _threads(NULL), _n_threads(0), |
40 |
33 _hot_card_cache(g1h) |
41 #define MAX_SIZE ((size_t) -1) |
|
42 |
|
43 size_t ConcurrentG1Refine::_cc_cache_sizes[] = { |
|
44 16381, 32771, 76831, 150001, 307261, |
|
45 614563, 1228891, 2457733, 4915219, 9830479, |
|
46 19660831, 39321619, 78643219, 157286461, MAX_SIZE |
|
47 }; |
|
48 |
|
49 ConcurrentG1Refine::ConcurrentG1Refine() : |
|
50 _card_counts(NULL), _card_epochs(NULL), |
|
51 _n_card_counts(0), _max_cards(0), _max_n_card_counts(0), |
|
52 _cache_size_index(0), _expand_card_counts(false), |
|
53 _hot_cache(NULL), |
|
54 _def_use_cache(false), _use_cache(false), |
|
55 // We initialize the epochs of the array to 0. By initializing |
|
56 // _n_periods to 1 and not 0 we automatically invalidate all the |
|
57 // entries on the array. Otherwise we might accidentally think that |
|
58 // we claimed a card that was in fact never set (see CR7033292). |
|
59 _n_periods(1), |
|
60 _threads(NULL), _n_threads(0) |
|
61 { |
34 { |
62 |
|
63 // Ergomonically select initial concurrent refinement parameters |
35 // Ergomonically select initial concurrent refinement parameters |
64 if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { |
36 if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) { |
65 FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1)); |
37 FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1)); |
66 } |
38 } |
67 set_green_zone(G1ConcRefinementGreenZone); |
39 set_green_zone(G1ConcRefinementGreenZone); |
98 } else { |
74 } else { |
99 _thread_threshold_step = G1ConcRefinementThresholdStep; |
75 _thread_threshold_step = G1ConcRefinementThresholdStep; |
100 } |
76 } |
101 } |
77 } |
102 |
78 |
103 int ConcurrentG1Refine::thread_num() { |
|
104 return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1); |
|
105 } |
|
106 |
|
107 void ConcurrentG1Refine::init() { |
79 void ConcurrentG1Refine::init() { |
108 if (G1ConcRSLogCacheSize > 0) { |
80 _hot_card_cache.initialize(); |
109 _g1h = G1CollectedHeap::heap(); |
|
110 |
|
111 _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift; |
|
112 _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100; |
|
113 |
|
114 size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; |
|
115 guarantee(_max_cards < max_card_num, "card_num representation"); |
|
116 |
|
117 // We need _n_card_counts to be less than _max_n_card_counts here |
|
118 // so that the expansion call (below) actually allocates the |
|
119 // _counts and _epochs arrays. |
|
120 assert(_n_card_counts == 0, "pre-condition"); |
|
121 assert(_max_n_card_counts > 0, "pre-condition"); |
|
122 |
|
123 // Find the index into cache size array that is of a size that's |
|
124 // large enough to hold desired_sz. |
|
125 size_t desired_sz = _max_cards / InitialCacheFraction; |
|
126 int desired_sz_index = 0; |
|
127 while (_cc_cache_sizes[desired_sz_index] < desired_sz) { |
|
128 desired_sz_index += 1; |
|
129 assert(desired_sz_index < MAX_CC_CACHE_INDEX, "invariant"); |
|
130 } |
|
131 assert(desired_sz_index < MAX_CC_CACHE_INDEX, "invariant"); |
|
132 |
|
133 // If the desired_sz value is between two sizes then |
|
134 // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index] |
|
135 // we will start with the lower size in the optimistic expectation that |
|
136 // we will not need to expand up. Note desired_sz_index could also be 0. |
|
137 if (desired_sz_index > 0 && |
|
138 _cc_cache_sizes[desired_sz_index] > desired_sz) { |
|
139 desired_sz_index -= 1; |
|
140 } |
|
141 |
|
142 if (!expand_card_count_cache(desired_sz_index)) { |
|
143 // Allocation was unsuccessful - exit |
|
144 vm_exit_during_initialization("Could not reserve enough space for card count cache"); |
|
145 } |
|
146 assert(_n_card_counts > 0, "post-condition"); |
|
147 assert(_cache_size_index == desired_sz_index, "post-condition"); |
|
148 |
|
149 Copy::fill_to_bytes(&_card_counts[0], |
|
150 _n_card_counts * sizeof(CardCountCacheEntry)); |
|
151 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); |
|
152 |
|
153 ModRefBarrierSet* bs = _g1h->mr_bs(); |
|
154 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); |
|
155 _ct_bs = (CardTableModRefBS*)bs; |
|
156 _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); |
|
157 |
|
158 _def_use_cache = true; |
|
159 _use_cache = true; |
|
160 _hot_cache_size = (1 << G1ConcRSLogCacheSize); |
|
161 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); |
|
162 _n_hot = 0; |
|
163 _hot_cache_idx = 0; |
|
164 |
|
165 // For refining the cards in the hot cache in parallel |
|
166 int n_workers = (ParallelGCThreads > 0 ? |
|
167 _g1h->workers()->total_workers() : 1); |
|
168 _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); |
|
169 _hot_cache_par_claimed_idx = 0; |
|
170 } |
|
171 } |
81 } |
172 |
82 |
173 void ConcurrentG1Refine::stop() { |
83 void ConcurrentG1Refine::stop() { |
174 if (_threads != NULL) { |
84 if (_threads != NULL) { |
175 for (int i = 0; i < _n_threads; i++) { |
85 for (int i = 0; i < _n_threads; i++) { |
213 tc->do_thread(_threads[i]); |
112 tc->do_thread(_threads[i]); |
214 } |
113 } |
215 } |
114 } |
216 } |
115 } |
217 |
116 |
218 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { |
117 int ConcurrentG1Refine::thread_num() { |
219 HeapWord* start = _ct_bs->addr_for(card_ptr); |
118 int n_threads = (G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads |
220 HeapRegion* r = _g1h->heap_region_containing(start); |
119 : ParallelGCThreads; |
221 if (r != NULL && r->is_young()) { |
120 return MAX2<int>(n_threads, 1); |
222 return true; |
|
223 } |
|
224 // This card is not associated with a heap region |
|
225 // so can't be young. |
|
226 return false; |
|
227 } |
|
228 |
|
229 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { |
|
230 unsigned new_card_num = ptr_2_card_num(card_ptr); |
|
231 unsigned bucket = hash(new_card_num); |
|
232 assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); |
|
233 |
|
234 CardCountCacheEntry* count_ptr = &_card_counts[bucket]; |
|
235 CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; |
|
236 |
|
237 // We have to construct a new entry if we haven't updated the counts |
|
238 // during the current period, or if the count was updated for a |
|
239 // different card number. |
|
240 unsigned int new_epoch = (unsigned int) _n_periods; |
|
241 julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); |
|
242 |
|
243 while (true) { |
|
244 // Fetch the previous epoch value |
|
245 julong prev_epoch_entry = epoch_ptr->_value; |
|
246 julong cas_res; |
|
247 |
|
248 if (extract_epoch(prev_epoch_entry) != new_epoch) { |
|
249 // This entry has not yet been updated during this period. |
|
250 // Note: we update the epoch value atomically to ensure |
|
251 // that there is only one winner that updates the cached |
|
252 // card_ptr value even though all the refine threads share |
|
253 // the same epoch value. |
|
254 |
|
255 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, |
|
256 (volatile jlong*)&epoch_ptr->_value, |
|
257 (jlong) prev_epoch_entry); |
|
258 |
|
259 if (cas_res == prev_epoch_entry) { |
|
260 // We have successfully won the race to update the |
|
261 // epoch and card_num value. Make it look like the |
|
262 // count and eviction count were previously cleared. |
|
263 count_ptr->_count = 1; |
|
264 count_ptr->_evict_count = 0; |
|
265 *count = 0; |
|
266 // We can defer the processing of card_ptr |
|
267 *defer = true; |
|
268 return card_ptr; |
|
269 } |
|
270 // We did not win the race to update the epoch field, so some other |
|
271 // thread must have done it. The value that gets returned by CAS |
|
272 // should be the new epoch value. |
|
273 assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); |
|
274 // We could 'continue' here or just re-read the previous epoch value |
|
275 prev_epoch_entry = epoch_ptr->_value; |
|
276 } |
|
277 |
|
278 // The epoch entry for card_ptr has been updated during this period. |
|
279 unsigned old_card_num = extract_card_num(prev_epoch_entry); |
|
280 |
|
281 // The card count that will be returned to caller |
|
282 *count = count_ptr->_count; |
|
283 |
|
284 // Are we updating the count for the same card? |
|
285 if (new_card_num == old_card_num) { |
|
286 // Same card - just update the count. We could have more than one |
|
287 // thread racing to update count for the current card. It should be |
|
288 // OK not to use a CAS as the only penalty should be some missed |
|
289 // increments of the count which delays identifying the card as "hot". |
|
290 |
|
291 if (*count < max_jubyte) count_ptr->_count++; |
|
292 // We can defer the processing of card_ptr |
|
293 *defer = true; |
|
294 return card_ptr; |
|
295 } |
|
296 |
|
297 // Different card - evict old card info |
|
298 if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; |
|
299 if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { |
|
300 // Trigger a resize the next time we clear |
|
301 _expand_card_counts = true; |
|
302 } |
|
303 |
|
304 cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, |
|
305 (volatile jlong*)&epoch_ptr->_value, |
|
306 (jlong) prev_epoch_entry); |
|
307 |
|
308 if (cas_res == prev_epoch_entry) { |
|
309 // We successfully updated the card num value in the epoch entry |
|
310 count_ptr->_count = 0; // initialize counter for new card num |
|
311 jbyte* old_card_ptr = card_num_2_ptr(old_card_num); |
|
312 |
|
313 // Even though the region containg the card at old_card_num was not |
|
314 // in the young list when old_card_num was recorded in the epoch |
|
315 // cache it could have been added to the free list and subsequently |
|
316 // added to the young list in the intervening time. See CR 6817995. |
|
317 // We do not deal with this case here - it will be handled in |
|
318 // HeapRegion::oops_on_card_seq_iterate_careful after it has been |
|
319 // determined that the region containing the card has been allocated |
|
320 // to, and it's safe to check the young type of the region. |
|
321 |
|
322 // We do not want to defer processing of card_ptr in this case |
|
323 // (we need to refine old_card_ptr and card_ptr) |
|
324 *defer = false; |
|
325 return old_card_ptr; |
|
326 } |
|
327 // Someone else beat us - try again. |
|
328 } |
|
329 } |
|
330 |
|
331 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { |
|
332 int count; |
|
333 jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); |
|
334 assert(cached_ptr != NULL, "bad cached card ptr"); |
|
335 |
|
336 // We've just inserted a card pointer into the card count cache |
|
337 // and got back the card that we just inserted or (evicted) the |
|
338 // previous contents of that count slot. |
|
339 |
|
340 // The card we got back could be in a young region. When the |
|
341 // returned card (if evicted) was originally inserted, we had |
|
342 // determined that its containing region was not young. However |
|
343 // it is possible for the region to be freed during a cleanup |
|
344 // pause, then reallocated and tagged as young which will result |
|
345 // in the returned card residing in a young region. |
|
346 // |
|
347 // We do not deal with this case here - the change from non-young |
|
348 // to young could be observed at any time - it will be handled in |
|
349 // HeapRegion::oops_on_card_seq_iterate_careful after it has been |
|
350 // determined that the region containing the card has been allocated |
|
351 // to. |
|
352 |
|
353 // The card pointer we obtained from card count cache is not hot |
|
354 // so do not store it in the cache; return it for immediate |
|
355 // refining. |
|
356 if (count < G1ConcRSHotCardLimit) { |
|
357 return cached_ptr; |
|
358 } |
|
359 |
|
360 // Otherwise, the pointer we got from the _card_counts cache is hot. |
|
361 jbyte* res = NULL; |
|
362 MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); |
|
363 if (_n_hot == _hot_cache_size) { |
|
364 res = _hot_cache[_hot_cache_idx]; |
|
365 _n_hot--; |
|
366 } |
|
367 // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. |
|
368 _hot_cache[_hot_cache_idx] = cached_ptr; |
|
369 _hot_cache_idx++; |
|
370 if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; |
|
371 _n_hot++; |
|
372 |
|
373 // The card obtained from the hot card cache could be in a young |
|
374 // region. See above on how this can happen. |
|
375 |
|
376 return res; |
|
377 } |
|
378 |
|
379 void ConcurrentG1Refine::clean_up_cache(int worker_i, |
|
380 G1RemSet* g1rs, |
|
381 DirtyCardQueue* into_cset_dcq) { |
|
382 assert(!use_cache(), "cache should be disabled"); |
|
383 int start_idx; |
|
384 |
|
385 while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once |
|
386 int end_idx = start_idx + _hot_cache_par_chunk_size; |
|
387 |
|
388 if (start_idx == |
|
389 Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { |
|
390 // The current worker has successfully claimed the chunk [start_idx..end_idx) |
|
391 end_idx = MIN2(end_idx, _n_hot); |
|
392 for (int i = start_idx; i < end_idx; i++) { |
|
393 jbyte* entry = _hot_cache[i]; |
|
394 if (entry != NULL) { |
|
395 if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) { |
|
396 // 'entry' contains references that point into the current |
|
397 // collection set. We need to record 'entry' in the DCQS |
|
398 // that's used for that purpose. |
|
399 // |
|
400 // The only time we care about recording cards that contain |
|
401 // references that point into the collection set is during |
|
402 // RSet updating while within an evacuation pause. |
|
403 // In this case worker_i should be the id of a GC worker thread |
|
404 assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); |
|
405 assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id"); |
|
406 into_cset_dcq->enqueue(entry); |
|
407 } |
|
408 } |
|
409 } |
|
410 } |
|
411 } |
|
412 } |
|
413 |
|
414 // The arrays used to hold the card counts and the epochs must have |
|
415 // a 1:1 correspondence. Hence they are allocated and freed together |
|
416 // Returns true if the allocations of both the counts and epochs |
|
417 // were successful; false otherwise. |
|
418 bool ConcurrentG1Refine::allocate_card_count_cache(size_t n, |
|
419 CardCountCacheEntry** counts, |
|
420 CardEpochCacheEntry** epochs) { |
|
421 // We call the allocation/free routines directly for the counts |
|
422 // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY |
|
423 // macros call AllocateHeap and FreeHeap respectively. |
|
424 // AllocateHeap will call vm_exit_out_of_memory in the event |
|
425 // of an allocation failure and abort the JVM. With the |
|
426 // _counts/epochs arrays we only need to abort the JVM if the |
|
427 // initial allocation of these arrays fails. |
|
428 // |
|
429 // Additionally AllocateHeap/FreeHeap do some tracing of |
|
430 // allocate/free calls so calling one without calling the |
|
431 // other can cause inconsistencies in the tracing. So we |
|
432 // call neither. |
|
433 |
|
434 assert(*counts == NULL, "out param"); |
|
435 assert(*epochs == NULL, "out param"); |
|
436 |
|
437 size_t counts_size = n * sizeof(CardCountCacheEntry); |
|
438 size_t epochs_size = n * sizeof(CardEpochCacheEntry); |
|
439 |
|
440 *counts = (CardCountCacheEntry*) os::malloc(counts_size, mtGC); |
|
441 if (*counts == NULL) { |
|
442 // allocation was unsuccessful |
|
443 return false; |
|
444 } |
|
445 |
|
446 *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size, mtGC); |
|
447 if (*epochs == NULL) { |
|
448 // allocation was unsuccessful - free counts array |
|
449 assert(*counts != NULL, "must be"); |
|
450 os::free(*counts, mtGC); |
|
451 *counts = NULL; |
|
452 return false; |
|
453 } |
|
454 |
|
455 // We successfully allocated both counts and epochs |
|
456 return true; |
|
457 } |
|
458 |
|
459 // Returns true if the card counts/epochs cache was |
|
460 // successfully expanded; false otherwise. |
|
461 bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) { |
|
462 // Can we expand the card count and epoch tables? |
|
463 if (_n_card_counts < _max_n_card_counts) { |
|
464 assert(cache_size_idx >= 0 && cache_size_idx < MAX_CC_CACHE_INDEX, "oob"); |
|
465 |
|
466 size_t cache_size = _cc_cache_sizes[cache_size_idx]; |
|
467 // Make sure we don't go bigger than we will ever need |
|
468 cache_size = MIN2(cache_size, _max_n_card_counts); |
|
469 |
|
470 // Should we expand the card count and card epoch tables? |
|
471 if (cache_size > _n_card_counts) { |
|
472 // We have been asked to allocate new, larger, arrays for |
|
473 // the card counts and the epochs. Attempt the allocation |
|
474 // of both before we free the existing arrays in case |
|
475 // the allocation is unsuccessful... |
|
476 CardCountCacheEntry* counts = NULL; |
|
477 CardEpochCacheEntry* epochs = NULL; |
|
478 |
|
479 if (allocate_card_count_cache(cache_size, &counts, &epochs)) { |
|
480 // Allocation was successful. |
|
481 // We can just free the old arrays; we're |
|
482 // not interested in preserving the contents |
|
483 if (_card_counts != NULL) os::free(_card_counts, mtGC); |
|
484 if (_card_epochs != NULL) os::free(_card_epochs, mtGC); |
|
485 |
|
486 // Cache the size of the arrays and the index that got us there. |
|
487 _n_card_counts = cache_size; |
|
488 _cache_size_index = cache_size_idx; |
|
489 |
|
490 _card_counts = counts; |
|
491 _card_epochs = epochs; |
|
492 |
|
493 // We successfully allocated/expanded the caches. |
|
494 return true; |
|
495 } |
|
496 } |
|
497 } |
|
498 |
|
499 // We did not successfully expand the caches. |
|
500 return false; |
|
501 } |
|
502 |
|
503 void ConcurrentG1Refine::clear_and_record_card_counts() { |
|
504 if (G1ConcRSLogCacheSize == 0) { |
|
505 return; |
|
506 } |
|
507 |
|
508 double start = os::elapsedTime(); |
|
509 |
|
510 if (_expand_card_counts) { |
|
511 int new_idx = _cache_size_index + 1; |
|
512 |
|
513 if (expand_card_count_cache(new_idx)) { |
|
514 // Allocation was successful and _n_card_counts has |
|
515 // been updated to the new size. We only need to clear |
|
516 // the epochs so we don't read a bogus epoch value |
|
517 // when inserting a card into the hot card cache. |
|
518 Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); |
|
519 } |
|
520 _expand_card_counts = false; |
|
521 } |
|
522 |
|
523 int this_epoch = (int) _n_periods; |
|
524 assert((this_epoch+1) <= max_jint, "to many periods"); |
|
525 // Update epoch |
|
526 _n_periods++; |
|
527 double cc_clear_time_ms = (os::elapsedTime() - start) * 1000; |
|
528 _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms); |
|
529 } |
121 } |
530 |
122 |
531 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { |
123 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { |
532 for (int i = 0; i < _n_threads; ++i) { |
124 for (int i = 0; i < _n_threads; ++i) { |
533 _threads[i]->print_on(st); |
125 _threads[i]->print_on(st); |