ysr@777: /*
xdono@1279:  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
ysr@777:  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
ysr@777:  *
ysr@777:  * This code is free software; you can redistribute it and/or modify it
ysr@777:  * under the terms of the GNU General Public License version 2 only, as
ysr@777:  * published by the Free Software Foundation.
ysr@777:  *
ysr@777:  * This code is distributed in the hope that it will be useful, but WITHOUT
ysr@777:  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
ysr@777:  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
ysr@777:  * version 2 for more details (a copy is included in the LICENSE file that
ysr@777:  * accompanied this code).
ysr@777:  *
ysr@777:  * You should have received a copy of the GNU General Public License version
ysr@777:  * 2 along with this work; if not, write to the Free Software Foundation,
ysr@777:  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
ysr@777:  *
ysr@777:  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
ysr@777:  * CA 95054 USA or visit www.sun.com if you need additional information or
ysr@777:  * have any questions.
ysr@777:  *
ysr@777:  */
ysr@777: 
ysr@777: #include "incls/_precompiled.incl"
ysr@777: #include "incls/_concurrentG1Refine.cpp.incl"
ysr@777: 
johnc@1325: // Possible sizes for the card counts cache: odd primes that roughly double in size.
johnc@1325: // (See jvmtiTagMap.cpp).
johnc@1325: int ConcurrentG1Refine::_cc_cache_sizes[] = {
johnc@1325:         16381,    32771,    76831,    150001,   307261,
johnc@1325:        614563,  1228891,  2457733,   4915219,  9830479,
johnc@1325:      19660831, 39321619, 78643219, 157286461,       -1
johnc@1325:   };
johnc@1325: 
ysr@777: ConcurrentG1Refine::ConcurrentG1Refine() :
johnc@1325:   _card_counts(NULL), _card_epochs(NULL),
johnc@1325:   _n_card_counts(0), _max_n_card_counts(0),
johnc@1325:   _cache_size_index(0), _expand_card_counts(false),
ysr@777:   _hot_cache(NULL),
ysr@777:   _def_use_cache(false), _use_cache(false),
johnc@1325:   _n_periods(0),
iveresov@1229:   _threads(NULL), _n_threads(0)
ysr@777: {
ysr@777:   if (G1ConcRefine) {
iveresov@1230:     _n_threads = (int)thread_num();
iveresov@1229:     if (_n_threads > 0) {
iveresov@1229:       _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
iveresov@1230:       int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
iveresov@1229:       ConcurrentG1RefineThread *next = NULL;
iveresov@1229:       for (int i = _n_threads - 1; i >= 0; i--) {
iveresov@1230:         ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
iveresov@1229:         assert(t != NULL, "Conc refine should have been created");
iveresov@1229:         assert(t->cg1r() == this, "Conc refine thread should refer to this");
iveresov@1229:         _threads[i] = t;
iveresov@1229:         next = t;
iveresov@1229:       }
iveresov@1229:     }
ysr@777:   }
ysr@777: }
ysr@777: 
iveresov@1230: size_t ConcurrentG1Refine::thread_num() {
iveresov@1230:   if (G1ConcRefine) {
iveresov@1230:     return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
iveresov@1230:   }
iveresov@1230:   return 0;
iveresov@1230: }
iveresov@1230: 
ysr@777: void ConcurrentG1Refine::init() {
johnc@1325:   if (G1ConcRSLogCacheSize > 0) {
johnc@1325:     _g1h = G1CollectedHeap::heap();
johnc@1325:     _max_n_card_counts =
johnc@1325:       (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
johnc@1325: 
johnc@1325:     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
johnc@1325:     guarantee(_max_n_card_counts < max_card_num, "card_num representation");
johnc@1325: 
johnc@1325:     int desired = _max_n_card_counts / InitialCacheFraction;
johnc@1325:     for (_cache_size_index = 0;
johnc@1325:               _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
johnc@1325:       if (_cc_cache_sizes[_cache_size_index] >= desired) break;
johnc@1325:     }
johnc@1325:     _cache_size_index = MAX2(0, (_cache_size_index - 1));
johnc@1325: 
johnc@1325:     int initial_size = _cc_cache_sizes[_cache_size_index];
johnc@1325:     if (initial_size < 0) initial_size = _max_n_card_counts;
johnc@1325: 
johnc@1325:     // Make sure we don't go bigger than we will ever need
johnc@1325:     _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
johnc@1325: 
johnc@1325:     _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
johnc@1325:     _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
johnc@1325: 
johnc@1325:     Copy::fill_to_bytes(&_card_counts[0],
johnc@1325:                         _n_card_counts * sizeof(CardCountCacheEntry));
johnc@1325:     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
johnc@1325: 
johnc@1325:     ModRefBarrierSet* bs = _g1h->mr_bs();
ysr@777:     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
johnc@1325:     _ct_bs = (CardTableModRefBS*)bs;
johnc@1325:     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
johnc@1325: 
ysr@777:     _def_use_cache = true;
ysr@777:     _use_cache = true;
ysr@777:     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
ysr@777:     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
ysr@777:     _n_hot = 0;
ysr@777:     _hot_cache_idx = 0;
johnc@1324: 
johnc@1324:     // For refining the cards in the hot cache in parallel
johnc@1324:     int n_workers = (ParallelGCThreads > 0 ?
johnc@1325:                         _g1h->workers()->total_workers() : 1);
johnc@1324:     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
johnc@1324:     _hot_cache_par_claimed_idx = 0;
ysr@777:   }
ysr@777: }
ysr@777: 
iveresov@1229: void ConcurrentG1Refine::stop() {
iveresov@1229:   if (_threads != NULL) {
iveresov@1229:     for (int i = 0; i < _n_threads; i++) {
iveresov@1229:       _threads[i]->stop();
iveresov@1229:     }
iveresov@1229:   }
iveresov@1229: }
iveresov@1229: 
ysr@777: ConcurrentG1Refine::~ConcurrentG1Refine() {
johnc@1325:   if (G1ConcRSLogCacheSize > 0) {
ysr@777:     assert(_card_counts != NULL, "Logic");
johnc@1325:     FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
johnc@1325:     assert(_card_epochs != NULL, "Logic");
johnc@1325:     FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
ysr@777:     assert(_hot_cache != NULL, "Logic");
ysr@777:     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
ysr@777:   }
iveresov@1229:   if (_threads != NULL) {
iveresov@1229:     for (int i = 0; i < _n_threads; i++) {
iveresov@1229:       delete _threads[i];
iveresov@1229:     }
iveresov@1234:     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
ysr@777:   }
ysr@777: }
ysr@777: 
iveresov@1229: void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
iveresov@1229:   if (_threads != NULL) {
iveresov@1229:     for (int i = 0; i < _n_threads; i++) {
iveresov@1229:       tc->do_thread(_threads[i]);
iveresov@1229:     }
ysr@777:   }
ysr@777: }
ysr@777: 
johnc@1325: bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
johnc@1325:   HeapWord* start = _ct_bs->addr_for(card_ptr);
johnc@1325:   HeapRegion* r = _g1h->heap_region_containing(start);
johnc@1325:   if (r != NULL && r->is_young()) {
johnc@1325:     return true;
johnc@1325:   }
johnc@1325:   // This card is not associated with a heap region
johnc@1325:   // so can't be young.
johnc@1325:   return false;
ysr@777: }
ysr@777: 
johnc@1325: jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
johnc@1325:   unsigned new_card_num = ptr_2_card_num(card_ptr);
johnc@1325:   unsigned bucket = hash(new_card_num);
johnc@1325:   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
johnc@1325: 
johnc@1325:   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
johnc@1325:   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
johnc@1325: 
johnc@1325:   // We have to construct a new entry if we haven't updated the counts
johnc@1325:   // during the current period, or if the count was updated for a
johnc@1325:   // different card number.
johnc@1325:   unsigned int new_epoch = (unsigned int) _n_periods;
johnc@1325:   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
johnc@1325: 
johnc@1325:   while (true) {
johnc@1325:     // Fetch the previous epoch value
johnc@1325:     julong prev_epoch_entry = epoch_ptr->_value;
johnc@1325:     julong cas_res;
johnc@1325: 
johnc@1325:     if (extract_epoch(prev_epoch_entry) != new_epoch) {
johnc@1325:       // This entry has not yet been updated during this period.
johnc@1325:       // Note: we update the epoch value atomically to ensure
johnc@1325:       // that there is only one winner that updates the cached
johnc@1325:       // card_ptr value even though all the refine threads share
johnc@1325:       // the same epoch value.
johnc@1325: 
johnc@1325:       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
johnc@1325:                                          (volatile jlong*)&epoch_ptr->_value,
johnc@1325:                                          (jlong) prev_epoch_entry);
johnc@1325: 
johnc@1325:       if (cas_res == prev_epoch_entry) {
johnc@1325:         // We have successfully won the race to update the
johnc@1325:         // epoch and card_num value. Make it look like the
johnc@1325:         // count and eviction count were previously cleared.
johnc@1325:         count_ptr->_count = 1;
johnc@1325:         count_ptr->_evict_count = 0;
johnc@1325:         *count = 0;
johnc@1325:         // We can defer the processing of card_ptr
johnc@1325:         *defer = true;
johnc@1325:         return card_ptr;
johnc@1325:       }
johnc@1325:       // We did not win the race to update the epoch field, so some other
johnc@1325:       // thread must have done it. The value that gets returned by CAS
johnc@1325:       // should be the new epoch value.
johnc@1325:       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
johnc@1325:       // We could 'continue' here or just re-read the previous epoch value
johnc@1325:       prev_epoch_entry = epoch_ptr->_value;
johnc@1325:     }
johnc@1325: 
johnc@1325:     // The epoch entry for card_ptr has been updated during this period.
johnc@1325:     unsigned old_card_num = extract_card_num(prev_epoch_entry);
johnc@1325: 
johnc@1325:     // The card count that will be returned to caller
johnc@1325:     *count = count_ptr->_count;
johnc@1325: 
johnc@1325:     // Are we updating the count for the same card?
johnc@1325:     if (new_card_num == old_card_num) {
johnc@1325:       // Same card - just update the count. We could have more than one
johnc@1325:       // thread racing to update count for the current card. It should be
johnc@1325:       // OK not to use a CAS as the only penalty should be some missed
johnc@1325:       // increments of the count which delays identifying the card as "hot".
johnc@1325: 
johnc@1325:       if (*count < max_jubyte) count_ptr->_count++;
johnc@1325:       // We can defer the processing of card_ptr
johnc@1325:       *defer = true;
johnc@1325:       return card_ptr;
johnc@1325:     }
johnc@1325: 
johnc@1325:     // Different card - evict old card info
johnc@1325:     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
johnc@1325:     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
johnc@1325:       // Trigger a resize the next time we clear
johnc@1325:       _expand_card_counts = true;
johnc@1325:     }
johnc@1325: 
johnc@1325:     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
johnc@1325:                                        (volatile jlong*)&epoch_ptr->_value,
johnc@1325:                                        (jlong) prev_epoch_entry);
johnc@1325: 
johnc@1325:     if (cas_res == prev_epoch_entry) {
johnc@1325:       // We successfully updated the card num value in the epoch entry
johnc@1325:       count_ptr->_count = 0; // initialize counter for new card num
johnc@1325: 
johnc@1325:       // Even though the region containg the card at old_card_num was not
johnc@1325:       // in the young list when old_card_num was recorded in the epoch
johnc@1325:       // cache it could have been added to the free list and subsequently
johnc@1325:       // added to the young list in the intervening time. If the evicted
johnc@1325:       // card is in a young region just return the card_ptr and the evicted
johnc@1325:       // card will not be cleaned. See CR 6817995.
johnc@1325: 
johnc@1325:       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
johnc@1325:       if (is_young_card(old_card_ptr)) {
johnc@1325:         *count = 0;
johnc@1325:         // We can defer the processing of card_ptr
johnc@1325:         *defer = true;
johnc@1325:         return card_ptr;
johnc@1325:       }
johnc@1325: 
johnc@1325:       // We do not want to defer processing of card_ptr in this case
johnc@1325:       // (we need to refine old_card_ptr and card_ptr)
johnc@1325:       *defer = false;
johnc@1325:       return old_card_ptr;
johnc@1325:     }
johnc@1325:     // Someone else beat us - try again.
johnc@1325:   }
johnc@1325: }
johnc@1325: 
johnc@1325: jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
johnc@1325:   int count;
johnc@1325:   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
johnc@1325:   assert(cached_ptr != NULL, "bad cached card ptr");
johnc@1325:   assert(!is_young_card(cached_ptr), "shouldn't get a card in young region");
johnc@1325: 
johnc@1325:   // The card pointer we obtained from card count cache is not hot
johnc@1325:   // so do not store it in the cache; return it for immediate
johnc@1325:   // refining.
ysr@777:   if (count < G1ConcRSHotCardLimit) {
johnc@1325:     return cached_ptr;
ysr@777:   }
johnc@1325: 
johnc@1325:   // Otherwise, the pointer we got from the _card_counts is hot.
ysr@777:   jbyte* res = NULL;
ysr@777:   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
ysr@777:   if (_n_hot == _hot_cache_size) {
ysr@777:     res = _hot_cache[_hot_cache_idx];
ysr@777:     _n_hot--;
ysr@777:   }
ysr@777:   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
johnc@1325:   _hot_cache[_hot_cache_idx] = cached_ptr;
ysr@777:   _hot_cache_idx++;
ysr@777:   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
ysr@777:   _n_hot++;
johnc@1325: 
johnc@1325:   if (res != NULL) {
johnc@1325:     // Even though the region containg res was not in the young list
johnc@1325:     // when it was recorded in the hot cache it could have been added
johnc@1325:     // to the free list and subsequently added to the young list in
johnc@1325:     // the intervening time. If res is in a young region, return NULL
johnc@1325:     // so that res is not cleaned. See CR 6817995.
johnc@1325: 
johnc@1325:     if (is_young_card(res)) {
johnc@1325:       res = NULL;
johnc@1325:     }
johnc@1325:   }
johnc@1325: 
ysr@777:   return res;
ysr@777: }
ysr@777: 
ysr@777: void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
ysr@777:   assert(!use_cache(), "cache should be disabled");
johnc@1324:   int start_idx;
johnc@1324: 
johnc@1324:   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
johnc@1324:     int end_idx = start_idx + _hot_cache_par_chunk_size;
johnc@1324: 
johnc@1324:     if (start_idx ==
johnc@1324:         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
johnc@1324:       // The current worker has successfully claimed the chunk [start_idx..end_idx)
johnc@1324:       end_idx = MIN2(end_idx, _n_hot);
johnc@1324:       for (int i = start_idx; i < end_idx; i++) {
johnc@1324:         jbyte* entry = _hot_cache[i];
johnc@1324:         if (entry != NULL) {
johnc@1324:           g1rs->concurrentRefineOneCard(entry, worker_i);
johnc@1324:         }
johnc@1324:       }
ysr@777:     }
ysr@777:   }
ysr@777: }
ysr@777: 
johnc@1325: void ConcurrentG1Refine::expand_card_count_cache() {
johnc@1325:   if (_n_card_counts < _max_n_card_counts) {
johnc@1325:     int new_idx = _cache_size_index+1;
johnc@1325:     int new_size = _cc_cache_sizes[new_idx];
johnc@1325:     if (new_size < 0) new_size = _max_n_card_counts;
johnc@1325: 
johnc@1325:     // Make sure we don't go bigger than we will ever need
johnc@1325:     new_size = MIN2((unsigned) new_size, _max_n_card_counts);
johnc@1325: 
johnc@1325:     // Expand the card count and card epoch tables
johnc@1325:     if (new_size > (int)_n_card_counts) {
johnc@1325:       // We can just free and allocate a new array as we're
johnc@1325:       // not interested in preserving the contents
johnc@1325:       assert(_card_counts != NULL, "Logic!");
johnc@1325:       assert(_card_epochs != NULL, "Logic!");
johnc@1325:       FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
johnc@1325:       FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
johnc@1325:       _n_card_counts = new_size;
johnc@1325:       _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
johnc@1325:       _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
johnc@1325:       _cache_size_index = new_idx;
ysr@777:     }
ysr@777:   }
ysr@777: }
ysr@777: 
johnc@1325: void ConcurrentG1Refine::clear_and_record_card_counts() {
johnc@1325:   if (G1ConcRSLogCacheSize == 0) return;
johnc@1325: 
johnc@1325: #ifndef PRODUCT
johnc@1325:   double start = os::elapsedTime();
johnc@1325: #endif
johnc@1325: 
johnc@1325:   if (_expand_card_counts) {
johnc@1325:     expand_card_count_cache();
johnc@1325:     _expand_card_counts = false;
johnc@1325:     // Only need to clear the epochs.
johnc@1325:     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
ysr@777:   }
ysr@777: 
johnc@1325:   int this_epoch = (int) _n_periods;
johnc@1325:   assert((this_epoch+1) <= max_jint, "to many periods");
johnc@1325:   // Update epoch
johnc@1325:   _n_periods++;
johnc@1325: 
johnc@1325: #ifndef PRODUCT
johnc@1325:   double elapsed = os::elapsedTime() - start;
johnc@1325:   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
johnc@1325: #endif
ysr@777: }
tonyp@1454: 
tonyp@1454: void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
tonyp@1454:   for (int i = 0; i < _n_threads; ++i) {
tonyp@1454:     _threads[i]->print_on(st);
tonyp@1454:     st->cr();
tonyp@1454:   }
tonyp@1454: }
tonyp@1454: