ysr@777: /* xdono@1279: * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. ysr@777: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ysr@777: * ysr@777: * This code is free software; you can redistribute it and/or modify it ysr@777: * under the terms of the GNU General Public License version 2 only, as ysr@777: * published by the Free Software Foundation. ysr@777: * ysr@777: * This code is distributed in the hope that it will be useful, but WITHOUT ysr@777: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ysr@777: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ysr@777: * version 2 for more details (a copy is included in the LICENSE file that ysr@777: * accompanied this code). ysr@777: * ysr@777: * You should have received a copy of the GNU General Public License version ysr@777: * 2 along with this work; if not, write to the Free Software Foundation, ysr@777: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ysr@777: * ysr@777: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, ysr@777: * CA 95054 USA or visit www.sun.com if you need additional information or ysr@777: * have any questions. ysr@777: * ysr@777: */ ysr@777: ysr@777: #include "incls/_precompiled.incl" ysr@777: #include "incls/_concurrentG1Refine.cpp.incl" ysr@777: johnc@1325: // Possible sizes for the card counts cache: odd primes that roughly double in size. johnc@1325: // (See jvmtiTagMap.cpp). johnc@1325: int ConcurrentG1Refine::_cc_cache_sizes[] = { johnc@1325: 16381, 32771, 76831, 150001, 307261, johnc@1325: 614563, 1228891, 2457733, 4915219, 9830479, johnc@1325: 19660831, 39321619, 78643219, 157286461, -1 johnc@1325: }; johnc@1325: ysr@777: ConcurrentG1Refine::ConcurrentG1Refine() : johnc@1325: _card_counts(NULL), _card_epochs(NULL), johnc@1325: _n_card_counts(0), _max_n_card_counts(0), johnc@1325: _cache_size_index(0), _expand_card_counts(false), ysr@777: _hot_cache(NULL), ysr@777: _def_use_cache(false), _use_cache(false), johnc@1325: _n_periods(0), iveresov@1229: _threads(NULL), _n_threads(0) ysr@777: { ysr@777: if (G1ConcRefine) { iveresov@1230: _n_threads = (int)thread_num(); iveresov@1229: if (_n_threads > 0) { iveresov@1229: _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); iveresov@1230: int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); iveresov@1229: ConcurrentG1RefineThread *next = NULL; iveresov@1229: for (int i = _n_threads - 1; i >= 0; i--) { iveresov@1230: ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); iveresov@1229: assert(t != NULL, "Conc refine should have been created"); iveresov@1229: assert(t->cg1r() == this, "Conc refine thread should refer to this"); iveresov@1229: _threads[i] = t; iveresov@1229: next = t; iveresov@1229: } iveresov@1229: } ysr@777: } ysr@777: } ysr@777: iveresov@1230: size_t ConcurrentG1Refine::thread_num() { iveresov@1230: if (G1ConcRefine) { iveresov@1230: return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; iveresov@1230: } iveresov@1230: return 0; iveresov@1230: } iveresov@1230: ysr@777: void ConcurrentG1Refine::init() { johnc@1325: if (G1ConcRSLogCacheSize > 0) { johnc@1325: _g1h = G1CollectedHeap::heap(); johnc@1325: _max_n_card_counts = johnc@1325: (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); johnc@1325: johnc@1325: size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; johnc@1325: guarantee(_max_n_card_counts < max_card_num, "card_num representation"); johnc@1325: johnc@1325: int desired = _max_n_card_counts / InitialCacheFraction; johnc@1325: for (_cache_size_index = 0; johnc@1325: _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) { johnc@1325: if (_cc_cache_sizes[_cache_size_index] >= desired) break; johnc@1325: } johnc@1325: _cache_size_index = MAX2(0, (_cache_size_index - 1)); johnc@1325: johnc@1325: int initial_size = _cc_cache_sizes[_cache_size_index]; johnc@1325: if (initial_size < 0) initial_size = _max_n_card_counts; johnc@1325: johnc@1325: // Make sure we don't go bigger than we will ever need johnc@1325: _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts); johnc@1325: johnc@1325: _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); johnc@1325: _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); johnc@1325: johnc@1325: Copy::fill_to_bytes(&_card_counts[0], johnc@1325: _n_card_counts * sizeof(CardCountCacheEntry)); johnc@1325: Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); johnc@1325: johnc@1325: ModRefBarrierSet* bs = _g1h->mr_bs(); ysr@777: guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); johnc@1325: _ct_bs = (CardTableModRefBS*)bs; johnc@1325: _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); johnc@1325: ysr@777: _def_use_cache = true; ysr@777: _use_cache = true; ysr@777: _hot_cache_size = (1 << G1ConcRSLogCacheSize); ysr@777: _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); ysr@777: _n_hot = 0; ysr@777: _hot_cache_idx = 0; johnc@1324: johnc@1324: // For refining the cards in the hot cache in parallel johnc@1324: int n_workers = (ParallelGCThreads > 0 ? johnc@1325: _g1h->workers()->total_workers() : 1); johnc@1324: _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); johnc@1324: _hot_cache_par_claimed_idx = 0; ysr@777: } ysr@777: } ysr@777: iveresov@1229: void ConcurrentG1Refine::stop() { iveresov@1229: if (_threads != NULL) { iveresov@1229: for (int i = 0; i < _n_threads; i++) { iveresov@1229: _threads[i]->stop(); iveresov@1229: } iveresov@1229: } iveresov@1229: } iveresov@1229: ysr@777: ConcurrentG1Refine::~ConcurrentG1Refine() { johnc@1325: if (G1ConcRSLogCacheSize > 0) { ysr@777: assert(_card_counts != NULL, "Logic"); johnc@1325: FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); johnc@1325: assert(_card_epochs != NULL, "Logic"); johnc@1325: FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); ysr@777: assert(_hot_cache != NULL, "Logic"); ysr@777: FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); ysr@777: } iveresov@1229: if (_threads != NULL) { iveresov@1229: for (int i = 0; i < _n_threads; i++) { iveresov@1229: delete _threads[i]; iveresov@1229: } iveresov@1234: FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads); ysr@777: } ysr@777: } ysr@777: iveresov@1229: void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { iveresov@1229: if (_threads != NULL) { iveresov@1229: for (int i = 0; i < _n_threads; i++) { iveresov@1229: tc->do_thread(_threads[i]); iveresov@1229: } ysr@777: } ysr@777: } ysr@777: johnc@1325: bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { johnc@1325: HeapWord* start = _ct_bs->addr_for(card_ptr); johnc@1325: HeapRegion* r = _g1h->heap_region_containing(start); johnc@1325: if (r != NULL && r->is_young()) { johnc@1325: return true; johnc@1325: } johnc@1325: // This card is not associated with a heap region johnc@1325: // so can't be young. johnc@1325: return false; ysr@777: } ysr@777: johnc@1325: jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { johnc@1325: unsigned new_card_num = ptr_2_card_num(card_ptr); johnc@1325: unsigned bucket = hash(new_card_num); johnc@1325: assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); johnc@1325: johnc@1325: CardCountCacheEntry* count_ptr = &_card_counts[bucket]; johnc@1325: CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; johnc@1325: johnc@1325: // We have to construct a new entry if we haven't updated the counts johnc@1325: // during the current period, or if the count was updated for a johnc@1325: // different card number. johnc@1325: unsigned int new_epoch = (unsigned int) _n_periods; johnc@1325: julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); johnc@1325: johnc@1325: while (true) { johnc@1325: // Fetch the previous epoch value johnc@1325: julong prev_epoch_entry = epoch_ptr->_value; johnc@1325: julong cas_res; johnc@1325: johnc@1325: if (extract_epoch(prev_epoch_entry) != new_epoch) { johnc@1325: // This entry has not yet been updated during this period. johnc@1325: // Note: we update the epoch value atomically to ensure johnc@1325: // that there is only one winner that updates the cached johnc@1325: // card_ptr value even though all the refine threads share johnc@1325: // the same epoch value. johnc@1325: johnc@1325: cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, johnc@1325: (volatile jlong*)&epoch_ptr->_value, johnc@1325: (jlong) prev_epoch_entry); johnc@1325: johnc@1325: if (cas_res == prev_epoch_entry) { johnc@1325: // We have successfully won the race to update the johnc@1325: // epoch and card_num value. Make it look like the johnc@1325: // count and eviction count were previously cleared. johnc@1325: count_ptr->_count = 1; johnc@1325: count_ptr->_evict_count = 0; johnc@1325: *count = 0; johnc@1325: // We can defer the processing of card_ptr johnc@1325: *defer = true; johnc@1325: return card_ptr; johnc@1325: } johnc@1325: // We did not win the race to update the epoch field, so some other johnc@1325: // thread must have done it. The value that gets returned by CAS johnc@1325: // should be the new epoch value. johnc@1325: assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); johnc@1325: // We could 'continue' here or just re-read the previous epoch value johnc@1325: prev_epoch_entry = epoch_ptr->_value; johnc@1325: } johnc@1325: johnc@1325: // The epoch entry for card_ptr has been updated during this period. johnc@1325: unsigned old_card_num = extract_card_num(prev_epoch_entry); johnc@1325: johnc@1325: // The card count that will be returned to caller johnc@1325: *count = count_ptr->_count; johnc@1325: johnc@1325: // Are we updating the count for the same card? johnc@1325: if (new_card_num == old_card_num) { johnc@1325: // Same card - just update the count. We could have more than one johnc@1325: // thread racing to update count for the current card. It should be johnc@1325: // OK not to use a CAS as the only penalty should be some missed johnc@1325: // increments of the count which delays identifying the card as "hot". johnc@1325: johnc@1325: if (*count < max_jubyte) count_ptr->_count++; johnc@1325: // We can defer the processing of card_ptr johnc@1325: *defer = true; johnc@1325: return card_ptr; johnc@1325: } johnc@1325: johnc@1325: // Different card - evict old card info johnc@1325: if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; johnc@1325: if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { johnc@1325: // Trigger a resize the next time we clear johnc@1325: _expand_card_counts = true; johnc@1325: } johnc@1325: johnc@1325: cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, johnc@1325: (volatile jlong*)&epoch_ptr->_value, johnc@1325: (jlong) prev_epoch_entry); johnc@1325: johnc@1325: if (cas_res == prev_epoch_entry) { johnc@1325: // We successfully updated the card num value in the epoch entry johnc@1325: count_ptr->_count = 0; // initialize counter for new card num johnc@1325: johnc@1325: // Even though the region containg the card at old_card_num was not johnc@1325: // in the young list when old_card_num was recorded in the epoch johnc@1325: // cache it could have been added to the free list and subsequently johnc@1325: // added to the young list in the intervening time. If the evicted johnc@1325: // card is in a young region just return the card_ptr and the evicted johnc@1325: // card will not be cleaned. See CR 6817995. johnc@1325: johnc@1325: jbyte* old_card_ptr = card_num_2_ptr(old_card_num); johnc@1325: if (is_young_card(old_card_ptr)) { johnc@1325: *count = 0; johnc@1325: // We can defer the processing of card_ptr johnc@1325: *defer = true; johnc@1325: return card_ptr; johnc@1325: } johnc@1325: johnc@1325: // We do not want to defer processing of card_ptr in this case johnc@1325: // (we need to refine old_card_ptr and card_ptr) johnc@1325: *defer = false; johnc@1325: return old_card_ptr; johnc@1325: } johnc@1325: // Someone else beat us - try again. johnc@1325: } johnc@1325: } johnc@1325: johnc@1325: jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { johnc@1325: int count; johnc@1325: jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); johnc@1325: assert(cached_ptr != NULL, "bad cached card ptr"); johnc@1325: assert(!is_young_card(cached_ptr), "shouldn't get a card in young region"); johnc@1325: johnc@1325: // The card pointer we obtained from card count cache is not hot johnc@1325: // so do not store it in the cache; return it for immediate johnc@1325: // refining. ysr@777: if (count < G1ConcRSHotCardLimit) { johnc@1325: return cached_ptr; ysr@777: } johnc@1325: johnc@1325: // Otherwise, the pointer we got from the _card_counts is hot. ysr@777: jbyte* res = NULL; ysr@777: MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); ysr@777: if (_n_hot == _hot_cache_size) { ysr@777: res = _hot_cache[_hot_cache_idx]; ysr@777: _n_hot--; ysr@777: } ysr@777: // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. johnc@1325: _hot_cache[_hot_cache_idx] = cached_ptr; ysr@777: _hot_cache_idx++; ysr@777: if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; ysr@777: _n_hot++; johnc@1325: johnc@1325: if (res != NULL) { johnc@1325: // Even though the region containg res was not in the young list johnc@1325: // when it was recorded in the hot cache it could have been added johnc@1325: // to the free list and subsequently added to the young list in johnc@1325: // the intervening time. If res is in a young region, return NULL johnc@1325: // so that res is not cleaned. See CR 6817995. johnc@1325: johnc@1325: if (is_young_card(res)) { johnc@1325: res = NULL; johnc@1325: } johnc@1325: } johnc@1325: ysr@777: return res; ysr@777: } ysr@777: ysr@777: void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { ysr@777: assert(!use_cache(), "cache should be disabled"); johnc@1324: int start_idx; johnc@1324: johnc@1324: while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once johnc@1324: int end_idx = start_idx + _hot_cache_par_chunk_size; johnc@1324: johnc@1324: if (start_idx == johnc@1324: Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { johnc@1324: // The current worker has successfully claimed the chunk [start_idx..end_idx) johnc@1324: end_idx = MIN2(end_idx, _n_hot); johnc@1324: for (int i = start_idx; i < end_idx; i++) { johnc@1324: jbyte* entry = _hot_cache[i]; johnc@1324: if (entry != NULL) { johnc@1324: g1rs->concurrentRefineOneCard(entry, worker_i); johnc@1324: } johnc@1324: } ysr@777: } ysr@777: } ysr@777: } ysr@777: johnc@1325: void ConcurrentG1Refine::expand_card_count_cache() { johnc@1325: if (_n_card_counts < _max_n_card_counts) { johnc@1325: int new_idx = _cache_size_index+1; johnc@1325: int new_size = _cc_cache_sizes[new_idx]; johnc@1325: if (new_size < 0) new_size = _max_n_card_counts; johnc@1325: johnc@1325: // Make sure we don't go bigger than we will ever need johnc@1325: new_size = MIN2((unsigned) new_size, _max_n_card_counts); johnc@1325: johnc@1325: // Expand the card count and card epoch tables johnc@1325: if (new_size > (int)_n_card_counts) { johnc@1325: // We can just free and allocate a new array as we're johnc@1325: // not interested in preserving the contents johnc@1325: assert(_card_counts != NULL, "Logic!"); johnc@1325: assert(_card_epochs != NULL, "Logic!"); johnc@1325: FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); johnc@1325: FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); johnc@1325: _n_card_counts = new_size; johnc@1325: _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); johnc@1325: _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); johnc@1325: _cache_size_index = new_idx; ysr@777: } ysr@777: } ysr@777: } ysr@777: johnc@1325: void ConcurrentG1Refine::clear_and_record_card_counts() { johnc@1325: if (G1ConcRSLogCacheSize == 0) return; johnc@1325: johnc@1325: #ifndef PRODUCT johnc@1325: double start = os::elapsedTime(); johnc@1325: #endif johnc@1325: johnc@1325: if (_expand_card_counts) { johnc@1325: expand_card_count_cache(); johnc@1325: _expand_card_counts = false; johnc@1325: // Only need to clear the epochs. johnc@1325: Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); ysr@777: } ysr@777: johnc@1325: int this_epoch = (int) _n_periods; johnc@1325: assert((this_epoch+1) <= max_jint, "to many periods"); johnc@1325: // Update epoch johnc@1325: _n_periods++; johnc@1325: johnc@1325: #ifndef PRODUCT johnc@1325: double elapsed = os::elapsedTime() - start; johnc@1325: _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0); johnc@1325: #endif ysr@777: } tonyp@1454: tonyp@1454: void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { tonyp@1454: for (int i = 0; i < _n_threads; ++i) { tonyp@1454: _threads[i]->print_on(st); tonyp@1454: st->cr(); tonyp@1454: } tonyp@1454: } tonyp@1454: