src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp

Mon, 28 Mar 2011 10:58:54 -0700

author
johnc
date
Mon, 28 Mar 2011 10:58:54 -0700
changeset 2713
02f49b66361a
parent 2646
04d1138b4cce
child 2716
c84ee870e0b9
permissions
-rw-r--r--

7026932: G1: No need to abort VM when card count cache expansion fails
Summary: Manage allocation/freeing of the card cache counts and epochs arrays directly so that an allocation failure while attempting to expand these arrays does not abort the JVM. Failure to expand these arrays is not fatal.
Reviewed-by: iveresov, tonyp

     1 /*
     2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "gc_implementation/g1/concurrentG1Refine.hpp"
    27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
    28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    30 #include "gc_implementation/g1/g1RemSet.hpp"
    31 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    32 #include "memory/space.inline.hpp"
    33 #include "runtime/atomic.hpp"
    34 #include "runtime/java.hpp"
    35 #include "utilities/copy.hpp"
    37 // Possible sizes for the card counts cache: odd primes that roughly double in size.
    38 // (See jvmtiTagMap.cpp).
    40 #define MAX_SIZE ((size_t) -1)
    42 size_t ConcurrentG1Refine::_cc_cache_sizes[] = {
    43           16381,    32771,    76831,    150001,   307261,
    44          614563,  1228891,  2457733,   4915219,  9830479,
    45        19660831, 39321619, 78643219, 157286461,  MAX_SIZE
    46   };
    48 ConcurrentG1Refine::ConcurrentG1Refine() :
    49   _card_counts(NULL), _card_epochs(NULL),
    50   _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),
    51   _cache_size_index(0), _expand_card_counts(false),
    52   _hot_cache(NULL),
    53   _def_use_cache(false), _use_cache(false),
    54   _n_periods(0),
    55   _threads(NULL), _n_threads(0)
    56 {
    58   // Ergomonically select initial concurrent refinement parameters
    59   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
    60     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
    61   }
    62   set_green_zone(G1ConcRefinementGreenZone);
    64   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
    65     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
    66   }
    67   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
    69   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
    70     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
    71   }
    72   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
    73   _n_worker_threads = thread_num();
    74   // We need one extra thread to do the young gen rset size sampling.
    75   _n_threads = _n_worker_threads + 1;
    76   reset_threshold_step();
    78   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
    79   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    80   ConcurrentG1RefineThread *next = NULL;
    81   for (int i = _n_threads - 1; i >= 0; i--) {
    82     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    83     assert(t != NULL, "Conc refine should have been created");
    84     assert(t->cg1r() == this, "Conc refine thread should refer to this");
    85     _threads[i] = t;
    86     next = t;
    87   }
    88 }
    90 void ConcurrentG1Refine::reset_threshold_step() {
    91   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
    92     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
    93   } else {
    94     _thread_threshold_step = G1ConcRefinementThresholdStep;
    95   }
    96 }
    98 int ConcurrentG1Refine::thread_num() {
    99   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
   100 }
   102 void ConcurrentG1Refine::init() {
   103   if (G1ConcRSLogCacheSize > 0) {
   104     _g1h = G1CollectedHeap::heap();
   106     _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;
   107     _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;
   109     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
   110     guarantee(_max_cards < max_card_num, "card_num representation");
   112     // We need _n_card_counts to be less than _max_n_card_counts here
   113     // so that the expansion call (below) actually allocates the
   114     // _counts and _epochs arrays.
   115     assert(_n_card_counts == 0, "pre-condition");
   116     assert(_max_n_card_counts > 0, "pre-condition");
   118     // Find the index into cache size array that is of a size that's
   119     // large enough to hold desired_sz.
   120     size_t desired_sz = _max_cards / InitialCacheFraction;
   121     int desired_sz_index = 0;
   122     while (_cc_cache_sizes[desired_sz_index] < desired_sz) {
   123       desired_sz_index += 1;
   124       assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
   125     }
   126     assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
   128     // If the desired_sz value is between two sizes then
   129     // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]
   130     // we will start with the lower size in the optimistic expectation that
   131     // we will not need to expand up. Note desired_sz_index could also be 0.
   132     if (desired_sz_index > 0 &&
   133         _cc_cache_sizes[desired_sz_index] > desired_sz) {
   134       desired_sz_index -= 1;
   135     }
   137     if (!expand_card_count_cache(desired_sz_index)) {
   138       // Allocation was unsuccessful - exit
   139       vm_exit_during_initialization("Could not reserve enough space for card count cache");
   140     }
   141     assert(_n_card_counts > 0, "post-condition");
   142     assert(_cache_size_index == desired_sz_index, "post-condition");
   144     Copy::fill_to_bytes(&_card_counts[0],
   145                         _n_card_counts * sizeof(CardCountCacheEntry));
   146     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   148     ModRefBarrierSet* bs = _g1h->mr_bs();
   149     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   150     _ct_bs = (CardTableModRefBS*)bs;
   151     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
   153     _def_use_cache = true;
   154     _use_cache = true;
   155     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
   156     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
   157     _n_hot = 0;
   158     _hot_cache_idx = 0;
   160     // For refining the cards in the hot cache in parallel
   161     int n_workers = (ParallelGCThreads > 0 ?
   162                         _g1h->workers()->total_workers() : 1);
   163     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
   164     _hot_cache_par_claimed_idx = 0;
   165   }
   166 }
   168 void ConcurrentG1Refine::stop() {
   169   if (_threads != NULL) {
   170     for (int i = 0; i < _n_threads; i++) {
   171       _threads[i]->stop();
   172     }
   173   }
   174 }
   176 void ConcurrentG1Refine::reinitialize_threads() {
   177   reset_threshold_step();
   178   if (_threads != NULL) {
   179     for (int i = 0; i < _n_threads; i++) {
   180       _threads[i]->initialize();
   181     }
   182   }
   183 }
   185 ConcurrentG1Refine::~ConcurrentG1Refine() {
   186   if (G1ConcRSLogCacheSize > 0) {
   187     // Please see the comment in allocate_card_count_cache
   188     // for why we call os::malloc() and os::free() directly.
   189     assert(_card_counts != NULL, "Logic");
   190     os::free(_card_counts);
   191     assert(_card_epochs != NULL, "Logic");
   192     os::free(_card_epochs);
   194     assert(_hot_cache != NULL, "Logic");
   195     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   196   }
   197   if (_threads != NULL) {
   198     for (int i = 0; i < _n_threads; i++) {
   199       delete _threads[i];
   200     }
   201     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   202   }
   203 }
   205 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   206   if (_threads != NULL) {
   207     for (int i = 0; i < _n_threads; i++) {
   208       tc->do_thread(_threads[i]);
   209     }
   210   }
   211 }
   213 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
   214   HeapWord* start = _ct_bs->addr_for(card_ptr);
   215   HeapRegion* r = _g1h->heap_region_containing(start);
   216   if (r != NULL && r->is_young()) {
   217     return true;
   218   }
   219   // This card is not associated with a heap region
   220   // so can't be young.
   221   return false;
   222 }
   224 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
   225   unsigned new_card_num = ptr_2_card_num(card_ptr);
   226   unsigned bucket = hash(new_card_num);
   227   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
   229   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
   230   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
   232   // We have to construct a new entry if we haven't updated the counts
   233   // during the current period, or if the count was updated for a
   234   // different card number.
   235   unsigned int new_epoch = (unsigned int) _n_periods;
   236   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
   238   while (true) {
   239     // Fetch the previous epoch value
   240     julong prev_epoch_entry = epoch_ptr->_value;
   241     julong cas_res;
   243     if (extract_epoch(prev_epoch_entry) != new_epoch) {
   244       // This entry has not yet been updated during this period.
   245       // Note: we update the epoch value atomically to ensure
   246       // that there is only one winner that updates the cached
   247       // card_ptr value even though all the refine threads share
   248       // the same epoch value.
   250       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   251                                          (volatile jlong*)&epoch_ptr->_value,
   252                                          (jlong) prev_epoch_entry);
   254       if (cas_res == prev_epoch_entry) {
   255         // We have successfully won the race to update the
   256         // epoch and card_num value. Make it look like the
   257         // count and eviction count were previously cleared.
   258         count_ptr->_count = 1;
   259         count_ptr->_evict_count = 0;
   260         *count = 0;
   261         // We can defer the processing of card_ptr
   262         *defer = true;
   263         return card_ptr;
   264       }
   265       // We did not win the race to update the epoch field, so some other
   266       // thread must have done it. The value that gets returned by CAS
   267       // should be the new epoch value.
   268       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
   269       // We could 'continue' here or just re-read the previous epoch value
   270       prev_epoch_entry = epoch_ptr->_value;
   271     }
   273     // The epoch entry for card_ptr has been updated during this period.
   274     unsigned old_card_num = extract_card_num(prev_epoch_entry);
   276     // The card count that will be returned to caller
   277     *count = count_ptr->_count;
   279     // Are we updating the count for the same card?
   280     if (new_card_num == old_card_num) {
   281       // Same card - just update the count. We could have more than one
   282       // thread racing to update count for the current card. It should be
   283       // OK not to use a CAS as the only penalty should be some missed
   284       // increments of the count which delays identifying the card as "hot".
   286       if (*count < max_jubyte) count_ptr->_count++;
   287       // We can defer the processing of card_ptr
   288       *defer = true;
   289       return card_ptr;
   290     }
   292     // Different card - evict old card info
   293     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
   294     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
   295       // Trigger a resize the next time we clear
   296       _expand_card_counts = true;
   297     }
   299     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   300                                        (volatile jlong*)&epoch_ptr->_value,
   301                                        (jlong) prev_epoch_entry);
   303     if (cas_res == prev_epoch_entry) {
   304       // We successfully updated the card num value in the epoch entry
   305       count_ptr->_count = 0; // initialize counter for new card num
   306       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
   308       // Even though the region containg the card at old_card_num was not
   309       // in the young list when old_card_num was recorded in the epoch
   310       // cache it could have been added to the free list and subsequently
   311       // added to the young list in the intervening time. See CR 6817995.
   312       // We do not deal with this case here - it will be handled in
   313       // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   314       // determined that the region containing the card has been allocated
   315       // to, and it's safe to check the young type of the region.
   317       // We do not want to defer processing of card_ptr in this case
   318       // (we need to refine old_card_ptr and card_ptr)
   319       *defer = false;
   320       return old_card_ptr;
   321     }
   322     // Someone else beat us - try again.
   323   }
   324 }
   326 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   327   int count;
   328   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
   329   assert(cached_ptr != NULL, "bad cached card ptr");
   331   // We've just inserted a card pointer into the card count cache
   332   // and got back the card that we just inserted or (evicted) the
   333   // previous contents of that count slot.
   335   // The card we got back could be in a young region. When the
   336   // returned card (if evicted) was originally inserted, we had
   337   // determined that its containing region was not young. However
   338   // it is possible for the region to be freed during a cleanup
   339   // pause, then reallocated and tagged as young which will result
   340   // in the returned card residing in a young region.
   341   //
   342   // We do not deal with this case here - the change from non-young
   343   // to young could be observed at any time - it will be handled in
   344   // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   345   // determined that the region containing the card has been allocated
   346   // to.
   348   // The card pointer we obtained from card count cache is not hot
   349   // so do not store it in the cache; return it for immediate
   350   // refining.
   351   if (count < G1ConcRSHotCardLimit) {
   352     return cached_ptr;
   353   }
   355   // Otherwise, the pointer we got from the _card_counts cache is hot.
   356   jbyte* res = NULL;
   357   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   358   if (_n_hot == _hot_cache_size) {
   359     res = _hot_cache[_hot_cache_idx];
   360     _n_hot--;
   361   }
   362   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   363   _hot_cache[_hot_cache_idx] = cached_ptr;
   364   _hot_cache_idx++;
   365   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   366   _n_hot++;
   368   // The card obtained from the hot card cache could be in a young
   369   // region. See above on how this can happen.
   371   return res;
   372 }
   374 void ConcurrentG1Refine::clean_up_cache(int worker_i,
   375                                         G1RemSet* g1rs,
   376                                         DirtyCardQueue* into_cset_dcq) {
   377   assert(!use_cache(), "cache should be disabled");
   378   int start_idx;
   380   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
   381     int end_idx = start_idx + _hot_cache_par_chunk_size;
   383     if (start_idx ==
   384         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
   385       // The current worker has successfully claimed the chunk [start_idx..end_idx)
   386       end_idx = MIN2(end_idx, _n_hot);
   387       for (int i = start_idx; i < end_idx; i++) {
   388         jbyte* entry = _hot_cache[i];
   389         if (entry != NULL) {
   390           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
   391             // 'entry' contains references that point into the current
   392             // collection set. We need to record 'entry' in the DCQS
   393             // that's used for that purpose.
   394             //
   395             // The only time we care about recording cards that contain
   396             // references that point into the collection set is during
   397             // RSet updating while within an evacuation pause.
   398             // In this case worker_i should be the id of a GC worker thread
   399             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
   400             assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id");
   401             into_cset_dcq->enqueue(entry);
   402           }
   403         }
   404       }
   405     }
   406   }
   407 }
   409 // The arrays used to hold the card counts and the epochs must have
   410 // a 1:1 correspondence. Hence they are allocated and freed together
   411 // Returns true if the allocations of both the counts and epochs
   412 // were successful; false otherwise.
   413 bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,
   414                                                    CardCountCacheEntry** counts,
   415                                                    CardEpochCacheEntry** epochs) {
   416   // We call the allocation/free routines directly for the counts
   417   // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY
   418   // macros call AllocateHeap and FreeHeap respectively.
   419   // AllocateHeap will call vm_exit_out_of_memory in the event
   420   // of an allocation failure and abort the JVM. With the
   421   // _counts/epochs arrays we only need to abort the JVM if the
   422   // initial allocation of these arrays fails.
   423   //
   424   // Additionally AllocateHeap/FreeHeap do some tracing of
   425   // allocate/free calls so calling one without calling the
   426   // other can cause inconsistencies in the tracing. So we
   427   // call neither.
   429   assert(*counts == NULL, "out param");
   430   assert(*epochs == NULL, "out param");
   432   size_t counts_size = n * sizeof(CardCountCacheEntry);
   433   size_t epochs_size = n * sizeof(CardEpochCacheEntry);
   435   *counts = (CardCountCacheEntry*) os::malloc(counts_size);
   436   if (*counts == NULL) {
   437     // allocation was unsuccessful
   438     return false;
   439   }
   441   *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size);
   442   if (*epochs == NULL) {
   443     // allocation was unsuccessful - free counts array
   444     assert(*counts != NULL, "must be");
   445     os::free(*counts);
   446     *counts = NULL;
   447     return false;
   448   }
   450   // We successfully allocated both counts and epochs
   451   return true;
   452 }
   454 // Returns true if the card counts/epochs cache was
   455 // successfully expanded; false otherwise.
   456 bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {
   457   // Can we expand the card count and epoch tables?
   458   if (_n_card_counts < _max_n_card_counts) {
   459     assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");
   461     size_t cache_size = _cc_cache_sizes[cache_size_idx];
   462     // Make sure we don't go bigger than we will ever need
   463     cache_size = MIN2(cache_size, _max_n_card_counts);
   465     // Should we expand the card count and card epoch tables?
   466     if (cache_size > _n_card_counts) {
   467       // We have been asked to allocate new, larger, arrays for
   468       // the card counts and the epochs. Attempt the allocation
   469       // of both before we free the existing arrays in case
   470       // the allocation is unsuccessful...
   471       CardCountCacheEntry* counts = NULL;
   472       CardEpochCacheEntry* epochs = NULL;
   474       if (allocate_card_count_cache(cache_size, &counts, &epochs)) {
   475         // Allocation was successful.
   476         // We can just free the old arrays; we're
   477         // not interested in preserving the contents
   478         if (_card_counts != NULL) os::free(_card_counts);
   479         if (_card_epochs != NULL) os::free(_card_epochs);
   481         // Cache the size of the arrays and the index that got us there.
   482         _n_card_counts = cache_size;
   483         _cache_size_index = cache_size_idx;
   485         _card_counts = counts;
   486         _card_epochs = epochs;
   488         // We successfully allocated/expanded the caches.
   489         return true;
   490       }
   491     }
   492   }
   494   // We did not successfully expand the caches.
   495   return false;
   496 }
   498 void ConcurrentG1Refine::clear_and_record_card_counts() {
   499   if (G1ConcRSLogCacheSize == 0) return;
   501 #ifndef PRODUCT
   502   double start = os::elapsedTime();
   503 #endif
   505   if (_expand_card_counts) {
   506     int new_idx = _cache_size_index + 1;
   508     if (expand_card_count_cache(new_idx)) {
   509       // Allocation was successful and  _n_card_counts has
   510       // been updated to the new size. We only need to clear
   511       // the epochs so we don't read a bogus epoch value
   512       // when inserting a card into the hot card cache.
   513       Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   514     }
   515     _expand_card_counts = false;
   516   }
   518   int this_epoch = (int) _n_periods;
   519   assert((this_epoch+1) <= max_jint, "to many periods");
   520   // Update epoch
   521   _n_periods++;
   523 #ifndef PRODUCT
   524   double elapsed = os::elapsedTime() - start;
   525   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
   526 #endif
   527 }
   529 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
   530   for (int i = 0; i < _n_threads; ++i) {
   531     _threads[i]->print_on(st);
   532     st->cr();
   533   }
   534 }

mercurial