jdk8-mips64-public/hotspot: src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp@02f49b66361a

7026932: G1: No need to abort VM when card count cache expansion fails
Summary: Manage allocation/freeing of the card cache counts and epochs arrays directly so that an allocation failure while attempting to expand these arrays does not abort the JVM. Failure to expand these arrays is not fatal.
Reviewed-by: iveresov, tonyp

     1 /*

     2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "gc_implementation/g1/concurrentG1Refine.hpp"

    27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"

    28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"

    29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"

    30 #include "gc_implementation/g1/g1RemSet.hpp"

    31 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"

    32 #include "memory/space.inline.hpp"

    33 #include "runtime/atomic.hpp"

    34 #include "runtime/java.hpp"

    35 #include "utilities/copy.hpp"

    37 // Possible sizes for the card counts cache: odd primes that roughly double in size.

    38 // (See jvmtiTagMap.cpp).

    40 #define MAX_SIZE ((size_t) -1)

    42 size_t ConcurrentG1Refine::_cc_cache_sizes[] = {

    43           16381,    32771,    76831,    150001,   307261,

    44          614563,  1228891,  2457733,   4915219,  9830479,

    45        19660831, 39321619, 78643219, 157286461,  MAX_SIZE

    46   };

    48 ConcurrentG1Refine::ConcurrentG1Refine() :

    49   _card_counts(NULL), _card_epochs(NULL),

    50   _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),

    51   _cache_size_index(0), _expand_card_counts(false),

    52   _hot_cache(NULL),

    53   _def_use_cache(false), _use_cache(false),

    54   _n_periods(0),

    55   _threads(NULL), _n_threads(0)

    56 {

    58   // Ergomonically select initial concurrent refinement parameters

    59   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {

    60     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));

    61   }

    62   set_green_zone(G1ConcRefinementGreenZone);

    64   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {

    65     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);

    66   }

    67   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));

    69   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {

    70     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);

    71   }

    72   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));

    73   _n_worker_threads = thread_num();

    74   // We need one extra thread to do the young gen rset size sampling.

    75   _n_threads = _n_worker_threads + 1;

    76   reset_threshold_step();

    78   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);

    79   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();

    80   ConcurrentG1RefineThread *next = NULL;

    81   for (int i = _n_threads - 1; i >= 0; i--) {

    82     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);

    83     assert(t != NULL, "Conc refine should have been created");

    84     assert(t->cg1r() == this, "Conc refine thread should refer to this");

    85     _threads[i] = t;

    86     next = t;

    87   }

    88 }

    90 void ConcurrentG1Refine::reset_threshold_step() {

    91   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {

    92     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);

    93   } else {

    94     _thread_threshold_step = G1ConcRefinementThresholdStep;

    95   }

    96 }

    98 int ConcurrentG1Refine::thread_num() {

    99   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);

   100 }

   102 void ConcurrentG1Refine::init() {

   103   if (G1ConcRSLogCacheSize > 0) {

   104     _g1h = G1CollectedHeap::heap();

   106     _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;

   107     _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;

   109     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;

   110     guarantee(_max_cards < max_card_num, "card_num representation");

   112     // We need _n_card_counts to be less than _max_n_card_counts here

   113     // so that the expansion call (below) actually allocates the

   114     // _counts and _epochs arrays.

   115     assert(_n_card_counts == 0, "pre-condition");

   116     assert(_max_n_card_counts > 0, "pre-condition");

   118     // Find the index into cache size array that is of a size that's

   119     // large enough to hold desired_sz.

   120     size_t desired_sz = _max_cards / InitialCacheFraction;

   121     int desired_sz_index = 0;

   122     while (_cc_cache_sizes[desired_sz_index] < desired_sz) {

   123       desired_sz_index += 1;

   124       assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

   125     }

   126     assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

   128     // If the desired_sz value is between two sizes then

   129     // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]

   130     // we will start with the lower size in the optimistic expectation that

   131     // we will not need to expand up. Note desired_sz_index could also be 0.

   132     if (desired_sz_index > 0 &&

   133         _cc_cache_sizes[desired_sz_index] > desired_sz) {

   134       desired_sz_index -= 1;

   135     }

   137     if (!expand_card_count_cache(desired_sz_index)) {

   138       // Allocation was unsuccessful - exit

   139       vm_exit_during_initialization("Could not reserve enough space for card count cache");

   140     }

   141     assert(_n_card_counts > 0, "post-condition");

   142     assert(_cache_size_index == desired_sz_index, "post-condition");

   144     Copy::fill_to_bytes(&_card_counts[0],

   145                         _n_card_counts * sizeof(CardCountCacheEntry));

   146     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));

   148     ModRefBarrierSet* bs = _g1h->mr_bs();

   149     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");

   150     _ct_bs = (CardTableModRefBS*)bs;

   151     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());

   153     _def_use_cache = true;

   154     _use_cache = true;

   155     _hot_cache_size = (1 << G1ConcRSLogCacheSize);

   156     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);

   157     _n_hot = 0;

   158     _hot_cache_idx = 0;

   160     // For refining the cards in the hot cache in parallel

   161     int n_workers = (ParallelGCThreads > 0 ?

   162                         _g1h->workers()->total_workers() : 1);

   163     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);

   164     _hot_cache_par_claimed_idx = 0;

   165   }

   166 }

   168 void ConcurrentG1Refine::stop() {

   169   if (_threads != NULL) {

   170     for (int i = 0; i < _n_threads; i++) {

   171       _threads[i]->stop();

   172     }

   173   }

   174 }

   176 void ConcurrentG1Refine::reinitialize_threads() {

   177   reset_threshold_step();

   178   if (_threads != NULL) {

   179     for (int i = 0; i < _n_threads; i++) {

   180       _threads[i]->initialize();

   181     }

   182   }

   183 }

   185 ConcurrentG1Refine::~ConcurrentG1Refine() {

   186   if (G1ConcRSLogCacheSize > 0) {

   187     // Please see the comment in allocate_card_count_cache

   188     // for why we call os::malloc() and os::free() directly.

   189     assert(_card_counts != NULL, "Logic");

   190     os::free(_card_counts);

   191     assert(_card_epochs != NULL, "Logic");

   192     os::free(_card_epochs);

   194     assert(_hot_cache != NULL, "Logic");

   195     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);

   196   }

   197   if (_threads != NULL) {

   198     for (int i = 0; i < _n_threads; i++) {

   199       delete _threads[i];

   200     }

   201     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);

   202   }

   203 }

   205 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {

   206   if (_threads != NULL) {

   207     for (int i = 0; i < _n_threads; i++) {

   208       tc->do_thread(_threads[i]);

   209     }

   210   }

   211 }

   213 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {

   214   HeapWord* start = _ct_bs->addr_for(card_ptr);

   215   HeapRegion* r = _g1h->heap_region_containing(start);

   216   if (r != NULL && r->is_young()) {

   217     return true;

   218   }

   219   // This card is not associated with a heap region

   220   // so can't be young.

   221   return false;

   222 }

   224 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {

   225   unsigned new_card_num = ptr_2_card_num(card_ptr);

   226   unsigned bucket = hash(new_card_num);

   227   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");

   229   CardCountCacheEntry* count_ptr = &_card_counts[bucket];

   230   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];

   232   // We have to construct a new entry if we haven't updated the counts

   233   // during the current period, or if the count was updated for a

   234   // different card number.

   235   unsigned int new_epoch = (unsigned int) _n_periods;

   236   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);

   238   while (true) {

   239     // Fetch the previous epoch value

   240     julong prev_epoch_entry = epoch_ptr->_value;

   241     julong cas_res;

   243     if (extract_epoch(prev_epoch_entry) != new_epoch) {

   244       // This entry has not yet been updated during this period.

   245       // Note: we update the epoch value atomically to ensure

   246       // that there is only one winner that updates the cached

   247       // card_ptr value even though all the refine threads share

   248       // the same epoch value.

   250       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,

   251                                          (volatile jlong*)&epoch_ptr->_value,

   252                                          (jlong) prev_epoch_entry);

   254       if (cas_res == prev_epoch_entry) {

   255         // We have successfully won the race to update the

   256         // epoch and card_num value. Make it look like the

   257         // count and eviction count were previously cleared.

   258         count_ptr->_count = 1;

   259         count_ptr->_evict_count = 0;

   260         *count = 0;

   261         // We can defer the processing of card_ptr

   262         *defer = true;

   263         return card_ptr;

   264       }

   265       // We did not win the race to update the epoch field, so some other

   266       // thread must have done it. The value that gets returned by CAS

   267       // should be the new epoch value.

   268       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");

   269       // We could 'continue' here or just re-read the previous epoch value

   270       prev_epoch_entry = epoch_ptr->_value;

   271     }

   273     // The epoch entry for card_ptr has been updated during this period.

   274     unsigned old_card_num = extract_card_num(prev_epoch_entry);

   276     // The card count that will be returned to caller

   277     *count = count_ptr->_count;

   279     // Are we updating the count for the same card?

   280     if (new_card_num == old_card_num) {

   281       // Same card - just update the count. We could have more than one

   282       // thread racing to update count for the current card. It should be

   283       // OK not to use a CAS as the only penalty should be some missed

   284       // increments of the count which delays identifying the card as "hot".

   286       if (*count < max_jubyte) count_ptr->_count++;

   287       // We can defer the processing of card_ptr

   288       *defer = true;

   289       return card_ptr;

   290     }

   292     // Different card - evict old card info

   293     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;

   294     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {

   295       // Trigger a resize the next time we clear

   296       _expand_card_counts = true;

   297     }

   299     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,

   300                                        (volatile jlong*)&epoch_ptr->_value,

   301                                        (jlong) prev_epoch_entry);

   303     if (cas_res == prev_epoch_entry) {

   304       // We successfully updated the card num value in the epoch entry

   305       count_ptr->_count = 0; // initialize counter for new card num

   306       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);

   308       // Even though the region containg the card at old_card_num was not

   309       // in the young list when old_card_num was recorded in the epoch

   310       // cache it could have been added to the free list and subsequently

   311       // added to the young list in the intervening time. See CR 6817995.

   312       // We do not deal with this case here - it will be handled in

   313       // HeapRegion::oops_on_card_seq_iterate_careful after it has been

   314       // determined that the region containing the card has been allocated

   315       // to, and it's safe to check the young type of the region.

   317       // We do not want to defer processing of card_ptr in this case

   318       // (we need to refine old_card_ptr and card_ptr)

   319       *defer = false;

   320       return old_card_ptr;

   321     }

   322     // Someone else beat us - try again.

   323   }

   324 }

   326 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {

   327   int count;

   328   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);

   329   assert(cached_ptr != NULL, "bad cached card ptr");

   331   // We've just inserted a card pointer into the card count cache

   332   // and got back the card that we just inserted or (evicted) the

   333   // previous contents of that count slot.

   335   // The card we got back could be in a young region. When the

   336   // returned card (if evicted) was originally inserted, we had

   337   // determined that its containing region was not young. However

   338   // it is possible for the region to be freed during a cleanup

   339   // pause, then reallocated and tagged as young which will result

   340   // in the returned card residing in a young region.

   341   //

   342   // We do not deal with this case here - the change from non-young

   343   // to young could be observed at any time - it will be handled in

   344   // HeapRegion::oops_on_card_seq_iterate_careful after it has been

   345   // determined that the region containing the card has been allocated

   346   // to.

   348   // The card pointer we obtained from card count cache is not hot

   349   // so do not store it in the cache; return it for immediate

   350   // refining.

   351   if (count < G1ConcRSHotCardLimit) {

   352     return cached_ptr;

   353   }

   355   // Otherwise, the pointer we got from the _card_counts cache is hot.

   356   jbyte* res = NULL;

   357   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);

   358   if (_n_hot == _hot_cache_size) {

   359     res = _hot_cache[_hot_cache_idx];

   360     _n_hot--;

   361   }

   362   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.

   363   _hot_cache[_hot_cache_idx] = cached_ptr;

   364   _hot_cache_idx++;

   365   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;

   366   _n_hot++;

   368   // The card obtained from the hot card cache could be in a young

   369   // region. See above on how this can happen.

   371   return res;

   372 }

   374 void ConcurrentG1Refine::clean_up_cache(int worker_i,

   375                                         G1RemSet* g1rs,

   376                                         DirtyCardQueue* into_cset_dcq) {

   377   assert(!use_cache(), "cache should be disabled");

   378   int start_idx;

   380   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once

   381     int end_idx = start_idx + _hot_cache_par_chunk_size;

   383     if (start_idx ==

   384         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {

   385       // The current worker has successfully claimed the chunk [start_idx..end_idx)

   386       end_idx = MIN2(end_idx, _n_hot);

   387       for (int i = start_idx; i < end_idx; i++) {

   388         jbyte* entry = _hot_cache[i];

   389         if (entry != NULL) {

   390           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {

   391             // 'entry' contains references that point into the current

   392             // collection set. We need to record 'entry' in the DCQS

   393             // that's used for that purpose.

   394             //

   395             // The only time we care about recording cards that contain

   396             // references that point into the collection set is during

   397             // RSet updating while within an evacuation pause.

   398             // In this case worker_i should be the id of a GC worker thread

   399             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");

   400             assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id");

   401             into_cset_dcq->enqueue(entry);

   402           }

   403         }

   404       }

   405     }

   406   }

   407 }

   409 // The arrays used to hold the card counts and the epochs must have

   410 // a 1:1 correspondence. Hence they are allocated and freed together

   411 // Returns true if the allocations of both the counts and epochs

   412 // were successful; false otherwise.

   413 bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,

   414                                                    CardCountCacheEntry** counts,

   415                                                    CardEpochCacheEntry** epochs) {

   416   // We call the allocation/free routines directly for the counts

   417   // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY

   418   // macros call AllocateHeap and FreeHeap respectively.

   419   // AllocateHeap will call vm_exit_out_of_memory in the event

   420   // of an allocation failure and abort the JVM. With the

   421   // _counts/epochs arrays we only need to abort the JVM if the

   422   // initial allocation of these arrays fails.

   423   //

   424   // Additionally AllocateHeap/FreeHeap do some tracing of

   425   // allocate/free calls so calling one without calling the

   426   // other can cause inconsistencies in the tracing. So we

   427   // call neither.

   429   assert(*counts == NULL, "out param");

   430   assert(*epochs == NULL, "out param");

   432   size_t counts_size = n * sizeof(CardCountCacheEntry);

   433   size_t epochs_size = n * sizeof(CardEpochCacheEntry);

   435   *counts = (CardCountCacheEntry*) os::malloc(counts_size);

   436   if (*counts == NULL) {

   437     // allocation was unsuccessful

   438     return false;

   439   }

   441   *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size);

   442   if (*epochs == NULL) {

   443     // allocation was unsuccessful - free counts array

   444     assert(*counts != NULL, "must be");

   445     os::free(*counts);

   446     *counts = NULL;

   447     return false;

   448   }

   450   // We successfully allocated both counts and epochs

   451   return true;

   452 }

   454 // Returns true if the card counts/epochs cache was

   455 // successfully expanded; false otherwise.

   456 bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {

   457   // Can we expand the card count and epoch tables?

   458   if (_n_card_counts < _max_n_card_counts) {

   459     assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");

   461     size_t cache_size = _cc_cache_sizes[cache_size_idx];

   462     // Make sure we don't go bigger than we will ever need

   463     cache_size = MIN2(cache_size, _max_n_card_counts);

   465     // Should we expand the card count and card epoch tables?

   466     if (cache_size > _n_card_counts) {

   467       // We have been asked to allocate new, larger, arrays for

   468       // the card counts and the epochs. Attempt the allocation

   469       // of both before we free the existing arrays in case

   470       // the allocation is unsuccessful...

   471       CardCountCacheEntry* counts = NULL;

   472       CardEpochCacheEntry* epochs = NULL;

   474       if (allocate_card_count_cache(cache_size, &counts, &epochs)) {

   475         // Allocation was successful.

   476         // We can just free the old arrays; we're

   477         // not interested in preserving the contents

   478         if (_card_counts != NULL) os::free(_card_counts);

   479         if (_card_epochs != NULL) os::free(_card_epochs);

   481         // Cache the size of the arrays and the index that got us there.

   482         _n_card_counts = cache_size;

   483         _cache_size_index = cache_size_idx;

   485         _card_counts = counts;

   486         _card_epochs = epochs;

   488         // We successfully allocated/expanded the caches.

   489         return true;

   490       }

   491     }

   492   }

   494   // We did not successfully expand the caches.

   495   return false;

   496 }

   498 void ConcurrentG1Refine::clear_and_record_card_counts() {

   499   if (G1ConcRSLogCacheSize == 0) return;

   501 #ifndef PRODUCT

   502   double start = os::elapsedTime();

   503 #endif

   505   if (_expand_card_counts) {

   506     int new_idx = _cache_size_index + 1;

   508     if (expand_card_count_cache(new_idx)) {

   509       // Allocation was successful and  _n_card_counts has

   510       // been updated to the new size. We only need to clear

   511       // the epochs so we don't read a bogus epoch value

   512       // when inserting a card into the hot card cache.

   513       Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));

   514     }

   515     _expand_card_counts = false;

   516   }

   518   int this_epoch = (int) _n_periods;

   519   assert((this_epoch+1) <= max_jint, "to many periods");

   520   // Update epoch

   521   _n_periods++;

   523 #ifndef PRODUCT

   524   double elapsed = os::elapsedTime() - start;

   525   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);

   526 #endif

   527 }

   529 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {

   530   for (int i = 0; i < _n_threads; ++i) {

   531     _threads[i]->print_on(st);

   532     st->cr();

   533   }

   534 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp@02f49b66361a

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp