jdk8-mips64-public/hotspot: src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp@7383557659bd

7185699: G1: Prediction model discrepancies
Summary: Correct the result value of G1CollectedHeap::pending_card_num(). Change the code that calculates the GC efficiency of a non-young heap region to use historical data from mixed GCs and the actual number of live bytes when predicting how long it would take to collect the region. Changes were also reviewed by Thomas Schatzl.
Reviewed-by: azeemj, brutisso

     1 /*

     2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "gc_implementation/g1/concurrentG1Refine.hpp"

    27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"

    28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"

    29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"

    30 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"

    31 #include "gc_implementation/g1/g1RemSet.hpp"

    32 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"

    33 #include "memory/space.inline.hpp"

    34 #include "runtime/atomic.hpp"

    35 #include "runtime/java.hpp"

    36 #include "utilities/copy.hpp"

    38 // Possible sizes for the card counts cache: odd primes that roughly double in size.

    39 // (See jvmtiTagMap.cpp).

    41 #define MAX_SIZE ((size_t) -1)

    43 size_t ConcurrentG1Refine::_cc_cache_sizes[] = {

    44           16381,    32771,    76831,    150001,   307261,

    45          614563,  1228891,  2457733,   4915219,  9830479,

    46        19660831, 39321619, 78643219, 157286461,  MAX_SIZE

    47   };

    49 ConcurrentG1Refine::ConcurrentG1Refine() :

    50   _card_counts(NULL), _card_epochs(NULL),

    51   _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),

    52   _cache_size_index(0), _expand_card_counts(false),

    53   _hot_cache(NULL),

    54   _def_use_cache(false), _use_cache(false),

    55   // We initialize the epochs of the array to 0. By initializing

    56   // _n_periods to 1 and not 0 we automatically invalidate all the

    57   // entries on the array. Otherwise we might accidentally think that

    58   // we claimed a card that was in fact never set (see CR7033292).

    59   _n_periods(1),

    60   _threads(NULL), _n_threads(0)

    61 {

    63   // Ergomonically select initial concurrent refinement parameters

    64   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {

    65     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));

    66   }

    67   set_green_zone(G1ConcRefinementGreenZone);

    69   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {

    70     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);

    71   }

    72   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));

    74   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {

    75     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);

    76   }

    77   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));

    78   _n_worker_threads = thread_num();

    79   // We need one extra thread to do the young gen rset size sampling.

    80   _n_threads = _n_worker_threads + 1;

    81   reset_threshold_step();

    83   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads, mtGC);

    84   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();

    85   ConcurrentG1RefineThread *next = NULL;

    86   for (int i = _n_threads - 1; i >= 0; i--) {

    87     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);

    88     assert(t != NULL, "Conc refine should have been created");

    89     assert(t->cg1r() == this, "Conc refine thread should refer to this");

    90     _threads[i] = t;

    91     next = t;

    92   }

    93 }

    95 void ConcurrentG1Refine::reset_threshold_step() {

    96   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {

    97     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);

    98   } else {

    99     _thread_threshold_step = G1ConcRefinementThresholdStep;

   100   }

   101 }

   103 int ConcurrentG1Refine::thread_num() {

   104   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);

   105 }

   107 void ConcurrentG1Refine::init() {

   108   if (G1ConcRSLogCacheSize > 0) {

   109     _g1h = G1CollectedHeap::heap();

   111     _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;

   112     _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;

   114     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;

   115     guarantee(_max_cards < max_card_num, "card_num representation");

   117     // We need _n_card_counts to be less than _max_n_card_counts here

   118     // so that the expansion call (below) actually allocates the

   119     // _counts and _epochs arrays.

   120     assert(_n_card_counts == 0, "pre-condition");

   121     assert(_max_n_card_counts > 0, "pre-condition");

   123     // Find the index into cache size array that is of a size that's

   124     // large enough to hold desired_sz.

   125     size_t desired_sz = _max_cards / InitialCacheFraction;

   126     int desired_sz_index = 0;

   127     while (_cc_cache_sizes[desired_sz_index] < desired_sz) {

   128       desired_sz_index += 1;

   129       assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

   130     }

   131     assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

   133     // If the desired_sz value is between two sizes then

   134     // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]

   135     // we will start with the lower size in the optimistic expectation that

   136     // we will not need to expand up. Note desired_sz_index could also be 0.

   137     if (desired_sz_index > 0 &&

   138         _cc_cache_sizes[desired_sz_index] > desired_sz) {

   139       desired_sz_index -= 1;

   140     }

   142     if (!expand_card_count_cache(desired_sz_index)) {

   143       // Allocation was unsuccessful - exit

   144       vm_exit_during_initialization("Could not reserve enough space for card count cache");

   145     }

   146     assert(_n_card_counts > 0, "post-condition");

   147     assert(_cache_size_index == desired_sz_index, "post-condition");

   149     Copy::fill_to_bytes(&_card_counts[0],

   150                         _n_card_counts * sizeof(CardCountCacheEntry));

   151     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));

   153     ModRefBarrierSet* bs = _g1h->mr_bs();

   154     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");

   155     _ct_bs = (CardTableModRefBS*)bs;

   156     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());

   158     _def_use_cache = true;

   159     _use_cache = true;

   160     _hot_cache_size = (1 << G1ConcRSLogCacheSize);

   161     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);

   162     _n_hot = 0;

   163     _hot_cache_idx = 0;

   165     // For refining the cards in the hot cache in parallel

   166     int n_workers = (ParallelGCThreads > 0 ?

   167                         _g1h->workers()->total_workers() : 1);

   168     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);

   169     _hot_cache_par_claimed_idx = 0;

   170   }

   171 }

   173 void ConcurrentG1Refine::stop() {

   174   if (_threads != NULL) {

   175     for (int i = 0; i < _n_threads; i++) {

   176       _threads[i]->stop();

   177     }

   178   }

   179 }

   181 void ConcurrentG1Refine::reinitialize_threads() {

   182   reset_threshold_step();

   183   if (_threads != NULL) {

   184     for (int i = 0; i < _n_threads; i++) {

   185       _threads[i]->initialize();

   186     }

   187   }

   188 }

   190 ConcurrentG1Refine::~ConcurrentG1Refine() {

   191   if (G1ConcRSLogCacheSize > 0) {

   192     // Please see the comment in allocate_card_count_cache

   193     // for why we call os::malloc() and os::free() directly.

   194     assert(_card_counts != NULL, "Logic");

   195     os::free(_card_counts, mtGC);

   196     assert(_card_epochs != NULL, "Logic");

   197     os::free(_card_epochs, mtGC);

   199     assert(_hot_cache != NULL, "Logic");

   200     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache, mtGC);

   201   }

   202   if (_threads != NULL) {

   203     for (int i = 0; i < _n_threads; i++) {

   204       delete _threads[i];

   205     }

   206     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads, mtGC);

   207   }

   208 }

   210 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {

   211   if (_threads != NULL) {

   212     for (int i = 0; i < _n_threads; i++) {

   213       tc->do_thread(_threads[i]);

   214     }

   215   }

   216 }

   218 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {

   219   HeapWord* start = _ct_bs->addr_for(card_ptr);

   220   HeapRegion* r = _g1h->heap_region_containing(start);

   221   if (r != NULL && r->is_young()) {

   222     return true;

   223   }

   224   // This card is not associated with a heap region

   225   // so can't be young.

   226   return false;

   227 }

   229 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {

   230   unsigned new_card_num = ptr_2_card_num(card_ptr);

   231   unsigned bucket = hash(new_card_num);

   232   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");

   234   CardCountCacheEntry* count_ptr = &_card_counts[bucket];

   235   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];

   237   // We have to construct a new entry if we haven't updated the counts

   238   // during the current period, or if the count was updated for a

   239   // different card number.

   240   unsigned int new_epoch = (unsigned int) _n_periods;

   241   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);

   243   while (true) {

   244     // Fetch the previous epoch value

   245     julong prev_epoch_entry = epoch_ptr->_value;

   246     julong cas_res;

   248     if (extract_epoch(prev_epoch_entry) != new_epoch) {

   249       // This entry has not yet been updated during this period.

   250       // Note: we update the epoch value atomically to ensure

   251       // that there is only one winner that updates the cached

   252       // card_ptr value even though all the refine threads share

   253       // the same epoch value.

   255       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,

   256                                          (volatile jlong*)&epoch_ptr->_value,

   257                                          (jlong) prev_epoch_entry);

   259       if (cas_res == prev_epoch_entry) {

   260         // We have successfully won the race to update the

   261         // epoch and card_num value. Make it look like the

   262         // count and eviction count were previously cleared.

   263         count_ptr->_count = 1;

   264         count_ptr->_evict_count = 0;

   265         *count = 0;

   266         // We can defer the processing of card_ptr

   267         *defer = true;

   268         return card_ptr;

   269       }

   270       // We did not win the race to update the epoch field, so some other

   271       // thread must have done it. The value that gets returned by CAS

   272       // should be the new epoch value.

   273       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");

   274       // We could 'continue' here or just re-read the previous epoch value

   275       prev_epoch_entry = epoch_ptr->_value;

   276     }

   278     // The epoch entry for card_ptr has been updated during this period.

   279     unsigned old_card_num = extract_card_num(prev_epoch_entry);

   281     // The card count that will be returned to caller

   282     *count = count_ptr->_count;

   284     // Are we updating the count for the same card?

   285     if (new_card_num == old_card_num) {

   286       // Same card - just update the count. We could have more than one

   287       // thread racing to update count for the current card. It should be

   288       // OK not to use a CAS as the only penalty should be some missed

   289       // increments of the count which delays identifying the card as "hot".

   291       if (*count < max_jubyte) count_ptr->_count++;

   292       // We can defer the processing of card_ptr

   293       *defer = true;

   294       return card_ptr;

   295     }

   297     // Different card - evict old card info

   298     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;

   299     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {

   300       // Trigger a resize the next time we clear

   301       _expand_card_counts = true;

   302     }

   304     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,

   305                                        (volatile jlong*)&epoch_ptr->_value,

   306                                        (jlong) prev_epoch_entry);

   308     if (cas_res == prev_epoch_entry) {

   309       // We successfully updated the card num value in the epoch entry

   310       count_ptr->_count = 0; // initialize counter for new card num

   311       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);

   313       // Even though the region containg the card at old_card_num was not

   314       // in the young list when old_card_num was recorded in the epoch

   315       // cache it could have been added to the free list and subsequently

   316       // added to the young list in the intervening time. See CR 6817995.

   317       // We do not deal with this case here - it will be handled in

   318       // HeapRegion::oops_on_card_seq_iterate_careful after it has been

   319       // determined that the region containing the card has been allocated

   320       // to, and it's safe to check the young type of the region.

   322       // We do not want to defer processing of card_ptr in this case

   323       // (we need to refine old_card_ptr and card_ptr)

   324       *defer = false;

   325       return old_card_ptr;

   326     }

   327     // Someone else beat us - try again.

   328   }

   329 }

   331 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {

   332   int count;

   333   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);

   334   assert(cached_ptr != NULL, "bad cached card ptr");

   336   // We've just inserted a card pointer into the card count cache

   337   // and got back the card that we just inserted or (evicted) the

   338   // previous contents of that count slot.

   340   // The card we got back could be in a young region. When the

   341   // returned card (if evicted) was originally inserted, we had

   342   // determined that its containing region was not young. However

   343   // it is possible for the region to be freed during a cleanup

   344   // pause, then reallocated and tagged as young which will result

   345   // in the returned card residing in a young region.

   346   //

   347   // We do not deal with this case here - the change from non-young

   348   // to young could be observed at any time - it will be handled in

   349   // HeapRegion::oops_on_card_seq_iterate_careful after it has been

   350   // determined that the region containing the card has been allocated

   351   // to.

   353   // The card pointer we obtained from card count cache is not hot

   354   // so do not store it in the cache; return it for immediate

   355   // refining.

   356   if (count < G1ConcRSHotCardLimit) {

   357     return cached_ptr;

   358   }

   360   // Otherwise, the pointer we got from the _card_counts cache is hot.

   361   jbyte* res = NULL;

   362   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);

   363   if (_n_hot == _hot_cache_size) {

   364     res = _hot_cache[_hot_cache_idx];

   365     _n_hot--;

   366   }

   367   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.

   368   _hot_cache[_hot_cache_idx] = cached_ptr;

   369   _hot_cache_idx++;

   370   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;

   371   _n_hot++;

   373   // The card obtained from the hot card cache could be in a young

   374   // region. See above on how this can happen.

   376   return res;

   377 }

   379 void ConcurrentG1Refine::clean_up_cache(int worker_i,

   380                                         G1RemSet* g1rs,

   381                                         DirtyCardQueue* into_cset_dcq) {

   382   assert(!use_cache(), "cache should be disabled");

   383   int start_idx;

   385   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once

   386     int end_idx = start_idx + _hot_cache_par_chunk_size;

   388     if (start_idx ==

   389         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {

   390       // The current worker has successfully claimed the chunk [start_idx..end_idx)

   391       end_idx = MIN2(end_idx, _n_hot);

   392       for (int i = start_idx; i < end_idx; i++) {

   393         jbyte* entry = _hot_cache[i];

   394         if (entry != NULL) {

   395           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {

   396             // 'entry' contains references that point into the current

   397             // collection set. We need to record 'entry' in the DCQS

   398             // that's used for that purpose.

   399             //

   400             // The only time we care about recording cards that contain

   401             // references that point into the collection set is during

   402             // RSet updating while within an evacuation pause.

   403             // In this case worker_i should be the id of a GC worker thread

   404             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");

   405             assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id");

   406             into_cset_dcq->enqueue(entry);

   407           }

   408         }

   409       }

   410     }

   411   }

   412 }

   414 // The arrays used to hold the card counts and the epochs must have

   415 // a 1:1 correspondence. Hence they are allocated and freed together

   416 // Returns true if the allocations of both the counts and epochs

   417 // were successful; false otherwise.

   418 bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,

   419                                                    CardCountCacheEntry** counts,

   420                                                    CardEpochCacheEntry** epochs) {

   421   // We call the allocation/free routines directly for the counts

   422   // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY

   423   // macros call AllocateHeap and FreeHeap respectively.

   424   // AllocateHeap will call vm_exit_out_of_memory in the event

   425   // of an allocation failure and abort the JVM. With the

   426   // _counts/epochs arrays we only need to abort the JVM if the

   427   // initial allocation of these arrays fails.

   428   //

   429   // Additionally AllocateHeap/FreeHeap do some tracing of

   430   // allocate/free calls so calling one without calling the

   431   // other can cause inconsistencies in the tracing. So we

   432   // call neither.

   434   assert(*counts == NULL, "out param");

   435   assert(*epochs == NULL, "out param");

   437   size_t counts_size = n * sizeof(CardCountCacheEntry);

   438   size_t epochs_size = n * sizeof(CardEpochCacheEntry);

   440   *counts = (CardCountCacheEntry*) os::malloc(counts_size, mtGC);

   441   if (*counts == NULL) {

   442     // allocation was unsuccessful

   443     return false;

   444   }

   446   *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size, mtGC);

   447   if (*epochs == NULL) {

   448     // allocation was unsuccessful - free counts array

   449     assert(*counts != NULL, "must be");

   450     os::free(*counts, mtGC);

   451     *counts = NULL;

   452     return false;

   453   }

   455   // We successfully allocated both counts and epochs

   456   return true;

   457 }

   459 // Returns true if the card counts/epochs cache was

   460 // successfully expanded; false otherwise.

   461 bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {

   462   // Can we expand the card count and epoch tables?

   463   if (_n_card_counts < _max_n_card_counts) {

   464     assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");

   466     size_t cache_size = _cc_cache_sizes[cache_size_idx];

   467     // Make sure we don't go bigger than we will ever need

   468     cache_size = MIN2(cache_size, _max_n_card_counts);

   470     // Should we expand the card count and card epoch tables?

   471     if (cache_size > _n_card_counts) {

   472       // We have been asked to allocate new, larger, arrays for

   473       // the card counts and the epochs. Attempt the allocation

   474       // of both before we free the existing arrays in case

   475       // the allocation is unsuccessful...

   476       CardCountCacheEntry* counts = NULL;

   477       CardEpochCacheEntry* epochs = NULL;

   479       if (allocate_card_count_cache(cache_size, &counts, &epochs)) {

   480         // Allocation was successful.

   481         // We can just free the old arrays; we're

   482         // not interested in preserving the contents

   483         if (_card_counts != NULL) os::free(_card_counts, mtGC);

   484         if (_card_epochs != NULL) os::free(_card_epochs, mtGC);

   486         // Cache the size of the arrays and the index that got us there.

   487         _n_card_counts = cache_size;

   488         _cache_size_index = cache_size_idx;

   490         _card_counts = counts;

   491         _card_epochs = epochs;

   493         // We successfully allocated/expanded the caches.

   494         return true;

   495       }

   496     }

   497   }

   499   // We did not successfully expand the caches.

   500   return false;

   501 }

   503 void ConcurrentG1Refine::clear_and_record_card_counts() {

   504   if (G1ConcRSLogCacheSize == 0) {

   505     return;

   506   }

   508   double start = os::elapsedTime();

   510   if (_expand_card_counts) {

   511     int new_idx = _cache_size_index + 1;

   513     if (expand_card_count_cache(new_idx)) {

   514       // Allocation was successful and  _n_card_counts has

   515       // been updated to the new size. We only need to clear

   516       // the epochs so we don't read a bogus epoch value

   517       // when inserting a card into the hot card cache.

   518       Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));

   519     }

   520     _expand_card_counts = false;

   521   }

   523   int this_epoch = (int) _n_periods;

   524   assert((this_epoch+1) <= max_jint, "to many periods");

   525   // Update epoch

   526   _n_periods++;

   527   double cc_clear_time_ms = (os::elapsedTime() - start) * 1000;

   528   _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms);

   529 }

   531 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {

   532   for (int i = 0; i < _n_threads; ++i) {

   533     _threads[i]->print_on(st);

   534     st->cr();

   535   }

   536 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp@7383557659bd

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp