jdk8-mips64-public/hotspot: src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp@4f93f0d00802

7059019: G1: add G1 support to the SA
Summary: Extend the SA to recognize the G1CollectedHeap and implement any code that's needed by our serviceability tools (jmap, jinfo, jstack, etc.) that depend on the SA.
Reviewed-by: never, poonam, johnc

     1 /*

     2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "gc_implementation/g1/concurrentG1Refine.hpp"

    27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"

    28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"

    29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"

    30 #include "gc_implementation/g1/g1RemSet.hpp"

    31 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"

    32 #include "memory/space.inline.hpp"

    33 #include "runtime/atomic.hpp"

    34 #include "runtime/java.hpp"

    35 #include "utilities/copy.hpp"

    37 // Possible sizes for the card counts cache: odd primes that roughly double in size.

    38 // (See jvmtiTagMap.cpp).

    40 #define MAX_SIZE ((size_t) -1)

    42 size_t ConcurrentG1Refine::_cc_cache_sizes[] = {

    43           16381,    32771,    76831,    150001,   307261,

    44          614563,  1228891,  2457733,   4915219,  9830479,

    45        19660831, 39321619, 78643219, 157286461,  MAX_SIZE

    46   };

    48 ConcurrentG1Refine::ConcurrentG1Refine() :

    49   _card_counts(NULL), _card_epochs(NULL),

    50   _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),

    51   _cache_size_index(0), _expand_card_counts(false),

    52   _hot_cache(NULL),

    53   _def_use_cache(false), _use_cache(false),

    54   // We initialize the epochs of the array to 0. By initializing

    55   // _n_periods to 1 and not 0 we automatically invalidate all the

    56   // entries on the array. Otherwise we might accidentally think that

    57   // we claimed a card that was in fact never set (see CR7033292).

    58   _n_periods(1),

    59   _threads(NULL), _n_threads(0)

    60 {

    62   // Ergomonically select initial concurrent refinement parameters

    63   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {

    64     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));

    65   }

    66   set_green_zone(G1ConcRefinementGreenZone);

    68   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {

    69     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);

    70   }

    71   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));

    73   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {

    74     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);

    75   }

    76   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));

    77   _n_worker_threads = thread_num();

    78   // We need one extra thread to do the young gen rset size sampling.

    79   _n_threads = _n_worker_threads + 1;

    80   reset_threshold_step();

    82   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);

    83   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();

    84   ConcurrentG1RefineThread *next = NULL;

    85   for (int i = _n_threads - 1; i >= 0; i--) {

    86     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);

    87     assert(t != NULL, "Conc refine should have been created");

    88     assert(t->cg1r() == this, "Conc refine thread should refer to this");

    89     _threads[i] = t;

    90     next = t;

    91   }

    92 }

    94 void ConcurrentG1Refine::reset_threshold_step() {

    95   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {

    96     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);

    97   } else {

    98     _thread_threshold_step = G1ConcRefinementThresholdStep;

    99   }

   100 }

   102 int ConcurrentG1Refine::thread_num() {

   103   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);

   104 }

   106 void ConcurrentG1Refine::init() {

   107   if (G1ConcRSLogCacheSize > 0) {

   108     _g1h = G1CollectedHeap::heap();

   110     _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;

   111     _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;

   113     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;

   114     guarantee(_max_cards < max_card_num, "card_num representation");

   116     // We need _n_card_counts to be less than _max_n_card_counts here

   117     // so that the expansion call (below) actually allocates the

   118     // _counts and _epochs arrays.

   119     assert(_n_card_counts == 0, "pre-condition");

   120     assert(_max_n_card_counts > 0, "pre-condition");

   122     // Find the index into cache size array that is of a size that's

   123     // large enough to hold desired_sz.

   124     size_t desired_sz = _max_cards / InitialCacheFraction;

   125     int desired_sz_index = 0;

   126     while (_cc_cache_sizes[desired_sz_index] < desired_sz) {

   127       desired_sz_index += 1;

   128       assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

   129     }

   130     assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");

   132     // If the desired_sz value is between two sizes then

   133     // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]

   134     // we will start with the lower size in the optimistic expectation that

   135     // we will not need to expand up. Note desired_sz_index could also be 0.

   136     if (desired_sz_index > 0 &&

   137         _cc_cache_sizes[desired_sz_index] > desired_sz) {

   138       desired_sz_index -= 1;

   139     }

   141     if (!expand_card_count_cache(desired_sz_index)) {

   142       // Allocation was unsuccessful - exit

   143       vm_exit_during_initialization("Could not reserve enough space for card count cache");

   144     }

   145     assert(_n_card_counts > 0, "post-condition");

   146     assert(_cache_size_index == desired_sz_index, "post-condition");

   148     Copy::fill_to_bytes(&_card_counts[0],

   149                         _n_card_counts * sizeof(CardCountCacheEntry));

   150     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));

   152     ModRefBarrierSet* bs = _g1h->mr_bs();

   153     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");

   154     _ct_bs = (CardTableModRefBS*)bs;

   155     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());

   157     _def_use_cache = true;

   158     _use_cache = true;

   159     _hot_cache_size = (1 << G1ConcRSLogCacheSize);

   160     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);

   161     _n_hot = 0;

   162     _hot_cache_idx = 0;

   164     // For refining the cards in the hot cache in parallel

   165     int n_workers = (ParallelGCThreads > 0 ?

   166                         _g1h->workers()->total_workers() : 1);

   167     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);

   168     _hot_cache_par_claimed_idx = 0;

   169   }

   170 }

   172 void ConcurrentG1Refine::stop() {

   173   if (_threads != NULL) {

   174     for (int i = 0; i < _n_threads; i++) {

   175       _threads[i]->stop();

   176     }

   177   }

   178 }

   180 void ConcurrentG1Refine::reinitialize_threads() {

   181   reset_threshold_step();

   182   if (_threads != NULL) {

   183     for (int i = 0; i < _n_threads; i++) {

   184       _threads[i]->initialize();

   185     }

   186   }

   187 }

   189 ConcurrentG1Refine::~ConcurrentG1Refine() {

   190   if (G1ConcRSLogCacheSize > 0) {

   191     // Please see the comment in allocate_card_count_cache

   192     // for why we call os::malloc() and os::free() directly.

   193     assert(_card_counts != NULL, "Logic");

   194     os::free(_card_counts);

   195     assert(_card_epochs != NULL, "Logic");

   196     os::free(_card_epochs);

   198     assert(_hot_cache != NULL, "Logic");

   199     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);

   200   }

   201   if (_threads != NULL) {

   202     for (int i = 0; i < _n_threads; i++) {

   203       delete _threads[i];

   204     }

   205     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);

   206   }

   207 }

   209 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {

   210   if (_threads != NULL) {

   211     for (int i = 0; i < _n_threads; i++) {

   212       tc->do_thread(_threads[i]);

   213     }

   214   }

   215 }

   217 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {

   218   HeapWord* start = _ct_bs->addr_for(card_ptr);

   219   HeapRegion* r = _g1h->heap_region_containing(start);

   220   if (r != NULL && r->is_young()) {

   221     return true;

   222   }

   223   // This card is not associated with a heap region

   224   // so can't be young.

   225   return false;

   226 }

   228 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {

   229   unsigned new_card_num = ptr_2_card_num(card_ptr);

   230   unsigned bucket = hash(new_card_num);

   231   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");

   233   CardCountCacheEntry* count_ptr = &_card_counts[bucket];

   234   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];

   236   // We have to construct a new entry if we haven't updated the counts

   237   // during the current period, or if the count was updated for a

   238   // different card number.

   239   unsigned int new_epoch = (unsigned int) _n_periods;

   240   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);

   242   while (true) {

   243     // Fetch the previous epoch value

   244     julong prev_epoch_entry = epoch_ptr->_value;

   245     julong cas_res;

   247     if (extract_epoch(prev_epoch_entry) != new_epoch) {

   248       // This entry has not yet been updated during this period.

   249       // Note: we update the epoch value atomically to ensure

   250       // that there is only one winner that updates the cached

   251       // card_ptr value even though all the refine threads share

   252       // the same epoch value.

   254       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,

   255                                          (volatile jlong*)&epoch_ptr->_value,

   256                                          (jlong) prev_epoch_entry);

   258       if (cas_res == prev_epoch_entry) {

   259         // We have successfully won the race to update the

   260         // epoch and card_num value. Make it look like the

   261         // count and eviction count were previously cleared.

   262         count_ptr->_count = 1;

   263         count_ptr->_evict_count = 0;

   264         *count = 0;

   265         // We can defer the processing of card_ptr

   266         *defer = true;

   267         return card_ptr;

   268       }

   269       // We did not win the race to update the epoch field, so some other

   270       // thread must have done it. The value that gets returned by CAS

   271       // should be the new epoch value.

   272       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");

   273       // We could 'continue' here or just re-read the previous epoch value

   274       prev_epoch_entry = epoch_ptr->_value;

   275     }

   277     // The epoch entry for card_ptr has been updated during this period.

   278     unsigned old_card_num = extract_card_num(prev_epoch_entry);

   280     // The card count that will be returned to caller

   281     *count = count_ptr->_count;

   283     // Are we updating the count for the same card?

   284     if (new_card_num == old_card_num) {

   285       // Same card - just update the count. We could have more than one

   286       // thread racing to update count for the current card. It should be

   287       // OK not to use a CAS as the only penalty should be some missed

   288       // increments of the count which delays identifying the card as "hot".

   290       if (*count < max_jubyte) count_ptr->_count++;

   291       // We can defer the processing of card_ptr

   292       *defer = true;

   293       return card_ptr;

   294     }

   296     // Different card - evict old card info

   297     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;

   298     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {

   299       // Trigger a resize the next time we clear

   300       _expand_card_counts = true;

   301     }

   303     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,

   304                                        (volatile jlong*)&epoch_ptr->_value,

   305                                        (jlong) prev_epoch_entry);

   307     if (cas_res == prev_epoch_entry) {

   308       // We successfully updated the card num value in the epoch entry

   309       count_ptr->_count = 0; // initialize counter for new card num

   310       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);

   312       // Even though the region containg the card at old_card_num was not

   313       // in the young list when old_card_num was recorded in the epoch

   314       // cache it could have been added to the free list and subsequently

   315       // added to the young list in the intervening time. See CR 6817995.

   316       // We do not deal with this case here - it will be handled in

   317       // HeapRegion::oops_on_card_seq_iterate_careful after it has been

   318       // determined that the region containing the card has been allocated

   319       // to, and it's safe to check the young type of the region.

   321       // We do not want to defer processing of card_ptr in this case

   322       // (we need to refine old_card_ptr and card_ptr)

   323       *defer = false;

   324       return old_card_ptr;

   325     }

   326     // Someone else beat us - try again.

   327   }

   328 }

   330 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {

   331   int count;

   332   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);

   333   assert(cached_ptr != NULL, "bad cached card ptr");

   335   // We've just inserted a card pointer into the card count cache

   336   // and got back the card that we just inserted or (evicted) the

   337   // previous contents of that count slot.

   339   // The card we got back could be in a young region. When the

   340   // returned card (if evicted) was originally inserted, we had

   341   // determined that its containing region was not young. However

   342   // it is possible for the region to be freed during a cleanup

   343   // pause, then reallocated and tagged as young which will result

   344   // in the returned card residing in a young region.

   345   //

   346   // We do not deal with this case here - the change from non-young

   347   // to young could be observed at any time - it will be handled in

   348   // HeapRegion::oops_on_card_seq_iterate_careful after it has been

   349   // determined that the region containing the card has been allocated

   350   // to.

   352   // The card pointer we obtained from card count cache is not hot

   353   // so do not store it in the cache; return it for immediate

   354   // refining.

   355   if (count < G1ConcRSHotCardLimit) {

   356     return cached_ptr;

   357   }

   359   // Otherwise, the pointer we got from the _card_counts cache is hot.

   360   jbyte* res = NULL;

   361   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);

   362   if (_n_hot == _hot_cache_size) {

   363     res = _hot_cache[_hot_cache_idx];

   364     _n_hot--;

   365   }

   366   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.

   367   _hot_cache[_hot_cache_idx] = cached_ptr;

   368   _hot_cache_idx++;

   369   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;

   370   _n_hot++;

   372   // The card obtained from the hot card cache could be in a young

   373   // region. See above on how this can happen.

   375   return res;

   376 }

   378 void ConcurrentG1Refine::clean_up_cache(int worker_i,

   379                                         G1RemSet* g1rs,

   380                                         DirtyCardQueue* into_cset_dcq) {

   381   assert(!use_cache(), "cache should be disabled");

   382   int start_idx;

   384   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once

   385     int end_idx = start_idx + _hot_cache_par_chunk_size;

   387     if (start_idx ==

   388         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {

   389       // The current worker has successfully claimed the chunk [start_idx..end_idx)

   390       end_idx = MIN2(end_idx, _n_hot);

   391       for (int i = start_idx; i < end_idx; i++) {

   392         jbyte* entry = _hot_cache[i];

   393         if (entry != NULL) {

   394           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {

   395             // 'entry' contains references that point into the current

   396             // collection set. We need to record 'entry' in the DCQS

   397             // that's used for that purpose.

   398             //

   399             // The only time we care about recording cards that contain

   400             // references that point into the collection set is during

   401             // RSet updating while within an evacuation pause.

   402             // In this case worker_i should be the id of a GC worker thread

   403             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");

   404             assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id");

   405             into_cset_dcq->enqueue(entry);

   406           }

   407         }

   408       }

   409     }

   410   }

   411 }

   413 // The arrays used to hold the card counts and the epochs must have

   414 // a 1:1 correspondence. Hence they are allocated and freed together

   415 // Returns true if the allocations of both the counts and epochs

   416 // were successful; false otherwise.

   417 bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,

   418                                                    CardCountCacheEntry** counts,

   419                                                    CardEpochCacheEntry** epochs) {

   420   // We call the allocation/free routines directly for the counts

   421   // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY

   422   // macros call AllocateHeap and FreeHeap respectively.

   423   // AllocateHeap will call vm_exit_out_of_memory in the event

   424   // of an allocation failure and abort the JVM. With the

   425   // _counts/epochs arrays we only need to abort the JVM if the

   426   // initial allocation of these arrays fails.

   427   //

   428   // Additionally AllocateHeap/FreeHeap do some tracing of

   429   // allocate/free calls so calling one without calling the

   430   // other can cause inconsistencies in the tracing. So we

   431   // call neither.

   433   assert(*counts == NULL, "out param");

   434   assert(*epochs == NULL, "out param");

   436   size_t counts_size = n * sizeof(CardCountCacheEntry);

   437   size_t epochs_size = n * sizeof(CardEpochCacheEntry);

   439   *counts = (CardCountCacheEntry*) os::malloc(counts_size);

   440   if (*counts == NULL) {

   441     // allocation was unsuccessful

   442     return false;

   443   }

   445   *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size);

   446   if (*epochs == NULL) {

   447     // allocation was unsuccessful - free counts array

   448     assert(*counts != NULL, "must be");

   449     os::free(*counts);

   450     *counts = NULL;

   451     return false;

   452   }

   454   // We successfully allocated both counts and epochs

   455   return true;

   456 }

   458 // Returns true if the card counts/epochs cache was

   459 // successfully expanded; false otherwise.

   460 bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {

   461   // Can we expand the card count and epoch tables?

   462   if (_n_card_counts < _max_n_card_counts) {

   463     assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");

   465     size_t cache_size = _cc_cache_sizes[cache_size_idx];

   466     // Make sure we don't go bigger than we will ever need

   467     cache_size = MIN2(cache_size, _max_n_card_counts);

   469     // Should we expand the card count and card epoch tables?

   470     if (cache_size > _n_card_counts) {

   471       // We have been asked to allocate new, larger, arrays for

   472       // the card counts and the epochs. Attempt the allocation

   473       // of both before we free the existing arrays in case

   474       // the allocation is unsuccessful...

   475       CardCountCacheEntry* counts = NULL;

   476       CardEpochCacheEntry* epochs = NULL;

   478       if (allocate_card_count_cache(cache_size, &counts, &epochs)) {

   479         // Allocation was successful.

   480         // We can just free the old arrays; we're

   481         // not interested in preserving the contents

   482         if (_card_counts != NULL) os::free(_card_counts);

   483         if (_card_epochs != NULL) os::free(_card_epochs);

   485         // Cache the size of the arrays and the index that got us there.

   486         _n_card_counts = cache_size;

   487         _cache_size_index = cache_size_idx;

   489         _card_counts = counts;

   490         _card_epochs = epochs;

   492         // We successfully allocated/expanded the caches.

   493         return true;

   494       }

   495     }

   496   }

   498   // We did not successfully expand the caches.

   499   return false;

   500 }

   502 void ConcurrentG1Refine::clear_and_record_card_counts() {

   503   if (G1ConcRSLogCacheSize == 0) return;

   505 #ifndef PRODUCT

   506   double start = os::elapsedTime();

   507 #endif

   509   if (_expand_card_counts) {

   510     int new_idx = _cache_size_index + 1;

   512     if (expand_card_count_cache(new_idx)) {

   513       // Allocation was successful and  _n_card_counts has

   514       // been updated to the new size. We only need to clear

   515       // the epochs so we don't read a bogus epoch value

   516       // when inserting a card into the hot card cache.

   517       Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));

   518     }

   519     _expand_card_counts = false;

   520   }

   522   int this_epoch = (int) _n_periods;

   523   assert((this_epoch+1) <= max_jint, "to many periods");

   524   // Update epoch

   525   _n_periods++;

   527 #ifndef PRODUCT

   528   double elapsed = os::elapsedTime() - start;

   529   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);

   530 #endif

   531 }

   533 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {

   534   for (int i = 0; i < _n_threads; ++i) {

   535     _threads[i]->print_on(st);

   536     st->cr();

   537   }

   538 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp@4f93f0d00802

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp