src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp

Sat, 06 Oct 2012 01:17:44 -0700

author
johnc
date
Sat, 06 Oct 2012 01:17:44 -0700
changeset 4173
8a5ea0a9ccc4
parent 3924
3a431b605145
child 5078
194f52aa2f23
permissions
-rw-r--r--

7127708: G1: change task num types from int to uint in concurrent mark
Summary: Change the type of various task num fields, parameters etc to unsigned and rename them to be more consistent with the other collectors. Code changes were also reviewed by Vitaly Davidovich.
Reviewed-by: johnc
Contributed-by: Kaushik Srenevasan <kaushik@twitter.com>

     1 /*
     2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "gc_implementation/g1/concurrentG1Refine.hpp"
    27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
    28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    30 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
    31 #include "gc_implementation/g1/g1RemSet.hpp"
    32 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    33 #include "memory/space.inline.hpp"
    34 #include "runtime/atomic.hpp"
    35 #include "runtime/java.hpp"
    36 #include "utilities/copy.hpp"
    38 // Possible sizes for the card counts cache: odd primes that roughly double in size.
    39 // (See jvmtiTagMap.cpp).
    41 #define MAX_SIZE ((size_t) -1)
    43 size_t ConcurrentG1Refine::_cc_cache_sizes[] = {
    44           16381,    32771,    76831,    150001,   307261,
    45          614563,  1228891,  2457733,   4915219,  9830479,
    46        19660831, 39321619, 78643219, 157286461,  MAX_SIZE
    47   };
    49 ConcurrentG1Refine::ConcurrentG1Refine() :
    50   _card_counts(NULL), _card_epochs(NULL),
    51   _n_card_counts(0), _max_cards(0), _max_n_card_counts(0),
    52   _cache_size_index(0), _expand_card_counts(false),
    53   _hot_cache(NULL),
    54   _def_use_cache(false), _use_cache(false),
    55   // We initialize the epochs of the array to 0. By initializing
    56   // _n_periods to 1 and not 0 we automatically invalidate all the
    57   // entries on the array. Otherwise we might accidentally think that
    58   // we claimed a card that was in fact never set (see CR7033292).
    59   _n_periods(1),
    60   _threads(NULL), _n_threads(0)
    61 {
    63   // Ergomonically select initial concurrent refinement parameters
    64   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
    65     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
    66   }
    67   set_green_zone(G1ConcRefinementGreenZone);
    69   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
    70     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
    71   }
    72   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
    74   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
    75     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
    76   }
    77   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
    78   _n_worker_threads = thread_num();
    79   // We need one extra thread to do the young gen rset size sampling.
    80   _n_threads = _n_worker_threads + 1;
    81   reset_threshold_step();
    83   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads, mtGC);
    84   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    85   ConcurrentG1RefineThread *next = NULL;
    86   for (int i = _n_threads - 1; i >= 0; i--) {
    87     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    88     assert(t != NULL, "Conc refine should have been created");
    89     assert(t->cg1r() == this, "Conc refine thread should refer to this");
    90     _threads[i] = t;
    91     next = t;
    92   }
    93 }
    95 void ConcurrentG1Refine::reset_threshold_step() {
    96   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
    97     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
    98   } else {
    99     _thread_threshold_step = G1ConcRefinementThresholdStep;
   100   }
   101 }
   103 int ConcurrentG1Refine::thread_num() {
   104   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
   105 }
   107 void ConcurrentG1Refine::init() {
   108   if (G1ConcRSLogCacheSize > 0) {
   109     _g1h = G1CollectedHeap::heap();
   111     _max_cards = _g1h->max_capacity() >> CardTableModRefBS::card_shift;
   112     _max_n_card_counts = _max_cards * G1MaxHotCardCountSizePercent / 100;
   114     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
   115     guarantee(_max_cards < max_card_num, "card_num representation");
   117     // We need _n_card_counts to be less than _max_n_card_counts here
   118     // so that the expansion call (below) actually allocates the
   119     // _counts and _epochs arrays.
   120     assert(_n_card_counts == 0, "pre-condition");
   121     assert(_max_n_card_counts > 0, "pre-condition");
   123     // Find the index into cache size array that is of a size that's
   124     // large enough to hold desired_sz.
   125     size_t desired_sz = _max_cards / InitialCacheFraction;
   126     int desired_sz_index = 0;
   127     while (_cc_cache_sizes[desired_sz_index] < desired_sz) {
   128       desired_sz_index += 1;
   129       assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
   130     }
   131     assert(desired_sz_index <  MAX_CC_CACHE_INDEX, "invariant");
   133     // If the desired_sz value is between two sizes then
   134     // _cc_cache_sizes[desired_sz_index-1] < desired_sz <= _cc_cache_sizes[desired_sz_index]
   135     // we will start with the lower size in the optimistic expectation that
   136     // we will not need to expand up. Note desired_sz_index could also be 0.
   137     if (desired_sz_index > 0 &&
   138         _cc_cache_sizes[desired_sz_index] > desired_sz) {
   139       desired_sz_index -= 1;
   140     }
   142     if (!expand_card_count_cache(desired_sz_index)) {
   143       // Allocation was unsuccessful - exit
   144       vm_exit_during_initialization("Could not reserve enough space for card count cache");
   145     }
   146     assert(_n_card_counts > 0, "post-condition");
   147     assert(_cache_size_index == desired_sz_index, "post-condition");
   149     Copy::fill_to_bytes(&_card_counts[0],
   150                         _n_card_counts * sizeof(CardCountCacheEntry));
   151     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   153     ModRefBarrierSet* bs = _g1h->mr_bs();
   154     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   155     _ct_bs = (CardTableModRefBS*)bs;
   156     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
   158     _def_use_cache = true;
   159     _use_cache = true;
   160     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
   161     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);
   162     _n_hot = 0;
   163     _hot_cache_idx = 0;
   165     // For refining the cards in the hot cache in parallel
   166     int n_workers = (ParallelGCThreads > 0 ?
   167                         _g1h->workers()->total_workers() : 1);
   168     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
   169     _hot_cache_par_claimed_idx = 0;
   170   }
   171 }
   173 void ConcurrentG1Refine::stop() {
   174   if (_threads != NULL) {
   175     for (int i = 0; i < _n_threads; i++) {
   176       _threads[i]->stop();
   177     }
   178   }
   179 }
   181 void ConcurrentG1Refine::reinitialize_threads() {
   182   reset_threshold_step();
   183   if (_threads != NULL) {
   184     for (int i = 0; i < _n_threads; i++) {
   185       _threads[i]->initialize();
   186     }
   187   }
   188 }
   190 ConcurrentG1Refine::~ConcurrentG1Refine() {
   191   if (G1ConcRSLogCacheSize > 0) {
   192     // Please see the comment in allocate_card_count_cache
   193     // for why we call os::malloc() and os::free() directly.
   194     assert(_card_counts != NULL, "Logic");
   195     os::free(_card_counts, mtGC);
   196     assert(_card_epochs != NULL, "Logic");
   197     os::free(_card_epochs, mtGC);
   199     assert(_hot_cache != NULL, "Logic");
   200     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache, mtGC);
   201   }
   202   if (_threads != NULL) {
   203     for (int i = 0; i < _n_threads; i++) {
   204       delete _threads[i];
   205     }
   206     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads, mtGC);
   207   }
   208 }
   210 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   211   if (_threads != NULL) {
   212     for (int i = 0; i < _n_threads; i++) {
   213       tc->do_thread(_threads[i]);
   214     }
   215   }
   216 }
   218 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
   219   HeapWord* start = _ct_bs->addr_for(card_ptr);
   220   HeapRegion* r = _g1h->heap_region_containing(start);
   221   if (r != NULL && r->is_young()) {
   222     return true;
   223   }
   224   // This card is not associated with a heap region
   225   // so can't be young.
   226   return false;
   227 }
   229 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
   230   unsigned new_card_num = ptr_2_card_num(card_ptr);
   231   unsigned bucket = hash(new_card_num);
   232   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
   234   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
   235   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
   237   // We have to construct a new entry if we haven't updated the counts
   238   // during the current period, or if the count was updated for a
   239   // different card number.
   240   unsigned int new_epoch = (unsigned int) _n_periods;
   241   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
   243   while (true) {
   244     // Fetch the previous epoch value
   245     julong prev_epoch_entry = epoch_ptr->_value;
   246     julong cas_res;
   248     if (extract_epoch(prev_epoch_entry) != new_epoch) {
   249       // This entry has not yet been updated during this period.
   250       // Note: we update the epoch value atomically to ensure
   251       // that there is only one winner that updates the cached
   252       // card_ptr value even though all the refine threads share
   253       // the same epoch value.
   255       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   256                                          (volatile jlong*)&epoch_ptr->_value,
   257                                          (jlong) prev_epoch_entry);
   259       if (cas_res == prev_epoch_entry) {
   260         // We have successfully won the race to update the
   261         // epoch and card_num value. Make it look like the
   262         // count and eviction count were previously cleared.
   263         count_ptr->_count = 1;
   264         count_ptr->_evict_count = 0;
   265         *count = 0;
   266         // We can defer the processing of card_ptr
   267         *defer = true;
   268         return card_ptr;
   269       }
   270       // We did not win the race to update the epoch field, so some other
   271       // thread must have done it. The value that gets returned by CAS
   272       // should be the new epoch value.
   273       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
   274       // We could 'continue' here or just re-read the previous epoch value
   275       prev_epoch_entry = epoch_ptr->_value;
   276     }
   278     // The epoch entry for card_ptr has been updated during this period.
   279     unsigned old_card_num = extract_card_num(prev_epoch_entry);
   281     // The card count that will be returned to caller
   282     *count = count_ptr->_count;
   284     // Are we updating the count for the same card?
   285     if (new_card_num == old_card_num) {
   286       // Same card - just update the count. We could have more than one
   287       // thread racing to update count for the current card. It should be
   288       // OK not to use a CAS as the only penalty should be some missed
   289       // increments of the count which delays identifying the card as "hot".
   291       if (*count < max_jubyte) count_ptr->_count++;
   292       // We can defer the processing of card_ptr
   293       *defer = true;
   294       return card_ptr;
   295     }
   297     // Different card - evict old card info
   298     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
   299     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
   300       // Trigger a resize the next time we clear
   301       _expand_card_counts = true;
   302     }
   304     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   305                                        (volatile jlong*)&epoch_ptr->_value,
   306                                        (jlong) prev_epoch_entry);
   308     if (cas_res == prev_epoch_entry) {
   309       // We successfully updated the card num value in the epoch entry
   310       count_ptr->_count = 0; // initialize counter for new card num
   311       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
   313       // Even though the region containg the card at old_card_num was not
   314       // in the young list when old_card_num was recorded in the epoch
   315       // cache it could have been added to the free list and subsequently
   316       // added to the young list in the intervening time. See CR 6817995.
   317       // We do not deal with this case here - it will be handled in
   318       // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   319       // determined that the region containing the card has been allocated
   320       // to, and it's safe to check the young type of the region.
   322       // We do not want to defer processing of card_ptr in this case
   323       // (we need to refine old_card_ptr and card_ptr)
   324       *defer = false;
   325       return old_card_ptr;
   326     }
   327     // Someone else beat us - try again.
   328   }
   329 }
   331 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   332   int count;
   333   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
   334   assert(cached_ptr != NULL, "bad cached card ptr");
   336   // We've just inserted a card pointer into the card count cache
   337   // and got back the card that we just inserted or (evicted) the
   338   // previous contents of that count slot.
   340   // The card we got back could be in a young region. When the
   341   // returned card (if evicted) was originally inserted, we had
   342   // determined that its containing region was not young. However
   343   // it is possible for the region to be freed during a cleanup
   344   // pause, then reallocated and tagged as young which will result
   345   // in the returned card residing in a young region.
   346   //
   347   // We do not deal with this case here - the change from non-young
   348   // to young could be observed at any time - it will be handled in
   349   // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   350   // determined that the region containing the card has been allocated
   351   // to.
   353   // The card pointer we obtained from card count cache is not hot
   354   // so do not store it in the cache; return it for immediate
   355   // refining.
   356   if (count < G1ConcRSHotCardLimit) {
   357     return cached_ptr;
   358   }
   360   // Otherwise, the pointer we got from the _card_counts cache is hot.
   361   jbyte* res = NULL;
   362   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   363   if (_n_hot == _hot_cache_size) {
   364     res = _hot_cache[_hot_cache_idx];
   365     _n_hot--;
   366   }
   367   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   368   _hot_cache[_hot_cache_idx] = cached_ptr;
   369   _hot_cache_idx++;
   370   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   371   _n_hot++;
   373   // The card obtained from the hot card cache could be in a young
   374   // region. See above on how this can happen.
   376   return res;
   377 }
   379 void ConcurrentG1Refine::clean_up_cache(int worker_i,
   380                                         G1RemSet* g1rs,
   381                                         DirtyCardQueue* into_cset_dcq) {
   382   assert(!use_cache(), "cache should be disabled");
   383   int start_idx;
   385   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
   386     int end_idx = start_idx + _hot_cache_par_chunk_size;
   388     if (start_idx ==
   389         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
   390       // The current worker has successfully claimed the chunk [start_idx..end_idx)
   391       end_idx = MIN2(end_idx, _n_hot);
   392       for (int i = start_idx; i < end_idx; i++) {
   393         jbyte* entry = _hot_cache[i];
   394         if (entry != NULL) {
   395           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
   396             // 'entry' contains references that point into the current
   397             // collection set. We need to record 'entry' in the DCQS
   398             // that's used for that purpose.
   399             //
   400             // The only time we care about recording cards that contain
   401             // references that point into the collection set is during
   402             // RSet updating while within an evacuation pause.
   403             // In this case worker_i should be the id of a GC worker thread
   404             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
   405             assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "incorrect worker id");
   406             into_cset_dcq->enqueue(entry);
   407           }
   408         }
   409       }
   410     }
   411   }
   412 }
   414 // The arrays used to hold the card counts and the epochs must have
   415 // a 1:1 correspondence. Hence they are allocated and freed together
   416 // Returns true if the allocations of both the counts and epochs
   417 // were successful; false otherwise.
   418 bool ConcurrentG1Refine::allocate_card_count_cache(size_t n,
   419                                                    CardCountCacheEntry** counts,
   420                                                    CardEpochCacheEntry** epochs) {
   421   // We call the allocation/free routines directly for the counts
   422   // and epochs arrays. The NEW_C_HEAP_ARRAY/FREE_C_HEAP_ARRAY
   423   // macros call AllocateHeap and FreeHeap respectively.
   424   // AllocateHeap will call vm_exit_out_of_memory in the event
   425   // of an allocation failure and abort the JVM. With the
   426   // _counts/epochs arrays we only need to abort the JVM if the
   427   // initial allocation of these arrays fails.
   428   //
   429   // Additionally AllocateHeap/FreeHeap do some tracing of
   430   // allocate/free calls so calling one without calling the
   431   // other can cause inconsistencies in the tracing. So we
   432   // call neither.
   434   assert(*counts == NULL, "out param");
   435   assert(*epochs == NULL, "out param");
   437   size_t counts_size = n * sizeof(CardCountCacheEntry);
   438   size_t epochs_size = n * sizeof(CardEpochCacheEntry);
   440   *counts = (CardCountCacheEntry*) os::malloc(counts_size, mtGC);
   441   if (*counts == NULL) {
   442     // allocation was unsuccessful
   443     return false;
   444   }
   446   *epochs = (CardEpochCacheEntry*) os::malloc(epochs_size, mtGC);
   447   if (*epochs == NULL) {
   448     // allocation was unsuccessful - free counts array
   449     assert(*counts != NULL, "must be");
   450     os::free(*counts, mtGC);
   451     *counts = NULL;
   452     return false;
   453   }
   455   // We successfully allocated both counts and epochs
   456   return true;
   457 }
   459 // Returns true if the card counts/epochs cache was
   460 // successfully expanded; false otherwise.
   461 bool ConcurrentG1Refine::expand_card_count_cache(int cache_size_idx) {
   462   // Can we expand the card count and epoch tables?
   463   if (_n_card_counts < _max_n_card_counts) {
   464     assert(cache_size_idx >= 0 && cache_size_idx  < MAX_CC_CACHE_INDEX, "oob");
   466     size_t cache_size = _cc_cache_sizes[cache_size_idx];
   467     // Make sure we don't go bigger than we will ever need
   468     cache_size = MIN2(cache_size, _max_n_card_counts);
   470     // Should we expand the card count and card epoch tables?
   471     if (cache_size > _n_card_counts) {
   472       // We have been asked to allocate new, larger, arrays for
   473       // the card counts and the epochs. Attempt the allocation
   474       // of both before we free the existing arrays in case
   475       // the allocation is unsuccessful...
   476       CardCountCacheEntry* counts = NULL;
   477       CardEpochCacheEntry* epochs = NULL;
   479       if (allocate_card_count_cache(cache_size, &counts, &epochs)) {
   480         // Allocation was successful.
   481         // We can just free the old arrays; we're
   482         // not interested in preserving the contents
   483         if (_card_counts != NULL) os::free(_card_counts, mtGC);
   484         if (_card_epochs != NULL) os::free(_card_epochs, mtGC);
   486         // Cache the size of the arrays and the index that got us there.
   487         _n_card_counts = cache_size;
   488         _cache_size_index = cache_size_idx;
   490         _card_counts = counts;
   491         _card_epochs = epochs;
   493         // We successfully allocated/expanded the caches.
   494         return true;
   495       }
   496     }
   497   }
   499   // We did not successfully expand the caches.
   500   return false;
   501 }
   503 void ConcurrentG1Refine::clear_and_record_card_counts() {
   504   if (G1ConcRSLogCacheSize == 0) {
   505     return;
   506   }
   508   double start = os::elapsedTime();
   510   if (_expand_card_counts) {
   511     int new_idx = _cache_size_index + 1;
   513     if (expand_card_count_cache(new_idx)) {
   514       // Allocation was successful and  _n_card_counts has
   515       // been updated to the new size. We only need to clear
   516       // the epochs so we don't read a bogus epoch value
   517       // when inserting a card into the hot card cache.
   518       Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   519     }
   520     _expand_card_counts = false;
   521   }
   523   int this_epoch = (int) _n_periods;
   524   assert((this_epoch+1) <= max_jint, "to many periods");
   525   // Update epoch
   526   _n_periods++;
   527   double cc_clear_time_ms = (os::elapsedTime() - start) * 1000;
   528   _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms);
   529 }
   531 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
   532   for (int i = 0; i < _n_threads; ++i) {
   533     _threads[i]->print_on(st);
   534     st->cr();
   535   }
   536 }

mercurial