src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp

Tue, 23 Nov 2010 13:22:55 -0800

author
stefank
date
Tue, 23 Nov 2010 13:22:55 -0800
changeset 2314
f95d63e2154a
parent 2060
2d160770d2e5
child 2504
c33825b68624
permissions
-rw-r--r--

6989984: Use standard include model for Hospot
Summary: Replaced MakeDeps and the includeDB files with more standardized solutions.
Reviewed-by: coleenp, kvn, kamg

     1 /*
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "gc_implementation/g1/concurrentG1Refine.hpp"
    27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
    28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    30 #include "gc_implementation/g1/g1RemSet.hpp"
    31 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    32 #include "memory/space.inline.hpp"
    33 #include "runtime/atomic.hpp"
    34 #include "utilities/copy.hpp"
    36 // Possible sizes for the card counts cache: odd primes that roughly double in size.
    37 // (See jvmtiTagMap.cpp).
    38 int ConcurrentG1Refine::_cc_cache_sizes[] = {
    39         16381,    32771,    76831,    150001,   307261,
    40        614563,  1228891,  2457733,   4915219,  9830479,
    41      19660831, 39321619, 78643219, 157286461,       -1
    42   };
    44 ConcurrentG1Refine::ConcurrentG1Refine() :
    45   _card_counts(NULL), _card_epochs(NULL),
    46   _n_card_counts(0), _max_n_card_counts(0),
    47   _cache_size_index(0), _expand_card_counts(false),
    48   _hot_cache(NULL),
    49   _def_use_cache(false), _use_cache(false),
    50   _n_periods(0),
    51   _threads(NULL), _n_threads(0)
    52 {
    54   // Ergomonically select initial concurrent refinement parameters
    55   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
    56     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
    57   }
    58   set_green_zone(G1ConcRefinementGreenZone);
    60   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
    61     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
    62   }
    63   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
    65   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
    66     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
    67   }
    68   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
    69   _n_worker_threads = thread_num();
    70   // We need one extra thread to do the young gen rset size sampling.
    71   _n_threads = _n_worker_threads + 1;
    72   reset_threshold_step();
    74   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
    75   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    76   ConcurrentG1RefineThread *next = NULL;
    77   for (int i = _n_threads - 1; i >= 0; i--) {
    78     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    79     assert(t != NULL, "Conc refine should have been created");
    80     assert(t->cg1r() == this, "Conc refine thread should refer to this");
    81     _threads[i] = t;
    82     next = t;
    83   }
    84 }
    86 void ConcurrentG1Refine::reset_threshold_step() {
    87   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
    88     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
    89   } else {
    90     _thread_threshold_step = G1ConcRefinementThresholdStep;
    91   }
    92 }
    94 int ConcurrentG1Refine::thread_num() {
    95   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
    96 }
    98 void ConcurrentG1Refine::init() {
    99   if (G1ConcRSLogCacheSize > 0) {
   100     _g1h = G1CollectedHeap::heap();
   101     _max_n_card_counts =
   102       (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
   104     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
   105     guarantee(_max_n_card_counts < max_card_num, "card_num representation");
   107     int desired = _max_n_card_counts / InitialCacheFraction;
   108     for (_cache_size_index = 0;
   109               _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
   110       if (_cc_cache_sizes[_cache_size_index] >= desired) break;
   111     }
   112     _cache_size_index = MAX2(0, (_cache_size_index - 1));
   114     int initial_size = _cc_cache_sizes[_cache_size_index];
   115     if (initial_size < 0) initial_size = _max_n_card_counts;
   117     // Make sure we don't go bigger than we will ever need
   118     _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
   120     _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   121     _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   123     Copy::fill_to_bytes(&_card_counts[0],
   124                         _n_card_counts * sizeof(CardCountCacheEntry));
   125     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   127     ModRefBarrierSet* bs = _g1h->mr_bs();
   128     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   129     _ct_bs = (CardTableModRefBS*)bs;
   130     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
   132     _def_use_cache = true;
   133     _use_cache = true;
   134     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
   135     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
   136     _n_hot = 0;
   137     _hot_cache_idx = 0;
   139     // For refining the cards in the hot cache in parallel
   140     int n_workers = (ParallelGCThreads > 0 ?
   141                         _g1h->workers()->total_workers() : 1);
   142     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
   143     _hot_cache_par_claimed_idx = 0;
   144   }
   145 }
   147 void ConcurrentG1Refine::stop() {
   148   if (_threads != NULL) {
   149     for (int i = 0; i < _n_threads; i++) {
   150       _threads[i]->stop();
   151     }
   152   }
   153 }
   155 void ConcurrentG1Refine::reinitialize_threads() {
   156   reset_threshold_step();
   157   if (_threads != NULL) {
   158     for (int i = 0; i < _n_threads; i++) {
   159       _threads[i]->initialize();
   160     }
   161   }
   162 }
   164 ConcurrentG1Refine::~ConcurrentG1Refine() {
   165   if (G1ConcRSLogCacheSize > 0) {
   166     assert(_card_counts != NULL, "Logic");
   167     FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   168     assert(_card_epochs != NULL, "Logic");
   169     FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   170     assert(_hot_cache != NULL, "Logic");
   171     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   172   }
   173   if (_threads != NULL) {
   174     for (int i = 0; i < _n_threads; i++) {
   175       delete _threads[i];
   176     }
   177     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   178   }
   179 }
   181 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   182   if (_threads != NULL) {
   183     for (int i = 0; i < _n_threads; i++) {
   184       tc->do_thread(_threads[i]);
   185     }
   186   }
   187 }
   189 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
   190   HeapWord* start = _ct_bs->addr_for(card_ptr);
   191   HeapRegion* r = _g1h->heap_region_containing(start);
   192   if (r != NULL && r->is_young()) {
   193     return true;
   194   }
   195   // This card is not associated with a heap region
   196   // so can't be young.
   197   return false;
   198 }
   200 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
   201   unsigned new_card_num = ptr_2_card_num(card_ptr);
   202   unsigned bucket = hash(new_card_num);
   203   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
   205   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
   206   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
   208   // We have to construct a new entry if we haven't updated the counts
   209   // during the current period, or if the count was updated for a
   210   // different card number.
   211   unsigned int new_epoch = (unsigned int) _n_periods;
   212   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
   214   while (true) {
   215     // Fetch the previous epoch value
   216     julong prev_epoch_entry = epoch_ptr->_value;
   217     julong cas_res;
   219     if (extract_epoch(prev_epoch_entry) != new_epoch) {
   220       // This entry has not yet been updated during this period.
   221       // Note: we update the epoch value atomically to ensure
   222       // that there is only one winner that updates the cached
   223       // card_ptr value even though all the refine threads share
   224       // the same epoch value.
   226       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   227                                          (volatile jlong*)&epoch_ptr->_value,
   228                                          (jlong) prev_epoch_entry);
   230       if (cas_res == prev_epoch_entry) {
   231         // We have successfully won the race to update the
   232         // epoch and card_num value. Make it look like the
   233         // count and eviction count were previously cleared.
   234         count_ptr->_count = 1;
   235         count_ptr->_evict_count = 0;
   236         *count = 0;
   237         // We can defer the processing of card_ptr
   238         *defer = true;
   239         return card_ptr;
   240       }
   241       // We did not win the race to update the epoch field, so some other
   242       // thread must have done it. The value that gets returned by CAS
   243       // should be the new epoch value.
   244       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
   245       // We could 'continue' here or just re-read the previous epoch value
   246       prev_epoch_entry = epoch_ptr->_value;
   247     }
   249     // The epoch entry for card_ptr has been updated during this period.
   250     unsigned old_card_num = extract_card_num(prev_epoch_entry);
   252     // The card count that will be returned to caller
   253     *count = count_ptr->_count;
   255     // Are we updating the count for the same card?
   256     if (new_card_num == old_card_num) {
   257       // Same card - just update the count. We could have more than one
   258       // thread racing to update count for the current card. It should be
   259       // OK not to use a CAS as the only penalty should be some missed
   260       // increments of the count which delays identifying the card as "hot".
   262       if (*count < max_jubyte) count_ptr->_count++;
   263       // We can defer the processing of card_ptr
   264       *defer = true;
   265       return card_ptr;
   266     }
   268     // Different card - evict old card info
   269     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
   270     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
   271       // Trigger a resize the next time we clear
   272       _expand_card_counts = true;
   273     }
   275     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   276                                        (volatile jlong*)&epoch_ptr->_value,
   277                                        (jlong) prev_epoch_entry);
   279     if (cas_res == prev_epoch_entry) {
   280       // We successfully updated the card num value in the epoch entry
   281       count_ptr->_count = 0; // initialize counter for new card num
   282       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
   284       // Even though the region containg the card at old_card_num was not
   285       // in the young list when old_card_num was recorded in the epoch
   286       // cache it could have been added to the free list and subsequently
   287       // added to the young list in the intervening time. See CR 6817995.
   288       // We do not deal with this case here - it will be handled in
   289       // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   290       // determined that the region containing the card has been allocated
   291       // to, and it's safe to check the young type of the region.
   293       // We do not want to defer processing of card_ptr in this case
   294       // (we need to refine old_card_ptr and card_ptr)
   295       *defer = false;
   296       return old_card_ptr;
   297     }
   298     // Someone else beat us - try again.
   299   }
   300 }
   302 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   303   int count;
   304   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
   305   assert(cached_ptr != NULL, "bad cached card ptr");
   307   // We've just inserted a card pointer into the card count cache
   308   // and got back the card that we just inserted or (evicted) the
   309   // previous contents of that count slot.
   311   // The card we got back could be in a young region. When the
   312   // returned card (if evicted) was originally inserted, we had
   313   // determined that its containing region was not young. However
   314   // it is possible for the region to be freed during a cleanup
   315   // pause, then reallocated and tagged as young which will result
   316   // in the returned card residing in a young region.
   317   //
   318   // We do not deal with this case here - the change from non-young
   319   // to young could be observed at any time - it will be handled in
   320   // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   321   // determined that the region containing the card has been allocated
   322   // to.
   324   // The card pointer we obtained from card count cache is not hot
   325   // so do not store it in the cache; return it for immediate
   326   // refining.
   327   if (count < G1ConcRSHotCardLimit) {
   328     return cached_ptr;
   329   }
   331   // Otherwise, the pointer we got from the _card_counts cache is hot.
   332   jbyte* res = NULL;
   333   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   334   if (_n_hot == _hot_cache_size) {
   335     res = _hot_cache[_hot_cache_idx];
   336     _n_hot--;
   337   }
   338   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   339   _hot_cache[_hot_cache_idx] = cached_ptr;
   340   _hot_cache_idx++;
   341   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   342   _n_hot++;
   344   // The card obtained from the hot card cache could be in a young
   345   // region. See above on how this can happen.
   347   return res;
   348 }
   350 void ConcurrentG1Refine::clean_up_cache(int worker_i,
   351                                         G1RemSet* g1rs,
   352                                         DirtyCardQueue* into_cset_dcq) {
   353   assert(!use_cache(), "cache should be disabled");
   354   int start_idx;
   356   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
   357     int end_idx = start_idx + _hot_cache_par_chunk_size;
   359     if (start_idx ==
   360         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
   361       // The current worker has successfully claimed the chunk [start_idx..end_idx)
   362       end_idx = MIN2(end_idx, _n_hot);
   363       for (int i = start_idx; i < end_idx; i++) {
   364         jbyte* entry = _hot_cache[i];
   365         if (entry != NULL) {
   366           if (g1rs->concurrentRefineOneCard(entry, worker_i, true)) {
   367             // 'entry' contains references that point into the current
   368             // collection set. We need to record 'entry' in the DCQS
   369             // that's used for that purpose.
   370             //
   371             // The only time we care about recording cards that contain
   372             // references that point into the collection set is during
   373             // RSet updating while within an evacuation pause.
   374             // In this case worker_i should be the id of a GC worker thread
   375             assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
   376             assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "incorrect worker id");
   377             into_cset_dcq->enqueue(entry);
   378           }
   379         }
   380       }
   381     }
   382   }
   383 }
   385 void ConcurrentG1Refine::expand_card_count_cache() {
   386   if (_n_card_counts < _max_n_card_counts) {
   387     int new_idx = _cache_size_index+1;
   388     int new_size = _cc_cache_sizes[new_idx];
   389     if (new_size < 0) new_size = _max_n_card_counts;
   391     // Make sure we don't go bigger than we will ever need
   392     new_size = MIN2((unsigned) new_size, _max_n_card_counts);
   394     // Expand the card count and card epoch tables
   395     if (new_size > (int)_n_card_counts) {
   396       // We can just free and allocate a new array as we're
   397       // not interested in preserving the contents
   398       assert(_card_counts != NULL, "Logic!");
   399       assert(_card_epochs != NULL, "Logic!");
   400       FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   401       FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   402       _n_card_counts = new_size;
   403       _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   404       _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   405       _cache_size_index = new_idx;
   406     }
   407   }
   408 }
   410 void ConcurrentG1Refine::clear_and_record_card_counts() {
   411   if (G1ConcRSLogCacheSize == 0) return;
   413 #ifndef PRODUCT
   414   double start = os::elapsedTime();
   415 #endif
   417   if (_expand_card_counts) {
   418     expand_card_count_cache();
   419     _expand_card_counts = false;
   420     // Only need to clear the epochs.
   421     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   422   }
   424   int this_epoch = (int) _n_periods;
   425   assert((this_epoch+1) <= max_jint, "to many periods");
   426   // Update epoch
   427   _n_periods++;
   429 #ifndef PRODUCT
   430   double elapsed = os::elapsedTime() - start;
   431   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
   432 #endif
   433 }
   435 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
   436   for (int i = 0; i < _n_threads; ++i) {
   437     _threads[i]->print_on(st);
   438     st->cr();
   439   }
   440 }

mercurial