src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp

Thu, 22 Apr 2010 10:02:38 -0700

author
johnc
date
Thu, 22 Apr 2010 10:02:38 -0700
changeset 1829
1316cec51b4d
parent 1717
b81f3572f355
child 1907
c18cbe5936b8
permissions
-rw-r--r--

6819061: G1: eliminate serial Other times that are proportional to the collection set length
6871109: G1: remove the concept of the scan only prefix
Summary: Removed scan only regions and associated code. The young portion of the collection set is now constructed incrementally - when a young region is retired as the current allocation region it is added to the collection set.
Reviewed-by: apetrusenko, iveresov, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentG1Refine.cpp.incl"
    28 // Possible sizes for the card counts cache: odd primes that roughly double in size.
    29 // (See jvmtiTagMap.cpp).
    30 int ConcurrentG1Refine::_cc_cache_sizes[] = {
    31         16381,    32771,    76831,    150001,   307261,
    32        614563,  1228891,  2457733,   4915219,  9830479,
    33      19660831, 39321619, 78643219, 157286461,       -1
    34   };
    36 ConcurrentG1Refine::ConcurrentG1Refine() :
    37   _card_counts(NULL), _card_epochs(NULL),
    38   _n_card_counts(0), _max_n_card_counts(0),
    39   _cache_size_index(0), _expand_card_counts(false),
    40   _hot_cache(NULL),
    41   _def_use_cache(false), _use_cache(false),
    42   _n_periods(0),
    43   _threads(NULL), _n_threads(0)
    44 {
    46   // Ergomonically select initial concurrent refinement parameters
    47   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
    48     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
    49   }
    50   set_green_zone(G1ConcRefinementGreenZone);
    52   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
    53     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
    54   }
    55   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
    57   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
    58     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
    59   }
    60   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
    61   _n_worker_threads = thread_num();
    62   // We need one extra thread to do the young gen rset size sampling.
    63   _n_threads = _n_worker_threads + 1;
    64   reset_threshold_step();
    66   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
    67   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    68   ConcurrentG1RefineThread *next = NULL;
    69   for (int i = _n_threads - 1; i >= 0; i--) {
    70     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    71     assert(t != NULL, "Conc refine should have been created");
    72     assert(t->cg1r() == this, "Conc refine thread should refer to this");
    73     _threads[i] = t;
    74     next = t;
    75   }
    76 }
    78 void ConcurrentG1Refine::reset_threshold_step() {
    79   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
    80     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
    81   } else {
    82     _thread_threshold_step = G1ConcRefinementThresholdStep;
    83   }
    84 }
    86 int ConcurrentG1Refine::thread_num() {
    87   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
    88 }
    90 void ConcurrentG1Refine::init() {
    91   if (G1ConcRSLogCacheSize > 0) {
    92     _g1h = G1CollectedHeap::heap();
    93     _max_n_card_counts =
    94       (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
    96     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
    97     guarantee(_max_n_card_counts < max_card_num, "card_num representation");
    99     int desired = _max_n_card_counts / InitialCacheFraction;
   100     for (_cache_size_index = 0;
   101               _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
   102       if (_cc_cache_sizes[_cache_size_index] >= desired) break;
   103     }
   104     _cache_size_index = MAX2(0, (_cache_size_index - 1));
   106     int initial_size = _cc_cache_sizes[_cache_size_index];
   107     if (initial_size < 0) initial_size = _max_n_card_counts;
   109     // Make sure we don't go bigger than we will ever need
   110     _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
   112     _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   113     _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   115     Copy::fill_to_bytes(&_card_counts[0],
   116                         _n_card_counts * sizeof(CardCountCacheEntry));
   117     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   119     ModRefBarrierSet* bs = _g1h->mr_bs();
   120     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   121     _ct_bs = (CardTableModRefBS*)bs;
   122     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
   124     _def_use_cache = true;
   125     _use_cache = true;
   126     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
   127     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
   128     _n_hot = 0;
   129     _hot_cache_idx = 0;
   131     // For refining the cards in the hot cache in parallel
   132     int n_workers = (ParallelGCThreads > 0 ?
   133                         _g1h->workers()->total_workers() : 1);
   134     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
   135     _hot_cache_par_claimed_idx = 0;
   136   }
   137 }
   139 void ConcurrentG1Refine::stop() {
   140   if (_threads != NULL) {
   141     for (int i = 0; i < _n_threads; i++) {
   142       _threads[i]->stop();
   143     }
   144   }
   145 }
   147 void ConcurrentG1Refine::reinitialize_threads() {
   148   reset_threshold_step();
   149   if (_threads != NULL) {
   150     for (int i = 0; i < _n_threads; i++) {
   151       _threads[i]->initialize();
   152     }
   153   }
   154 }
   156 ConcurrentG1Refine::~ConcurrentG1Refine() {
   157   if (G1ConcRSLogCacheSize > 0) {
   158     assert(_card_counts != NULL, "Logic");
   159     FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   160     assert(_card_epochs != NULL, "Logic");
   161     FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   162     assert(_hot_cache != NULL, "Logic");
   163     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   164   }
   165   if (_threads != NULL) {
   166     for (int i = 0; i < _n_threads; i++) {
   167       delete _threads[i];
   168     }
   169     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   170   }
   171 }
   173 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   174   if (_threads != NULL) {
   175     for (int i = 0; i < _n_threads; i++) {
   176       tc->do_thread(_threads[i]);
   177     }
   178   }
   179 }
   181 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
   182   HeapWord* start = _ct_bs->addr_for(card_ptr);
   183   HeapRegion* r = _g1h->heap_region_containing(start);
   184   if (r != NULL && r->is_young()) {
   185     return true;
   186   }
   187   // This card is not associated with a heap region
   188   // so can't be young.
   189   return false;
   190 }
   192 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
   193   unsigned new_card_num = ptr_2_card_num(card_ptr);
   194   unsigned bucket = hash(new_card_num);
   195   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
   197   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
   198   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
   200   // We have to construct a new entry if we haven't updated the counts
   201   // during the current period, or if the count was updated for a
   202   // different card number.
   203   unsigned int new_epoch = (unsigned int) _n_periods;
   204   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
   206   while (true) {
   207     // Fetch the previous epoch value
   208     julong prev_epoch_entry = epoch_ptr->_value;
   209     julong cas_res;
   211     if (extract_epoch(prev_epoch_entry) != new_epoch) {
   212       // This entry has not yet been updated during this period.
   213       // Note: we update the epoch value atomically to ensure
   214       // that there is only one winner that updates the cached
   215       // card_ptr value even though all the refine threads share
   216       // the same epoch value.
   218       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   219                                          (volatile jlong*)&epoch_ptr->_value,
   220                                          (jlong) prev_epoch_entry);
   222       if (cas_res == prev_epoch_entry) {
   223         // We have successfully won the race to update the
   224         // epoch and card_num value. Make it look like the
   225         // count and eviction count were previously cleared.
   226         count_ptr->_count = 1;
   227         count_ptr->_evict_count = 0;
   228         *count = 0;
   229         // We can defer the processing of card_ptr
   230         *defer = true;
   231         return card_ptr;
   232       }
   233       // We did not win the race to update the epoch field, so some other
   234       // thread must have done it. The value that gets returned by CAS
   235       // should be the new epoch value.
   236       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
   237       // We could 'continue' here or just re-read the previous epoch value
   238       prev_epoch_entry = epoch_ptr->_value;
   239     }
   241     // The epoch entry for card_ptr has been updated during this period.
   242     unsigned old_card_num = extract_card_num(prev_epoch_entry);
   244     // The card count that will be returned to caller
   245     *count = count_ptr->_count;
   247     // Are we updating the count for the same card?
   248     if (new_card_num == old_card_num) {
   249       // Same card - just update the count. We could have more than one
   250       // thread racing to update count for the current card. It should be
   251       // OK not to use a CAS as the only penalty should be some missed
   252       // increments of the count which delays identifying the card as "hot".
   254       if (*count < max_jubyte) count_ptr->_count++;
   255       // We can defer the processing of card_ptr
   256       *defer = true;
   257       return card_ptr;
   258     }
   260     // Different card - evict old card info
   261     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
   262     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
   263       // Trigger a resize the next time we clear
   264       _expand_card_counts = true;
   265     }
   267     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   268                                        (volatile jlong*)&epoch_ptr->_value,
   269                                        (jlong) prev_epoch_entry);
   271     if (cas_res == prev_epoch_entry) {
   272       // We successfully updated the card num value in the epoch entry
   273       count_ptr->_count = 0; // initialize counter for new card num
   275       // Even though the region containg the card at old_card_num was not
   276       // in the young list when old_card_num was recorded in the epoch
   277       // cache it could have been added to the free list and subsequently
   278       // added to the young list in the intervening time. If the evicted
   279       // card is in a young region just return the card_ptr and the evicted
   280       // card will not be cleaned. See CR 6817995.
   282       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
   283       if (is_young_card(old_card_ptr)) {
   284         *count = 0;
   285         // We can defer the processing of card_ptr
   286         *defer = true;
   287         return card_ptr;
   288       }
   290       // We do not want to defer processing of card_ptr in this case
   291       // (we need to refine old_card_ptr and card_ptr)
   292       *defer = false;
   293       return old_card_ptr;
   294     }
   295     // Someone else beat us - try again.
   296   }
   297 }
   299 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   300   int count;
   301   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
   302   assert(cached_ptr != NULL, "bad cached card ptr");
   304   if (is_young_card(cached_ptr)) {
   305     // The region containing cached_ptr has been freed during a clean up
   306     // pause, reallocated, and tagged as young.
   307     assert(cached_ptr != card_ptr, "shouldn't be");
   309     // We've just inserted a new old-gen card pointer into the card count
   310     // cache and evicted the previous contents of that count slot.
   311     // The evicted card pointer has been determined to be in a young region
   312     // and so cannot be the newly inserted card pointer (that will be
   313     // in an old region).
   314     // The count for newly inserted card will be set to zero during the
   315     // insertion, so we don't want to defer the cleaning of the newly
   316     // inserted card pointer.
   317     assert(*defer == false, "deferring non-hot card");
   318     return NULL;
   319   }
   321   // The card pointer we obtained from card count cache is not hot
   322   // so do not store it in the cache; return it for immediate
   323   // refining.
   324   if (count < G1ConcRSHotCardLimit) {
   325     return cached_ptr;
   326   }
   328   // Otherwise, the pointer we got from the _card_counts is hot.
   329   jbyte* res = NULL;
   330   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   331   if (_n_hot == _hot_cache_size) {
   332     res = _hot_cache[_hot_cache_idx];
   333     _n_hot--;
   334   }
   335   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   336   _hot_cache[_hot_cache_idx] = cached_ptr;
   337   _hot_cache_idx++;
   338   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   339   _n_hot++;
   341   if (res != NULL) {
   342     // Even though the region containg res was not in the young list
   343     // when it was recorded in the hot cache it could have been added
   344     // to the free list and subsequently added to the young list in
   345     // the intervening time. If res is in a young region, return NULL
   346     // so that res is not cleaned. See CR 6817995.
   348     if (is_young_card(res)) {
   349       res = NULL;
   350     }
   351   }
   353   return res;
   354 }
   356 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
   357   assert(!use_cache(), "cache should be disabled");
   358   int start_idx;
   360   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
   361     int end_idx = start_idx + _hot_cache_par_chunk_size;
   363     if (start_idx ==
   364         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
   365       // The current worker has successfully claimed the chunk [start_idx..end_idx)
   366       end_idx = MIN2(end_idx, _n_hot);
   367       for (int i = start_idx; i < end_idx; i++) {
   368         jbyte* entry = _hot_cache[i];
   369         if (entry != NULL) {
   370           g1rs->concurrentRefineOneCard(entry, worker_i);
   371         }
   372       }
   373     }
   374   }
   375 }
   377 void ConcurrentG1Refine::expand_card_count_cache() {
   378   if (_n_card_counts < _max_n_card_counts) {
   379     int new_idx = _cache_size_index+1;
   380     int new_size = _cc_cache_sizes[new_idx];
   381     if (new_size < 0) new_size = _max_n_card_counts;
   383     // Make sure we don't go bigger than we will ever need
   384     new_size = MIN2((unsigned) new_size, _max_n_card_counts);
   386     // Expand the card count and card epoch tables
   387     if (new_size > (int)_n_card_counts) {
   388       // We can just free and allocate a new array as we're
   389       // not interested in preserving the contents
   390       assert(_card_counts != NULL, "Logic!");
   391       assert(_card_epochs != NULL, "Logic!");
   392       FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   393       FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   394       _n_card_counts = new_size;
   395       _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   396       _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   397       _cache_size_index = new_idx;
   398     }
   399   }
   400 }
   402 void ConcurrentG1Refine::clear_and_record_card_counts() {
   403   if (G1ConcRSLogCacheSize == 0) return;
   405 #ifndef PRODUCT
   406   double start = os::elapsedTime();
   407 #endif
   409   if (_expand_card_counts) {
   410     expand_card_count_cache();
   411     _expand_card_counts = false;
   412     // Only need to clear the epochs.
   413     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   414   }
   416   int this_epoch = (int) _n_periods;
   417   assert((this_epoch+1) <= max_jint, "to many periods");
   418   // Update epoch
   419   _n_periods++;
   421 #ifndef PRODUCT
   422   double elapsed = os::elapsedTime() - start;
   423   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
   424 #endif
   425 }
   427 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
   428   for (int i = 0; i < _n_threads; ++i) {
   429     _threads[i]->print_on(st);
   430     st->cr();
   431   }
   432 }

mercurial