src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp

Mon, 19 Jul 2010 11:06:34 -0700

author
johnc
date
Mon, 19 Jul 2010 11:06:34 -0700
changeset 2021
5cbac8938c4c
parent 1907
c18cbe5936b8
child 2060
2d160770d2e5
permissions
-rw-r--r--

6956639: G1: assert(cached_ptr != card_ptr) failed: shouldn't be, concurrentG1Refine.cpp:307
Summary: During concurrent refinment, filter cards in young regions after it has been determined that the region has been allocated from and the young type of the region has been set.
Reviewed-by: iveresov, tonyp, jcoomes

     1 /*
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentG1Refine.cpp.incl"
    28 // Possible sizes for the card counts cache: odd primes that roughly double in size.
    29 // (See jvmtiTagMap.cpp).
    30 int ConcurrentG1Refine::_cc_cache_sizes[] = {
    31         16381,    32771,    76831,    150001,   307261,
    32        614563,  1228891,  2457733,   4915219,  9830479,
    33      19660831, 39321619, 78643219, 157286461,       -1
    34   };
    36 ConcurrentG1Refine::ConcurrentG1Refine() :
    37   _card_counts(NULL), _card_epochs(NULL),
    38   _n_card_counts(0), _max_n_card_counts(0),
    39   _cache_size_index(0), _expand_card_counts(false),
    40   _hot_cache(NULL),
    41   _def_use_cache(false), _use_cache(false),
    42   _n_periods(0),
    43   _threads(NULL), _n_threads(0)
    44 {
    46   // Ergomonically select initial concurrent refinement parameters
    47   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
    48     FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
    49   }
    50   set_green_zone(G1ConcRefinementGreenZone);
    52   if (FLAG_IS_DEFAULT(G1ConcRefinementYellowZone)) {
    53     FLAG_SET_DEFAULT(G1ConcRefinementYellowZone, green_zone() * 3);
    54   }
    55   set_yellow_zone(MAX2<int>(G1ConcRefinementYellowZone, green_zone()));
    57   if (FLAG_IS_DEFAULT(G1ConcRefinementRedZone)) {
    58     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
    59   }
    60   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
    61   _n_worker_threads = thread_num();
    62   // We need one extra thread to do the young gen rset size sampling.
    63   _n_threads = _n_worker_threads + 1;
    64   reset_threshold_step();
    66   _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
    67   int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    68   ConcurrentG1RefineThread *next = NULL;
    69   for (int i = _n_threads - 1; i >= 0; i--) {
    70     ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    71     assert(t != NULL, "Conc refine should have been created");
    72     assert(t->cg1r() == this, "Conc refine thread should refer to this");
    73     _threads[i] = t;
    74     next = t;
    75   }
    76 }
    78 void ConcurrentG1Refine::reset_threshold_step() {
    79   if (FLAG_IS_DEFAULT(G1ConcRefinementThresholdStep)) {
    80     _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
    81   } else {
    82     _thread_threshold_step = G1ConcRefinementThresholdStep;
    83   }
    84 }
    86 int ConcurrentG1Refine::thread_num() {
    87   return MAX2<int>((G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads : ParallelGCThreads, 1);
    88 }
    90 void ConcurrentG1Refine::init() {
    91   if (G1ConcRSLogCacheSize > 0) {
    92     _g1h = G1CollectedHeap::heap();
    93     _max_n_card_counts =
    94       (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
    96     size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
    97     guarantee(_max_n_card_counts < max_card_num, "card_num representation");
    99     int desired = _max_n_card_counts / InitialCacheFraction;
   100     for (_cache_size_index = 0;
   101               _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
   102       if (_cc_cache_sizes[_cache_size_index] >= desired) break;
   103     }
   104     _cache_size_index = MAX2(0, (_cache_size_index - 1));
   106     int initial_size = _cc_cache_sizes[_cache_size_index];
   107     if (initial_size < 0) initial_size = _max_n_card_counts;
   109     // Make sure we don't go bigger than we will ever need
   110     _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
   112     _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   113     _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   115     Copy::fill_to_bytes(&_card_counts[0],
   116                         _n_card_counts * sizeof(CardCountCacheEntry));
   117     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   119     ModRefBarrierSet* bs = _g1h->mr_bs();
   120     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   121     _ct_bs = (CardTableModRefBS*)bs;
   122     _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
   124     _def_use_cache = true;
   125     _use_cache = true;
   126     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
   127     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
   128     _n_hot = 0;
   129     _hot_cache_idx = 0;
   131     // For refining the cards in the hot cache in parallel
   132     int n_workers = (ParallelGCThreads > 0 ?
   133                         _g1h->workers()->total_workers() : 1);
   134     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
   135     _hot_cache_par_claimed_idx = 0;
   136   }
   137 }
   139 void ConcurrentG1Refine::stop() {
   140   if (_threads != NULL) {
   141     for (int i = 0; i < _n_threads; i++) {
   142       _threads[i]->stop();
   143     }
   144   }
   145 }
   147 void ConcurrentG1Refine::reinitialize_threads() {
   148   reset_threshold_step();
   149   if (_threads != NULL) {
   150     for (int i = 0; i < _n_threads; i++) {
   151       _threads[i]->initialize();
   152     }
   153   }
   154 }
   156 ConcurrentG1Refine::~ConcurrentG1Refine() {
   157   if (G1ConcRSLogCacheSize > 0) {
   158     assert(_card_counts != NULL, "Logic");
   159     FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   160     assert(_card_epochs != NULL, "Logic");
   161     FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   162     assert(_hot_cache != NULL, "Logic");
   163     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   164   }
   165   if (_threads != NULL) {
   166     for (int i = 0; i < _n_threads; i++) {
   167       delete _threads[i];
   168     }
   169     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   170   }
   171 }
   173 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   174   if (_threads != NULL) {
   175     for (int i = 0; i < _n_threads; i++) {
   176       tc->do_thread(_threads[i]);
   177     }
   178   }
   179 }
   181 bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
   182   HeapWord* start = _ct_bs->addr_for(card_ptr);
   183   HeapRegion* r = _g1h->heap_region_containing(start);
   184   if (r != NULL && r->is_young()) {
   185     return true;
   186   }
   187   // This card is not associated with a heap region
   188   // so can't be young.
   189   return false;
   190 }
   192 jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
   193   unsigned new_card_num = ptr_2_card_num(card_ptr);
   194   unsigned bucket = hash(new_card_num);
   195   assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
   197   CardCountCacheEntry* count_ptr = &_card_counts[bucket];
   198   CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
   200   // We have to construct a new entry if we haven't updated the counts
   201   // during the current period, or if the count was updated for a
   202   // different card number.
   203   unsigned int new_epoch = (unsigned int) _n_periods;
   204   julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
   206   while (true) {
   207     // Fetch the previous epoch value
   208     julong prev_epoch_entry = epoch_ptr->_value;
   209     julong cas_res;
   211     if (extract_epoch(prev_epoch_entry) != new_epoch) {
   212       // This entry has not yet been updated during this period.
   213       // Note: we update the epoch value atomically to ensure
   214       // that there is only one winner that updates the cached
   215       // card_ptr value even though all the refine threads share
   216       // the same epoch value.
   218       cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   219                                          (volatile jlong*)&epoch_ptr->_value,
   220                                          (jlong) prev_epoch_entry);
   222       if (cas_res == prev_epoch_entry) {
   223         // We have successfully won the race to update the
   224         // epoch and card_num value. Make it look like the
   225         // count and eviction count were previously cleared.
   226         count_ptr->_count = 1;
   227         count_ptr->_evict_count = 0;
   228         *count = 0;
   229         // We can defer the processing of card_ptr
   230         *defer = true;
   231         return card_ptr;
   232       }
   233       // We did not win the race to update the epoch field, so some other
   234       // thread must have done it. The value that gets returned by CAS
   235       // should be the new epoch value.
   236       assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
   237       // We could 'continue' here or just re-read the previous epoch value
   238       prev_epoch_entry = epoch_ptr->_value;
   239     }
   241     // The epoch entry for card_ptr has been updated during this period.
   242     unsigned old_card_num = extract_card_num(prev_epoch_entry);
   244     // The card count that will be returned to caller
   245     *count = count_ptr->_count;
   247     // Are we updating the count for the same card?
   248     if (new_card_num == old_card_num) {
   249       // Same card - just update the count. We could have more than one
   250       // thread racing to update count for the current card. It should be
   251       // OK not to use a CAS as the only penalty should be some missed
   252       // increments of the count which delays identifying the card as "hot".
   254       if (*count < max_jubyte) count_ptr->_count++;
   255       // We can defer the processing of card_ptr
   256       *defer = true;
   257       return card_ptr;
   258     }
   260     // Different card - evict old card info
   261     if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
   262     if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
   263       // Trigger a resize the next time we clear
   264       _expand_card_counts = true;
   265     }
   267     cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   268                                        (volatile jlong*)&epoch_ptr->_value,
   269                                        (jlong) prev_epoch_entry);
   271     if (cas_res == prev_epoch_entry) {
   272       // We successfully updated the card num value in the epoch entry
   273       count_ptr->_count = 0; // initialize counter for new card num
   274       jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
   276       // Even though the region containg the card at old_card_num was not
   277       // in the young list when old_card_num was recorded in the epoch
   278       // cache it could have been added to the free list and subsequently
   279       // added to the young list in the intervening time. See CR 6817995.
   280       // We do not deal with this case here - it will be handled in
   281       // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   282       // determined that the region containing the card has been allocated
   283       // to, and it's safe to check the young type of the region.
   285       // We do not want to defer processing of card_ptr in this case
   286       // (we need to refine old_card_ptr and card_ptr)
   287       *defer = false;
   288       return old_card_ptr;
   289     }
   290     // Someone else beat us - try again.
   291   }
   292 }
   294 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   295   int count;
   296   jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
   297   assert(cached_ptr != NULL, "bad cached card ptr");
   299   // We've just inserted a card pointer into the card count cache
   300   // and got back the card that we just inserted or (evicted) the
   301   // previous contents of that count slot.
   303   // The card we got back could be in a young region. When the
   304   // returned card (if evicted) was originally inserted, we had
   305   // determined that its containing region was not young. However
   306   // it is possible for the region to be freed during a cleanup
   307   // pause, then reallocated and tagged as young which will result
   308   // in the returned card residing in a young region.
   309   //
   310   // We do not deal with this case here - the change from non-young
   311   // to young could be observed at any time - it will be handled in
   312   // HeapRegion::oops_on_card_seq_iterate_careful after it has been
   313   // determined that the region containing the card has been allocated
   314   // to.
   316   // The card pointer we obtained from card count cache is not hot
   317   // so do not store it in the cache; return it for immediate
   318   // refining.
   319   if (count < G1ConcRSHotCardLimit) {
   320     return cached_ptr;
   321   }
   323   // Otherwise, the pointer we got from the _card_counts cache is hot.
   324   jbyte* res = NULL;
   325   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   326   if (_n_hot == _hot_cache_size) {
   327     res = _hot_cache[_hot_cache_idx];
   328     _n_hot--;
   329   }
   330   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   331   _hot_cache[_hot_cache_idx] = cached_ptr;
   332   _hot_cache_idx++;
   333   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   334   _n_hot++;
   336   // The card obtained from the hot card cache could be in a young
   337   // region. See above on how this can happen.
   339   return res;
   340 }
   342 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
   343   assert(!use_cache(), "cache should be disabled");
   344   int start_idx;
   346   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
   347     int end_idx = start_idx + _hot_cache_par_chunk_size;
   349     if (start_idx ==
   350         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
   351       // The current worker has successfully claimed the chunk [start_idx..end_idx)
   352       end_idx = MIN2(end_idx, _n_hot);
   353       for (int i = start_idx; i < end_idx; i++) {
   354         jbyte* entry = _hot_cache[i];
   355         if (entry != NULL) {
   356           g1rs->concurrentRefineOneCard(entry, worker_i);
   357         }
   358       }
   359     }
   360   }
   361 }
   363 void ConcurrentG1Refine::expand_card_count_cache() {
   364   if (_n_card_counts < _max_n_card_counts) {
   365     int new_idx = _cache_size_index+1;
   366     int new_size = _cc_cache_sizes[new_idx];
   367     if (new_size < 0) new_size = _max_n_card_counts;
   369     // Make sure we don't go bigger than we will ever need
   370     new_size = MIN2((unsigned) new_size, _max_n_card_counts);
   372     // Expand the card count and card epoch tables
   373     if (new_size > (int)_n_card_counts) {
   374       // We can just free and allocate a new array as we're
   375       // not interested in preserving the contents
   376       assert(_card_counts != NULL, "Logic!");
   377       assert(_card_epochs != NULL, "Logic!");
   378       FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   379       FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   380       _n_card_counts = new_size;
   381       _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   382       _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   383       _cache_size_index = new_idx;
   384     }
   385   }
   386 }
   388 void ConcurrentG1Refine::clear_and_record_card_counts() {
   389   if (G1ConcRSLogCacheSize == 0) return;
   391 #ifndef PRODUCT
   392   double start = os::elapsedTime();
   393 #endif
   395   if (_expand_card_counts) {
   396     expand_card_count_cache();
   397     _expand_card_counts = false;
   398     // Only need to clear the epochs.
   399     Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   400   }
   402   int this_epoch = (int) _n_periods;
   403   assert((this_epoch+1) <= max_jint, "to many periods");
   404   // Update epoch
   405   _n_periods++;
   407 #ifndef PRODUCT
   408   double elapsed = os::elapsedTime() - start;
   409   _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
   410 #endif
   411 }
   413 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
   414   for (int i = 0; i < _n_threads; ++i) {
   415     _threads[i]->print_on(st);
   416     st->cr();
   417   }
   418 }

mercurial