6819077: G1: first GC thread coming late into the GC.

Tue, 04 Aug 2009 16:00:17 -0700

author
johnc
date
Tue, 04 Aug 2009 16:00:17 -0700
changeset 1325
6cb8e9df7174
parent 1324
15c5903cf9e1
child 1326
703065c670fa

6819077: G1: first GC thread coming late into the GC.
Summary: The first worker thread is delayed when entering the GC because it clears the card count table that is used in identifying hot cards. Replace the card count table with a dynamically sized evicting hash table that includes an epoch based counter.
Reviewed-by: iveresov, tonyp

src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1RemSet.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1RemSet.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1_globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/includeDB_gc_g1 file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Mon Aug 03 12:59:30 2009 -0700
     1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Aug 04 16:00:17 2009 -0700
     1.3 @@ -25,11 +25,21 @@
     1.4  #include "incls/_precompiled.incl"
     1.5  #include "incls/_concurrentG1Refine.cpp.incl"
     1.6  
     1.7 +// Possible sizes for the card counts cache: odd primes that roughly double in size.
     1.8 +// (See jvmtiTagMap.cpp).
     1.9 +int ConcurrentG1Refine::_cc_cache_sizes[] = {
    1.10 +        16381,    32771,    76831,    150001,   307261,
    1.11 +       614563,  1228891,  2457733,   4915219,  9830479,
    1.12 +     19660831, 39321619, 78643219, 157286461,       -1
    1.13 +  };
    1.14 +
    1.15  ConcurrentG1Refine::ConcurrentG1Refine() :
    1.16 -  _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
    1.17 +  _card_counts(NULL), _card_epochs(NULL),
    1.18 +  _n_card_counts(0), _max_n_card_counts(0),
    1.19 +  _cache_size_index(0), _expand_card_counts(false),
    1.20    _hot_cache(NULL),
    1.21    _def_use_cache(false), _use_cache(false),
    1.22 -  _n_periods(0), _total_cards(0), _total_travs(0),
    1.23 +  _n_periods(0),
    1.24    _threads(NULL), _n_threads(0)
    1.25  {
    1.26    if (G1ConcRefine) {
    1.27 @@ -57,26 +67,39 @@
    1.28  }
    1.29  
    1.30  void ConcurrentG1Refine::init() {
    1.31 -  G1CollectedHeap* g1h = G1CollectedHeap::heap();
    1.32 -  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
    1.33 -    _n_card_counts =
    1.34 -      (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
    1.35 -    _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
    1.36 -    for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
    1.37 -    ModRefBarrierSet* bs = g1h->mr_bs();
    1.38 +  if (G1ConcRSLogCacheSize > 0) {
    1.39 +    _g1h = G1CollectedHeap::heap();
    1.40 +    _max_n_card_counts =
    1.41 +      (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
    1.42 +
    1.43 +    size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1;
    1.44 +    guarantee(_max_n_card_counts < max_card_num, "card_num representation");
    1.45 +
    1.46 +    int desired = _max_n_card_counts / InitialCacheFraction;
    1.47 +    for (_cache_size_index = 0;
    1.48 +              _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) {
    1.49 +      if (_cc_cache_sizes[_cache_size_index] >= desired) break;
    1.50 +    }
    1.51 +    _cache_size_index = MAX2(0, (_cache_size_index - 1));
    1.52 +
    1.53 +    int initial_size = _cc_cache_sizes[_cache_size_index];
    1.54 +    if (initial_size < 0) initial_size = _max_n_card_counts;
    1.55 +
    1.56 +    // Make sure we don't go bigger than we will ever need
    1.57 +    _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts);
    1.58 +
    1.59 +    _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
    1.60 +    _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
    1.61 +
    1.62 +    Copy::fill_to_bytes(&_card_counts[0],
    1.63 +                        _n_card_counts * sizeof(CardCountCacheEntry));
    1.64 +    Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
    1.65 +
    1.66 +    ModRefBarrierSet* bs = _g1h->mr_bs();
    1.67      guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
    1.68 -    CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
    1.69 -    _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
    1.70 -    if (G1ConcRSCountTraversals) {
    1.71 -      _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
    1.72 -      _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
    1.73 -      for (int i = 0; i < 256; i++) {
    1.74 -        _cur_card_count_histo[i] = 0;
    1.75 -        _cum_card_count_histo[i] = 0;
    1.76 -      }
    1.77 -    }
    1.78 -  }
    1.79 -  if (G1ConcRSLogCacheSize > 0) {
    1.80 +    _ct_bs = (CardTableModRefBS*)bs;
    1.81 +    _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start());
    1.82 +
    1.83      _def_use_cache = true;
    1.84      _use_cache = true;
    1.85      _hot_cache_size = (1 << G1ConcRSLogCacheSize);
    1.86 @@ -86,7 +109,7 @@
    1.87  
    1.88      // For refining the cards in the hot cache in parallel
    1.89      int n_workers = (ParallelGCThreads > 0 ?
    1.90 -                        g1h->workers()->total_workers() : 1);
    1.91 +                        _g1h->workers()->total_workers() : 1);
    1.92      _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
    1.93      _hot_cache_par_claimed_idx = 0;
    1.94    }
    1.95 @@ -101,15 +124,11 @@
    1.96  }
    1.97  
    1.98  ConcurrentG1Refine::~ConcurrentG1Refine() {
    1.99 -  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
   1.100 +  if (G1ConcRSLogCacheSize > 0) {
   1.101      assert(_card_counts != NULL, "Logic");
   1.102 -    FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
   1.103 -    assert(_cur_card_count_histo != NULL, "Logic");
   1.104 -    FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
   1.105 -    assert(_cum_card_count_histo != NULL, "Logic");
   1.106 -    FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
   1.107 -  }
   1.108 -  if (G1ConcRSLogCacheSize > 0) {
   1.109 +    FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   1.110 +    assert(_card_epochs != NULL, "Logic");
   1.111 +    FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   1.112      assert(_hot_cache != NULL, "Logic");
   1.113      FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   1.114    }
   1.115 @@ -129,42 +148,165 @@
   1.116    }
   1.117  }
   1.118  
   1.119 -
   1.120 -int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
   1.121 -  size_t card_num = (card_ptr - _ct_bot);
   1.122 -  guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
   1.123 -  unsigned char cnt = _card_counts[card_num];
   1.124 -  if (cnt < 255) _card_counts[card_num]++;
   1.125 -  return cnt;
   1.126 -  _total_travs++;
   1.127 +bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) {
   1.128 +  HeapWord* start = _ct_bs->addr_for(card_ptr);
   1.129 +  HeapRegion* r = _g1h->heap_region_containing(start);
   1.130 +  if (r != NULL && r->is_young()) {
   1.131 +    return true;
   1.132 +  }
   1.133 +  // This card is not associated with a heap region
   1.134 +  // so can't be young.
   1.135 +  return false;
   1.136  }
   1.137  
   1.138 -jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
   1.139 -  int count = add_card_count(card_ptr);
   1.140 -  // Count previously unvisited cards.
   1.141 -  if (count == 0) _total_cards++;
   1.142 -  // We'll assume a traversal unless we store it in the cache.
   1.143 +jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) {
   1.144 +  unsigned new_card_num = ptr_2_card_num(card_ptr);
   1.145 +  unsigned bucket = hash(new_card_num);
   1.146 +  assert(0 <= bucket && bucket < _n_card_counts, "Bounds");
   1.147 +
   1.148 +  CardCountCacheEntry* count_ptr = &_card_counts[bucket];
   1.149 +  CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket];
   1.150 +
   1.151 +  // We have to construct a new entry if we haven't updated the counts
   1.152 +  // during the current period, or if the count was updated for a
   1.153 +  // different card number.
   1.154 +  unsigned int new_epoch = (unsigned int) _n_periods;
   1.155 +  julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch);
   1.156 +
   1.157 +  while (true) {
   1.158 +    // Fetch the previous epoch value
   1.159 +    julong prev_epoch_entry = epoch_ptr->_value;
   1.160 +    julong cas_res;
   1.161 +
   1.162 +    if (extract_epoch(prev_epoch_entry) != new_epoch) {
   1.163 +      // This entry has not yet been updated during this period.
   1.164 +      // Note: we update the epoch value atomically to ensure
   1.165 +      // that there is only one winner that updates the cached
   1.166 +      // card_ptr value even though all the refine threads share
   1.167 +      // the same epoch value.
   1.168 +
   1.169 +      cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   1.170 +                                         (volatile jlong*)&epoch_ptr->_value,
   1.171 +                                         (jlong) prev_epoch_entry);
   1.172 +
   1.173 +      if (cas_res == prev_epoch_entry) {
   1.174 +        // We have successfully won the race to update the
   1.175 +        // epoch and card_num value. Make it look like the
   1.176 +        // count and eviction count were previously cleared.
   1.177 +        count_ptr->_count = 1;
   1.178 +        count_ptr->_evict_count = 0;
   1.179 +        *count = 0;
   1.180 +        // We can defer the processing of card_ptr
   1.181 +        *defer = true;
   1.182 +        return card_ptr;
   1.183 +      }
   1.184 +      // We did not win the race to update the epoch field, so some other
   1.185 +      // thread must have done it. The value that gets returned by CAS
   1.186 +      // should be the new epoch value.
   1.187 +      assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch");
   1.188 +      // We could 'continue' here or just re-read the previous epoch value
   1.189 +      prev_epoch_entry = epoch_ptr->_value;
   1.190 +    }
   1.191 +
   1.192 +    // The epoch entry for card_ptr has been updated during this period.
   1.193 +    unsigned old_card_num = extract_card_num(prev_epoch_entry);
   1.194 +
   1.195 +    // The card count that will be returned to caller
   1.196 +    *count = count_ptr->_count;
   1.197 +
   1.198 +    // Are we updating the count for the same card?
   1.199 +    if (new_card_num == old_card_num) {
   1.200 +      // Same card - just update the count. We could have more than one
   1.201 +      // thread racing to update count for the current card. It should be
   1.202 +      // OK not to use a CAS as the only penalty should be some missed
   1.203 +      // increments of the count which delays identifying the card as "hot".
   1.204 +
   1.205 +      if (*count < max_jubyte) count_ptr->_count++;
   1.206 +      // We can defer the processing of card_ptr
   1.207 +      *defer = true;
   1.208 +      return card_ptr;
   1.209 +    }
   1.210 +
   1.211 +    // Different card - evict old card info
   1.212 +    if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++;
   1.213 +    if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) {
   1.214 +      // Trigger a resize the next time we clear
   1.215 +      _expand_card_counts = true;
   1.216 +    }
   1.217 +
   1.218 +    cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry,
   1.219 +                                       (volatile jlong*)&epoch_ptr->_value,
   1.220 +                                       (jlong) prev_epoch_entry);
   1.221 +
   1.222 +    if (cas_res == prev_epoch_entry) {
   1.223 +      // We successfully updated the card num value in the epoch entry
   1.224 +      count_ptr->_count = 0; // initialize counter for new card num
   1.225 +
   1.226 +      // Even though the region containg the card at old_card_num was not
   1.227 +      // in the young list when old_card_num was recorded in the epoch
   1.228 +      // cache it could have been added to the free list and subsequently
   1.229 +      // added to the young list in the intervening time. If the evicted
   1.230 +      // card is in a young region just return the card_ptr and the evicted
   1.231 +      // card will not be cleaned. See CR 6817995.
   1.232 +
   1.233 +      jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
   1.234 +      if (is_young_card(old_card_ptr)) {
   1.235 +        *count = 0;
   1.236 +        // We can defer the processing of card_ptr
   1.237 +        *defer = true;
   1.238 +        return card_ptr;
   1.239 +      }
   1.240 +
   1.241 +      // We do not want to defer processing of card_ptr in this case
   1.242 +      // (we need to refine old_card_ptr and card_ptr)
   1.243 +      *defer = false;
   1.244 +      return old_card_ptr;
   1.245 +    }
   1.246 +    // Someone else beat us - try again.
   1.247 +  }
   1.248 +}
   1.249 +
   1.250 +jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
   1.251 +  int count;
   1.252 +  jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
   1.253 +  assert(cached_ptr != NULL, "bad cached card ptr");
   1.254 +  assert(!is_young_card(cached_ptr), "shouldn't get a card in young region");
   1.255 +
   1.256 +  // The card pointer we obtained from card count cache is not hot
   1.257 +  // so do not store it in the cache; return it for immediate
   1.258 +  // refining.
   1.259    if (count < G1ConcRSHotCardLimit) {
   1.260 -    _total_travs++;
   1.261 -    return card_ptr;
   1.262 +    return cached_ptr;
   1.263    }
   1.264 -  // Otherwise, it's hot.
   1.265 +
   1.266 +  // Otherwise, the pointer we got from the _card_counts is hot.
   1.267    jbyte* res = NULL;
   1.268    MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   1.269    if (_n_hot == _hot_cache_size) {
   1.270 -    _total_travs++;
   1.271      res = _hot_cache[_hot_cache_idx];
   1.272      _n_hot--;
   1.273    }
   1.274    // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   1.275 -  _hot_cache[_hot_cache_idx] = card_ptr;
   1.276 +  _hot_cache[_hot_cache_idx] = cached_ptr;
   1.277    _hot_cache_idx++;
   1.278    if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   1.279    _n_hot++;
   1.280 +
   1.281 +  if (res != NULL) {
   1.282 +    // Even though the region containg res was not in the young list
   1.283 +    // when it was recorded in the hot cache it could have been added
   1.284 +    // to the free list and subsequently added to the young list in
   1.285 +    // the intervening time. If res is in a young region, return NULL
   1.286 +    // so that res is not cleaned. See CR 6817995.
   1.287 +
   1.288 +    if (is_young_card(res)) {
   1.289 +      res = NULL;
   1.290 +    }
   1.291 +  }
   1.292 +
   1.293    return res;
   1.294  }
   1.295  
   1.296 -
   1.297  void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
   1.298    assert(!use_cache(), "cache should be disabled");
   1.299    int start_idx;
   1.300 @@ -186,114 +328,52 @@
   1.301    }
   1.302  }
   1.303  
   1.304 -void ConcurrentG1Refine::clear_and_record_card_counts() {
   1.305 -  if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
   1.306 -  _n_periods++;
   1.307 -  if (G1ConcRSCountTraversals) {
   1.308 -    for (size_t i = 0; i < _n_card_counts; i++) {
   1.309 -      unsigned char bucket = _card_counts[i];
   1.310 -      _cur_card_count_histo[bucket]++;
   1.311 -      _card_counts[i] = 0;
   1.312 +void ConcurrentG1Refine::expand_card_count_cache() {
   1.313 +  if (_n_card_counts < _max_n_card_counts) {
   1.314 +    int new_idx = _cache_size_index+1;
   1.315 +    int new_size = _cc_cache_sizes[new_idx];
   1.316 +    if (new_size < 0) new_size = _max_n_card_counts;
   1.317 +
   1.318 +    // Make sure we don't go bigger than we will ever need
   1.319 +    new_size = MIN2((unsigned) new_size, _max_n_card_counts);
   1.320 +
   1.321 +    // Expand the card count and card epoch tables
   1.322 +    if (new_size > (int)_n_card_counts) {
   1.323 +      // We can just free and allocate a new array as we're
   1.324 +      // not interested in preserving the contents
   1.325 +      assert(_card_counts != NULL, "Logic!");
   1.326 +      assert(_card_epochs != NULL, "Logic!");
   1.327 +      FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts);
   1.328 +      FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs);
   1.329 +      _n_card_counts = new_size;
   1.330 +      _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts);
   1.331 +      _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts);
   1.332 +      _cache_size_index = new_idx;
   1.333      }
   1.334 -    gclog_or_tty->print_cr("Card counts:");
   1.335 -    for (int i = 0; i < 256; i++) {
   1.336 -      if (_cur_card_count_histo[i] > 0) {
   1.337 -        gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
   1.338 -        _cum_card_count_histo[i] += _cur_card_count_histo[i];
   1.339 -        _cur_card_count_histo[i] = 0;
   1.340 -      }
   1.341 -    }
   1.342 -  } else {
   1.343 -    assert(G1ConcRSLogCacheSize > 0, "Logic");
   1.344 -    Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
   1.345 -                        _n_card_counts / HeapWordSize);
   1.346    }
   1.347  }
   1.348  
   1.349 -void
   1.350 -ConcurrentG1Refine::
   1.351 -print_card_count_histo_range(unsigned* histo, int from, int to,
   1.352 -                             float& cum_card_pct,
   1.353 -                             float& cum_travs_pct) {
   1.354 -  unsigned cards = 0;
   1.355 -  unsigned travs = 0;
   1.356 -  guarantee(to <= 256, "Precondition");
   1.357 -  for (int i = from; i < to-1; i++) {
   1.358 -    cards += histo[i];
   1.359 -    travs += histo[i] * i;
   1.360 +void ConcurrentG1Refine::clear_and_record_card_counts() {
   1.361 +  if (G1ConcRSLogCacheSize == 0) return;
   1.362 +
   1.363 +#ifndef PRODUCT
   1.364 +  double start = os::elapsedTime();
   1.365 +#endif
   1.366 +
   1.367 +  if (_expand_card_counts) {
   1.368 +    expand_card_count_cache();
   1.369 +    _expand_card_counts = false;
   1.370 +    // Only need to clear the epochs.
   1.371 +    Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry));
   1.372    }
   1.373 -  if (to == 256) {
   1.374 -    unsigned histo_card_sum = 0;
   1.375 -    unsigned histo_trav_sum = 0;
   1.376 -    for (int i = 1; i < 255; i++) {
   1.377 -      histo_trav_sum += histo[i] * i;
   1.378 -    }
   1.379 -    cards += histo[255];
   1.380 -    // correct traversals for the last one.
   1.381 -    unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
   1.382 -    travs += travs_255;
   1.383  
   1.384 -  } else {
   1.385 -    cards += histo[to-1];
   1.386 -    travs += histo[to-1] * (to-1);
   1.387 -  }
   1.388 -  float fperiods = (float)_n_periods;
   1.389 -  float f_tot_cards = (float)_total_cards/fperiods;
   1.390 -  float f_tot_travs = (float)_total_travs/fperiods;
   1.391 -  if (cards > 0) {
   1.392 -    float fcards = (float)cards/fperiods;
   1.393 -    float ftravs = (float)travs/fperiods;
   1.394 -    if (to == 256) {
   1.395 -      gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
   1.396 -    } else {
   1.397 -      gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
   1.398 -    }
   1.399 -    float pct_cards = fcards*100.0/f_tot_cards;
   1.400 -    cum_card_pct += pct_cards;
   1.401 -    float pct_travs = ftravs*100.0/f_tot_travs;
   1.402 -    cum_travs_pct += pct_travs;
   1.403 -    gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
   1.404 -                  pct_cards, cum_card_pct,
   1.405 -                  pct_travs, cum_travs_pct);
   1.406 -  }
   1.407 +  int this_epoch = (int) _n_periods;
   1.408 +  assert((this_epoch+1) <= max_jint, "to many periods");
   1.409 +  // Update epoch
   1.410 +  _n_periods++;
   1.411 +
   1.412 +#ifndef PRODUCT
   1.413 +  double elapsed = os::elapsedTime() - start;
   1.414 +  _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
   1.415 +#endif
   1.416  }
   1.417 -
   1.418 -void ConcurrentG1Refine::print_final_card_counts() {
   1.419 -  if (!G1ConcRSCountTraversals) return;
   1.420 -
   1.421 -  gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
   1.422 -                _total_travs, _total_cards);
   1.423 -  float fperiods = (float)_n_periods;
   1.424 -  gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
   1.425 -                "per collection.", (float)_total_travs/fperiods,
   1.426 -                (float)_total_cards/fperiods);
   1.427 -  gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
   1.428 -                "dirty card.\n",
   1.429 -                _total_cards > 0 ?
   1.430 -                (float)_total_travs/(float)_total_cards : 0.0);
   1.431 -
   1.432 -
   1.433 -  gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
   1.434 -                "range", "# cards", "# travs", "% cards", "(cum)",
   1.435 -                "% travs", "(cum)");
   1.436 -  gclog_or_tty->print_cr("------------------------------------------------------------"
   1.437 -                "-------------");
   1.438 -  float cum_cards_pct = 0.0;
   1.439 -  float cum_travs_pct = 0.0;
   1.440 -  for (int i = 1; i < 10; i++) {
   1.441 -    print_card_count_histo_range(_cum_card_count_histo, i, i+1,
   1.442 -                                 cum_cards_pct, cum_travs_pct);
   1.443 -  }
   1.444 -  for (int i = 10; i < 100; i += 10) {
   1.445 -    print_card_count_histo_range(_cum_card_count_histo, i, i+10,
   1.446 -                                 cum_cards_pct, cum_travs_pct);
   1.447 -  }
   1.448 -  print_card_count_histo_range(_cum_card_count_histo, 100, 150,
   1.449 -                               cum_cards_pct, cum_travs_pct);
   1.450 -  print_card_count_histo_range(_cum_card_count_histo, 150, 200,
   1.451 -                               cum_cards_pct, cum_travs_pct);
   1.452 -  print_card_count_histo_range(_cum_card_count_histo, 150, 255,
   1.453 -                               cum_cards_pct, cum_travs_pct);
   1.454 -  print_card_count_histo_range(_cum_card_count_histo, 255, 256,
   1.455 -                               cum_cards_pct, cum_travs_pct);
   1.456 -}
     2.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Mon Aug 03 12:59:30 2009 -0700
     2.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue Aug 04 16:00:17 2009 -0700
     2.3 @@ -29,18 +29,77 @@
     2.4  class ConcurrentG1Refine: public CHeapObj {
     2.5    ConcurrentG1RefineThread** _threads;
     2.6    int _n_threads;
     2.7 +
     2.8    // The cache for card refinement.
     2.9 -  bool     _use_cache;
    2.10 -  bool     _def_use_cache;
    2.11 -  size_t _n_periods;
    2.12 -  size_t _total_cards;
    2.13 -  size_t _total_travs;
    2.14 +  bool   _use_cache;
    2.15 +  bool   _def_use_cache;
    2.16  
    2.17 -  unsigned char* _card_counts;
    2.18 -  unsigned       _n_card_counts;
    2.19 -  const jbyte*   _ct_bot;
    2.20 -  unsigned*      _cur_card_count_histo;
    2.21 -  unsigned*      _cum_card_count_histo;
    2.22 +  size_t _n_periods;    // Used as clearing epoch
    2.23 +
    2.24 +  // An evicting cache of the number of times each card
    2.25 +  // is accessed. Reduces, but does not eliminate, the amount
    2.26 +  // of duplicated processing of dirty cards.
    2.27 +
    2.28 +  enum SomePrivateConstants {
    2.29 +    epoch_bits           = 32,
    2.30 +    card_num_shift       = epoch_bits,
    2.31 +    epoch_mask           = AllBits,
    2.32 +    card_num_mask        = AllBits,
    2.33 +
    2.34 +    // The initial cache size is approximately this fraction
    2.35 +    // of a maximal cache (i.e. the size needed for all cards
    2.36 +    // in the heap)
    2.37 +    InitialCacheFraction = 512
    2.38 +  };
    2.39 +
    2.40 +  const static julong card_num_mask_in_place =
    2.41 +                        (julong) card_num_mask << card_num_shift;
    2.42 +
    2.43 +  typedef struct {
    2.44 +    julong _value;      // |  card_num   |  epoch   |
    2.45 +  } CardEpochCacheEntry;
    2.46 +
    2.47 +  julong make_epoch_entry(unsigned int card_num, unsigned int epoch) {
    2.48 +    assert(0 <= card_num && card_num < _max_n_card_counts, "Bounds");
    2.49 +    assert(0 <= epoch && epoch <= _n_periods, "must be");
    2.50 +
    2.51 +    return ((julong) card_num << card_num_shift) | epoch;
    2.52 +  }
    2.53 +
    2.54 +  unsigned int extract_epoch(julong v) {
    2.55 +    return (v & epoch_mask);
    2.56 +  }
    2.57 +
    2.58 +  unsigned int extract_card_num(julong v) {
    2.59 +    return (v & card_num_mask_in_place) >> card_num_shift;
    2.60 +  }
    2.61 +
    2.62 +  typedef struct {
    2.63 +    unsigned char _count;
    2.64 +    unsigned char _evict_count;
    2.65 +  } CardCountCacheEntry;
    2.66 +
    2.67 +  CardCountCacheEntry* _card_counts;
    2.68 +  CardEpochCacheEntry* _card_epochs;
    2.69 +
    2.70 +  // The current number of buckets in the card count cache
    2.71 +  unsigned _n_card_counts;
    2.72 +
    2.73 +  // The max number of buckets required for the number of
    2.74 +  // cards for the entire reserved heap
    2.75 +  unsigned _max_n_card_counts;
    2.76 +
    2.77 +  // Possible sizes of the cache: odd primes that roughly double in size.
    2.78 +  // (See jvmtiTagMap.cpp).
    2.79 +  static int _cc_cache_sizes[];
    2.80 +
    2.81 +  // The index in _cc_cache_sizes corresponding to the size of
    2.82 +  // _card_counts.
    2.83 +  int _cache_size_index;
    2.84 +
    2.85 +  bool _expand_card_counts;
    2.86 +
    2.87 +  const jbyte* _ct_bot;
    2.88  
    2.89    jbyte**      _hot_cache;
    2.90    int          _hot_cache_size;
    2.91 @@ -50,12 +109,37 @@
    2.92    int          _hot_cache_par_chunk_size;
    2.93    volatile int _hot_cache_par_claimed_idx;
    2.94  
    2.95 +  // Needed to workaround 6817995
    2.96 +  CardTableModRefBS* _ct_bs;
    2.97 +  G1CollectedHeap*   _g1h;
    2.98 +
    2.99 +  // Expands the array that holds the card counts to the next size up
   2.100 +  void expand_card_count_cache();
   2.101 +
   2.102 +  // hash a given key (index of card_ptr) with the specified size
   2.103 +  static unsigned int hash(size_t key, int size) {
   2.104 +    return (unsigned int) key % size;
   2.105 +  }
   2.106 +
   2.107 +  // hash a given key (index of card_ptr)
   2.108 +  unsigned int hash(size_t key) {
   2.109 +    return hash(key, _n_card_counts);
   2.110 +  }
   2.111 +
   2.112 +  unsigned ptr_2_card_num(jbyte* card_ptr) {
   2.113 +    return (unsigned) (card_ptr - _ct_bot);
   2.114 +  }
   2.115 +
   2.116 +  jbyte* card_num_2_ptr(unsigned card_num) {
   2.117 +    return (jbyte*) (_ct_bot + card_num);
   2.118 +  }
   2.119 +
   2.120    // Returns the count of this card after incrementing it.
   2.121 -  int add_card_count(jbyte* card_ptr);
   2.122 +  jbyte* add_card_count(jbyte* card_ptr, int* count, bool* defer);
   2.123  
   2.124 -  void print_card_count_histo_range(unsigned* histo, int from, int to,
   2.125 -                                    float& cum_card_pct,
   2.126 -                                    float& cum_travs_pct);
   2.127 +  // Returns true if this card is in a young region
   2.128 +  bool is_young_card(jbyte* card_ptr);
   2.129 +
   2.130   public:
   2.131    ConcurrentG1Refine();
   2.132    ~ConcurrentG1Refine();
   2.133 @@ -69,7 +153,7 @@
   2.134    // If this is the first entry for the slot, writes into the cache and
   2.135    // returns NULL.  If it causes an eviction, returns the evicted pointer.
   2.136    // Otherwise, its a cache hit, and returns NULL.
   2.137 -  jbyte* cache_insert(jbyte* card_ptr);
   2.138 +  jbyte* cache_insert(jbyte* card_ptr, bool* defer);
   2.139  
   2.140    // Process the cached entries.
   2.141    void clean_up_cache(int worker_i, G1RemSet* g1rs);
   2.142 @@ -93,7 +177,6 @@
   2.143    }
   2.144  
   2.145    void clear_and_record_card_counts();
   2.146 -  void print_final_card_counts();
   2.147  
   2.148    static size_t thread_num();
   2.149  };
     3.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 03 12:59:30 2009 -0700
     3.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Aug 04 16:00:17 2009 -0700
     3.3 @@ -2414,8 +2414,6 @@
     3.4  }
     3.5  
     3.6  void G1CollectedHeap::print_tracing_info() const {
     3.7 -  concurrent_g1_refine()->print_final_card_counts();
     3.8 -
     3.9    // We'll overload this to mean "trace GC pause statistics."
    3.10    if (TraceGen0Time || TraceGen1Time) {
    3.11      // The "G1CollectorPolicy" is keeping track of these stats, so delegate
     4.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Aug 03 12:59:30 2009 -0700
     4.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Aug 04 16:00:17 2009 -0700
     4.3 @@ -94,7 +94,14 @@
     4.4    _summary(new Summary()),
     4.5    _abandoned_summary(new AbandonedSummary()),
     4.6  
     4.7 +#ifndef PRODUCT
     4.8    _cur_clear_ct_time_ms(0.0),
     4.9 +  _min_clear_cc_time_ms(-1.0),
    4.10 +  _max_clear_cc_time_ms(-1.0),
    4.11 +  _cur_clear_cc_time_ms(0.0),
    4.12 +  _cum_clear_cc_time_ms(0.0),
    4.13 +  _num_cc_clears(0L),
    4.14 +#endif
    4.15  
    4.16    _region_num_young(0),
    4.17    _region_num_tenured(0),
    4.18 @@ -1648,6 +1655,15 @@
    4.19          print_stats(1, "Object Copying", obj_copy_time);
    4.20        }
    4.21      }
    4.22 +#ifndef PRODUCT
    4.23 +    print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
    4.24 +    print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
    4.25 +    print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
    4.26 +    print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
    4.27 +    if (_num_cc_clears > 0) {
    4.28 +      print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
    4.29 +    }
    4.30 +#endif
    4.31      print_stats(1, "Other", other_time_ms);
    4.32      for (int i = 0; i < _aux_num; ++i) {
    4.33        if (_cur_aux_times_set[i]) {
     5.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Aug 03 12:59:30 2009 -0700
     5.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Aug 04 16:00:17 2009 -0700
     5.3 @@ -112,7 +112,6 @@
     5.4      return 8*M;
     5.5    }
     5.6  
     5.7 -
     5.8    double _cur_collection_start_sec;
     5.9    size_t _cur_collection_pause_used_at_start_bytes;
    5.10    size_t _cur_collection_pause_used_regions_at_start;
    5.11 @@ -122,6 +121,15 @@
    5.12    double _cur_clear_ct_time_ms;
    5.13    bool   _satb_drain_time_set;
    5.14  
    5.15 +#ifndef PRODUCT
    5.16 +  // Card Table Count Cache stats
    5.17 +  double _min_clear_cc_time_ms;         // min
    5.18 +  double _max_clear_cc_time_ms;         // max
    5.19 +  double _cur_clear_cc_time_ms;         // clearing time during current pause
    5.20 +  double _cum_clear_cc_time_ms;         // cummulative clearing time
    5.21 +  jlong  _num_cc_clears;                // number of times the card count cache has been cleared
    5.22 +#endif
    5.23 +
    5.24    double _cur_CH_strong_roots_end_sec;
    5.25    double _cur_CH_strong_roots_dur_ms;
    5.26    double _cur_G1_strong_roots_end_sec;
    5.27 @@ -931,6 +939,18 @@
    5.28      _cur_aux_times_ms[i] += ms;
    5.29    }
    5.30  
    5.31 +#ifndef PRODUCT
    5.32 +  void record_cc_clear_time(double ms) {
    5.33 +    if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms)
    5.34 +      _min_clear_cc_time_ms = ms;
    5.35 +    if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms)
    5.36 +      _max_clear_cc_time_ms = ms;
    5.37 +    _cur_clear_cc_time_ms = ms;
    5.38 +    _cum_clear_cc_time_ms += ms;
    5.39 +    _num_cc_clears++;
    5.40 +  }
    5.41 +#endif
    5.42 +
    5.43    // Record the fact that "bytes" bytes allocated in a region.
    5.44    void record_before_bytes(size_t bytes);
    5.45    void record_after_bytes(size_t bytes);
     6.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Aug 03 12:59:30 2009 -0700
     6.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue Aug 04 16:00:17 2009 -0700
     6.3 @@ -676,6 +676,55 @@
     6.4  
     6.5  static IntHistogram out_of_histo(50, 50);
     6.6  
     6.7 +void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) {
     6.8 +  // Construct the region representing the card.
     6.9 +  HeapWord* start = _ct_bs->addr_for(card_ptr);
    6.10 +  // And find the region containing it.
    6.11 +  HeapRegion* r = _g1->heap_region_containing(start);
    6.12 +  assert(r != NULL, "unexpected null");
    6.13 +
    6.14 +  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
    6.15 +  MemRegion dirtyRegion(start, end);
    6.16 +
    6.17 +#if CARD_REPEAT_HISTO
    6.18 +  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
    6.19 +  ct_freq_note_card(_ct_bs->index_for(start));
    6.20 +#endif
    6.21 +
    6.22 +  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
    6.23 +  update_rs_oop_cl.set_from(r);
    6.24 +  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
    6.25 +
    6.26 +  // Undirty the card.
    6.27 +  *card_ptr = CardTableModRefBS::clean_card_val();
    6.28 +  // We must complete this write before we do any of the reads below.
    6.29 +  OrderAccess::storeload();
    6.30 +  // And process it, being careful of unallocated portions of TLAB's.
    6.31 +  HeapWord* stop_point =
    6.32 +    r->oops_on_card_seq_iterate_careful(dirtyRegion,
    6.33 +                                        &filter_then_update_rs_oop_cl);
    6.34 +  // If stop_point is non-null, then we encountered an unallocated region
    6.35 +  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
    6.36 +  // card and re-enqueue: if we put off the card until a GC pause, then the
    6.37 +  // unallocated portion will be filled in.  Alternatively, we might try
    6.38 +  // the full complexity of the technique used in "regular" precleaning.
    6.39 +  if (stop_point != NULL) {
    6.40 +    // The card might have gotten re-dirtied and re-enqueued while we
    6.41 +    // worked.  (In fact, it's pretty likely.)
    6.42 +    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
    6.43 +      *card_ptr = CardTableModRefBS::dirty_card_val();
    6.44 +      MutexLockerEx x(Shared_DirtyCardQ_lock,
    6.45 +                      Mutex::_no_safepoint_check_flag);
    6.46 +      DirtyCardQueue* sdcq =
    6.47 +        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
    6.48 +      sdcq->enqueue(card_ptr);
    6.49 +    }
    6.50 +  } else {
    6.51 +    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
    6.52 +    _conc_refine_cards++;
    6.53 +  }
    6.54 +}
    6.55 +
    6.56  void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
    6.57    // If the card is no longer dirty, nothing to do.
    6.58    if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
    6.59 @@ -716,61 +765,63 @@
    6.60      return;
    6.61    }
    6.62  
    6.63 -  // Should we defer it?
    6.64 +  // Should we defer processing the card?
    6.65 +  //
    6.66 +  // Previously the result from the insert_cache call would be
    6.67 +  // either card_ptr (implying that card_ptr was currently "cold"),
    6.68 +  // null (meaning we had inserted the card ptr into the "hot"
    6.69 +  // cache, which had some headroom), or a "hot" card ptr
    6.70 +  // extracted from the "hot" cache.
    6.71 +  //
    6.72 +  // Now that the _card_counts cache in the ConcurrentG1Refine
    6.73 +  // instance is an evicting hash table, the result we get back
    6.74 +  // could be from evicting the card ptr in an already occupied
    6.75 +  // bucket (in which case we have replaced the card ptr in the
    6.76 +  // bucket with card_ptr and "defer" is set to false). To avoid
    6.77 +  // having a data structure (updates to which would need a lock)
    6.78 +  // to hold these unprocessed dirty cards, we need to immediately
    6.79 +  // process card_ptr. The actions needed to be taken on return
    6.80 +  // from cache_insert are summarized in the following table:
    6.81 +  //
    6.82 +  // res      defer   action
    6.83 +  // --------------------------------------------------------------
    6.84 +  // null     false   card evicted from _card_counts & replaced with
    6.85 +  //                  card_ptr; evicted ptr added to hot cache.
    6.86 +  //                  No need to process res; immediately process card_ptr
    6.87 +  //
    6.88 +  // null     true    card not evicted from _card_counts; card_ptr added
    6.89 +  //                  to hot cache.
    6.90 +  //                  Nothing to do.
    6.91 +  //
    6.92 +  // non-null false   card evicted from _card_counts & replaced with
    6.93 +  //                  card_ptr; evicted ptr is currently "cold" or
    6.94 +  //                  caused an eviction from the hot cache.
    6.95 +  //                  Immediately process res; process card_ptr.
    6.96 +  //
    6.97 +  // non-null true    card not evicted from _card_counts; card_ptr is
    6.98 +  //                  currently cold, or caused an eviction from hot
    6.99 +  //                  cache.
   6.100 +  //                  Immediately process res; no need to process card_ptr.
   6.101 +
   6.102 +  jbyte* res = card_ptr;
   6.103 +  bool defer = false;
   6.104    if (_cg1r->use_cache()) {
   6.105 -    card_ptr = _cg1r->cache_insert(card_ptr);
   6.106 -    // If it was not an eviction, nothing to do.
   6.107 -    if (card_ptr == NULL) return;
   6.108 -
   6.109 -    // OK, we have to reset the card start, region, etc.
   6.110 -    start = _ct_bs->addr_for(card_ptr);
   6.111 -    r = _g1->heap_region_containing(start);
   6.112 -    if (r == NULL) {
   6.113 -      guarantee(_g1->is_in_permanent(start), "Or else where?");
   6.114 -      return;  // Not in the G1 heap (might be in perm, for example.)
   6.115 +    jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
   6.116 +    if (res != NULL && (res != card_ptr || defer)) {
   6.117 +      start = _ct_bs->addr_for(res);
   6.118 +      r = _g1->heap_region_containing(start);
   6.119 +      if (r == NULL) {
   6.120 +        assert(_g1->is_in_permanent(start), "Or else where?");
   6.121 +      } else {
   6.122 +        guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
   6.123 +        // Process card pointer we get back from the hot card cache
   6.124 +        concurrentRefineOneCard_impl(res, worker_i);
   6.125 +      }
   6.126      }
   6.127 -    guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
   6.128    }
   6.129  
   6.130 -  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
   6.131 -  MemRegion dirtyRegion(start, end);
   6.132 -
   6.133 -#if CARD_REPEAT_HISTO
   6.134 -  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
   6.135 -  ct_freq_note_card(_ct_bs->index_for(start));
   6.136 -#endif
   6.137 -
   6.138 -  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
   6.139 -  update_rs_oop_cl.set_from(r);
   6.140 -  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
   6.141 -
   6.142 -  // Undirty the card.
   6.143 -  *card_ptr = CardTableModRefBS::clean_card_val();
   6.144 -  // We must complete this write before we do any of the reads below.
   6.145 -  OrderAccess::storeload();
   6.146 -  // And process it, being careful of unallocated portions of TLAB's.
   6.147 -  HeapWord* stop_point =
   6.148 -    r->oops_on_card_seq_iterate_careful(dirtyRegion,
   6.149 -                                        &filter_then_update_rs_oop_cl);
   6.150 -  // If stop_point is non-null, then we encountered an unallocated region
   6.151 -  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
   6.152 -  // card and re-enqueue: if we put off the card until a GC pause, then the
   6.153 -  // unallocated portion will be filled in.  Alternatively, we might try
   6.154 -  // the full complexity of the technique used in "regular" precleaning.
   6.155 -  if (stop_point != NULL) {
   6.156 -    // The card might have gotten re-dirtied and re-enqueued while we
   6.157 -    // worked.  (In fact, it's pretty likely.)
   6.158 -    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
   6.159 -      *card_ptr = CardTableModRefBS::dirty_card_val();
   6.160 -      MutexLockerEx x(Shared_DirtyCardQ_lock,
   6.161 -                      Mutex::_no_safepoint_check_flag);
   6.162 -      DirtyCardQueue* sdcq =
   6.163 -        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
   6.164 -      sdcq->enqueue(card_ptr);
   6.165 -    }
   6.166 -  } else {
   6.167 -    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
   6.168 -    _conc_refine_cards++;
   6.169 +  if (!defer) {
   6.170 +    concurrentRefineOneCard_impl(card_ptr, worker_i);
   6.171    }
   6.172  }
   6.173  
     7.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Aug 03 12:59:30 2009 -0700
     7.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Tue Aug 04 16:00:17 2009 -0700
     7.3 @@ -157,6 +157,10 @@
     7.4      }
     7.5    }
     7.6  
     7.7 +  // The routine that performs the actual work of refining a dirty
     7.8 +  // card.
     7.9 +  void concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i);
    7.10 +
    7.11  protected:
    7.12    template <class T> void write_ref_nv(HeapRegion* from, T* p);
    7.13    template <class T> void par_write_ref_nv(HeapRegion* from, T* p, int tid);
     8.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Aug 03 12:59:30 2009 -0700
     8.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Aug 04 16:00:17 2009 -0700
     8.3 @@ -187,10 +187,6 @@
     8.4    develop(intx, G1ConcRSLogCacheSize, 10,                                   \
     8.5            "Log base 2 of the length of conc RS hot-card cache.")            \
     8.6                                                                              \
     8.7 -  develop(bool, G1ConcRSCountTraversals, false,                             \
     8.8 -          "If true, gather data about the number of times CR traverses "    \
     8.9 -          "cards ")                                                         \
    8.10 -                                                                            \
    8.11    develop(intx, G1ConcRSHotCardLimit, 4,                                    \
    8.12            "The threshold that defines (>=) a hot card.")                    \
    8.13                                                                              \
    8.14 @@ -264,6 +260,10 @@
    8.15                                                                              \
    8.16    product(uintx, G1ParallelRSetThreads, 0,                                  \
    8.17            "If non-0 is the number of parallel rem set update threads, "     \
    8.18 -          "otherwise the value is determined ergonomically.")
    8.19 +          "otherwise the value is determined ergonomically.")               \
    8.20 +                                                                            \
    8.21 +  develop(intx, G1CardCountCacheExpandThreshold, 16,                        \
    8.22 +          "Expand the card count cache if the number of collisions for "    \
    8.23 +          "a particular entry exceeds this value.")
    8.24  
    8.25  G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
     9.1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1	Mon Aug 03 12:59:30 2009 -0700
     9.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Tue Aug 04 16:00:17 2009 -0700
     9.3 @@ -45,11 +45,14 @@
     9.4  concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
     9.5  concurrentG1Refine.cpp			copy.hpp
     9.6  concurrentG1Refine.cpp			g1CollectedHeap.inline.hpp
     9.7 +concurrentG1Refine.cpp                  g1CollectorPolicy.hpp
     9.8  concurrentG1Refine.cpp			g1RemSet.hpp
     9.9  concurrentG1Refine.cpp			space.inline.hpp
    9.10 +concurrentG1Refine.cpp                  heapRegionSeq.inline.hpp
    9.11  
    9.12  concurrentG1Refine.hpp			globalDefinitions.hpp
    9.13  concurrentG1Refine.hpp			allocation.hpp
    9.14 +concurrentG1Refine.hpp                  cardTableModRefBS.hpp
    9.15  concurrentG1Refine.hpp			thread.hpp
    9.16  
    9.17  concurrentG1RefineThread.cpp		concurrentG1Refine.hpp

mercurial