src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp

Mon, 03 Aug 2009 12:59:30 -0700

author
johnc
date
Mon, 03 Aug 2009 12:59:30 -0700
changeset 1324
15c5903cf9e1
parent 1279
bd02caa94611
child 1325
6cb8e9df7174
permissions
-rw-r--r--

6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_concurrentG1Refine.cpp.incl"
    28 ConcurrentG1Refine::ConcurrentG1Refine() :
    29   _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
    30   _hot_cache(NULL),
    31   _def_use_cache(false), _use_cache(false),
    32   _n_periods(0), _total_cards(0), _total_travs(0),
    33   _threads(NULL), _n_threads(0)
    34 {
    35   if (G1ConcRefine) {
    36     _n_threads = (int)thread_num();
    37     if (_n_threads > 0) {
    38       _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
    39       int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
    40       ConcurrentG1RefineThread *next = NULL;
    41       for (int i = _n_threads - 1; i >= 0; i--) {
    42         ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
    43         assert(t != NULL, "Conc refine should have been created");
    44         assert(t->cg1r() == this, "Conc refine thread should refer to this");
    45         _threads[i] = t;
    46         next = t;
    47       }
    48     }
    49   }
    50 }
    52 size_t ConcurrentG1Refine::thread_num() {
    53   if (G1ConcRefine) {
    54     return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
    55   }
    56   return 0;
    57 }
    59 void ConcurrentG1Refine::init() {
    60   G1CollectedHeap* g1h = G1CollectedHeap::heap();
    61   if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
    62     _n_card_counts =
    63       (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
    64     _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
    65     for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
    66     ModRefBarrierSet* bs = g1h->mr_bs();
    67     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
    68     CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
    69     _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
    70     if (G1ConcRSCountTraversals) {
    71       _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
    72       _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
    73       for (int i = 0; i < 256; i++) {
    74         _cur_card_count_histo[i] = 0;
    75         _cum_card_count_histo[i] = 0;
    76       }
    77     }
    78   }
    79   if (G1ConcRSLogCacheSize > 0) {
    80     _def_use_cache = true;
    81     _use_cache = true;
    82     _hot_cache_size = (1 << G1ConcRSLogCacheSize);
    83     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
    84     _n_hot = 0;
    85     _hot_cache_idx = 0;
    87     // For refining the cards in the hot cache in parallel
    88     int n_workers = (ParallelGCThreads > 0 ?
    89                         g1h->workers()->total_workers() : 1);
    90     _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers);
    91     _hot_cache_par_claimed_idx = 0;
    92   }
    93 }
    95 void ConcurrentG1Refine::stop() {
    96   if (_threads != NULL) {
    97     for (int i = 0; i < _n_threads; i++) {
    98       _threads[i]->stop();
    99     }
   100   }
   101 }
   103 ConcurrentG1Refine::~ConcurrentG1Refine() {
   104   if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
   105     assert(_card_counts != NULL, "Logic");
   106     FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
   107     assert(_cur_card_count_histo != NULL, "Logic");
   108     FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
   109     assert(_cum_card_count_histo != NULL, "Logic");
   110     FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
   111   }
   112   if (G1ConcRSLogCacheSize > 0) {
   113     assert(_hot_cache != NULL, "Logic");
   114     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   115   }
   116   if (_threads != NULL) {
   117     for (int i = 0; i < _n_threads; i++) {
   118       delete _threads[i];
   119     }
   120     FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   121   }
   122 }
   124 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
   125   if (_threads != NULL) {
   126     for (int i = 0; i < _n_threads; i++) {
   127       tc->do_thread(_threads[i]);
   128     }
   129   }
   130 }
   133 int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
   134   size_t card_num = (card_ptr - _ct_bot);
   135   guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
   136   unsigned char cnt = _card_counts[card_num];
   137   if (cnt < 255) _card_counts[card_num]++;
   138   return cnt;
   139   _total_travs++;
   140 }
   142 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
   143   int count = add_card_count(card_ptr);
   144   // Count previously unvisited cards.
   145   if (count == 0) _total_cards++;
   146   // We'll assume a traversal unless we store it in the cache.
   147   if (count < G1ConcRSHotCardLimit) {
   148     _total_travs++;
   149     return card_ptr;
   150   }
   151   // Otherwise, it's hot.
   152   jbyte* res = NULL;
   153   MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
   154   if (_n_hot == _hot_cache_size) {
   155     _total_travs++;
   156     res = _hot_cache[_hot_cache_idx];
   157     _n_hot--;
   158   }
   159   // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
   160   _hot_cache[_hot_cache_idx] = card_ptr;
   161   _hot_cache_idx++;
   162   if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
   163   _n_hot++;
   164   return res;
   165 }
   168 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
   169   assert(!use_cache(), "cache should be disabled");
   170   int start_idx;
   172   while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
   173     int end_idx = start_idx + _hot_cache_par_chunk_size;
   175     if (start_idx ==
   176         Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
   177       // The current worker has successfully claimed the chunk [start_idx..end_idx)
   178       end_idx = MIN2(end_idx, _n_hot);
   179       for (int i = start_idx; i < end_idx; i++) {
   180         jbyte* entry = _hot_cache[i];
   181         if (entry != NULL) {
   182           g1rs->concurrentRefineOneCard(entry, worker_i);
   183         }
   184       }
   185     }
   186   }
   187 }
   189 void ConcurrentG1Refine::clear_and_record_card_counts() {
   190   if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
   191   _n_periods++;
   192   if (G1ConcRSCountTraversals) {
   193     for (size_t i = 0; i < _n_card_counts; i++) {
   194       unsigned char bucket = _card_counts[i];
   195       _cur_card_count_histo[bucket]++;
   196       _card_counts[i] = 0;
   197     }
   198     gclog_or_tty->print_cr("Card counts:");
   199     for (int i = 0; i < 256; i++) {
   200       if (_cur_card_count_histo[i] > 0) {
   201         gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
   202         _cum_card_count_histo[i] += _cur_card_count_histo[i];
   203         _cur_card_count_histo[i] = 0;
   204       }
   205     }
   206   } else {
   207     assert(G1ConcRSLogCacheSize > 0, "Logic");
   208     Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
   209                         _n_card_counts / HeapWordSize);
   210   }
   211 }
   213 void
   214 ConcurrentG1Refine::
   215 print_card_count_histo_range(unsigned* histo, int from, int to,
   216                              float& cum_card_pct,
   217                              float& cum_travs_pct) {
   218   unsigned cards = 0;
   219   unsigned travs = 0;
   220   guarantee(to <= 256, "Precondition");
   221   for (int i = from; i < to-1; i++) {
   222     cards += histo[i];
   223     travs += histo[i] * i;
   224   }
   225   if (to == 256) {
   226     unsigned histo_card_sum = 0;
   227     unsigned histo_trav_sum = 0;
   228     for (int i = 1; i < 255; i++) {
   229       histo_trav_sum += histo[i] * i;
   230     }
   231     cards += histo[255];
   232     // correct traversals for the last one.
   233     unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
   234     travs += travs_255;
   236   } else {
   237     cards += histo[to-1];
   238     travs += histo[to-1] * (to-1);
   239   }
   240   float fperiods = (float)_n_periods;
   241   float f_tot_cards = (float)_total_cards/fperiods;
   242   float f_tot_travs = (float)_total_travs/fperiods;
   243   if (cards > 0) {
   244     float fcards = (float)cards/fperiods;
   245     float ftravs = (float)travs/fperiods;
   246     if (to == 256) {
   247       gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
   248     } else {
   249       gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
   250     }
   251     float pct_cards = fcards*100.0/f_tot_cards;
   252     cum_card_pct += pct_cards;
   253     float pct_travs = ftravs*100.0/f_tot_travs;
   254     cum_travs_pct += pct_travs;
   255     gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
   256                   pct_cards, cum_card_pct,
   257                   pct_travs, cum_travs_pct);
   258   }
   259 }
   261 void ConcurrentG1Refine::print_final_card_counts() {
   262   if (!G1ConcRSCountTraversals) return;
   264   gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
   265                 _total_travs, _total_cards);
   266   float fperiods = (float)_n_periods;
   267   gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
   268                 "per collection.", (float)_total_travs/fperiods,
   269                 (float)_total_cards/fperiods);
   270   gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
   271                 "dirty card.\n",
   272                 _total_cards > 0 ?
   273                 (float)_total_travs/(float)_total_cards : 0.0);
   276   gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
   277                 "range", "# cards", "# travs", "% cards", "(cum)",
   278                 "% travs", "(cum)");
   279   gclog_or_tty->print_cr("------------------------------------------------------------"
   280                 "-------------");
   281   float cum_cards_pct = 0.0;
   282   float cum_travs_pct = 0.0;
   283   for (int i = 1; i < 10; i++) {
   284     print_card_count_histo_range(_cum_card_count_histo, i, i+1,
   285                                  cum_cards_pct, cum_travs_pct);
   286   }
   287   for (int i = 10; i < 100; i += 10) {
   288     print_card_count_histo_range(_cum_card_count_histo, i, i+10,
   289                                  cum_cards_pct, cum_travs_pct);
   290   }
   291   print_card_count_histo_range(_cum_card_count_histo, 100, 150,
   292                                cum_cards_pct, cum_travs_pct);
   293   print_card_count_histo_range(_cum_card_count_histo, 150, 200,
   294                                cum_cards_pct, cum_travs_pct);
   295   print_card_count_histo_range(_cum_card_count_histo, 150, 255,
   296                                cum_cards_pct, cum_travs_pct);
   297   print_card_count_histo_range(_cum_card_count_histo, 255, 256,
   298                                cum_cards_pct, cum_travs_pct);
   299 }

mercurial