src/share/vm/gc_implementation/g1/g1RemSet.cpp

Mon, 03 Aug 2009 12:59:30 -0700

author
johnc
date
Mon, 03 Aug 2009 12:59:30 -0700
changeset 1324
15c5903cf9e1
parent 1280
df6caf649ff7
child 1325
6cb8e9df7174
permissions
-rw-r--r--

6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_g1RemSet.cpp.incl"
    28 #define CARD_REPEAT_HISTO 0
    30 #if CARD_REPEAT_HISTO
    31 static size_t ct_freq_sz;
    32 static jbyte* ct_freq = NULL;
    34 void init_ct_freq_table(size_t heap_sz_bytes) {
    35   if (ct_freq == NULL) {
    36     ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
    37     ct_freq = new jbyte[ct_freq_sz];
    38     for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
    39   }
    40 }
    42 void ct_freq_note_card(size_t index) {
    43   assert(0 <= index && index < ct_freq_sz, "Bounds error.");
    44   if (ct_freq[index] < 100) { ct_freq[index]++; }
    45 }
    47 static IntHistogram card_repeat_count(10, 10);
    49 void ct_freq_update_histo_and_reset() {
    50   for (size_t j = 0; j < ct_freq_sz; j++) {
    51     card_repeat_count.add_entry(ct_freq[j]);
    52     ct_freq[j] = 0;
    53   }
    55 }
    56 #endif
    59 class IntoCSOopClosure: public OopsInHeapRegionClosure {
    60   OopsInHeapRegionClosure* _blk;
    61   G1CollectedHeap* _g1;
    62 public:
    63   IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
    64     _g1(g1), _blk(blk) {}
    65   void set_region(HeapRegion* from) {
    66     _blk->set_region(from);
    67   }
    68   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
    69   virtual void do_oop(      oop* p) { do_oop_work(p); }
    70   template <class T> void do_oop_work(T* p) {
    71     oop obj = oopDesc::load_decode_heap_oop(p);
    72     if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
    73   }
    74   bool apply_to_weak_ref_discovered_field() { return true; }
    75   bool idempotent() { return true; }
    76 };
    78 class IntoCSRegionClosure: public HeapRegionClosure {
    79   IntoCSOopClosure _blk;
    80   G1CollectedHeap* _g1;
    81 public:
    82   IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
    83     _g1(g1), _blk(g1, blk) {}
    84   bool doHeapRegion(HeapRegion* r) {
    85     if (!r->in_collection_set()) {
    86       _blk.set_region(r);
    87       if (r->isHumongous()) {
    88         if (r->startsHumongous()) {
    89           oop obj = oop(r->bottom());
    90           obj->oop_iterate(&_blk);
    91         }
    92       } else {
    93         r->oop_before_save_marks_iterate(&_blk);
    94       }
    95     }
    96     return false;
    97   }
    98 };
   100 void
   101 StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
   102                                             int worker_i) {
   103   IntoCSRegionClosure rc(_g1, oc);
   104   _g1->heap_region_iterate(&rc);
   105 }
   107 class VerifyRSCleanCardOopClosure: public OopClosure {
   108   G1CollectedHeap* _g1;
   109 public:
   110   VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
   112   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   113   virtual void do_oop(      oop* p) { do_oop_work(p); }
   114   template <class T> void do_oop_work(T* p) {
   115     oop obj = oopDesc::load_decode_heap_oop(p);
   116     HeapRegion* to = _g1->heap_region_containing(obj);
   117     guarantee(to == NULL || !to->in_collection_set(),
   118               "Missed a rem set member.");
   119   }
   120 };
   122 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
   123   : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
   124     _cg1r(g1->concurrent_g1_refine()),
   125     _par_traversal_in_progress(false), _new_refs(NULL),
   126     _cards_scanned(NULL), _total_cards_scanned(0)
   127 {
   128   _seq_task = new SubTasksDone(NumSeqTasks);
   129   guarantee(n_workers() > 0, "There should be some workers");
   130   _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
   131   for (uint i = 0; i < n_workers(); i++) {
   132     _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
   133   }
   134 }
   136 HRInto_G1RemSet::~HRInto_G1RemSet() {
   137   delete _seq_task;
   138   for (uint i = 0; i < n_workers(); i++) {
   139     delete _new_refs[i];
   140   }
   141   FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
   142 }
   144 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
   145   if (_g1->is_in_g1_reserved(mr.start())) {
   146     _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
   147     if (_start_first == NULL) _start_first = mr.start();
   148   }
   149 }
   151 class ScanRSClosure : public HeapRegionClosure {
   152   size_t _cards_done, _cards;
   153   G1CollectedHeap* _g1h;
   154   OopsInHeapRegionClosure* _oc;
   155   G1BlockOffsetSharedArray* _bot_shared;
   156   CardTableModRefBS *_ct_bs;
   157   int _worker_i;
   158   bool _try_claimed;
   159   size_t _min_skip_distance, _max_skip_distance;
   160 public:
   161   ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
   162     _oc(oc),
   163     _cards(0),
   164     _cards_done(0),
   165     _worker_i(worker_i),
   166     _try_claimed(false)
   167   {
   168     _g1h = G1CollectedHeap::heap();
   169     _bot_shared = _g1h->bot_shared();
   170     _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
   171     _min_skip_distance = 16;
   172     _max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance;
   173   }
   175   void set_try_claimed() { _try_claimed = true; }
   177   void scanCard(size_t index, HeapRegion *r) {
   178     _cards_done++;
   179     DirtyCardToOopClosure* cl =
   180       r->new_dcto_closure(_oc,
   181                          CardTableModRefBS::Precise,
   182                          HeapRegionDCTOC::IntoCSFilterKind);
   184     // Set the "from" region in the closure.
   185     _oc->set_region(r);
   186     HeapWord* card_start = _bot_shared->address_for_index(index);
   187     HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
   188     Space *sp = SharedHeap::heap()->space_containing(card_start);
   189     MemRegion sm_region;
   190     if (ParallelGCThreads > 0) {
   191       // first find the used area
   192       sm_region = sp->used_region_at_save_marks();
   193     } else {
   194       // The closure is not idempotent.  We shouldn't look at objects
   195       // allocated during the GC.
   196       sm_region = sp->used_region_at_save_marks();
   197     }
   198     MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
   199     if (!mr.is_empty()) {
   200       cl->do_MemRegion(mr);
   201     }
   202   }
   204   void printCard(HeapRegion* card_region, size_t card_index,
   205                  HeapWord* card_start) {
   206     gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
   207                            "RS names card %p: "
   208                            "[" PTR_FORMAT ", " PTR_FORMAT ")",
   209                            _worker_i,
   210                            card_region->bottom(), card_region->end(),
   211                            card_index,
   212                            card_start, card_start + G1BlockOffsetSharedArray::N_words);
   213   }
   215   bool doHeapRegion(HeapRegion* r) {
   216     assert(r->in_collection_set(), "should only be called on elements of CS.");
   217     HeapRegionRemSet* hrrs = r->rem_set();
   218     if (hrrs->iter_is_complete()) return false; // All done.
   219     if (!_try_claimed && !hrrs->claim_iter()) return false;
   220     _g1h->push_dirty_cards_region(r);
   221     // If we didn't return above, then
   222     //   _try_claimed || r->claim_iter()
   223     // is true: either we're supposed to work on claimed-but-not-complete
   224     // regions, or we successfully claimed the region.
   225     HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
   226     hrrs->init_iterator(iter);
   227     size_t card_index;
   228     size_t skip_distance = 0, current_card = 0, jump_to_card = 0;
   229     while (iter->has_next(card_index)) {
   230       if (current_card < jump_to_card) {
   231         ++current_card;
   232         continue;
   233       }
   234       HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
   235 #if 0
   236       gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
   237                           card_start, card_start + CardTableModRefBS::card_size_in_words);
   238 #endif
   240       HeapRegion* card_region = _g1h->heap_region_containing(card_start);
   241       assert(card_region != NULL, "Yielding cards not in the heap?");
   242       _cards++;
   244       if (!card_region->is_on_dirty_cards_region_list()) {
   245         _g1h->push_dirty_cards_region(card_region);
   246       }
   248        // If the card is dirty, then we will scan it during updateRS.
   249       if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
   250           if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
   251             scanCard(card_index, card_region);
   252           } else if (_try_claimed) {
   253             if (jump_to_card == 0 || jump_to_card != current_card) {
   254               // We did some useful work in the previous iteration.
   255               // Decrease the distance.
   256               skip_distance = MAX2(skip_distance >> 1, _min_skip_distance);
   257             } else {
   258               // Previous iteration resulted in a claim failure.
   259               // Increase the distance.
   260               skip_distance = MIN2(skip_distance << 1, _max_skip_distance);
   261             }
   262             jump_to_card = current_card + skip_distance;
   263           }
   264       }
   265       ++current_card;
   266     }
   267     if (!_try_claimed) {
   268       hrrs->set_iter_complete();
   269     }
   270     return false;
   271   }
   272   // Set all cards back to clean.
   273   void cleanup() {_g1h->cleanUpCardTable();}
   274   size_t cards_done() { return _cards_done;}
   275   size_t cards_looked_up() { return _cards;}
   276 };
   278 // We want the parallel threads to start their scanning at
   279 // different collection set regions to avoid contention.
   280 // If we have:
   281 //          n collection set regions
   282 //          p threads
   283 // Then thread t will start at region t * floor (n/p)
   285 HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
   286   HeapRegion* result = _g1p->collection_set();
   287   if (ParallelGCThreads > 0) {
   288     size_t cs_size = _g1p->collection_set_size();
   289     int n_workers = _g1->workers()->total_workers();
   290     size_t cs_spans = cs_size / n_workers;
   291     size_t ind      = cs_spans * worker_i;
   292     for (size_t i = 0; i < ind; i++)
   293       result = result->next_in_collection_set();
   294   }
   295   return result;
   296 }
   298 void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
   299   double rs_time_start = os::elapsedTime();
   300   HeapRegion *startRegion = calculateStartRegion(worker_i);
   302   BufferingOopsInHeapRegionClosure boc(oc);
   303   ScanRSClosure scanRScl(&boc, worker_i);
   304   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   305   scanRScl.set_try_claimed();
   306   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   308   boc.done();
   309   double closure_app_time_sec = boc.closure_app_seconds();
   310   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
   311     closure_app_time_sec;
   312   double closure_app_time_ms = closure_app_time_sec * 1000.0;
   314   assert( _cards_scanned != NULL, "invariant" );
   315   _cards_scanned[worker_i] = scanRScl.cards_done();
   317   _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
   318   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
   320   double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
   321   if (scan_new_refs_time_ms > 0.0) {
   322     closure_app_time_ms += scan_new_refs_time_ms;
   323   }
   325   _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
   326 }
   328 void HRInto_G1RemSet::updateRS(int worker_i) {
   329   ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
   331   double start = os::elapsedTime();
   332   _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
   334   // Apply the appropriate closure to all remaining log entries.
   335   _g1->iterate_dirty_card_closure(false, worker_i);
   336   // Now there should be no dirty cards.
   337   if (G1RSLogCheckCardTable) {
   338     CountNonCleanMemRegionClosure cl(_g1);
   339     _ct_bs->mod_card_iterate(&cl);
   340     // XXX This isn't true any more: keeping cards of young regions
   341     // marked dirty broke it.  Need some reasonable fix.
   342     guarantee(cl.n() == 0, "Card table should be clean.");
   343   }
   345   _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
   346 }
   348 #ifndef PRODUCT
   349 class PrintRSClosure : public HeapRegionClosure {
   350   int _count;
   351 public:
   352   PrintRSClosure() : _count(0) {}
   353   bool doHeapRegion(HeapRegion* r) {
   354     HeapRegionRemSet* hrrs = r->rem_set();
   355     _count += (int) hrrs->occupied();
   356     if (hrrs->occupied() == 0) {
   357       gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
   358                           "has no remset entries\n",
   359                           r->bottom(), r->end());
   360     } else {
   361       gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
   362                           r->bottom(), r->end());
   363       r->print();
   364       hrrs->print();
   365       gclog_or_tty->print("\nDone printing rem set\n");
   366     }
   367     return false;
   368   }
   369   int occupied() {return _count;}
   370 };
   371 #endif
   373 class CountRSSizeClosure: public HeapRegionClosure {
   374   size_t _n;
   375   size_t _tot;
   376   size_t _max;
   377   HeapRegion* _max_r;
   378   enum {
   379     N = 20,
   380     MIN = 6
   381   };
   382   int _histo[N];
   383 public:
   384   CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
   385     for (int i = 0; i < N; i++) _histo[i] = 0;
   386   }
   387   bool doHeapRegion(HeapRegion* r) {
   388     if (!r->continuesHumongous()) {
   389       size_t occ = r->rem_set()->occupied();
   390       _n++;
   391       _tot += occ;
   392       if (occ > _max) {
   393         _max = occ;
   394         _max_r = r;
   395       }
   396       // Fit it into a histo bin.
   397       int s = 1 << MIN;
   398       int i = 0;
   399       while (occ > (size_t) s && i < (N-1)) {
   400         s = s << 1;
   401         i++;
   402       }
   403       _histo[i]++;
   404     }
   405     return false;
   406   }
   407   size_t n() { return _n; }
   408   size_t tot() { return _tot; }
   409   size_t mx() { return _max; }
   410   HeapRegion* mxr() { return _max_r; }
   411   void print_histo() {
   412     int mx = N;
   413     while (mx >= 0) {
   414       if (_histo[mx-1] > 0) break;
   415       mx--;
   416     }
   417     gclog_or_tty->print_cr("Number of regions with given RS sizes:");
   418     gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
   419     for (int i = 1; i < mx-1; i++) {
   420       gclog_or_tty->print_cr("  %8d  - %8d   %8d",
   421                     (1 << (MIN + i - 1)) + 1,
   422                     1 << (MIN + i),
   423                     _histo[i]);
   424     }
   425     gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
   426   }
   427 };
   429 template <class T> void
   430 HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
   431                                     int worker_i) {
   432   double scan_new_refs_start_sec = os::elapsedTime();
   433   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   434   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
   435   for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
   436     T* p = (T*) _new_refs[worker_i]->at(i);
   437     oop obj = oopDesc::load_decode_heap_oop(p);
   438     // *p was in the collection set when p was pushed on "_new_refs", but
   439     // another thread may have processed this location from an RS, so it
   440     // might not point into the CS any longer.  If so, it's obviously been
   441     // processed, and we don't need to do anything further.
   442     if (g1h->obj_in_cs(obj)) {
   443       HeapRegion* r = g1h->heap_region_containing(p);
   445       DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
   446       oc->set_region(r);
   447       // If "p" has already been processed concurrently, this is
   448       // idempotent.
   449       oc->do_oop(p);
   450     }
   451   }
   452   _g1p->record_scan_new_refs_time(worker_i,
   453                                   (os::elapsedTime() - scan_new_refs_start_sec)
   454                                   * 1000.0);
   455 }
   457 void HRInto_G1RemSet::cleanupHRRS() {
   458   HeapRegionRemSet::cleanup();
   459 }
   461 void
   462 HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
   463                                              int worker_i) {
   464 #if CARD_REPEAT_HISTO
   465   ct_freq_update_histo_and_reset();
   466 #endif
   467   if (worker_i == 0) {
   468     _cg1r->clear_and_record_card_counts();
   469   }
   471   // Make this into a command-line flag...
   472   if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
   473     CountRSSizeClosure count_cl;
   474     _g1->heap_region_iterate(&count_cl);
   475     gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
   476                   "max region is " PTR_FORMAT,
   477                   count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
   478                   count_cl.mx(), count_cl.mxr());
   479     count_cl.print_histo();
   480   }
   482   if (ParallelGCThreads > 0) {
   483     // The two flags below were introduced temporarily to serialize
   484     // the updating and scanning of remembered sets. There are some
   485     // race conditions when these two operations are done in parallel
   486     // and they are causing failures. When we resolve said race
   487     // conditions, we'll revert back to parallel remembered set
   488     // updating and scanning. See CRs 6677707 and 6677708.
   489     if (G1ParallelRSetUpdatingEnabled || (worker_i == 0)) {
   490       updateRS(worker_i);
   491       scanNewRefsRS(oc, worker_i);
   492     } else {
   493       _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
   494       _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
   495       _g1p->record_update_rs_time(worker_i, 0.0);
   496       _g1p->record_scan_new_refs_time(worker_i, 0.0);
   497     }
   498     if (G1ParallelRSetScanningEnabled || (worker_i == 0)) {
   499       scanRS(oc, worker_i);
   500     } else {
   501       _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
   502       _g1p->record_scan_rs_time(worker_i, 0.0);
   503     }
   504   } else {
   505     assert(worker_i == 0, "invariant");
   506     updateRS(0);
   507     scanNewRefsRS(oc, 0);
   508     scanRS(oc, 0);
   509   }
   510 }
   512 void HRInto_G1RemSet::
   513 prepare_for_oops_into_collection_set_do() {
   514 #if G1_REM_SET_LOGGING
   515   PrintRSClosure cl;
   516   _g1->collection_set_iterate(&cl);
   517 #endif
   518   cleanupHRRS();
   519   ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
   520   _g1->set_refine_cte_cl_concurrency(false);
   521   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   522   dcqs.concatenate_logs();
   524   assert(!_par_traversal_in_progress, "Invariant between iterations.");
   525   if (ParallelGCThreads > 0) {
   526     set_par_traversal(true);
   527     _seq_task->set_par_threads((int)n_workers());
   528   }
   529   guarantee( _cards_scanned == NULL, "invariant" );
   530   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
   531   for (uint i = 0; i < n_workers(); ++i) {
   532     _cards_scanned[i] = 0;
   533   }
   534   _total_cards_scanned = 0;
   535 }
   538 class cleanUpIteratorsClosure : public HeapRegionClosure {
   539   bool doHeapRegion(HeapRegion *r) {
   540     HeapRegionRemSet* hrrs = r->rem_set();
   541     hrrs->init_for_par_iteration();
   542     return false;
   543   }
   544 };
   546 class UpdateRSetOopsIntoCSImmediate : public OopClosure {
   547   G1CollectedHeap* _g1;
   548 public:
   549   UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
   550   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   551   virtual void do_oop(      oop* p) { do_oop_work(p); }
   552   template <class T> void do_oop_work(T* p) {
   553     HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
   554     if (to->in_collection_set()) {
   555       to->rem_set()->add_reference(p, 0);
   556     }
   557   }
   558 };
   560 class UpdateRSetOopsIntoCSDeferred : public OopClosure {
   561   G1CollectedHeap* _g1;
   562   CardTableModRefBS* _ct_bs;
   563   DirtyCardQueue* _dcq;
   564 public:
   565   UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
   566     _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
   567   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   568   virtual void do_oop(      oop* p) { do_oop_work(p); }
   569   template <class T> void do_oop_work(T* p) {
   570     oop obj = oopDesc::load_decode_heap_oop(p);
   571     if (_g1->obj_in_cs(obj)) {
   572       size_t card_index = _ct_bs->index_for(p);
   573       if (_ct_bs->mark_card_deferred(card_index)) {
   574         _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
   575       }
   576     }
   577   }
   578 };
   580 template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
   581   for (size_t i = 0; i < n_workers(); i++) {
   582     for (int j = 0; j < _new_refs[i]->length(); j++) {
   583       T* p = (T*) _new_refs[i]->at(j);
   584       cl->do_oop(p);
   585     }
   586   }
   587 }
   589 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   590   guarantee( _cards_scanned != NULL, "invariant" );
   591   _total_cards_scanned = 0;
   592   for (uint i = 0; i < n_workers(); ++i)
   593     _total_cards_scanned += _cards_scanned[i];
   594   FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
   595   _cards_scanned = NULL;
   596   // Cleanup after copy
   597 #if G1_REM_SET_LOGGING
   598   PrintRSClosure cl;
   599   _g1->heap_region_iterate(&cl);
   600 #endif
   601   _g1->set_refine_cte_cl_concurrency(true);
   602   cleanUpIteratorsClosure iterClosure;
   603   _g1->collection_set_iterate(&iterClosure);
   604   // Set all cards back to clean.
   605   _g1->cleanUpCardTable();
   607   if (ParallelGCThreads > 0) {
   608     set_par_traversal(false);
   609   }
   611   if (_g1->evacuation_failed()) {
   612     // Restore remembered sets for the regions pointing into
   613     // the collection set.
   614     if (G1DeferredRSUpdate) {
   615       DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
   616       UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
   617       new_refs_iterate(&deferred_update);
   618     } else {
   619       UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
   620       new_refs_iterate(&immediate_update);
   621     }
   622   }
   623   for (uint i = 0; i < n_workers(); i++) {
   624     _new_refs[i]->clear();
   625   }
   627   assert(!_par_traversal_in_progress, "Invariant between iterations.");
   628 }
   630 class UpdateRSObjectClosure: public ObjectClosure {
   631   UpdateRSOopClosure* _update_rs_oop_cl;
   632 public:
   633   UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
   634     _update_rs_oop_cl(update_rs_oop_cl) {}
   635   void do_object(oop obj) {
   636     obj->oop_iterate(_update_rs_oop_cl);
   637   }
   639 };
   641 class ScrubRSClosure: public HeapRegionClosure {
   642   G1CollectedHeap* _g1h;
   643   BitMap* _region_bm;
   644   BitMap* _card_bm;
   645   CardTableModRefBS* _ctbs;
   646 public:
   647   ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
   648     _g1h(G1CollectedHeap::heap()),
   649     _region_bm(region_bm), _card_bm(card_bm),
   650     _ctbs(NULL)
   651   {
   652     ModRefBarrierSet* bs = _g1h->mr_bs();
   653     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   654     _ctbs = (CardTableModRefBS*)bs;
   655   }
   657   bool doHeapRegion(HeapRegion* r) {
   658     if (!r->continuesHumongous()) {
   659       r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
   660     }
   661     return false;
   662   }
   663 };
   665 void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
   666   ScrubRSClosure scrub_cl(region_bm, card_bm);
   667   _g1->heap_region_iterate(&scrub_cl);
   668 }
   670 void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
   671                                 int worker_num, int claim_val) {
   672   ScrubRSClosure scrub_cl(region_bm, card_bm);
   673   _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
   674 }
   677 static IntHistogram out_of_histo(50, 50);
   679 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   680   // If the card is no longer dirty, nothing to do.
   681   if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
   683   // Construct the region representing the card.
   684   HeapWord* start = _ct_bs->addr_for(card_ptr);
   685   // And find the region containing it.
   686   HeapRegion* r = _g1->heap_region_containing(start);
   687   if (r == NULL) {
   688     guarantee(_g1->is_in_permanent(start), "Or else where?");
   689     return;  // Not in the G1 heap (might be in perm, for example.)
   690   }
   691   // Why do we have to check here whether a card is on a young region,
   692   // given that we dirty young regions and, as a result, the
   693   // post-barrier is supposed to filter them out and never to enqueue
   694   // them? When we allocate a new region as the "allocation region" we
   695   // actually dirty its cards after we release the lock, since card
   696   // dirtying while holding the lock was a performance bottleneck. So,
   697   // as a result, it is possible for other threads to actually
   698   // allocate objects in the region (after the acquire the lock)
   699   // before all the cards on the region are dirtied. This is unlikely,
   700   // and it doesn't happen often, but it can happen. So, the extra
   701   // check below filters out those cards.
   702   if (r->is_young()) {
   703     return;
   704   }
   705   // While we are processing RSet buffers during the collection, we
   706   // actually don't want to scan any cards on the collection set,
   707   // since we don't want to update remebered sets with entries that
   708   // point into the collection set, given that live objects from the
   709   // collection set are about to move and such entries will be stale
   710   // very soon. This change also deals with a reliability issue which
   711   // involves scanning a card in the collection set and coming across
   712   // an array that was being chunked and looking malformed. Note,
   713   // however, that if evacuation fails, we have to scan any objects
   714   // that were not moved and create any missing entries.
   715   if (r->in_collection_set()) {
   716     return;
   717   }
   719   // Should we defer it?
   720   if (_cg1r->use_cache()) {
   721     card_ptr = _cg1r->cache_insert(card_ptr);
   722     // If it was not an eviction, nothing to do.
   723     if (card_ptr == NULL) return;
   725     // OK, we have to reset the card start, region, etc.
   726     start = _ct_bs->addr_for(card_ptr);
   727     r = _g1->heap_region_containing(start);
   728     if (r == NULL) {
   729       guarantee(_g1->is_in_permanent(start), "Or else where?");
   730       return;  // Not in the G1 heap (might be in perm, for example.)
   731     }
   732     guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
   733   }
   735   HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
   736   MemRegion dirtyRegion(start, end);
   738 #if CARD_REPEAT_HISTO
   739   init_ct_freq_table(_g1->g1_reserved_obj_bytes());
   740   ct_freq_note_card(_ct_bs->index_for(start));
   741 #endif
   743   UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
   744   update_rs_oop_cl.set_from(r);
   745   FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
   747   // Undirty the card.
   748   *card_ptr = CardTableModRefBS::clean_card_val();
   749   // We must complete this write before we do any of the reads below.
   750   OrderAccess::storeload();
   751   // And process it, being careful of unallocated portions of TLAB's.
   752   HeapWord* stop_point =
   753     r->oops_on_card_seq_iterate_careful(dirtyRegion,
   754                                         &filter_then_update_rs_oop_cl);
   755   // If stop_point is non-null, then we encountered an unallocated region
   756   // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
   757   // card and re-enqueue: if we put off the card until a GC pause, then the
   758   // unallocated portion will be filled in.  Alternatively, we might try
   759   // the full complexity of the technique used in "regular" precleaning.
   760   if (stop_point != NULL) {
   761     // The card might have gotten re-dirtied and re-enqueued while we
   762     // worked.  (In fact, it's pretty likely.)
   763     if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
   764       *card_ptr = CardTableModRefBS::dirty_card_val();
   765       MutexLockerEx x(Shared_DirtyCardQ_lock,
   766                       Mutex::_no_safepoint_check_flag);
   767       DirtyCardQueue* sdcq =
   768         JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
   769       sdcq->enqueue(card_ptr);
   770     }
   771   } else {
   772     out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
   773     _conc_refine_cards++;
   774   }
   775 }
   777 class HRRSStatsIter: public HeapRegionClosure {
   778   size_t _occupied;
   779   size_t _total_mem_sz;
   780   size_t _max_mem_sz;
   781   HeapRegion* _max_mem_sz_region;
   782 public:
   783   HRRSStatsIter() :
   784     _occupied(0),
   785     _total_mem_sz(0),
   786     _max_mem_sz(0),
   787     _max_mem_sz_region(NULL)
   788   {}
   790   bool doHeapRegion(HeapRegion* r) {
   791     if (r->continuesHumongous()) return false;
   792     size_t mem_sz = r->rem_set()->mem_size();
   793     if (mem_sz > _max_mem_sz) {
   794       _max_mem_sz = mem_sz;
   795       _max_mem_sz_region = r;
   796     }
   797     _total_mem_sz += mem_sz;
   798     size_t occ = r->rem_set()->occupied();
   799     _occupied += occ;
   800     return false;
   801   }
   802   size_t total_mem_sz() { return _total_mem_sz; }
   803   size_t max_mem_sz() { return _max_mem_sz; }
   804   size_t occupied() { return _occupied; }
   805   HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
   806 };
   808 class PrintRSThreadVTimeClosure : public ThreadClosure {
   809 public:
   810   virtual void do_thread(Thread *t) {
   811     ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
   812     gclog_or_tty->print("    %5.2f", crt->vtime_accum());
   813   }
   814 };
   816 void HRInto_G1RemSet::print_summary_info() {
   817   G1CollectedHeap* g1 = G1CollectedHeap::heap();
   819 #if CARD_REPEAT_HISTO
   820   gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
   821   gclog_or_tty->print_cr("  # of repeats --> # of cards with that number.");
   822   card_repeat_count.print_on(gclog_or_tty);
   823 #endif
   825   if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
   826     gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
   827     gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
   828     out_of_histo.print_on(gclog_or_tty);
   829   }
   830   gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
   831                          _conc_refine_cards);
   832   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   833   jint tot_processed_buffers =
   834     dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
   835   gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
   836   gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS threads.",
   837                 dcqs.processed_buffers_rs_thread(),
   838                 100.0*(float)dcqs.processed_buffers_rs_thread()/
   839                 (float)tot_processed_buffers);
   840   gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
   841                 dcqs.processed_buffers_mut(),
   842                 100.0*(float)dcqs.processed_buffers_mut()/
   843                 (float)tot_processed_buffers);
   844   gclog_or_tty->print_cr("  Conc RS threads times(s)");
   845   PrintRSThreadVTimeClosure p;
   846   gclog_or_tty->print("     ");
   847   g1->concurrent_g1_refine()->threads_do(&p);
   848   gclog_or_tty->print_cr("");
   850   if (G1UseHRIntoRS) {
   851     HRRSStatsIter blk;
   852     g1->heap_region_iterate(&blk);
   853     gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
   854                            "  Max = " SIZE_FORMAT "K.",
   855                            blk.total_mem_sz()/K, blk.max_mem_sz()/K);
   856     gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
   857                            " free_lists = " SIZE_FORMAT "K.",
   858                            HeapRegionRemSet::static_mem_size()/K,
   859                            HeapRegionRemSet::fl_mem_size()/K);
   860     gclog_or_tty->print_cr("    %d occupied cards represented.",
   861                            blk.occupied());
   862     gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
   863                            ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
   864                            blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
   865                            (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
   866                            (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
   867     gclog_or_tty->print_cr("    Did %d coarsenings.",
   868                   HeapRegionRemSet::n_coarsenings());
   870   }
   871 }
   872 void HRInto_G1RemSet::prepare_for_verify() {
   873   if (G1HRRSFlushLogBuffersOnVerify &&
   874       (VerifyBeforeGC || VerifyAfterGC)
   875       &&  !_g1->full_collection()) {
   876     cleanupHRRS();
   877     _g1->set_refine_cte_cl_concurrency(false);
   878     if (SafepointSynchronize::is_at_safepoint()) {
   879       DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   880       dcqs.concatenate_logs();
   881     }
   882     bool cg1r_use_cache = _cg1r->use_cache();
   883     _cg1r->set_use_cache(false);
   884     updateRS(0);
   885     _cg1r->set_use_cache(cg1r_use_cache);
   887     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   888   }
   889 }

mercurial