src/share/vm/gc_implementation/g1/g1RemSet.cpp

Tue, 04 Aug 2009 16:00:17 -0700

author
johnc
date
Tue, 04 Aug 2009 16:00:17 -0700
changeset 1325
6cb8e9df7174
parent 1280
df6caf649ff7
child 1696
0414c1049f15
permissions
-rw-r--r--

6819077: G1: first GC thread coming late into the GC.
Summary: The first worker thread is delayed when entering the GC because it clears the card count table that is used in identifying hot cards. Replace the card count table with a dynamically sized evicting hash table that includes an epoch based counter.
Reviewed-by: iveresov, tonyp

     1 /*
     2  * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_g1RemSet.cpp.incl"
    28 #define CARD_REPEAT_HISTO 0
    30 #if CARD_REPEAT_HISTO
    31 static size_t ct_freq_sz;
    32 static jbyte* ct_freq = NULL;
    34 void init_ct_freq_table(size_t heap_sz_bytes) {
    35   if (ct_freq == NULL) {
    36     ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
    37     ct_freq = new jbyte[ct_freq_sz];
    38     for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
    39   }
    40 }
    42 void ct_freq_note_card(size_t index) {
    43   assert(0 <= index && index < ct_freq_sz, "Bounds error.");
    44   if (ct_freq[index] < 100) { ct_freq[index]++; }
    45 }
    47 static IntHistogram card_repeat_count(10, 10);
    49 void ct_freq_update_histo_and_reset() {
    50   for (size_t j = 0; j < ct_freq_sz; j++) {
    51     card_repeat_count.add_entry(ct_freq[j]);
    52     ct_freq[j] = 0;
    53   }
    55 }
    56 #endif
    59 class IntoCSOopClosure: public OopsInHeapRegionClosure {
    60   OopsInHeapRegionClosure* _blk;
    61   G1CollectedHeap* _g1;
    62 public:
    63   IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
    64     _g1(g1), _blk(blk) {}
    65   void set_region(HeapRegion* from) {
    66     _blk->set_region(from);
    67   }
    68   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
    69   virtual void do_oop(      oop* p) { do_oop_work(p); }
    70   template <class T> void do_oop_work(T* p) {
    71     oop obj = oopDesc::load_decode_heap_oop(p);
    72     if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
    73   }
    74   bool apply_to_weak_ref_discovered_field() { return true; }
    75   bool idempotent() { return true; }
    76 };
    78 class IntoCSRegionClosure: public HeapRegionClosure {
    79   IntoCSOopClosure _blk;
    80   G1CollectedHeap* _g1;
    81 public:
    82   IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
    83     _g1(g1), _blk(g1, blk) {}
    84   bool doHeapRegion(HeapRegion* r) {
    85     if (!r->in_collection_set()) {
    86       _blk.set_region(r);
    87       if (r->isHumongous()) {
    88         if (r->startsHumongous()) {
    89           oop obj = oop(r->bottom());
    90           obj->oop_iterate(&_blk);
    91         }
    92       } else {
    93         r->oop_before_save_marks_iterate(&_blk);
    94       }
    95     }
    96     return false;
    97   }
    98 };
   100 void
   101 StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
   102                                             int worker_i) {
   103   IntoCSRegionClosure rc(_g1, oc);
   104   _g1->heap_region_iterate(&rc);
   105 }
   107 class VerifyRSCleanCardOopClosure: public OopClosure {
   108   G1CollectedHeap* _g1;
   109 public:
   110   VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
   112   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   113   virtual void do_oop(      oop* p) { do_oop_work(p); }
   114   template <class T> void do_oop_work(T* p) {
   115     oop obj = oopDesc::load_decode_heap_oop(p);
   116     HeapRegion* to = _g1->heap_region_containing(obj);
   117     guarantee(to == NULL || !to->in_collection_set(),
   118               "Missed a rem set member.");
   119   }
   120 };
   122 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
   123   : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
   124     _cg1r(g1->concurrent_g1_refine()),
   125     _par_traversal_in_progress(false), _new_refs(NULL),
   126     _cards_scanned(NULL), _total_cards_scanned(0)
   127 {
   128   _seq_task = new SubTasksDone(NumSeqTasks);
   129   guarantee(n_workers() > 0, "There should be some workers");
   130   _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
   131   for (uint i = 0; i < n_workers(); i++) {
   132     _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
   133   }
   134 }
   136 HRInto_G1RemSet::~HRInto_G1RemSet() {
   137   delete _seq_task;
   138   for (uint i = 0; i < n_workers(); i++) {
   139     delete _new_refs[i];
   140   }
   141   FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
   142 }
   144 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
   145   if (_g1->is_in_g1_reserved(mr.start())) {
   146     _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
   147     if (_start_first == NULL) _start_first = mr.start();
   148   }
   149 }
   151 class ScanRSClosure : public HeapRegionClosure {
   152   size_t _cards_done, _cards;
   153   G1CollectedHeap* _g1h;
   154   OopsInHeapRegionClosure* _oc;
   155   G1BlockOffsetSharedArray* _bot_shared;
   156   CardTableModRefBS *_ct_bs;
   157   int _worker_i;
   158   bool _try_claimed;
   159   size_t _min_skip_distance, _max_skip_distance;
   160 public:
   161   ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
   162     _oc(oc),
   163     _cards(0),
   164     _cards_done(0),
   165     _worker_i(worker_i),
   166     _try_claimed(false)
   167   {
   168     _g1h = G1CollectedHeap::heap();
   169     _bot_shared = _g1h->bot_shared();
   170     _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
   171     _min_skip_distance = 16;
   172     _max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance;
   173   }
   175   void set_try_claimed() { _try_claimed = true; }
   177   void scanCard(size_t index, HeapRegion *r) {
   178     _cards_done++;
   179     DirtyCardToOopClosure* cl =
   180       r->new_dcto_closure(_oc,
   181                          CardTableModRefBS::Precise,
   182                          HeapRegionDCTOC::IntoCSFilterKind);
   184     // Set the "from" region in the closure.
   185     _oc->set_region(r);
   186     HeapWord* card_start = _bot_shared->address_for_index(index);
   187     HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
   188     Space *sp = SharedHeap::heap()->space_containing(card_start);
   189     MemRegion sm_region;
   190     if (ParallelGCThreads > 0) {
   191       // first find the used area
   192       sm_region = sp->used_region_at_save_marks();
   193     } else {
   194       // The closure is not idempotent.  We shouldn't look at objects
   195       // allocated during the GC.
   196       sm_region = sp->used_region_at_save_marks();
   197     }
   198     MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
   199     if (!mr.is_empty()) {
   200       cl->do_MemRegion(mr);
   201     }
   202   }
   204   void printCard(HeapRegion* card_region, size_t card_index,
   205                  HeapWord* card_start) {
   206     gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
   207                            "RS names card %p: "
   208                            "[" PTR_FORMAT ", " PTR_FORMAT ")",
   209                            _worker_i,
   210                            card_region->bottom(), card_region->end(),
   211                            card_index,
   212                            card_start, card_start + G1BlockOffsetSharedArray::N_words);
   213   }
   215   bool doHeapRegion(HeapRegion* r) {
   216     assert(r->in_collection_set(), "should only be called on elements of CS.");
   217     HeapRegionRemSet* hrrs = r->rem_set();
   218     if (hrrs->iter_is_complete()) return false; // All done.
   219     if (!_try_claimed && !hrrs->claim_iter()) return false;
   220     _g1h->push_dirty_cards_region(r);
   221     // If we didn't return above, then
   222     //   _try_claimed || r->claim_iter()
   223     // is true: either we're supposed to work on claimed-but-not-complete
   224     // regions, or we successfully claimed the region.
   225     HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
   226     hrrs->init_iterator(iter);
   227     size_t card_index;
   228     size_t skip_distance = 0, current_card = 0, jump_to_card = 0;
   229     while (iter->has_next(card_index)) {
   230       if (current_card < jump_to_card) {
   231         ++current_card;
   232         continue;
   233       }
   234       HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
   235 #if 0
   236       gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
   237                           card_start, card_start + CardTableModRefBS::card_size_in_words);
   238 #endif
   240       HeapRegion* card_region = _g1h->heap_region_containing(card_start);
   241       assert(card_region != NULL, "Yielding cards not in the heap?");
   242       _cards++;
   244       if (!card_region->is_on_dirty_cards_region_list()) {
   245         _g1h->push_dirty_cards_region(card_region);
   246       }
   248        // If the card is dirty, then we will scan it during updateRS.
   249       if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
   250           if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
   251             scanCard(card_index, card_region);
   252           } else if (_try_claimed) {
   253             if (jump_to_card == 0 || jump_to_card != current_card) {
   254               // We did some useful work in the previous iteration.
   255               // Decrease the distance.
   256               skip_distance = MAX2(skip_distance >> 1, _min_skip_distance);
   257             } else {
   258               // Previous iteration resulted in a claim failure.
   259               // Increase the distance.
   260               skip_distance = MIN2(skip_distance << 1, _max_skip_distance);
   261             }
   262             jump_to_card = current_card + skip_distance;
   263           }
   264       }
   265       ++current_card;
   266     }
   267     if (!_try_claimed) {
   268       hrrs->set_iter_complete();
   269     }
   270     return false;
   271   }
   272   // Set all cards back to clean.
   273   void cleanup() {_g1h->cleanUpCardTable();}
   274   size_t cards_done() { return _cards_done;}
   275   size_t cards_looked_up() { return _cards;}
   276 };
   278 // We want the parallel threads to start their scanning at
   279 // different collection set regions to avoid contention.
   280 // If we have:
   281 //          n collection set regions
   282 //          p threads
   283 // Then thread t will start at region t * floor (n/p)
   285 HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
   286   HeapRegion* result = _g1p->collection_set();
   287   if (ParallelGCThreads > 0) {
   288     size_t cs_size = _g1p->collection_set_size();
   289     int n_workers = _g1->workers()->total_workers();
   290     size_t cs_spans = cs_size / n_workers;
   291     size_t ind      = cs_spans * worker_i;
   292     for (size_t i = 0; i < ind; i++)
   293       result = result->next_in_collection_set();
   294   }
   295   return result;
   296 }
   298 void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
   299   double rs_time_start = os::elapsedTime();
   300   HeapRegion *startRegion = calculateStartRegion(worker_i);
   302   BufferingOopsInHeapRegionClosure boc(oc);
   303   ScanRSClosure scanRScl(&boc, worker_i);
   304   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   305   scanRScl.set_try_claimed();
   306   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   308   boc.done();
   309   double closure_app_time_sec = boc.closure_app_seconds();
   310   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
   311     closure_app_time_sec;
   312   double closure_app_time_ms = closure_app_time_sec * 1000.0;
   314   assert( _cards_scanned != NULL, "invariant" );
   315   _cards_scanned[worker_i] = scanRScl.cards_done();
   317   _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
   318   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
   320   double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
   321   if (scan_new_refs_time_ms > 0.0) {
   322     closure_app_time_ms += scan_new_refs_time_ms;
   323   }
   325   _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
   326 }
   328 void HRInto_G1RemSet::updateRS(int worker_i) {
   329   ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
   331   double start = os::elapsedTime();
   332   _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
   334   // Apply the appropriate closure to all remaining log entries.
   335   _g1->iterate_dirty_card_closure(false, worker_i);
   336   // Now there should be no dirty cards.
   337   if (G1RSLogCheckCardTable) {
   338     CountNonCleanMemRegionClosure cl(_g1);
   339     _ct_bs->mod_card_iterate(&cl);
   340     // XXX This isn't true any more: keeping cards of young regions
   341     // marked dirty broke it.  Need some reasonable fix.
   342     guarantee(cl.n() == 0, "Card table should be clean.");
   343   }
   345   _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
   346 }
   348 #ifndef PRODUCT
   349 class PrintRSClosure : public HeapRegionClosure {
   350   int _count;
   351 public:
   352   PrintRSClosure() : _count(0) {}
   353   bool doHeapRegion(HeapRegion* r) {
   354     HeapRegionRemSet* hrrs = r->rem_set();
   355     _count += (int) hrrs->occupied();
   356     if (hrrs->occupied() == 0) {
   357       gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
   358                           "has no remset entries\n",
   359                           r->bottom(), r->end());
   360     } else {
   361       gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
   362                           r->bottom(), r->end());
   363       r->print();
   364       hrrs->print();
   365       gclog_or_tty->print("\nDone printing rem set\n");
   366     }
   367     return false;
   368   }
   369   int occupied() {return _count;}
   370 };
   371 #endif
   373 class CountRSSizeClosure: public HeapRegionClosure {
   374   size_t _n;
   375   size_t _tot;
   376   size_t _max;
   377   HeapRegion* _max_r;
   378   enum {
   379     N = 20,
   380     MIN = 6
   381   };
   382   int _histo[N];
   383 public:
   384   CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
   385     for (int i = 0; i < N; i++) _histo[i] = 0;
   386   }
   387   bool doHeapRegion(HeapRegion* r) {
   388     if (!r->continuesHumongous()) {
   389       size_t occ = r->rem_set()->occupied();
   390       _n++;
   391       _tot += occ;
   392       if (occ > _max) {
   393         _max = occ;
   394         _max_r = r;
   395       }
   396       // Fit it into a histo bin.
   397       int s = 1 << MIN;
   398       int i = 0;
   399       while (occ > (size_t) s && i < (N-1)) {
   400         s = s << 1;
   401         i++;
   402       }
   403       _histo[i]++;
   404     }
   405     return false;
   406   }
   407   size_t n() { return _n; }
   408   size_t tot() { return _tot; }
   409   size_t mx() { return _max; }
   410   HeapRegion* mxr() { return _max_r; }
   411   void print_histo() {
   412     int mx = N;
   413     while (mx >= 0) {
   414       if (_histo[mx-1] > 0) break;
   415       mx--;
   416     }
   417     gclog_or_tty->print_cr("Number of regions with given RS sizes:");
   418     gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
   419     for (int i = 1; i < mx-1; i++) {
   420       gclog_or_tty->print_cr("  %8d  - %8d   %8d",
   421                     (1 << (MIN + i - 1)) + 1,
   422                     1 << (MIN + i),
   423                     _histo[i]);
   424     }
   425     gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
   426   }
   427 };
   429 template <class T> void
   430 HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
   431                                     int worker_i) {
   432   double scan_new_refs_start_sec = os::elapsedTime();
   433   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   434   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
   435   for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
   436     T* p = (T*) _new_refs[worker_i]->at(i);
   437     oop obj = oopDesc::load_decode_heap_oop(p);
   438     // *p was in the collection set when p was pushed on "_new_refs", but
   439     // another thread may have processed this location from an RS, so it
   440     // might not point into the CS any longer.  If so, it's obviously been
   441     // processed, and we don't need to do anything further.
   442     if (g1h->obj_in_cs(obj)) {
   443       HeapRegion* r = g1h->heap_region_containing(p);
   445       DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
   446       oc->set_region(r);
   447       // If "p" has already been processed concurrently, this is
   448       // idempotent.
   449       oc->do_oop(p);
   450     }
   451   }
   452   _g1p->record_scan_new_refs_time(worker_i,
   453                                   (os::elapsedTime() - scan_new_refs_start_sec)
   454                                   * 1000.0);
   455 }
   457 void HRInto_G1RemSet::cleanupHRRS() {
   458   HeapRegionRemSet::cleanup();
   459 }
   461 void
   462 HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
   463                                              int worker_i) {
   464 #if CARD_REPEAT_HISTO
   465   ct_freq_update_histo_and_reset();
   466 #endif
   467   if (worker_i == 0) {
   468     _cg1r->clear_and_record_card_counts();
   469   }
   471   // Make this into a command-line flag...
   472   if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
   473     CountRSSizeClosure count_cl;
   474     _g1->heap_region_iterate(&count_cl);
   475     gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
   476                   "max region is " PTR_FORMAT,
   477                   count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
   478                   count_cl.mx(), count_cl.mxr());
   479     count_cl.print_histo();
   480   }
   482   if (ParallelGCThreads > 0) {
   483     // The two flags below were introduced temporarily to serialize
   484     // the updating and scanning of remembered sets. There are some
   485     // race conditions when these two operations are done in parallel
   486     // and they are causing failures. When we resolve said race
   487     // conditions, we'll revert back to parallel remembered set
   488     // updating and scanning. See CRs 6677707 and 6677708.
   489     if (G1ParallelRSetUpdatingEnabled || (worker_i == 0)) {
   490       updateRS(worker_i);
   491       scanNewRefsRS(oc, worker_i);
   492     } else {
   493       _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
   494       _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
   495       _g1p->record_update_rs_time(worker_i, 0.0);
   496       _g1p->record_scan_new_refs_time(worker_i, 0.0);
   497     }
   498     if (G1ParallelRSetScanningEnabled || (worker_i == 0)) {
   499       scanRS(oc, worker_i);
   500     } else {
   501       _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
   502       _g1p->record_scan_rs_time(worker_i, 0.0);
   503     }
   504   } else {
   505     assert(worker_i == 0, "invariant");
   506     updateRS(0);
   507     scanNewRefsRS(oc, 0);
   508     scanRS(oc, 0);
   509   }
   510 }
   512 void HRInto_G1RemSet::
   513 prepare_for_oops_into_collection_set_do() {
   514 #if G1_REM_SET_LOGGING
   515   PrintRSClosure cl;
   516   _g1->collection_set_iterate(&cl);
   517 #endif
   518   cleanupHRRS();
   519   ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
   520   _g1->set_refine_cte_cl_concurrency(false);
   521   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   522   dcqs.concatenate_logs();
   524   assert(!_par_traversal_in_progress, "Invariant between iterations.");
   525   if (ParallelGCThreads > 0) {
   526     set_par_traversal(true);
   527     _seq_task->set_par_threads((int)n_workers());
   528   }
   529   guarantee( _cards_scanned == NULL, "invariant" );
   530   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
   531   for (uint i = 0; i < n_workers(); ++i) {
   532     _cards_scanned[i] = 0;
   533   }
   534   _total_cards_scanned = 0;
   535 }
   538 class cleanUpIteratorsClosure : public HeapRegionClosure {
   539   bool doHeapRegion(HeapRegion *r) {
   540     HeapRegionRemSet* hrrs = r->rem_set();
   541     hrrs->init_for_par_iteration();
   542     return false;
   543   }
   544 };
   546 class UpdateRSetOopsIntoCSImmediate : public OopClosure {
   547   G1CollectedHeap* _g1;
   548 public:
   549   UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
   550   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   551   virtual void do_oop(      oop* p) { do_oop_work(p); }
   552   template <class T> void do_oop_work(T* p) {
   553     HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
   554     if (to->in_collection_set()) {
   555       to->rem_set()->add_reference(p, 0);
   556     }
   557   }
   558 };
   560 class UpdateRSetOopsIntoCSDeferred : public OopClosure {
   561   G1CollectedHeap* _g1;
   562   CardTableModRefBS* _ct_bs;
   563   DirtyCardQueue* _dcq;
   564 public:
   565   UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
   566     _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
   567   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   568   virtual void do_oop(      oop* p) { do_oop_work(p); }
   569   template <class T> void do_oop_work(T* p) {
   570     oop obj = oopDesc::load_decode_heap_oop(p);
   571     if (_g1->obj_in_cs(obj)) {
   572       size_t card_index = _ct_bs->index_for(p);
   573       if (_ct_bs->mark_card_deferred(card_index)) {
   574         _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
   575       }
   576     }
   577   }
   578 };
   580 template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
   581   for (size_t i = 0; i < n_workers(); i++) {
   582     for (int j = 0; j < _new_refs[i]->length(); j++) {
   583       T* p = (T*) _new_refs[i]->at(j);
   584       cl->do_oop(p);
   585     }
   586   }
   587 }
   589 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
   590   guarantee( _cards_scanned != NULL, "invariant" );
   591   _total_cards_scanned = 0;
   592   for (uint i = 0; i < n_workers(); ++i)
   593     _total_cards_scanned += _cards_scanned[i];
   594   FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
   595   _cards_scanned = NULL;
   596   // Cleanup after copy
   597 #if G1_REM_SET_LOGGING
   598   PrintRSClosure cl;
   599   _g1->heap_region_iterate(&cl);
   600 #endif
   601   _g1->set_refine_cte_cl_concurrency(true);
   602   cleanUpIteratorsClosure iterClosure;
   603   _g1->collection_set_iterate(&iterClosure);
   604   // Set all cards back to clean.
   605   _g1->cleanUpCardTable();
   607   if (ParallelGCThreads > 0) {
   608     set_par_traversal(false);
   609   }
   611   if (_g1->evacuation_failed()) {
   612     // Restore remembered sets for the regions pointing into
   613     // the collection set.
   614     if (G1DeferredRSUpdate) {
   615       DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
   616       UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
   617       new_refs_iterate(&deferred_update);
   618     } else {
   619       UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
   620       new_refs_iterate(&immediate_update);
   621     }
   622   }
   623   for (uint i = 0; i < n_workers(); i++) {
   624     _new_refs[i]->clear();
   625   }
   627   assert(!_par_traversal_in_progress, "Invariant between iterations.");
   628 }
   630 class UpdateRSObjectClosure: public ObjectClosure {
   631   UpdateRSOopClosure* _update_rs_oop_cl;
   632 public:
   633   UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
   634     _update_rs_oop_cl(update_rs_oop_cl) {}
   635   void do_object(oop obj) {
   636     obj->oop_iterate(_update_rs_oop_cl);
   637   }
   639 };
   641 class ScrubRSClosure: public HeapRegionClosure {
   642   G1CollectedHeap* _g1h;
   643   BitMap* _region_bm;
   644   BitMap* _card_bm;
   645   CardTableModRefBS* _ctbs;
   646 public:
   647   ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
   648     _g1h(G1CollectedHeap::heap()),
   649     _region_bm(region_bm), _card_bm(card_bm),
   650     _ctbs(NULL)
   651   {
   652     ModRefBarrierSet* bs = _g1h->mr_bs();
   653     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   654     _ctbs = (CardTableModRefBS*)bs;
   655   }
   657   bool doHeapRegion(HeapRegion* r) {
   658     if (!r->continuesHumongous()) {
   659       r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
   660     }
   661     return false;
   662   }
   663 };
   665 void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
   666   ScrubRSClosure scrub_cl(region_bm, card_bm);
   667   _g1->heap_region_iterate(&scrub_cl);
   668 }
   670 void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
   671                                 int worker_num, int claim_val) {
   672   ScrubRSClosure scrub_cl(region_bm, card_bm);
   673   _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
   674 }
   677 static IntHistogram out_of_histo(50, 50);
   679 void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) {
   680   // Construct the region representing the card.
   681   HeapWord* start = _ct_bs->addr_for(card_ptr);
   682   // And find the region containing it.
   683   HeapRegion* r = _g1->heap_region_containing(start);
   684   assert(r != NULL, "unexpected null");
   686   HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
   687   MemRegion dirtyRegion(start, end);
   689 #if CARD_REPEAT_HISTO
   690   init_ct_freq_table(_g1->g1_reserved_obj_bytes());
   691   ct_freq_note_card(_ct_bs->index_for(start));
   692 #endif
   694   UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
   695   update_rs_oop_cl.set_from(r);
   696   FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
   698   // Undirty the card.
   699   *card_ptr = CardTableModRefBS::clean_card_val();
   700   // We must complete this write before we do any of the reads below.
   701   OrderAccess::storeload();
   702   // And process it, being careful of unallocated portions of TLAB's.
   703   HeapWord* stop_point =
   704     r->oops_on_card_seq_iterate_careful(dirtyRegion,
   705                                         &filter_then_update_rs_oop_cl);
   706   // If stop_point is non-null, then we encountered an unallocated region
   707   // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
   708   // card and re-enqueue: if we put off the card until a GC pause, then the
   709   // unallocated portion will be filled in.  Alternatively, we might try
   710   // the full complexity of the technique used in "regular" precleaning.
   711   if (stop_point != NULL) {
   712     // The card might have gotten re-dirtied and re-enqueued while we
   713     // worked.  (In fact, it's pretty likely.)
   714     if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
   715       *card_ptr = CardTableModRefBS::dirty_card_val();
   716       MutexLockerEx x(Shared_DirtyCardQ_lock,
   717                       Mutex::_no_safepoint_check_flag);
   718       DirtyCardQueue* sdcq =
   719         JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
   720       sdcq->enqueue(card_ptr);
   721     }
   722   } else {
   723     out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
   724     _conc_refine_cards++;
   725   }
   726 }
   728 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   729   // If the card is no longer dirty, nothing to do.
   730   if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
   732   // Construct the region representing the card.
   733   HeapWord* start = _ct_bs->addr_for(card_ptr);
   734   // And find the region containing it.
   735   HeapRegion* r = _g1->heap_region_containing(start);
   736   if (r == NULL) {
   737     guarantee(_g1->is_in_permanent(start), "Or else where?");
   738     return;  // Not in the G1 heap (might be in perm, for example.)
   739   }
   740   // Why do we have to check here whether a card is on a young region,
   741   // given that we dirty young regions and, as a result, the
   742   // post-barrier is supposed to filter them out and never to enqueue
   743   // them? When we allocate a new region as the "allocation region" we
   744   // actually dirty its cards after we release the lock, since card
   745   // dirtying while holding the lock was a performance bottleneck. So,
   746   // as a result, it is possible for other threads to actually
   747   // allocate objects in the region (after the acquire the lock)
   748   // before all the cards on the region are dirtied. This is unlikely,
   749   // and it doesn't happen often, but it can happen. So, the extra
   750   // check below filters out those cards.
   751   if (r->is_young()) {
   752     return;
   753   }
   754   // While we are processing RSet buffers during the collection, we
   755   // actually don't want to scan any cards on the collection set,
   756   // since we don't want to update remebered sets with entries that
   757   // point into the collection set, given that live objects from the
   758   // collection set are about to move and such entries will be stale
   759   // very soon. This change also deals with a reliability issue which
   760   // involves scanning a card in the collection set and coming across
   761   // an array that was being chunked and looking malformed. Note,
   762   // however, that if evacuation fails, we have to scan any objects
   763   // that were not moved and create any missing entries.
   764   if (r->in_collection_set()) {
   765     return;
   766   }
   768   // Should we defer processing the card?
   769   //
   770   // Previously the result from the insert_cache call would be
   771   // either card_ptr (implying that card_ptr was currently "cold"),
   772   // null (meaning we had inserted the card ptr into the "hot"
   773   // cache, which had some headroom), or a "hot" card ptr
   774   // extracted from the "hot" cache.
   775   //
   776   // Now that the _card_counts cache in the ConcurrentG1Refine
   777   // instance is an evicting hash table, the result we get back
   778   // could be from evicting the card ptr in an already occupied
   779   // bucket (in which case we have replaced the card ptr in the
   780   // bucket with card_ptr and "defer" is set to false). To avoid
   781   // having a data structure (updates to which would need a lock)
   782   // to hold these unprocessed dirty cards, we need to immediately
   783   // process card_ptr. The actions needed to be taken on return
   784   // from cache_insert are summarized in the following table:
   785   //
   786   // res      defer   action
   787   // --------------------------------------------------------------
   788   // null     false   card evicted from _card_counts & replaced with
   789   //                  card_ptr; evicted ptr added to hot cache.
   790   //                  No need to process res; immediately process card_ptr
   791   //
   792   // null     true    card not evicted from _card_counts; card_ptr added
   793   //                  to hot cache.
   794   //                  Nothing to do.
   795   //
   796   // non-null false   card evicted from _card_counts & replaced with
   797   //                  card_ptr; evicted ptr is currently "cold" or
   798   //                  caused an eviction from the hot cache.
   799   //                  Immediately process res; process card_ptr.
   800   //
   801   // non-null true    card not evicted from _card_counts; card_ptr is
   802   //                  currently cold, or caused an eviction from hot
   803   //                  cache.
   804   //                  Immediately process res; no need to process card_ptr.
   806   jbyte* res = card_ptr;
   807   bool defer = false;
   808   if (_cg1r->use_cache()) {
   809     jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
   810     if (res != NULL && (res != card_ptr || defer)) {
   811       start = _ct_bs->addr_for(res);
   812       r = _g1->heap_region_containing(start);
   813       if (r == NULL) {
   814         assert(_g1->is_in_permanent(start), "Or else where?");
   815       } else {
   816         guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
   817         // Process card pointer we get back from the hot card cache
   818         concurrentRefineOneCard_impl(res, worker_i);
   819       }
   820     }
   821   }
   823   if (!defer) {
   824     concurrentRefineOneCard_impl(card_ptr, worker_i);
   825   }
   826 }
   828 class HRRSStatsIter: public HeapRegionClosure {
   829   size_t _occupied;
   830   size_t _total_mem_sz;
   831   size_t _max_mem_sz;
   832   HeapRegion* _max_mem_sz_region;
   833 public:
   834   HRRSStatsIter() :
   835     _occupied(0),
   836     _total_mem_sz(0),
   837     _max_mem_sz(0),
   838     _max_mem_sz_region(NULL)
   839   {}
   841   bool doHeapRegion(HeapRegion* r) {
   842     if (r->continuesHumongous()) return false;
   843     size_t mem_sz = r->rem_set()->mem_size();
   844     if (mem_sz > _max_mem_sz) {
   845       _max_mem_sz = mem_sz;
   846       _max_mem_sz_region = r;
   847     }
   848     _total_mem_sz += mem_sz;
   849     size_t occ = r->rem_set()->occupied();
   850     _occupied += occ;
   851     return false;
   852   }
   853   size_t total_mem_sz() { return _total_mem_sz; }
   854   size_t max_mem_sz() { return _max_mem_sz; }
   855   size_t occupied() { return _occupied; }
   856   HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
   857 };
   859 class PrintRSThreadVTimeClosure : public ThreadClosure {
   860 public:
   861   virtual void do_thread(Thread *t) {
   862     ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
   863     gclog_or_tty->print("    %5.2f", crt->vtime_accum());
   864   }
   865 };
   867 void HRInto_G1RemSet::print_summary_info() {
   868   G1CollectedHeap* g1 = G1CollectedHeap::heap();
   870 #if CARD_REPEAT_HISTO
   871   gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
   872   gclog_or_tty->print_cr("  # of repeats --> # of cards with that number.");
   873   card_repeat_count.print_on(gclog_or_tty);
   874 #endif
   876   if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
   877     gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
   878     gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
   879     out_of_histo.print_on(gclog_or_tty);
   880   }
   881   gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
   882                          _conc_refine_cards);
   883   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   884   jint tot_processed_buffers =
   885     dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
   886   gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
   887   gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS threads.",
   888                 dcqs.processed_buffers_rs_thread(),
   889                 100.0*(float)dcqs.processed_buffers_rs_thread()/
   890                 (float)tot_processed_buffers);
   891   gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
   892                 dcqs.processed_buffers_mut(),
   893                 100.0*(float)dcqs.processed_buffers_mut()/
   894                 (float)tot_processed_buffers);
   895   gclog_or_tty->print_cr("  Conc RS threads times(s)");
   896   PrintRSThreadVTimeClosure p;
   897   gclog_or_tty->print("     ");
   898   g1->concurrent_g1_refine()->threads_do(&p);
   899   gclog_or_tty->print_cr("");
   901   if (G1UseHRIntoRS) {
   902     HRRSStatsIter blk;
   903     g1->heap_region_iterate(&blk);
   904     gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
   905                            "  Max = " SIZE_FORMAT "K.",
   906                            blk.total_mem_sz()/K, blk.max_mem_sz()/K);
   907     gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
   908                            " free_lists = " SIZE_FORMAT "K.",
   909                            HeapRegionRemSet::static_mem_size()/K,
   910                            HeapRegionRemSet::fl_mem_size()/K);
   911     gclog_or_tty->print_cr("    %d occupied cards represented.",
   912                            blk.occupied());
   913     gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
   914                            ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
   915                            blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
   916                            (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
   917                            (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
   918     gclog_or_tty->print_cr("    Did %d coarsenings.",
   919                   HeapRegionRemSet::n_coarsenings());
   921   }
   922 }
   923 void HRInto_G1RemSet::prepare_for_verify() {
   924   if (G1HRRSFlushLogBuffersOnVerify &&
   925       (VerifyBeforeGC || VerifyAfterGC)
   926       &&  !_g1->full_collection()) {
   927     cleanupHRRS();
   928     _g1->set_refine_cte_cl_concurrency(false);
   929     if (SafepointSynchronize::is_at_safepoint()) {
   930       DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   931       dcqs.concatenate_logs();
   932     }
   933     bool cg1r_use_cache = _cg1r->use_cache();
   934     _cg1r->set_use_cache(false);
   935     updateRS(0);
   936     _cg1r->set_use_cache(cg1r_use_cache);
   938     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   939   }
   940 }

mercurial