src/share/vm/gc_implementation/g1/g1RemSet.cpp

Sun, 23 Oct 2011 23:06:06 -0700

author
johnc
date
Sun, 23 Oct 2011 23:06:06 -0700
changeset 3219
c6a6e936dc68
parent 3179
811ec3d0833b
child 3267
ed80554efa25
permissions
-rw-r--r--

7096030: G1: PrintGCDetails enhancements
7102445: G1: Unnecessary Resource allocations during RSet scanning
Summary: Add a new per-worker thread line in the PrintGCDetails output. GC Worker Other is the difference between the elapsed time for the parallel phase of the evacuation pause and the sum of the times of the sub-phases (external root scanning, mark stack scanning, RSet updating, RSet scanning, object copying, and termination) for that worker. During RSet scanning, stack allocate DirtyCardToOopClosure objects; allocating these in a resource area was causing abnormally high GC Worker Other times while the worker thread freed ResourceArea chunks.
Reviewed-by: tonyp, jwilhelm, brutisso

     1 /*
     2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "gc_implementation/g1/bufferingOopClosure.hpp"
    27 #include "gc_implementation/g1/concurrentG1Refine.hpp"
    28 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
    29 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
    30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
    33 #include "gc_implementation/g1/g1RemSet.inline.hpp"
    34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
    35 #include "memory/iterator.hpp"
    36 #include "oops/oop.inline.hpp"
    37 #include "utilities/intHisto.hpp"
    39 #define CARD_REPEAT_HISTO 0
    41 #if CARD_REPEAT_HISTO
    42 static size_t ct_freq_sz;
    43 static jbyte* ct_freq = NULL;
    45 void init_ct_freq_table(size_t heap_sz_bytes) {
    46   if (ct_freq == NULL) {
    47     ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
    48     ct_freq = new jbyte[ct_freq_sz];
    49     for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
    50   }
    51 }
    53 void ct_freq_note_card(size_t index) {
    54   assert(0 <= index && index < ct_freq_sz, "Bounds error.");
    55   if (ct_freq[index] < 100) { ct_freq[index]++; }
    56 }
    58 static IntHistogram card_repeat_count(10, 10);
    60 void ct_freq_update_histo_and_reset() {
    61   for (size_t j = 0; j < ct_freq_sz; j++) {
    62     card_repeat_count.add_entry(ct_freq[j]);
    63     ct_freq[j] = 0;
    64   }
    66 }
    67 #endif
    69 G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
    70   : _g1(g1), _conc_refine_cards(0),
    71     _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
    72     _cg1r(g1->concurrent_g1_refine()),
    73     _cset_rs_update_cl(NULL),
    74     _cards_scanned(NULL), _total_cards_scanned(0)
    75 {
    76   _seq_task = new SubTasksDone(NumSeqTasks);
    77   guarantee(n_workers() > 0, "There should be some workers");
    78   _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
    79   for (uint i = 0; i < n_workers(); i++) {
    80     _cset_rs_update_cl[i] = NULL;
    81   }
    82 }
    84 G1RemSet::~G1RemSet() {
    85   delete _seq_task;
    86   for (uint i = 0; i < n_workers(); i++) {
    87     assert(_cset_rs_update_cl[i] == NULL, "it should be");
    88   }
    89   FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
    90 }
    92 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
    93   if (_g1->is_in_g1_reserved(mr.start())) {
    94     _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
    95     if (_start_first == NULL) _start_first = mr.start();
    96   }
    97 }
    99 class ScanRSClosure : public HeapRegionClosure {
   100   size_t _cards_done, _cards;
   101   G1CollectedHeap* _g1h;
   102   OopsInHeapRegionClosure* _oc;
   103   G1BlockOffsetSharedArray* _bot_shared;
   104   CardTableModRefBS *_ct_bs;
   105   int _worker_i;
   106   int _block_size;
   107   bool _try_claimed;
   108 public:
   109   ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
   110     _oc(oc),
   111     _cards(0),
   112     _cards_done(0),
   113     _worker_i(worker_i),
   114     _try_claimed(false)
   115   {
   116     _g1h = G1CollectedHeap::heap();
   117     _bot_shared = _g1h->bot_shared();
   118     _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
   119     _block_size = MAX2<int>(G1RSetScanBlockSize, 1);
   120   }
   122   void set_try_claimed() { _try_claimed = true; }
   124   void scanCard(size_t index, HeapRegion *r) {
   125     // Stack allocate the DirtyCardToOopClosure instance
   126     HeapRegionDCTOC cl(_g1h, r, _oc,
   127                        CardTableModRefBS::Precise,
   128                        HeapRegionDCTOC::IntoCSFilterKind);
   130     // Set the "from" region in the closure.
   131     _oc->set_region(r);
   132     HeapWord* card_start = _bot_shared->address_for_index(index);
   133     HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
   134     Space *sp = SharedHeap::heap()->space_containing(card_start);
   135     MemRegion sm_region = sp->used_region_at_save_marks();
   136     MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
   137     if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
   138       // We make the card as "claimed" lazily (so races are possible
   139       // but they're benign), which reduces the number of duplicate
   140       // scans (the rsets of the regions in the cset can intersect).
   141       _ct_bs->set_card_claimed(index);
   142       _cards_done++;
   143       cl.do_MemRegion(mr);
   144     }
   145   }
   147   void printCard(HeapRegion* card_region, size_t card_index,
   148                  HeapWord* card_start) {
   149     gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
   150                            "RS names card %p: "
   151                            "[" PTR_FORMAT ", " PTR_FORMAT ")",
   152                            _worker_i,
   153                            card_region->bottom(), card_region->end(),
   154                            card_index,
   155                            card_start, card_start + G1BlockOffsetSharedArray::N_words);
   156   }
   158   bool doHeapRegion(HeapRegion* r) {
   159     assert(r->in_collection_set(), "should only be called on elements of CS.");
   160     HeapRegionRemSet* hrrs = r->rem_set();
   161     if (hrrs->iter_is_complete()) return false; // All done.
   162     if (!_try_claimed && !hrrs->claim_iter()) return false;
   163     // If we ever free the collection set concurrently, we should also
   164     // clear the card table concurrently therefore we won't need to
   165     // add regions of the collection set to the dirty cards region.
   166     _g1h->push_dirty_cards_region(r);
   167     // If we didn't return above, then
   168     //   _try_claimed || r->claim_iter()
   169     // is true: either we're supposed to work on claimed-but-not-complete
   170     // regions, or we successfully claimed the region.
   171     HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
   172     hrrs->init_iterator(iter);
   173     size_t card_index;
   175     // We claim cards in block so as to recude the contention. The block size is determined by
   176     // the G1RSetScanBlockSize parameter.
   177     size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
   178     for (size_t current_card = 0; iter->has_next(card_index); current_card++) {
   179       if (current_card >= jump_to_card + _block_size) {
   180         jump_to_card = hrrs->iter_claimed_next(_block_size);
   181       }
   182       if (current_card < jump_to_card) continue;
   183       HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
   184 #if 0
   185       gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
   186                           card_start, card_start + CardTableModRefBS::card_size_in_words);
   187 #endif
   189       HeapRegion* card_region = _g1h->heap_region_containing(card_start);
   190       assert(card_region != NULL, "Yielding cards not in the heap?");
   191       _cards++;
   193       if (!card_region->is_on_dirty_cards_region_list()) {
   194         _g1h->push_dirty_cards_region(card_region);
   195       }
   197       // If the card is dirty, then we will scan it during updateRS.
   198       if (!card_region->in_collection_set() &&
   199           !_ct_bs->is_card_dirty(card_index)) {
   200         scanCard(card_index, card_region);
   201       }
   202     }
   203     if (!_try_claimed) {
   204       hrrs->set_iter_complete();
   205     }
   206     return false;
   207   }
   208   size_t cards_done() { return _cards_done;}
   209   size_t cards_looked_up() { return _cards;}
   210 };
   212 // We want the parallel threads to start their scanning at
   213 // different collection set regions to avoid contention.
   214 // If we have:
   215 //          n collection set regions
   216 //          p threads
   217 // Then thread t will start at region t * floor (n/p)
   219 HeapRegion* G1RemSet::calculateStartRegion(int worker_i) {
   220   HeapRegion* result = _g1p->collection_set();
   221   if (ParallelGCThreads > 0) {
   222     size_t cs_size = _g1p->collection_set_size();
   223     int n_workers = _g1->workers()->total_workers();
   224     size_t cs_spans = cs_size / n_workers;
   225     size_t ind      = cs_spans * worker_i;
   226     for (size_t i = 0; i < ind; i++)
   227       result = result->next_in_collection_set();
   228   }
   229   return result;
   230 }
   232 void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
   233   double rs_time_start = os::elapsedTime();
   234   HeapRegion *startRegion = calculateStartRegion(worker_i);
   236   ScanRSClosure scanRScl(oc, worker_i);
   238   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   239   scanRScl.set_try_claimed();
   240   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   242   double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
   244   assert( _cards_scanned != NULL, "invariant" );
   245   _cards_scanned[worker_i] = scanRScl.cards_done();
   247   _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
   248 }
   250 // Closure used for updating RSets and recording references that
   251 // point into the collection set. Only called during an
   252 // evacuation pause.
   254 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
   255   G1RemSet* _g1rs;
   256   DirtyCardQueue* _into_cset_dcq;
   257 public:
   258   RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
   259                                               DirtyCardQueue* into_cset_dcq) :
   260     _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
   261   {}
   262   bool do_card_ptr(jbyte* card_ptr, int worker_i) {
   263     // The only time we care about recording cards that
   264     // contain references that point into the collection set
   265     // is during RSet updating within an evacuation pause.
   266     // In this case worker_i should be the id of a GC worker thread.
   267     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
   268     assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "should be a GC worker");
   270     if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) {
   271       // 'card_ptr' contains references that point into the collection
   272       // set. We need to record the card in the DCQS
   273       // (G1CollectedHeap::into_cset_dirty_card_queue_set())
   274       // that's used for that purpose.
   275       //
   276       // Enqueue the card
   277       _into_cset_dcq->enqueue(card_ptr);
   278     }
   279     return true;
   280   }
   281 };
   283 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, int worker_i) {
   284   double start = os::elapsedTime();
   285   // Apply the given closure to all remaining log entries.
   286   RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
   288   _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
   290   // Now there should be no dirty cards.
   291   if (G1RSLogCheckCardTable) {
   292     CountNonCleanMemRegionClosure cl(_g1);
   293     _ct_bs->mod_card_iterate(&cl);
   294     // XXX This isn't true any more: keeping cards of young regions
   295     // marked dirty broke it.  Need some reasonable fix.
   296     guarantee(cl.n() == 0, "Card table should be clean.");
   297   }
   299   _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
   300 }
   302 class CountRSSizeClosure: public HeapRegionClosure {
   303   size_t _n;
   304   size_t _tot;
   305   size_t _max;
   306   HeapRegion* _max_r;
   307   enum {
   308     N = 20,
   309     MIN = 6
   310   };
   311   int _histo[N];
   312 public:
   313   CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
   314     for (int i = 0; i < N; i++) _histo[i] = 0;
   315   }
   316   bool doHeapRegion(HeapRegion* r) {
   317     if (!r->continuesHumongous()) {
   318       size_t occ = r->rem_set()->occupied();
   319       _n++;
   320       _tot += occ;
   321       if (occ > _max) {
   322         _max = occ;
   323         _max_r = r;
   324       }
   325       // Fit it into a histo bin.
   326       int s = 1 << MIN;
   327       int i = 0;
   328       while (occ > (size_t) s && i < (N-1)) {
   329         s = s << 1;
   330         i++;
   331       }
   332       _histo[i]++;
   333     }
   334     return false;
   335   }
   336   size_t n() { return _n; }
   337   size_t tot() { return _tot; }
   338   size_t mx() { return _max; }
   339   HeapRegion* mxr() { return _max_r; }
   340   void print_histo() {
   341     int mx = N;
   342     while (mx >= 0) {
   343       if (_histo[mx-1] > 0) break;
   344       mx--;
   345     }
   346     gclog_or_tty->print_cr("Number of regions with given RS sizes:");
   347     gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
   348     for (int i = 1; i < mx-1; i++) {
   349       gclog_or_tty->print_cr("  %8d  - %8d   %8d",
   350                     (1 << (MIN + i - 1)) + 1,
   351                     1 << (MIN + i),
   352                     _histo[i]);
   353     }
   354     gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
   355   }
   356 };
   358 void G1RemSet::cleanupHRRS() {
   359   HeapRegionRemSet::cleanup();
   360 }
   362 void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
   363                                              int worker_i) {
   364 #if CARD_REPEAT_HISTO
   365   ct_freq_update_histo_and_reset();
   366 #endif
   367   if (worker_i == 0) {
   368     _cg1r->clear_and_record_card_counts();
   369   }
   371   // Make this into a command-line flag...
   372   if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
   373     CountRSSizeClosure count_cl;
   374     _g1->heap_region_iterate(&count_cl);
   375     gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
   376                   "max region is " PTR_FORMAT,
   377                   count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
   378                   count_cl.mx(), count_cl.mxr());
   379     count_cl.print_histo();
   380   }
   382   // We cache the value of 'oc' closure into the appropriate slot in the
   383   // _cset_rs_update_cl for this worker
   384   assert(worker_i < (int)n_workers(), "sanity");
   385   _cset_rs_update_cl[worker_i] = oc;
   387   // A DirtyCardQueue that is used to hold cards containing references
   388   // that point into the collection set. This DCQ is associated with a
   389   // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
   390   // circumstances (i.e. the pause successfully completes), these cards
   391   // are just discarded (there's no need to update the RSets of regions
   392   // that were in the collection set - after the pause these regions
   393   // are wholly 'free' of live objects. In the event of an evacuation
   394   // failure the cards/buffers in this queue set are:
   395   // * passed to the DirtyCardQueueSet that is used to manage deferred
   396   //   RSet updates, or
   397   // * scanned for references that point into the collection set
   398   //   and the RSet of the corresponding region in the collection set
   399   //   is updated immediately.
   400   DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
   402   assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
   404   // The two flags below were introduced temporarily to serialize
   405   // the updating and scanning of remembered sets. There are some
   406   // race conditions when these two operations are done in parallel
   407   // and they are causing failures. When we resolve said race
   408   // conditions, we'll revert back to parallel remembered set
   409   // updating and scanning. See CRs 6677707 and 6677708.
   410   if (G1UseParallelRSetUpdating || (worker_i == 0)) {
   411     updateRS(&into_cset_dcq, worker_i);
   412   } else {
   413     _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
   414     _g1p->record_update_rs_time(worker_i, 0.0);
   415   }
   416   if (G1UseParallelRSetScanning || (worker_i == 0)) {
   417     scanRS(oc, worker_i);
   418   } else {
   419     _g1p->record_scan_rs_time(worker_i, 0.0);
   420   }
   422   // We now clear the cached values of _cset_rs_update_cl for this worker
   423   _cset_rs_update_cl[worker_i] = NULL;
   424 }
   426 void G1RemSet::prepare_for_oops_into_collection_set_do() {
   427   cleanupHRRS();
   428   ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
   429   _g1->set_refine_cte_cl_concurrency(false);
   430   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   431   dcqs.concatenate_logs();
   433   if (ParallelGCThreads > 0) {
   434     _seq_task->set_n_threads((int)n_workers());
   435   }
   436   guarantee( _cards_scanned == NULL, "invariant" );
   437   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
   438   for (uint i = 0; i < n_workers(); ++i) {
   439     _cards_scanned[i] = 0;
   440   }
   441   _total_cards_scanned = 0;
   442 }
   445 // This closure, applied to a DirtyCardQueueSet, is used to immediately
   446 // update the RSets for the regions in the CSet. For each card it iterates
   447 // through the oops which coincide with that card. It scans the reference
   448 // fields in each oop; when it finds an oop that points into the collection
   449 // set, the RSet for the region containing the referenced object is updated.
   450 class UpdateRSetCardTableEntryIntoCSetClosure: public CardTableEntryClosure {
   451   G1CollectedHeap* _g1;
   452   CardTableModRefBS* _ct_bs;
   453 public:
   454   UpdateRSetCardTableEntryIntoCSetClosure(G1CollectedHeap* g1,
   455                                           CardTableModRefBS* bs):
   456     _g1(g1), _ct_bs(bs)
   457   { }
   459   bool do_card_ptr(jbyte* card_ptr, int worker_i) {
   460     // Construct the region representing the card.
   461     HeapWord* start = _ct_bs->addr_for(card_ptr);
   462     // And find the region containing it.
   463     HeapRegion* r = _g1->heap_region_containing(start);
   464     assert(r != NULL, "unexpected null");
   466     // Scan oops in the card looking for references into the collection set
   467     HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
   468     MemRegion scanRegion(start, end);
   470     UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set());
   471     FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl);
   472     FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl);
   474     // We can pass false as the "filter_young" parameter here as:
   475     // * we should be in a STW pause,
   476     // * the DCQS to which this closure is applied is used to hold
   477     //   references that point into the collection set from the prior
   478     //   RSet updating,
   479     // * the post-write barrier shouldn't be logging updates to young
   480     //   regions (but there is a situation where this can happen - see
   481     //   the comment in G1RemSet::concurrentRefineOneCard below -
   482     //   that should not be applicable here), and
   483     // * during actual RSet updating, the filtering of cards in young
   484     //   regions in HeapRegion::oops_on_card_seq_iterate_careful is
   485     //   employed.
   486     // As a result, when this closure is applied to "refs into cset"
   487     // DCQS, we shouldn't see any cards in young regions.
   488     update_rs_cl.set_region(r);
   489     HeapWord* stop_point =
   490       r->oops_on_card_seq_iterate_careful(scanRegion,
   491                                           &filter_then_update_rs_cset_oop_cl,
   492                                           false /* filter_young */,
   493                                           NULL  /* card_ptr */);
   495     // Since this is performed in the event of an evacuation failure, we
   496     // we shouldn't see a non-null stop point
   497     assert(stop_point == NULL, "saw an unallocated region");
   498     return true;
   499   }
   500 };
   502 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
   503   guarantee( _cards_scanned != NULL, "invariant" );
   504   _total_cards_scanned = 0;
   505   for (uint i = 0; i < n_workers(); ++i) {
   506     _total_cards_scanned += _cards_scanned[i];
   507   }
   508   FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
   509   _cards_scanned = NULL;
   510   // Cleanup after copy
   511   _g1->set_refine_cte_cl_concurrency(true);
   512   // Set all cards back to clean.
   513   _g1->cleanUpCardTable();
   515   DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
   516   int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
   518   if (_g1->evacuation_failed()) {
   519     // Restore remembered sets for the regions pointing into the collection set.
   521     if (G1DeferredRSUpdate) {
   522       // If deferred RS updates are enabled then we just need to transfer
   523       // the completed buffers from (a) the DirtyCardQueueSet used to hold
   524       // cards that contain references that point into the collection set
   525       // to (b) the DCQS used to hold the deferred RS updates
   526       _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
   527     } else {
   529       CardTableModRefBS* bs = (CardTableModRefBS*)_g1->barrier_set();
   530       UpdateRSetCardTableEntryIntoCSetClosure update_rs_cset_immediate(_g1, bs);
   532       int n_completed_buffers = 0;
   533       while (into_cset_dcqs.apply_closure_to_completed_buffer(&update_rs_cset_immediate,
   534                                                     0, 0, true)) {
   535         n_completed_buffers++;
   536       }
   537       assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
   538     }
   539   }
   541   // Free any completed buffers in the DirtyCardQueueSet used to hold cards
   542   // which contain references that point into the collection.
   543   _g1->into_cset_dirty_card_queue_set().clear();
   544   assert(_g1->into_cset_dirty_card_queue_set().completed_buffers_num() == 0,
   545          "all buffers should be freed");
   546   _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
   547 }
   549 class ScrubRSClosure: public HeapRegionClosure {
   550   G1CollectedHeap* _g1h;
   551   BitMap* _region_bm;
   552   BitMap* _card_bm;
   553   CardTableModRefBS* _ctbs;
   554 public:
   555   ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
   556     _g1h(G1CollectedHeap::heap()),
   557     _region_bm(region_bm), _card_bm(card_bm),
   558     _ctbs(NULL)
   559   {
   560     ModRefBarrierSet* bs = _g1h->mr_bs();
   561     guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
   562     _ctbs = (CardTableModRefBS*)bs;
   563   }
   565   bool doHeapRegion(HeapRegion* r) {
   566     if (!r->continuesHumongous()) {
   567       r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
   568     }
   569     return false;
   570   }
   571 };
   573 void G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
   574   ScrubRSClosure scrub_cl(region_bm, card_bm);
   575   _g1->heap_region_iterate(&scrub_cl);
   576 }
   578 void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
   579                                 int worker_num, int claim_val) {
   580   ScrubRSClosure scrub_cl(region_bm, card_bm);
   581   _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
   582 }
   585 static IntHistogram out_of_histo(50, 50);
   587 class TriggerClosure : public OopClosure {
   588   bool _trigger;
   589 public:
   590   TriggerClosure() : _trigger(false) { }
   591   bool value() const { return _trigger; }
   592   template <class T> void do_oop_nv(T* p) { _trigger = true; }
   593   virtual void do_oop(oop* p)        { do_oop_nv(p); }
   594   virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
   595 };
   597 class InvokeIfNotTriggeredClosure: public OopClosure {
   598   TriggerClosure* _t;
   599   OopClosure* _oc;
   600 public:
   601   InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
   602     _t(t), _oc(oc) { }
   603   template <class T> void do_oop_nv(T* p) {
   604     if (!_t->value()) _oc->do_oop(p);
   605   }
   606   virtual void do_oop(oop* p)        { do_oop_nv(p); }
   607   virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
   608 };
   610 class Mux2Closure : public OopClosure {
   611   OopClosure* _c1;
   612   OopClosure* _c2;
   613 public:
   614   Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
   615   template <class T> void do_oop_nv(T* p) {
   616     _c1->do_oop(p); _c2->do_oop(p);
   617   }
   618   virtual void do_oop(oop* p)        { do_oop_nv(p); }
   619   virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
   620 };
   622 bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
   623                                                    bool check_for_refs_into_cset) {
   624   // Construct the region representing the card.
   625   HeapWord* start = _ct_bs->addr_for(card_ptr);
   626   // And find the region containing it.
   627   HeapRegion* r = _g1->heap_region_containing(start);
   628   assert(r != NULL, "unexpected null");
   630   HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
   631   MemRegion dirtyRegion(start, end);
   633 #if CARD_REPEAT_HISTO
   634   init_ct_freq_table(_g1->max_capacity());
   635   ct_freq_note_card(_ct_bs->index_for(start));
   636 #endif
   638   assert(!check_for_refs_into_cset || _cset_rs_update_cl[worker_i] != NULL, "sanity");
   639   UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
   640                                                _g1->g1_rem_set(),
   641                                                _cset_rs_update_cl[worker_i],
   642                                                check_for_refs_into_cset,
   643                                                worker_i);
   644   update_rs_oop_cl.set_from(r);
   646   TriggerClosure trigger_cl;
   647   FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
   648   InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
   649   Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
   651   FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
   652                         (check_for_refs_into_cset ?
   653                                 (OopClosure*)&mux :
   654                                 (OopClosure*)&update_rs_oop_cl));
   656   // The region for the current card may be a young region. The
   657   // current card may have been a card that was evicted from the
   658   // card cache. When the card was inserted into the cache, we had
   659   // determined that its region was non-young. While in the cache,
   660   // the region may have been freed during a cleanup pause, reallocated
   661   // and tagged as young.
   662   //
   663   // We wish to filter out cards for such a region but the current
   664   // thread, if we're running concurrently, may "see" the young type
   665   // change at any time (so an earlier "is_young" check may pass or
   666   // fail arbitrarily). We tell the iteration code to perform this
   667   // filtering when it has been determined that there has been an actual
   668   // allocation in this region and making it safe to check the young type.
   669   bool filter_young = true;
   671   HeapWord* stop_point =
   672     r->oops_on_card_seq_iterate_careful(dirtyRegion,
   673                                         &filter_then_update_rs_oop_cl,
   674                                         filter_young,
   675                                         card_ptr);
   677   // If stop_point is non-null, then we encountered an unallocated region
   678   // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
   679   // card and re-enqueue: if we put off the card until a GC pause, then the
   680   // unallocated portion will be filled in.  Alternatively, we might try
   681   // the full complexity of the technique used in "regular" precleaning.
   682   if (stop_point != NULL) {
   683     // The card might have gotten re-dirtied and re-enqueued while we
   684     // worked.  (In fact, it's pretty likely.)
   685     if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
   686       *card_ptr = CardTableModRefBS::dirty_card_val();
   687       MutexLockerEx x(Shared_DirtyCardQ_lock,
   688                       Mutex::_no_safepoint_check_flag);
   689       DirtyCardQueue* sdcq =
   690         JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
   691       sdcq->enqueue(card_ptr);
   692     }
   693   } else {
   694     out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
   695     _conc_refine_cards++;
   696   }
   698   return trigger_cl.value();
   699 }
   701 bool G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
   702                                               bool check_for_refs_into_cset) {
   703   // If the card is no longer dirty, nothing to do.
   704   if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
   705     // No need to return that this card contains refs that point
   706     // into the collection set.
   707     return false;
   708   }
   710   // Construct the region representing the card.
   711   HeapWord* start = _ct_bs->addr_for(card_ptr);
   712   // And find the region containing it.
   713   HeapRegion* r = _g1->heap_region_containing(start);
   714   if (r == NULL) {
   715     guarantee(_g1->is_in_permanent(start), "Or else where?");
   716     // Again no need to return that this card contains refs that
   717     // point into the collection set.
   718     return false;  // Not in the G1 heap (might be in perm, for example.)
   719   }
   720   // Why do we have to check here whether a card is on a young region,
   721   // given that we dirty young regions and, as a result, the
   722   // post-barrier is supposed to filter them out and never to enqueue
   723   // them? When we allocate a new region as the "allocation region" we
   724   // actually dirty its cards after we release the lock, since card
   725   // dirtying while holding the lock was a performance bottleneck. So,
   726   // as a result, it is possible for other threads to actually
   727   // allocate objects in the region (after the acquire the lock)
   728   // before all the cards on the region are dirtied. This is unlikely,
   729   // and it doesn't happen often, but it can happen. So, the extra
   730   // check below filters out those cards.
   731   if (r->is_young()) {
   732     return false;
   733   }
   734   // While we are processing RSet buffers during the collection, we
   735   // actually don't want to scan any cards on the collection set,
   736   // since we don't want to update remebered sets with entries that
   737   // point into the collection set, given that live objects from the
   738   // collection set are about to move and such entries will be stale
   739   // very soon. This change also deals with a reliability issue which
   740   // involves scanning a card in the collection set and coming across
   741   // an array that was being chunked and looking malformed. Note,
   742   // however, that if evacuation fails, we have to scan any objects
   743   // that were not moved and create any missing entries.
   744   if (r->in_collection_set()) {
   745     return false;
   746   }
   748   // Should we defer processing the card?
   749   //
   750   // Previously the result from the insert_cache call would be
   751   // either card_ptr (implying that card_ptr was currently "cold"),
   752   // null (meaning we had inserted the card ptr into the "hot"
   753   // cache, which had some headroom), or a "hot" card ptr
   754   // extracted from the "hot" cache.
   755   //
   756   // Now that the _card_counts cache in the ConcurrentG1Refine
   757   // instance is an evicting hash table, the result we get back
   758   // could be from evicting the card ptr in an already occupied
   759   // bucket (in which case we have replaced the card ptr in the
   760   // bucket with card_ptr and "defer" is set to false). To avoid
   761   // having a data structure (updates to which would need a lock)
   762   // to hold these unprocessed dirty cards, we need to immediately
   763   // process card_ptr. The actions needed to be taken on return
   764   // from cache_insert are summarized in the following table:
   765   //
   766   // res      defer   action
   767   // --------------------------------------------------------------
   768   // null     false   card evicted from _card_counts & replaced with
   769   //                  card_ptr; evicted ptr added to hot cache.
   770   //                  No need to process res; immediately process card_ptr
   771   //
   772   // null     true    card not evicted from _card_counts; card_ptr added
   773   //                  to hot cache.
   774   //                  Nothing to do.
   775   //
   776   // non-null false   card evicted from _card_counts & replaced with
   777   //                  card_ptr; evicted ptr is currently "cold" or
   778   //                  caused an eviction from the hot cache.
   779   //                  Immediately process res; process card_ptr.
   780   //
   781   // non-null true    card not evicted from _card_counts; card_ptr is
   782   //                  currently cold, or caused an eviction from hot
   783   //                  cache.
   784   //                  Immediately process res; no need to process card_ptr.
   787   jbyte* res = card_ptr;
   788   bool defer = false;
   790   // This gets set to true if the card being refined has references
   791   // that point into the collection set.
   792   bool oops_into_cset = false;
   794   if (_cg1r->use_cache()) {
   795     jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
   796     if (res != NULL && (res != card_ptr || defer)) {
   797       start = _ct_bs->addr_for(res);
   798       r = _g1->heap_region_containing(start);
   799       if (r == NULL) {
   800         assert(_g1->is_in_permanent(start), "Or else where?");
   801       } else {
   802         // Checking whether the region we got back from the cache
   803         // is young here is inappropriate. The region could have been
   804         // freed, reallocated and tagged as young while in the cache.
   805         // Hence we could see its young type change at any time.
   806         //
   807         // Process card pointer we get back from the hot card cache. This
   808         // will check whether the region containing the card is young
   809         // _after_ checking that the region has been allocated from.
   810         oops_into_cset = concurrentRefineOneCard_impl(res, worker_i,
   811                                                       false /* check_for_refs_into_cset */);
   812         // The above call to concurrentRefineOneCard_impl is only
   813         // performed if the hot card cache is enabled. This cache is
   814         // disabled during an evacuation pause - which is the only
   815         // time when we need know if the card contains references
   816         // that point into the collection set. Also when the hot card
   817         // cache is enabled, this code is executed by the concurrent
   818         // refine threads - rather than the GC worker threads - and
   819         // concurrentRefineOneCard_impl will return false.
   820         assert(!oops_into_cset, "should not see true here");
   821       }
   822     }
   823   }
   825   if (!defer) {
   826     oops_into_cset =
   827       concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset);
   828     // We should only be detecting that the card contains references
   829     // that point into the collection set if the current thread is
   830     // a GC worker thread.
   831     assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(),
   832            "invalid result at non safepoint");
   833   }
   834   return oops_into_cset;
   835 }
   837 class HRRSStatsIter: public HeapRegionClosure {
   838   size_t _occupied;
   839   size_t _total_mem_sz;
   840   size_t _max_mem_sz;
   841   HeapRegion* _max_mem_sz_region;
   842 public:
   843   HRRSStatsIter() :
   844     _occupied(0),
   845     _total_mem_sz(0),
   846     _max_mem_sz(0),
   847     _max_mem_sz_region(NULL)
   848   {}
   850   bool doHeapRegion(HeapRegion* r) {
   851     if (r->continuesHumongous()) return false;
   852     size_t mem_sz = r->rem_set()->mem_size();
   853     if (mem_sz > _max_mem_sz) {
   854       _max_mem_sz = mem_sz;
   855       _max_mem_sz_region = r;
   856     }
   857     _total_mem_sz += mem_sz;
   858     size_t occ = r->rem_set()->occupied();
   859     _occupied += occ;
   860     return false;
   861   }
   862   size_t total_mem_sz() { return _total_mem_sz; }
   863   size_t max_mem_sz() { return _max_mem_sz; }
   864   size_t occupied() { return _occupied; }
   865   HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
   866 };
   868 class PrintRSThreadVTimeClosure : public ThreadClosure {
   869 public:
   870   virtual void do_thread(Thread *t) {
   871     ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
   872     gclog_or_tty->print("    %5.2f", crt->vtime_accum());
   873   }
   874 };
   876 void G1RemSet::print_summary_info() {
   877   G1CollectedHeap* g1 = G1CollectedHeap::heap();
   879 #if CARD_REPEAT_HISTO
   880   gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
   881   gclog_or_tty->print_cr("  # of repeats --> # of cards with that number.");
   882   card_repeat_count.print_on(gclog_or_tty);
   883 #endif
   885   if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
   886     gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
   887     gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
   888     out_of_histo.print_on(gclog_or_tty);
   889   }
   890   gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
   891                          _conc_refine_cards);
   892   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   893   jint tot_processed_buffers =
   894     dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
   895   gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
   896   gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS threads.",
   897                 dcqs.processed_buffers_rs_thread(),
   898                 100.0*(float)dcqs.processed_buffers_rs_thread()/
   899                 (float)tot_processed_buffers);
   900   gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
   901                 dcqs.processed_buffers_mut(),
   902                 100.0*(float)dcqs.processed_buffers_mut()/
   903                 (float)tot_processed_buffers);
   904   gclog_or_tty->print_cr("  Conc RS threads times(s)");
   905   PrintRSThreadVTimeClosure p;
   906   gclog_or_tty->print("     ");
   907   g1->concurrent_g1_refine()->threads_do(&p);
   908   gclog_or_tty->print_cr("");
   910   HRRSStatsIter blk;
   911   g1->heap_region_iterate(&blk);
   912   gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
   913                          "  Max = " SIZE_FORMAT "K.",
   914                          blk.total_mem_sz()/K, blk.max_mem_sz()/K);
   915   gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
   916                          " free_lists = " SIZE_FORMAT "K.",
   917                          HeapRegionRemSet::static_mem_size()/K,
   918                          HeapRegionRemSet::fl_mem_size()/K);
   919   gclog_or_tty->print_cr("    %d occupied cards represented.",
   920                          blk.occupied());
   921   gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
   922                          ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
   923                          blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
   924                          (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
   925                          (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
   926   gclog_or_tty->print_cr("    Did %d coarsenings.", HeapRegionRemSet::n_coarsenings());
   927 }
   929 void G1RemSet::prepare_for_verify() {
   930   if (G1HRRSFlushLogBuffersOnVerify &&
   931       (VerifyBeforeGC || VerifyAfterGC)
   932       &&  !_g1->full_collection()) {
   933     cleanupHRRS();
   934     _g1->set_refine_cte_cl_concurrency(false);
   935     if (SafepointSynchronize::is_at_safepoint()) {
   936       DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   937       dcqs.concatenate_logs();
   938     }
   939     bool cg1r_use_cache = _cg1r->use_cache();
   940     _cg1r->set_use_cache(false);
   941     DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
   942     updateRS(&into_cset_dcq, 0);
   943     _g1->into_cset_dirty_card_queue_set().clear();
   944     _cg1r->set_use_cache(cg1r_use_cache);
   946     assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   947   }
   948 }

mercurial