Sun, 23 Oct 2011 23:06:06 -0700
7096030: G1: PrintGCDetails enhancements
7102445: G1: Unnecessary Resource allocations during RSet scanning
Summary: Add a new per-worker thread line in the PrintGCDetails output. GC Worker Other is the difference between the elapsed time for the parallel phase of the evacuation pause and the sum of the times of the sub-phases (external root scanning, mark stack scanning, RSet updating, RSet scanning, object copying, and termination) for that worker. During RSet scanning, stack allocate DirtyCardToOopClosure objects; allocating these in a resource area was causing abnormally high GC Worker Other times while the worker thread freed ResourceArea chunks.
Reviewed-by: tonyp, jwilhelm, brutisso
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "gc_implementation/g1/bufferingOopClosure.hpp"
27 #include "gc_implementation/g1/concurrentG1Refine.hpp"
28 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
29 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 #include "gc_implementation/g1/g1RemSet.inline.hpp"
34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
35 #include "memory/iterator.hpp"
36 #include "oops/oop.inline.hpp"
37 #include "utilities/intHisto.hpp"
39 #define CARD_REPEAT_HISTO 0
41 #if CARD_REPEAT_HISTO
42 static size_t ct_freq_sz;
43 static jbyte* ct_freq = NULL;
45 void init_ct_freq_table(size_t heap_sz_bytes) {
46 if (ct_freq == NULL) {
47 ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
48 ct_freq = new jbyte[ct_freq_sz];
49 for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
50 }
51 }
53 void ct_freq_note_card(size_t index) {
54 assert(0 <= index && index < ct_freq_sz, "Bounds error.");
55 if (ct_freq[index] < 100) { ct_freq[index]++; }
56 }
58 static IntHistogram card_repeat_count(10, 10);
60 void ct_freq_update_histo_and_reset() {
61 for (size_t j = 0; j < ct_freq_sz; j++) {
62 card_repeat_count.add_entry(ct_freq[j]);
63 ct_freq[j] = 0;
64 }
66 }
67 #endif
69 G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
70 : _g1(g1), _conc_refine_cards(0),
71 _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
72 _cg1r(g1->concurrent_g1_refine()),
73 _cset_rs_update_cl(NULL),
74 _cards_scanned(NULL), _total_cards_scanned(0)
75 {
76 _seq_task = new SubTasksDone(NumSeqTasks);
77 guarantee(n_workers() > 0, "There should be some workers");
78 _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
79 for (uint i = 0; i < n_workers(); i++) {
80 _cset_rs_update_cl[i] = NULL;
81 }
82 }
84 G1RemSet::~G1RemSet() {
85 delete _seq_task;
86 for (uint i = 0; i < n_workers(); i++) {
87 assert(_cset_rs_update_cl[i] == NULL, "it should be");
88 }
89 FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
90 }
92 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
93 if (_g1->is_in_g1_reserved(mr.start())) {
94 _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
95 if (_start_first == NULL) _start_first = mr.start();
96 }
97 }
99 class ScanRSClosure : public HeapRegionClosure {
100 size_t _cards_done, _cards;
101 G1CollectedHeap* _g1h;
102 OopsInHeapRegionClosure* _oc;
103 G1BlockOffsetSharedArray* _bot_shared;
104 CardTableModRefBS *_ct_bs;
105 int _worker_i;
106 int _block_size;
107 bool _try_claimed;
108 public:
109 ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
110 _oc(oc),
111 _cards(0),
112 _cards_done(0),
113 _worker_i(worker_i),
114 _try_claimed(false)
115 {
116 _g1h = G1CollectedHeap::heap();
117 _bot_shared = _g1h->bot_shared();
118 _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
119 _block_size = MAX2<int>(G1RSetScanBlockSize, 1);
120 }
122 void set_try_claimed() { _try_claimed = true; }
124 void scanCard(size_t index, HeapRegion *r) {
125 // Stack allocate the DirtyCardToOopClosure instance
126 HeapRegionDCTOC cl(_g1h, r, _oc,
127 CardTableModRefBS::Precise,
128 HeapRegionDCTOC::IntoCSFilterKind);
130 // Set the "from" region in the closure.
131 _oc->set_region(r);
132 HeapWord* card_start = _bot_shared->address_for_index(index);
133 HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
134 Space *sp = SharedHeap::heap()->space_containing(card_start);
135 MemRegion sm_region = sp->used_region_at_save_marks();
136 MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
137 if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
138 // We make the card as "claimed" lazily (so races are possible
139 // but they're benign), which reduces the number of duplicate
140 // scans (the rsets of the regions in the cset can intersect).
141 _ct_bs->set_card_claimed(index);
142 _cards_done++;
143 cl.do_MemRegion(mr);
144 }
145 }
147 void printCard(HeapRegion* card_region, size_t card_index,
148 HeapWord* card_start) {
149 gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
150 "RS names card %p: "
151 "[" PTR_FORMAT ", " PTR_FORMAT ")",
152 _worker_i,
153 card_region->bottom(), card_region->end(),
154 card_index,
155 card_start, card_start + G1BlockOffsetSharedArray::N_words);
156 }
158 bool doHeapRegion(HeapRegion* r) {
159 assert(r->in_collection_set(), "should only be called on elements of CS.");
160 HeapRegionRemSet* hrrs = r->rem_set();
161 if (hrrs->iter_is_complete()) return false; // All done.
162 if (!_try_claimed && !hrrs->claim_iter()) return false;
163 // If we ever free the collection set concurrently, we should also
164 // clear the card table concurrently therefore we won't need to
165 // add regions of the collection set to the dirty cards region.
166 _g1h->push_dirty_cards_region(r);
167 // If we didn't return above, then
168 // _try_claimed || r->claim_iter()
169 // is true: either we're supposed to work on claimed-but-not-complete
170 // regions, or we successfully claimed the region.
171 HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
172 hrrs->init_iterator(iter);
173 size_t card_index;
175 // We claim cards in block so as to recude the contention. The block size is determined by
176 // the G1RSetScanBlockSize parameter.
177 size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
178 for (size_t current_card = 0; iter->has_next(card_index); current_card++) {
179 if (current_card >= jump_to_card + _block_size) {
180 jump_to_card = hrrs->iter_claimed_next(_block_size);
181 }
182 if (current_card < jump_to_card) continue;
183 HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
184 #if 0
185 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
186 card_start, card_start + CardTableModRefBS::card_size_in_words);
187 #endif
189 HeapRegion* card_region = _g1h->heap_region_containing(card_start);
190 assert(card_region != NULL, "Yielding cards not in the heap?");
191 _cards++;
193 if (!card_region->is_on_dirty_cards_region_list()) {
194 _g1h->push_dirty_cards_region(card_region);
195 }
197 // If the card is dirty, then we will scan it during updateRS.
198 if (!card_region->in_collection_set() &&
199 !_ct_bs->is_card_dirty(card_index)) {
200 scanCard(card_index, card_region);
201 }
202 }
203 if (!_try_claimed) {
204 hrrs->set_iter_complete();
205 }
206 return false;
207 }
208 size_t cards_done() { return _cards_done;}
209 size_t cards_looked_up() { return _cards;}
210 };
212 // We want the parallel threads to start their scanning at
213 // different collection set regions to avoid contention.
214 // If we have:
215 // n collection set regions
216 // p threads
217 // Then thread t will start at region t * floor (n/p)
219 HeapRegion* G1RemSet::calculateStartRegion(int worker_i) {
220 HeapRegion* result = _g1p->collection_set();
221 if (ParallelGCThreads > 0) {
222 size_t cs_size = _g1p->collection_set_size();
223 int n_workers = _g1->workers()->total_workers();
224 size_t cs_spans = cs_size / n_workers;
225 size_t ind = cs_spans * worker_i;
226 for (size_t i = 0; i < ind; i++)
227 result = result->next_in_collection_set();
228 }
229 return result;
230 }
232 void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
233 double rs_time_start = os::elapsedTime();
234 HeapRegion *startRegion = calculateStartRegion(worker_i);
236 ScanRSClosure scanRScl(oc, worker_i);
238 _g1->collection_set_iterate_from(startRegion, &scanRScl);
239 scanRScl.set_try_claimed();
240 _g1->collection_set_iterate_from(startRegion, &scanRScl);
242 double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
244 assert( _cards_scanned != NULL, "invariant" );
245 _cards_scanned[worker_i] = scanRScl.cards_done();
247 _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
248 }
250 // Closure used for updating RSets and recording references that
251 // point into the collection set. Only called during an
252 // evacuation pause.
254 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
255 G1RemSet* _g1rs;
256 DirtyCardQueue* _into_cset_dcq;
257 public:
258 RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
259 DirtyCardQueue* into_cset_dcq) :
260 _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
261 {}
262 bool do_card_ptr(jbyte* card_ptr, int worker_i) {
263 // The only time we care about recording cards that
264 // contain references that point into the collection set
265 // is during RSet updating within an evacuation pause.
266 // In this case worker_i should be the id of a GC worker thread.
267 assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
268 assert(worker_i < (int) (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "should be a GC worker");
270 if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) {
271 // 'card_ptr' contains references that point into the collection
272 // set. We need to record the card in the DCQS
273 // (G1CollectedHeap::into_cset_dirty_card_queue_set())
274 // that's used for that purpose.
275 //
276 // Enqueue the card
277 _into_cset_dcq->enqueue(card_ptr);
278 }
279 return true;
280 }
281 };
283 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, int worker_i) {
284 double start = os::elapsedTime();
285 // Apply the given closure to all remaining log entries.
286 RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
288 _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
290 // Now there should be no dirty cards.
291 if (G1RSLogCheckCardTable) {
292 CountNonCleanMemRegionClosure cl(_g1);
293 _ct_bs->mod_card_iterate(&cl);
294 // XXX This isn't true any more: keeping cards of young regions
295 // marked dirty broke it. Need some reasonable fix.
296 guarantee(cl.n() == 0, "Card table should be clean.");
297 }
299 _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
300 }
302 class CountRSSizeClosure: public HeapRegionClosure {
303 size_t _n;
304 size_t _tot;
305 size_t _max;
306 HeapRegion* _max_r;
307 enum {
308 N = 20,
309 MIN = 6
310 };
311 int _histo[N];
312 public:
313 CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
314 for (int i = 0; i < N; i++) _histo[i] = 0;
315 }
316 bool doHeapRegion(HeapRegion* r) {
317 if (!r->continuesHumongous()) {
318 size_t occ = r->rem_set()->occupied();
319 _n++;
320 _tot += occ;
321 if (occ > _max) {
322 _max = occ;
323 _max_r = r;
324 }
325 // Fit it into a histo bin.
326 int s = 1 << MIN;
327 int i = 0;
328 while (occ > (size_t) s && i < (N-1)) {
329 s = s << 1;
330 i++;
331 }
332 _histo[i]++;
333 }
334 return false;
335 }
336 size_t n() { return _n; }
337 size_t tot() { return _tot; }
338 size_t mx() { return _max; }
339 HeapRegion* mxr() { return _max_r; }
340 void print_histo() {
341 int mx = N;
342 while (mx >= 0) {
343 if (_histo[mx-1] > 0) break;
344 mx--;
345 }
346 gclog_or_tty->print_cr("Number of regions with given RS sizes:");
347 gclog_or_tty->print_cr(" <= %8d %8d", 1 << MIN, _histo[0]);
348 for (int i = 1; i < mx-1; i++) {
349 gclog_or_tty->print_cr(" %8d - %8d %8d",
350 (1 << (MIN + i - 1)) + 1,
351 1 << (MIN + i),
352 _histo[i]);
353 }
354 gclog_or_tty->print_cr(" > %8d %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
355 }
356 };
358 void G1RemSet::cleanupHRRS() {
359 HeapRegionRemSet::cleanup();
360 }
362 void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
363 int worker_i) {
364 #if CARD_REPEAT_HISTO
365 ct_freq_update_histo_and_reset();
366 #endif
367 if (worker_i == 0) {
368 _cg1r->clear_and_record_card_counts();
369 }
371 // Make this into a command-line flag...
372 if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
373 CountRSSizeClosure count_cl;
374 _g1->heap_region_iterate(&count_cl);
375 gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
376 "max region is " PTR_FORMAT,
377 count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
378 count_cl.mx(), count_cl.mxr());
379 count_cl.print_histo();
380 }
382 // We cache the value of 'oc' closure into the appropriate slot in the
383 // _cset_rs_update_cl for this worker
384 assert(worker_i < (int)n_workers(), "sanity");
385 _cset_rs_update_cl[worker_i] = oc;
387 // A DirtyCardQueue that is used to hold cards containing references
388 // that point into the collection set. This DCQ is associated with a
389 // special DirtyCardQueueSet (see g1CollectedHeap.hpp). Under normal
390 // circumstances (i.e. the pause successfully completes), these cards
391 // are just discarded (there's no need to update the RSets of regions
392 // that were in the collection set - after the pause these regions
393 // are wholly 'free' of live objects. In the event of an evacuation
394 // failure the cards/buffers in this queue set are:
395 // * passed to the DirtyCardQueueSet that is used to manage deferred
396 // RSet updates, or
397 // * scanned for references that point into the collection set
398 // and the RSet of the corresponding region in the collection set
399 // is updated immediately.
400 DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
402 assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
404 // The two flags below were introduced temporarily to serialize
405 // the updating and scanning of remembered sets. There are some
406 // race conditions when these two operations are done in parallel
407 // and they are causing failures. When we resolve said race
408 // conditions, we'll revert back to parallel remembered set
409 // updating and scanning. See CRs 6677707 and 6677708.
410 if (G1UseParallelRSetUpdating || (worker_i == 0)) {
411 updateRS(&into_cset_dcq, worker_i);
412 } else {
413 _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
414 _g1p->record_update_rs_time(worker_i, 0.0);
415 }
416 if (G1UseParallelRSetScanning || (worker_i == 0)) {
417 scanRS(oc, worker_i);
418 } else {
419 _g1p->record_scan_rs_time(worker_i, 0.0);
420 }
422 // We now clear the cached values of _cset_rs_update_cl for this worker
423 _cset_rs_update_cl[worker_i] = NULL;
424 }
426 void G1RemSet::prepare_for_oops_into_collection_set_do() {
427 cleanupHRRS();
428 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
429 _g1->set_refine_cte_cl_concurrency(false);
430 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
431 dcqs.concatenate_logs();
433 if (ParallelGCThreads > 0) {
434 _seq_task->set_n_threads((int)n_workers());
435 }
436 guarantee( _cards_scanned == NULL, "invariant" );
437 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
438 for (uint i = 0; i < n_workers(); ++i) {
439 _cards_scanned[i] = 0;
440 }
441 _total_cards_scanned = 0;
442 }
445 // This closure, applied to a DirtyCardQueueSet, is used to immediately
446 // update the RSets for the regions in the CSet. For each card it iterates
447 // through the oops which coincide with that card. It scans the reference
448 // fields in each oop; when it finds an oop that points into the collection
449 // set, the RSet for the region containing the referenced object is updated.
450 class UpdateRSetCardTableEntryIntoCSetClosure: public CardTableEntryClosure {
451 G1CollectedHeap* _g1;
452 CardTableModRefBS* _ct_bs;
453 public:
454 UpdateRSetCardTableEntryIntoCSetClosure(G1CollectedHeap* g1,
455 CardTableModRefBS* bs):
456 _g1(g1), _ct_bs(bs)
457 { }
459 bool do_card_ptr(jbyte* card_ptr, int worker_i) {
460 // Construct the region representing the card.
461 HeapWord* start = _ct_bs->addr_for(card_ptr);
462 // And find the region containing it.
463 HeapRegion* r = _g1->heap_region_containing(start);
464 assert(r != NULL, "unexpected null");
466 // Scan oops in the card looking for references into the collection set
467 HeapWord* end = _ct_bs->addr_for(card_ptr + 1);
468 MemRegion scanRegion(start, end);
470 UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set());
471 FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl);
472 FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl);
474 // We can pass false as the "filter_young" parameter here as:
475 // * we should be in a STW pause,
476 // * the DCQS to which this closure is applied is used to hold
477 // references that point into the collection set from the prior
478 // RSet updating,
479 // * the post-write barrier shouldn't be logging updates to young
480 // regions (but there is a situation where this can happen - see
481 // the comment in G1RemSet::concurrentRefineOneCard below -
482 // that should not be applicable here), and
483 // * during actual RSet updating, the filtering of cards in young
484 // regions in HeapRegion::oops_on_card_seq_iterate_careful is
485 // employed.
486 // As a result, when this closure is applied to "refs into cset"
487 // DCQS, we shouldn't see any cards in young regions.
488 update_rs_cl.set_region(r);
489 HeapWord* stop_point =
490 r->oops_on_card_seq_iterate_careful(scanRegion,
491 &filter_then_update_rs_cset_oop_cl,
492 false /* filter_young */,
493 NULL /* card_ptr */);
495 // Since this is performed in the event of an evacuation failure, we
496 // we shouldn't see a non-null stop point
497 assert(stop_point == NULL, "saw an unallocated region");
498 return true;
499 }
500 };
502 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
503 guarantee( _cards_scanned != NULL, "invariant" );
504 _total_cards_scanned = 0;
505 for (uint i = 0; i < n_workers(); ++i) {
506 _total_cards_scanned += _cards_scanned[i];
507 }
508 FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
509 _cards_scanned = NULL;
510 // Cleanup after copy
511 _g1->set_refine_cte_cl_concurrency(true);
512 // Set all cards back to clean.
513 _g1->cleanUpCardTable();
515 DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
516 int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
518 if (_g1->evacuation_failed()) {
519 // Restore remembered sets for the regions pointing into the collection set.
521 if (G1DeferredRSUpdate) {
522 // If deferred RS updates are enabled then we just need to transfer
523 // the completed buffers from (a) the DirtyCardQueueSet used to hold
524 // cards that contain references that point into the collection set
525 // to (b) the DCQS used to hold the deferred RS updates
526 _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
527 } else {
529 CardTableModRefBS* bs = (CardTableModRefBS*)_g1->barrier_set();
530 UpdateRSetCardTableEntryIntoCSetClosure update_rs_cset_immediate(_g1, bs);
532 int n_completed_buffers = 0;
533 while (into_cset_dcqs.apply_closure_to_completed_buffer(&update_rs_cset_immediate,
534 0, 0, true)) {
535 n_completed_buffers++;
536 }
537 assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
538 }
539 }
541 // Free any completed buffers in the DirtyCardQueueSet used to hold cards
542 // which contain references that point into the collection.
543 _g1->into_cset_dirty_card_queue_set().clear();
544 assert(_g1->into_cset_dirty_card_queue_set().completed_buffers_num() == 0,
545 "all buffers should be freed");
546 _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
547 }
549 class ScrubRSClosure: public HeapRegionClosure {
550 G1CollectedHeap* _g1h;
551 BitMap* _region_bm;
552 BitMap* _card_bm;
553 CardTableModRefBS* _ctbs;
554 public:
555 ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
556 _g1h(G1CollectedHeap::heap()),
557 _region_bm(region_bm), _card_bm(card_bm),
558 _ctbs(NULL)
559 {
560 ModRefBarrierSet* bs = _g1h->mr_bs();
561 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
562 _ctbs = (CardTableModRefBS*)bs;
563 }
565 bool doHeapRegion(HeapRegion* r) {
566 if (!r->continuesHumongous()) {
567 r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
568 }
569 return false;
570 }
571 };
573 void G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
574 ScrubRSClosure scrub_cl(region_bm, card_bm);
575 _g1->heap_region_iterate(&scrub_cl);
576 }
578 void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
579 int worker_num, int claim_val) {
580 ScrubRSClosure scrub_cl(region_bm, card_bm);
581 _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
582 }
585 static IntHistogram out_of_histo(50, 50);
587 class TriggerClosure : public OopClosure {
588 bool _trigger;
589 public:
590 TriggerClosure() : _trigger(false) { }
591 bool value() const { return _trigger; }
592 template <class T> void do_oop_nv(T* p) { _trigger = true; }
593 virtual void do_oop(oop* p) { do_oop_nv(p); }
594 virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
595 };
597 class InvokeIfNotTriggeredClosure: public OopClosure {
598 TriggerClosure* _t;
599 OopClosure* _oc;
600 public:
601 InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
602 _t(t), _oc(oc) { }
603 template <class T> void do_oop_nv(T* p) {
604 if (!_t->value()) _oc->do_oop(p);
605 }
606 virtual void do_oop(oop* p) { do_oop_nv(p); }
607 virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
608 };
610 class Mux2Closure : public OopClosure {
611 OopClosure* _c1;
612 OopClosure* _c2;
613 public:
614 Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
615 template <class T> void do_oop_nv(T* p) {
616 _c1->do_oop(p); _c2->do_oop(p);
617 }
618 virtual void do_oop(oop* p) { do_oop_nv(p); }
619 virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
620 };
622 bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
623 bool check_for_refs_into_cset) {
624 // Construct the region representing the card.
625 HeapWord* start = _ct_bs->addr_for(card_ptr);
626 // And find the region containing it.
627 HeapRegion* r = _g1->heap_region_containing(start);
628 assert(r != NULL, "unexpected null");
630 HeapWord* end = _ct_bs->addr_for(card_ptr + 1);
631 MemRegion dirtyRegion(start, end);
633 #if CARD_REPEAT_HISTO
634 init_ct_freq_table(_g1->max_capacity());
635 ct_freq_note_card(_ct_bs->index_for(start));
636 #endif
638 assert(!check_for_refs_into_cset || _cset_rs_update_cl[worker_i] != NULL, "sanity");
639 UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
640 _g1->g1_rem_set(),
641 _cset_rs_update_cl[worker_i],
642 check_for_refs_into_cset,
643 worker_i);
644 update_rs_oop_cl.set_from(r);
646 TriggerClosure trigger_cl;
647 FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
648 InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
649 Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
651 FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
652 (check_for_refs_into_cset ?
653 (OopClosure*)&mux :
654 (OopClosure*)&update_rs_oop_cl));
656 // The region for the current card may be a young region. The
657 // current card may have been a card that was evicted from the
658 // card cache. When the card was inserted into the cache, we had
659 // determined that its region was non-young. While in the cache,
660 // the region may have been freed during a cleanup pause, reallocated
661 // and tagged as young.
662 //
663 // We wish to filter out cards for such a region but the current
664 // thread, if we're running concurrently, may "see" the young type
665 // change at any time (so an earlier "is_young" check may pass or
666 // fail arbitrarily). We tell the iteration code to perform this
667 // filtering when it has been determined that there has been an actual
668 // allocation in this region and making it safe to check the young type.
669 bool filter_young = true;
671 HeapWord* stop_point =
672 r->oops_on_card_seq_iterate_careful(dirtyRegion,
673 &filter_then_update_rs_oop_cl,
674 filter_young,
675 card_ptr);
677 // If stop_point is non-null, then we encountered an unallocated region
678 // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the
679 // card and re-enqueue: if we put off the card until a GC pause, then the
680 // unallocated portion will be filled in. Alternatively, we might try
681 // the full complexity of the technique used in "regular" precleaning.
682 if (stop_point != NULL) {
683 // The card might have gotten re-dirtied and re-enqueued while we
684 // worked. (In fact, it's pretty likely.)
685 if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
686 *card_ptr = CardTableModRefBS::dirty_card_val();
687 MutexLockerEx x(Shared_DirtyCardQ_lock,
688 Mutex::_no_safepoint_check_flag);
689 DirtyCardQueue* sdcq =
690 JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
691 sdcq->enqueue(card_ptr);
692 }
693 } else {
694 out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
695 _conc_refine_cards++;
696 }
698 return trigger_cl.value();
699 }
701 bool G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
702 bool check_for_refs_into_cset) {
703 // If the card is no longer dirty, nothing to do.
704 if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
705 // No need to return that this card contains refs that point
706 // into the collection set.
707 return false;
708 }
710 // Construct the region representing the card.
711 HeapWord* start = _ct_bs->addr_for(card_ptr);
712 // And find the region containing it.
713 HeapRegion* r = _g1->heap_region_containing(start);
714 if (r == NULL) {
715 guarantee(_g1->is_in_permanent(start), "Or else where?");
716 // Again no need to return that this card contains refs that
717 // point into the collection set.
718 return false; // Not in the G1 heap (might be in perm, for example.)
719 }
720 // Why do we have to check here whether a card is on a young region,
721 // given that we dirty young regions and, as a result, the
722 // post-barrier is supposed to filter them out and never to enqueue
723 // them? When we allocate a new region as the "allocation region" we
724 // actually dirty its cards after we release the lock, since card
725 // dirtying while holding the lock was a performance bottleneck. So,
726 // as a result, it is possible for other threads to actually
727 // allocate objects in the region (after the acquire the lock)
728 // before all the cards on the region are dirtied. This is unlikely,
729 // and it doesn't happen often, but it can happen. So, the extra
730 // check below filters out those cards.
731 if (r->is_young()) {
732 return false;
733 }
734 // While we are processing RSet buffers during the collection, we
735 // actually don't want to scan any cards on the collection set,
736 // since we don't want to update remebered sets with entries that
737 // point into the collection set, given that live objects from the
738 // collection set are about to move and such entries will be stale
739 // very soon. This change also deals with a reliability issue which
740 // involves scanning a card in the collection set and coming across
741 // an array that was being chunked and looking malformed. Note,
742 // however, that if evacuation fails, we have to scan any objects
743 // that were not moved and create any missing entries.
744 if (r->in_collection_set()) {
745 return false;
746 }
748 // Should we defer processing the card?
749 //
750 // Previously the result from the insert_cache call would be
751 // either card_ptr (implying that card_ptr was currently "cold"),
752 // null (meaning we had inserted the card ptr into the "hot"
753 // cache, which had some headroom), or a "hot" card ptr
754 // extracted from the "hot" cache.
755 //
756 // Now that the _card_counts cache in the ConcurrentG1Refine
757 // instance is an evicting hash table, the result we get back
758 // could be from evicting the card ptr in an already occupied
759 // bucket (in which case we have replaced the card ptr in the
760 // bucket with card_ptr and "defer" is set to false). To avoid
761 // having a data structure (updates to which would need a lock)
762 // to hold these unprocessed dirty cards, we need to immediately
763 // process card_ptr. The actions needed to be taken on return
764 // from cache_insert are summarized in the following table:
765 //
766 // res defer action
767 // --------------------------------------------------------------
768 // null false card evicted from _card_counts & replaced with
769 // card_ptr; evicted ptr added to hot cache.
770 // No need to process res; immediately process card_ptr
771 //
772 // null true card not evicted from _card_counts; card_ptr added
773 // to hot cache.
774 // Nothing to do.
775 //
776 // non-null false card evicted from _card_counts & replaced with
777 // card_ptr; evicted ptr is currently "cold" or
778 // caused an eviction from the hot cache.
779 // Immediately process res; process card_ptr.
780 //
781 // non-null true card not evicted from _card_counts; card_ptr is
782 // currently cold, or caused an eviction from hot
783 // cache.
784 // Immediately process res; no need to process card_ptr.
787 jbyte* res = card_ptr;
788 bool defer = false;
790 // This gets set to true if the card being refined has references
791 // that point into the collection set.
792 bool oops_into_cset = false;
794 if (_cg1r->use_cache()) {
795 jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
796 if (res != NULL && (res != card_ptr || defer)) {
797 start = _ct_bs->addr_for(res);
798 r = _g1->heap_region_containing(start);
799 if (r == NULL) {
800 assert(_g1->is_in_permanent(start), "Or else where?");
801 } else {
802 // Checking whether the region we got back from the cache
803 // is young here is inappropriate. The region could have been
804 // freed, reallocated and tagged as young while in the cache.
805 // Hence we could see its young type change at any time.
806 //
807 // Process card pointer we get back from the hot card cache. This
808 // will check whether the region containing the card is young
809 // _after_ checking that the region has been allocated from.
810 oops_into_cset = concurrentRefineOneCard_impl(res, worker_i,
811 false /* check_for_refs_into_cset */);
812 // The above call to concurrentRefineOneCard_impl is only
813 // performed if the hot card cache is enabled. This cache is
814 // disabled during an evacuation pause - which is the only
815 // time when we need know if the card contains references
816 // that point into the collection set. Also when the hot card
817 // cache is enabled, this code is executed by the concurrent
818 // refine threads - rather than the GC worker threads - and
819 // concurrentRefineOneCard_impl will return false.
820 assert(!oops_into_cset, "should not see true here");
821 }
822 }
823 }
825 if (!defer) {
826 oops_into_cset =
827 concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset);
828 // We should only be detecting that the card contains references
829 // that point into the collection set if the current thread is
830 // a GC worker thread.
831 assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(),
832 "invalid result at non safepoint");
833 }
834 return oops_into_cset;
835 }
837 class HRRSStatsIter: public HeapRegionClosure {
838 size_t _occupied;
839 size_t _total_mem_sz;
840 size_t _max_mem_sz;
841 HeapRegion* _max_mem_sz_region;
842 public:
843 HRRSStatsIter() :
844 _occupied(0),
845 _total_mem_sz(0),
846 _max_mem_sz(0),
847 _max_mem_sz_region(NULL)
848 {}
850 bool doHeapRegion(HeapRegion* r) {
851 if (r->continuesHumongous()) return false;
852 size_t mem_sz = r->rem_set()->mem_size();
853 if (mem_sz > _max_mem_sz) {
854 _max_mem_sz = mem_sz;
855 _max_mem_sz_region = r;
856 }
857 _total_mem_sz += mem_sz;
858 size_t occ = r->rem_set()->occupied();
859 _occupied += occ;
860 return false;
861 }
862 size_t total_mem_sz() { return _total_mem_sz; }
863 size_t max_mem_sz() { return _max_mem_sz; }
864 size_t occupied() { return _occupied; }
865 HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
866 };
868 class PrintRSThreadVTimeClosure : public ThreadClosure {
869 public:
870 virtual void do_thread(Thread *t) {
871 ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
872 gclog_or_tty->print(" %5.2f", crt->vtime_accum());
873 }
874 };
876 void G1RemSet::print_summary_info() {
877 G1CollectedHeap* g1 = G1CollectedHeap::heap();
879 #if CARD_REPEAT_HISTO
880 gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
881 gclog_or_tty->print_cr(" # of repeats --> # of cards with that number.");
882 card_repeat_count.print_on(gclog_or_tty);
883 #endif
885 if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
886 gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
887 gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number.");
888 out_of_histo.print_on(gclog_or_tty);
889 }
890 gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
891 _conc_refine_cards);
892 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
893 jint tot_processed_buffers =
894 dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
895 gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers);
896 gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.",
897 dcqs.processed_buffers_rs_thread(),
898 100.0*(float)dcqs.processed_buffers_rs_thread()/
899 (float)tot_processed_buffers);
900 gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.",
901 dcqs.processed_buffers_mut(),
902 100.0*(float)dcqs.processed_buffers_mut()/
903 (float)tot_processed_buffers);
904 gclog_or_tty->print_cr(" Conc RS threads times(s)");
905 PrintRSThreadVTimeClosure p;
906 gclog_or_tty->print(" ");
907 g1->concurrent_g1_refine()->threads_do(&p);
908 gclog_or_tty->print_cr("");
910 HRRSStatsIter blk;
911 g1->heap_region_iterate(&blk);
912 gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K."
913 " Max = " SIZE_FORMAT "K.",
914 blk.total_mem_sz()/K, blk.max_mem_sz()/K);
915 gclog_or_tty->print_cr(" Static structures = " SIZE_FORMAT "K,"
916 " free_lists = " SIZE_FORMAT "K.",
917 HeapRegionRemSet::static_mem_size()/K,
918 HeapRegionRemSet::fl_mem_size()/K);
919 gclog_or_tty->print_cr(" %d occupied cards represented.",
920 blk.occupied());
921 gclog_or_tty->print_cr(" Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
922 ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
923 blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
924 (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
925 (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
926 gclog_or_tty->print_cr(" Did %d coarsenings.", HeapRegionRemSet::n_coarsenings());
927 }
929 void G1RemSet::prepare_for_verify() {
930 if (G1HRRSFlushLogBuffersOnVerify &&
931 (VerifyBeforeGC || VerifyAfterGC)
932 && !_g1->full_collection()) {
933 cleanupHRRS();
934 _g1->set_refine_cte_cl_concurrency(false);
935 if (SafepointSynchronize::is_at_safepoint()) {
936 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
937 dcqs.concatenate_logs();
938 }
939 bool cg1r_use_cache = _cg1r->use_cache();
940 _cg1r->set_use_cache(false);
941 DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
942 updateRS(&into_cset_dcq, 0);
943 _g1->into_cset_dirty_card_queue_set().clear();
944 _cg1r->set_use_cache(cg1r_use_cache);
946 assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
947 }
948 }