Mon, 02 Aug 2010 12:51:43 -0700
6814437: G1: remove the _new_refs array
Summary: The per-worker _new_refs array is used to hold references that point into the collection set. It is populated during RSet updating and subsequently processed. In the event of an evacuation failure it processed again to recreate the RSets of regions in the collection set. Remove the per-worker _new_refs array by processing the references directly. Use a DirtyCardQueue to hold the cards containing the references so that the RSets of regions in the collection set can be recreated when handling an evacuation failure.
Reviewed-by: iveresov, jmasa, tonyp
1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_g1RemSet.cpp.incl"
28 #define CARD_REPEAT_HISTO 0
30 #if CARD_REPEAT_HISTO
31 static size_t ct_freq_sz;
32 static jbyte* ct_freq = NULL;
34 void init_ct_freq_table(size_t heap_sz_bytes) {
35 if (ct_freq == NULL) {
36 ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
37 ct_freq = new jbyte[ct_freq_sz];
38 for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
39 }
40 }
42 void ct_freq_note_card(size_t index) {
43 assert(0 <= index && index < ct_freq_sz, "Bounds error.");
44 if (ct_freq[index] < 100) { ct_freq[index]++; }
45 }
47 static IntHistogram card_repeat_count(10, 10);
49 void ct_freq_update_histo_and_reset() {
50 for (size_t j = 0; j < ct_freq_sz; j++) {
51 card_repeat_count.add_entry(ct_freq[j]);
52 ct_freq[j] = 0;
53 }
55 }
56 #endif
59 class IntoCSOopClosure: public OopsInHeapRegionClosure {
60 OopsInHeapRegionClosure* _blk;
61 G1CollectedHeap* _g1;
62 public:
63 IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
64 _g1(g1), _blk(blk) {}
65 void set_region(HeapRegion* from) {
66 _blk->set_region(from);
67 }
68 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
69 virtual void do_oop( oop* p) { do_oop_work(p); }
70 template <class T> void do_oop_work(T* p) {
71 oop obj = oopDesc::load_decode_heap_oop(p);
72 if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
73 }
74 bool apply_to_weak_ref_discovered_field() { return true; }
75 bool idempotent() { return true; }
76 };
78 class IntoCSRegionClosure: public HeapRegionClosure {
79 IntoCSOopClosure _blk;
80 G1CollectedHeap* _g1;
81 public:
82 IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
83 _g1(g1), _blk(g1, blk) {}
84 bool doHeapRegion(HeapRegion* r) {
85 if (!r->in_collection_set()) {
86 _blk.set_region(r);
87 if (r->isHumongous()) {
88 if (r->startsHumongous()) {
89 oop obj = oop(r->bottom());
90 obj->oop_iterate(&_blk);
91 }
92 } else {
93 r->oop_before_save_marks_iterate(&_blk);
94 }
95 }
96 return false;
97 }
98 };
100 void
101 StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
102 int worker_i) {
103 IntoCSRegionClosure rc(_g1, oc);
104 _g1->heap_region_iterate(&rc);
105 }
107 class VerifyRSCleanCardOopClosure: public OopClosure {
108 G1CollectedHeap* _g1;
109 public:
110 VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
112 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
113 virtual void do_oop( oop* p) { do_oop_work(p); }
114 template <class T> void do_oop_work(T* p) {
115 oop obj = oopDesc::load_decode_heap_oop(p);
116 HeapRegion* to = _g1->heap_region_containing(obj);
117 guarantee(to == NULL || !to->in_collection_set(),
118 "Missed a rem set member.");
119 }
120 };
122 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
123 : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
124 _cg1r(g1->concurrent_g1_refine()),
125 _par_traversal_in_progress(false),
126 _cset_rs_update_cl(NULL),
127 _cards_scanned(NULL), _total_cards_scanned(0)
128 {
129 _seq_task = new SubTasksDone(NumSeqTasks);
130 guarantee(n_workers() > 0, "There should be some workers");
131 _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers());
132 for (uint i = 0; i < n_workers(); i++) {
133 _cset_rs_update_cl[i] = NULL;
134 }
135 }
137 HRInto_G1RemSet::~HRInto_G1RemSet() {
138 delete _seq_task;
139 for (uint i = 0; i < n_workers(); i++) {
140 assert(_cset_rs_update_cl[i] == NULL, "it should be");
141 }
142 FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl);
143 }
145 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
146 if (_g1->is_in_g1_reserved(mr.start())) {
147 _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
148 if (_start_first == NULL) _start_first = mr.start();
149 }
150 }
152 class ScanRSClosure : public HeapRegionClosure {
153 size_t _cards_done, _cards;
154 G1CollectedHeap* _g1h;
155 OopsInHeapRegionClosure* _oc;
156 G1BlockOffsetSharedArray* _bot_shared;
157 CardTableModRefBS *_ct_bs;
158 int _worker_i;
159 int _block_size;
160 bool _try_claimed;
161 public:
162 ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
163 _oc(oc),
164 _cards(0),
165 _cards_done(0),
166 _worker_i(worker_i),
167 _try_claimed(false)
168 {
169 _g1h = G1CollectedHeap::heap();
170 _bot_shared = _g1h->bot_shared();
171 _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
172 _block_size = MAX2<int>(G1RSetScanBlockSize, 1);
173 }
175 void set_try_claimed() { _try_claimed = true; }
177 void scanCard(size_t index, HeapRegion *r) {
178 _cards_done++;
179 DirtyCardToOopClosure* cl =
180 r->new_dcto_closure(_oc,
181 CardTableModRefBS::Precise,
182 HeapRegionDCTOC::IntoCSFilterKind);
184 // Set the "from" region in the closure.
185 _oc->set_region(r);
186 HeapWord* card_start = _bot_shared->address_for_index(index);
187 HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
188 Space *sp = SharedHeap::heap()->space_containing(card_start);
189 MemRegion sm_region;
190 if (ParallelGCThreads > 0) {
191 // first find the used area
192 sm_region = sp->used_region_at_save_marks();
193 } else {
194 // The closure is not idempotent. We shouldn't look at objects
195 // allocated during the GC.
196 sm_region = sp->used_region_at_save_marks();
197 }
198 MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
199 if (!mr.is_empty()) {
200 cl->do_MemRegion(mr);
201 }
202 }
204 void printCard(HeapRegion* card_region, size_t card_index,
205 HeapWord* card_start) {
206 gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
207 "RS names card %p: "
208 "[" PTR_FORMAT ", " PTR_FORMAT ")",
209 _worker_i,
210 card_region->bottom(), card_region->end(),
211 card_index,
212 card_start, card_start + G1BlockOffsetSharedArray::N_words);
213 }
215 bool doHeapRegion(HeapRegion* r) {
216 assert(r->in_collection_set(), "should only be called on elements of CS.");
217 HeapRegionRemSet* hrrs = r->rem_set();
218 if (hrrs->iter_is_complete()) return false; // All done.
219 if (!_try_claimed && !hrrs->claim_iter()) return false;
220 _g1h->push_dirty_cards_region(r);
221 // If we didn't return above, then
222 // _try_claimed || r->claim_iter()
223 // is true: either we're supposed to work on claimed-but-not-complete
224 // regions, or we successfully claimed the region.
225 HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
226 hrrs->init_iterator(iter);
227 size_t card_index;
229 // We claim cards in block so as to recude the contention. The block size is determined by
230 // the G1RSetScanBlockSize parameter.
231 size_t jump_to_card = hrrs->iter_claimed_next(_block_size);
232 for (size_t current_card = 0; iter->has_next(card_index); current_card++) {
233 if (current_card >= jump_to_card + _block_size) {
234 jump_to_card = hrrs->iter_claimed_next(_block_size);
235 }
236 if (current_card < jump_to_card) continue;
237 HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
238 #if 0
239 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
240 card_start, card_start + CardTableModRefBS::card_size_in_words);
241 #endif
243 HeapRegion* card_region = _g1h->heap_region_containing(card_start);
244 assert(card_region != NULL, "Yielding cards not in the heap?");
245 _cards++;
247 if (!card_region->is_on_dirty_cards_region_list()) {
248 _g1h->push_dirty_cards_region(card_region);
249 }
251 // If the card is dirty, then we will scan it during updateRS.
252 if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
253 // We make the card as "claimed" lazily (so races are possible but they're benign),
254 // which reduces the number of duplicate scans (the rsets of the regions in the cset
255 // can intersect).
256 if (!_ct_bs->is_card_claimed(card_index)) {
257 _ct_bs->set_card_claimed(card_index);
258 scanCard(card_index, card_region);
259 }
260 }
261 }
262 if (!_try_claimed) {
263 hrrs->set_iter_complete();
264 }
265 return false;
266 }
267 // Set all cards back to clean.
268 void cleanup() {_g1h->cleanUpCardTable();}
269 size_t cards_done() { return _cards_done;}
270 size_t cards_looked_up() { return _cards;}
271 };
273 // We want the parallel threads to start their scanning at
274 // different collection set regions to avoid contention.
275 // If we have:
276 // n collection set regions
277 // p threads
278 // Then thread t will start at region t * floor (n/p)
280 HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
281 HeapRegion* result = _g1p->collection_set();
282 if (ParallelGCThreads > 0) {
283 size_t cs_size = _g1p->collection_set_size();
284 int n_workers = _g1->workers()->total_workers();
285 size_t cs_spans = cs_size / n_workers;
286 size_t ind = cs_spans * worker_i;
287 for (size_t i = 0; i < ind; i++)
288 result = result->next_in_collection_set();
289 }
290 return result;
291 }
293 void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
294 double rs_time_start = os::elapsedTime();
295 HeapRegion *startRegion = calculateStartRegion(worker_i);
297 ScanRSClosure scanRScl(oc, worker_i);
298 _g1->collection_set_iterate_from(startRegion, &scanRScl);
299 scanRScl.set_try_claimed();
300 _g1->collection_set_iterate_from(startRegion, &scanRScl);
302 double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
304 assert( _cards_scanned != NULL, "invariant" );
305 _cards_scanned[worker_i] = scanRScl.cards_done();
307 _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
308 }
310 // Closure used for updating RSets and recording references that
311 // point into the collection set. Only called during an
312 // evacuation pause.
314 class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
315 G1RemSet* _g1rs;
316 DirtyCardQueue* _into_cset_dcq;
317 public:
318 RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
319 DirtyCardQueue* into_cset_dcq) :
320 _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
321 {}
322 bool do_card_ptr(jbyte* card_ptr, int worker_i) {
323 // The only time we care about recording cards that
324 // contain references that point into the collection set
325 // is during RSet updating within an evacuation pause.
326 // In this case worker_i should be the id of a GC worker thread.
327 assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
328 assert(worker_i < (int) DirtyCardQueueSet::num_par_ids(), "should be a GC worker");
330 if (_g1rs->concurrentRefineOneCard(card_ptr, worker_i, true)) {
331 // 'card_ptr' contains references that point into the collection
332 // set. We need to record the card in the DCQS
333 // (G1CollectedHeap::into_cset_dirty_card_queue_set())
334 // that's used for that purpose.
335 //
336 // Enqueue the card
337 _into_cset_dcq->enqueue(card_ptr);
338 }
339 return true;
340 }
341 };
343 void HRInto_G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, int worker_i) {
344 double start = os::elapsedTime();
345 // Apply the given closure to all remaining log entries.
346 RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
347 _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
349 // Now there should be no dirty cards.
350 if (G1RSLogCheckCardTable) {
351 CountNonCleanMemRegionClosure cl(_g1);
352 _ct_bs->mod_card_iterate(&cl);
353 // XXX This isn't true any more: keeping cards of young regions
354 // marked dirty broke it. Need some reasonable fix.
355 guarantee(cl.n() == 0, "Card table should be clean.");
356 }
358 _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
359 }
361 #ifndef PRODUCT
362 class PrintRSClosure : public HeapRegionClosure {
363 int _count;
364 public:
365 PrintRSClosure() : _count(0) {}
366 bool doHeapRegion(HeapRegion* r) {
367 HeapRegionRemSet* hrrs = r->rem_set();
368 _count += (int) hrrs->occupied();
369 if (hrrs->occupied() == 0) {
370 gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
371 "has no remset entries\n",
372 r->bottom(), r->end());
373 } else {
374 gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
375 r->bottom(), r->end());
376 r->print();
377 hrrs->print();
378 gclog_or_tty->print("\nDone printing rem set\n");
379 }
380 return false;
381 }
382 int occupied() {return _count;}
383 };
384 #endif
386 class CountRSSizeClosure: public HeapRegionClosure {
387 size_t _n;
388 size_t _tot;
389 size_t _max;
390 HeapRegion* _max_r;
391 enum {
392 N = 20,
393 MIN = 6
394 };
395 int _histo[N];
396 public:
397 CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
398 for (int i = 0; i < N; i++) _histo[i] = 0;
399 }
400 bool doHeapRegion(HeapRegion* r) {
401 if (!r->continuesHumongous()) {
402 size_t occ = r->rem_set()->occupied();
403 _n++;
404 _tot += occ;
405 if (occ > _max) {
406 _max = occ;
407 _max_r = r;
408 }
409 // Fit it into a histo bin.
410 int s = 1 << MIN;
411 int i = 0;
412 while (occ > (size_t) s && i < (N-1)) {
413 s = s << 1;
414 i++;
415 }
416 _histo[i]++;
417 }
418 return false;
419 }
420 size_t n() { return _n; }
421 size_t tot() { return _tot; }
422 size_t mx() { return _max; }
423 HeapRegion* mxr() { return _max_r; }
424 void print_histo() {
425 int mx = N;
426 while (mx >= 0) {
427 if (_histo[mx-1] > 0) break;
428 mx--;
429 }
430 gclog_or_tty->print_cr("Number of regions with given RS sizes:");
431 gclog_or_tty->print_cr(" <= %8d %8d", 1 << MIN, _histo[0]);
432 for (int i = 1; i < mx-1; i++) {
433 gclog_or_tty->print_cr(" %8d - %8d %8d",
434 (1 << (MIN + i - 1)) + 1,
435 1 << (MIN + i),
436 _histo[i]);
437 }
438 gclog_or_tty->print_cr(" > %8d %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
439 }
440 };
442 void HRInto_G1RemSet::cleanupHRRS() {
443 HeapRegionRemSet::cleanup();
444 }
446 void
447 HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
448 int worker_i) {
449 #if CARD_REPEAT_HISTO
450 ct_freq_update_histo_and_reset();
451 #endif
452 if (worker_i == 0) {
453 _cg1r->clear_and_record_card_counts();
454 }
456 // Make this into a command-line flag...
457 if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
458 CountRSSizeClosure count_cl;
459 _g1->heap_region_iterate(&count_cl);
460 gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
461 "max region is " PTR_FORMAT,
462 count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
463 count_cl.mx(), count_cl.mxr());
464 count_cl.print_histo();
465 }
467 // We cache the value of 'oc' closure into the appropriate slot in the
468 // _cset_rs_update_cl for this worker
469 assert(worker_i < (int)n_workers(), "sanity");
470 _cset_rs_update_cl[worker_i] = oc;
472 // A DirtyCardQueue that is used to hold cards containing references
473 // that point into the collection set. This DCQ is associated with a
474 // special DirtyCardQueueSet (see g1CollectedHeap.hpp). Under normal
475 // circumstances (i.e. the pause successfully completes), these cards
476 // are just discarded (there's no need to update the RSets of regions
477 // that were in the collection set - after the pause these regions
478 // are wholly 'free' of live objects. In the event of an evacuation
479 // failure the cards/buffers in this queue set are:
480 // * passed to the DirtyCardQueueSet that is used to manage deferred
481 // RSet updates, or
482 // * scanned for references that point into the collection set
483 // and the RSet of the corresponding region in the collection set
484 // is updated immediately.
485 DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
487 if (ParallelGCThreads > 0) {
488 // The two flags below were introduced temporarily to serialize
489 // the updating and scanning of remembered sets. There are some
490 // race conditions when these two operations are done in parallel
491 // and they are causing failures. When we resolve said race
492 // conditions, we'll revert back to parallel remembered set
493 // updating and scanning. See CRs 6677707 and 6677708.
494 if (G1UseParallelRSetUpdating || (worker_i == 0)) {
495 updateRS(&into_cset_dcq, worker_i);
496 } else {
497 _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
498 _g1p->record_update_rs_time(worker_i, 0.0);
499 }
500 if (G1UseParallelRSetScanning || (worker_i == 0)) {
501 scanRS(oc, worker_i);
502 } else {
503 _g1p->record_scan_rs_time(worker_i, 0.0);
504 }
505 } else {
506 assert(worker_i == 0, "invariant");
507 updateRS(&into_cset_dcq, 0);
508 scanRS(oc, 0);
509 }
511 // We now clear the cached values of _cset_rs_update_cl for this worker
512 _cset_rs_update_cl[worker_i] = NULL;
513 }
515 void HRInto_G1RemSet::
516 prepare_for_oops_into_collection_set_do() {
517 #if G1_REM_SET_LOGGING
518 PrintRSClosure cl;
519 _g1->collection_set_iterate(&cl);
520 #endif
521 cleanupHRRS();
522 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
523 _g1->set_refine_cte_cl_concurrency(false);
524 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
525 dcqs.concatenate_logs();
527 assert(!_par_traversal_in_progress, "Invariant between iterations.");
528 if (ParallelGCThreads > 0) {
529 set_par_traversal(true);
530 _seq_task->set_par_threads((int)n_workers());
531 }
532 guarantee( _cards_scanned == NULL, "invariant" );
533 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
534 for (uint i = 0; i < n_workers(); ++i) {
535 _cards_scanned[i] = 0;
536 }
537 _total_cards_scanned = 0;
538 }
541 class cleanUpIteratorsClosure : public HeapRegionClosure {
542 bool doHeapRegion(HeapRegion *r) {
543 HeapRegionRemSet* hrrs = r->rem_set();
544 hrrs->init_for_par_iteration();
545 return false;
546 }
547 };
549 // This closure, applied to a DirtyCardQueueSet, is used to immediately
550 // update the RSets for the regions in the CSet. For each card it iterates
551 // through the oops which coincide with that card. It scans the reference
552 // fields in each oop; when it finds an oop that points into the collection
553 // set, the RSet for the region containing the referenced object is updated.
554 // Note: _par_traversal_in_progress in the G1RemSet must be FALSE; otherwise
555 // the UpdateRSetImmediate closure will cause cards to be enqueued on to
556 // the DCQS that we're iterating over, causing an infinite loop.
557 class UpdateRSetCardTableEntryIntoCSetClosure: public CardTableEntryClosure {
558 G1CollectedHeap* _g1;
559 CardTableModRefBS* _ct_bs;
560 public:
561 UpdateRSetCardTableEntryIntoCSetClosure(G1CollectedHeap* g1,
562 CardTableModRefBS* bs):
563 _g1(g1), _ct_bs(bs)
564 { }
566 bool do_card_ptr(jbyte* card_ptr, int worker_i) {
567 // Construct the region representing the card.
568 HeapWord* start = _ct_bs->addr_for(card_ptr);
569 // And find the region containing it.
570 HeapRegion* r = _g1->heap_region_containing(start);
571 assert(r != NULL, "unexpected null");
573 // Scan oops in the card looking for references into the collection set
574 HeapWord* end = _ct_bs->addr_for(card_ptr + 1);
575 MemRegion scanRegion(start, end);
577 UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set());
578 FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl);
579 FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl);
581 // We can pass false as the "filter_young" parameter here as:
582 // * we should be in a STW pause,
583 // * the DCQS to which this closure is applied is used to hold
584 // references that point into the collection set from the prior
585 // RSet updating,
586 // * the post-write barrier shouldn't be logging updates to young
587 // regions (but there is a situation where this can happen - see
588 // the comment in HRInto_G1RemSet::concurrentRefineOneCard below -
589 // that should not be applicable here), and
590 // * during actual RSet updating, the filtering of cards in young
591 // regions in HeapRegion::oops_on_card_seq_iterate_careful is
592 // employed.
593 // As a result, when this closure is applied to "refs into cset"
594 // DCQS, we shouldn't see any cards in young regions.
595 update_rs_cl.set_region(r);
596 HeapWord* stop_point =
597 r->oops_on_card_seq_iterate_careful(scanRegion,
598 &filter_then_update_rs_cset_oop_cl,
599 false /* filter_young */);
601 // Since this is performed in the event of an evacuation failure, we
602 // we shouldn't see a non-null stop point
603 assert(stop_point == NULL, "saw an unallocated region");
604 return true;
605 }
606 };
608 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
609 guarantee( _cards_scanned != NULL, "invariant" );
610 _total_cards_scanned = 0;
611 for (uint i = 0; i < n_workers(); ++i)
612 _total_cards_scanned += _cards_scanned[i];
613 FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
614 _cards_scanned = NULL;
615 // Cleanup after copy
616 #if G1_REM_SET_LOGGING
617 PrintRSClosure cl;
618 _g1->heap_region_iterate(&cl);
619 #endif
620 _g1->set_refine_cte_cl_concurrency(true);
621 cleanUpIteratorsClosure iterClosure;
622 _g1->collection_set_iterate(&iterClosure);
623 // Set all cards back to clean.
624 _g1->cleanUpCardTable();
626 if (ParallelGCThreads > 0) {
627 set_par_traversal(false);
628 }
630 DirtyCardQueueSet& into_cset_dcqs = _g1->into_cset_dirty_card_queue_set();
631 int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num();
633 if (_g1->evacuation_failed()) {
634 // Restore remembered sets for the regions pointing into the collection set.
636 if (G1DeferredRSUpdate) {
637 // If deferred RS updates are enabled then we just need to transfer
638 // the completed buffers from (a) the DirtyCardQueueSet used to hold
639 // cards that contain references that point into the collection set
640 // to (b) the DCQS used to hold the deferred RS updates
641 _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
642 } else {
644 CardTableModRefBS* bs = (CardTableModRefBS*)_g1->barrier_set();
645 UpdateRSetCardTableEntryIntoCSetClosure update_rs_cset_immediate(_g1, bs);
647 int n_completed_buffers = 0;
648 while (into_cset_dcqs.apply_closure_to_completed_buffer(&update_rs_cset_immediate,
649 0, 0, true)) {
650 n_completed_buffers++;
651 }
652 assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers");
653 }
654 }
656 // Free any completed buffers in the DirtyCardQueueSet used to hold cards
657 // which contain references that point into the collection.
658 _g1->into_cset_dirty_card_queue_set().clear();
659 assert(_g1->into_cset_dirty_card_queue_set().completed_buffers_num() == 0,
660 "all buffers should be freed");
661 _g1->into_cset_dirty_card_queue_set().clear_n_completed_buffers();
663 assert(!_par_traversal_in_progress, "Invariant between iterations.");
664 }
666 class UpdateRSObjectClosure: public ObjectClosure {
667 UpdateRSOopClosure* _update_rs_oop_cl;
668 public:
669 UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
670 _update_rs_oop_cl(update_rs_oop_cl) {}
671 void do_object(oop obj) {
672 obj->oop_iterate(_update_rs_oop_cl);
673 }
675 };
677 class ScrubRSClosure: public HeapRegionClosure {
678 G1CollectedHeap* _g1h;
679 BitMap* _region_bm;
680 BitMap* _card_bm;
681 CardTableModRefBS* _ctbs;
682 public:
683 ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
684 _g1h(G1CollectedHeap::heap()),
685 _region_bm(region_bm), _card_bm(card_bm),
686 _ctbs(NULL)
687 {
688 ModRefBarrierSet* bs = _g1h->mr_bs();
689 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
690 _ctbs = (CardTableModRefBS*)bs;
691 }
693 bool doHeapRegion(HeapRegion* r) {
694 if (!r->continuesHumongous()) {
695 r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
696 }
697 return false;
698 }
699 };
701 void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
702 ScrubRSClosure scrub_cl(region_bm, card_bm);
703 _g1->heap_region_iterate(&scrub_cl);
704 }
706 void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
707 int worker_num, int claim_val) {
708 ScrubRSClosure scrub_cl(region_bm, card_bm);
709 _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
710 }
713 static IntHistogram out_of_histo(50, 50);
715 class TriggerClosure : public OopClosure {
716 bool _trigger;
717 public:
718 TriggerClosure() : _trigger(false) { }
719 bool value() const { return _trigger; }
720 template <class T> void do_oop_nv(T* p) { _trigger = true; }
721 virtual void do_oop(oop* p) { do_oop_nv(p); }
722 virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
723 };
725 class InvokeIfNotTriggeredClosure: public OopClosure {
726 TriggerClosure* _t;
727 OopClosure* _oc;
728 public:
729 InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
730 _t(t), _oc(oc) { }
731 template <class T> void do_oop_nv(T* p) {
732 if (!_t->value()) _oc->do_oop(p);
733 }
734 virtual void do_oop(oop* p) { do_oop_nv(p); }
735 virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
736 };
738 class Mux2Closure : public OopClosure {
739 OopClosure* _c1;
740 OopClosure* _c2;
741 public:
742 Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
743 template <class T> void do_oop_nv(T* p) {
744 _c1->do_oop(p); _c2->do_oop(p);
745 }
746 virtual void do_oop(oop* p) { do_oop_nv(p); }
747 virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
748 };
750 bool HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
751 bool check_for_refs_into_cset) {
752 // Construct the region representing the card.
753 HeapWord* start = _ct_bs->addr_for(card_ptr);
754 // And find the region containing it.
755 HeapRegion* r = _g1->heap_region_containing(start);
756 assert(r != NULL, "unexpected null");
758 HeapWord* end = _ct_bs->addr_for(card_ptr + 1);
759 MemRegion dirtyRegion(start, end);
761 #if CARD_REPEAT_HISTO
762 init_ct_freq_table(_g1->g1_reserved_obj_bytes());
763 ct_freq_note_card(_ct_bs->index_for(start));
764 #endif
766 UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
767 update_rs_oop_cl.set_from(r);
769 TriggerClosure trigger_cl;
770 FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
771 InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
772 Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
774 FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
775 (check_for_refs_into_cset ?
776 (OopClosure*)&mux :
777 (OopClosure*)&update_rs_oop_cl));
779 // Undirty the card.
780 *card_ptr = CardTableModRefBS::clean_card_val();
781 // We must complete this write before we do any of the reads below.
782 OrderAccess::storeload();
783 // And process it, being careful of unallocated portions of TLAB's.
785 // The region for the current card may be a young region. The
786 // current card may have been a card that was evicted from the
787 // card cache. When the card was inserted into the cache, we had
788 // determined that its region was non-young. While in the cache,
789 // the region may have been freed during a cleanup pause, reallocated
790 // and tagged as young.
791 //
792 // We wish to filter out cards for such a region but the current
793 // thread, if we're running conucrrently, may "see" the young type
794 // change at any time (so an earlier "is_young" check may pass or
795 // fail arbitrarily). We tell the iteration code to perform this
796 // filtering when it has been determined that there has been an actual
797 // allocation in this region and making it safe to check the young type.
798 bool filter_young = true;
800 HeapWord* stop_point =
801 r->oops_on_card_seq_iterate_careful(dirtyRegion,
802 &filter_then_update_rs_oop_cl,
803 filter_young);
805 // If stop_point is non-null, then we encountered an unallocated region
806 // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the
807 // card and re-enqueue: if we put off the card until a GC pause, then the
808 // unallocated portion will be filled in. Alternatively, we might try
809 // the full complexity of the technique used in "regular" precleaning.
810 if (stop_point != NULL) {
811 // The card might have gotten re-dirtied and re-enqueued while we
812 // worked. (In fact, it's pretty likely.)
813 if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
814 *card_ptr = CardTableModRefBS::dirty_card_val();
815 MutexLockerEx x(Shared_DirtyCardQ_lock,
816 Mutex::_no_safepoint_check_flag);
817 DirtyCardQueue* sdcq =
818 JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
819 sdcq->enqueue(card_ptr);
820 }
821 } else {
822 out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
823 _conc_refine_cards++;
824 }
826 return trigger_cl.value();
827 }
829 bool HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
830 bool check_for_refs_into_cset) {
831 // If the card is no longer dirty, nothing to do.
832 if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
833 // No need to return that this card contains refs that point
834 // into the collection set.
835 return false;
836 }
838 // Construct the region representing the card.
839 HeapWord* start = _ct_bs->addr_for(card_ptr);
840 // And find the region containing it.
841 HeapRegion* r = _g1->heap_region_containing(start);
842 if (r == NULL) {
843 guarantee(_g1->is_in_permanent(start), "Or else where?");
844 // Again no need to return that this card contains refs that
845 // point into the collection set.
846 return false; // Not in the G1 heap (might be in perm, for example.)
847 }
848 // Why do we have to check here whether a card is on a young region,
849 // given that we dirty young regions and, as a result, the
850 // post-barrier is supposed to filter them out and never to enqueue
851 // them? When we allocate a new region as the "allocation region" we
852 // actually dirty its cards after we release the lock, since card
853 // dirtying while holding the lock was a performance bottleneck. So,
854 // as a result, it is possible for other threads to actually
855 // allocate objects in the region (after the acquire the lock)
856 // before all the cards on the region are dirtied. This is unlikely,
857 // and it doesn't happen often, but it can happen. So, the extra
858 // check below filters out those cards.
859 if (r->is_young()) {
860 return false;
861 }
862 // While we are processing RSet buffers during the collection, we
863 // actually don't want to scan any cards on the collection set,
864 // since we don't want to update remebered sets with entries that
865 // point into the collection set, given that live objects from the
866 // collection set are about to move and such entries will be stale
867 // very soon. This change also deals with a reliability issue which
868 // involves scanning a card in the collection set and coming across
869 // an array that was being chunked and looking malformed. Note,
870 // however, that if evacuation fails, we have to scan any objects
871 // that were not moved and create any missing entries.
872 if (r->in_collection_set()) {
873 return false;
874 }
876 // Should we defer processing the card?
877 //
878 // Previously the result from the insert_cache call would be
879 // either card_ptr (implying that card_ptr was currently "cold"),
880 // null (meaning we had inserted the card ptr into the "hot"
881 // cache, which had some headroom), or a "hot" card ptr
882 // extracted from the "hot" cache.
883 //
884 // Now that the _card_counts cache in the ConcurrentG1Refine
885 // instance is an evicting hash table, the result we get back
886 // could be from evicting the card ptr in an already occupied
887 // bucket (in which case we have replaced the card ptr in the
888 // bucket with card_ptr and "defer" is set to false). To avoid
889 // having a data structure (updates to which would need a lock)
890 // to hold these unprocessed dirty cards, we need to immediately
891 // process card_ptr. The actions needed to be taken on return
892 // from cache_insert are summarized in the following table:
893 //
894 // res defer action
895 // --------------------------------------------------------------
896 // null false card evicted from _card_counts & replaced with
897 // card_ptr; evicted ptr added to hot cache.
898 // No need to process res; immediately process card_ptr
899 //
900 // null true card not evicted from _card_counts; card_ptr added
901 // to hot cache.
902 // Nothing to do.
903 //
904 // non-null false card evicted from _card_counts & replaced with
905 // card_ptr; evicted ptr is currently "cold" or
906 // caused an eviction from the hot cache.
907 // Immediately process res; process card_ptr.
908 //
909 // non-null true card not evicted from _card_counts; card_ptr is
910 // currently cold, or caused an eviction from hot
911 // cache.
912 // Immediately process res; no need to process card_ptr.
915 jbyte* res = card_ptr;
916 bool defer = false;
918 // This gets set to true if the card being refined has references
919 // that point into the collection set.
920 bool oops_into_cset = false;
922 if (_cg1r->use_cache()) {
923 jbyte* res = _cg1r->cache_insert(card_ptr, &defer);
924 if (res != NULL && (res != card_ptr || defer)) {
925 start = _ct_bs->addr_for(res);
926 r = _g1->heap_region_containing(start);
927 if (r == NULL) {
928 assert(_g1->is_in_permanent(start), "Or else where?");
929 } else {
930 // Checking whether the region we got back from the cache
931 // is young here is inappropriate. The region could have been
932 // freed, reallocated and tagged as young while in the cache.
933 // Hence we could see its young type change at any time.
934 //
935 // Process card pointer we get back from the hot card cache. This
936 // will check whether the region containing the card is young
937 // _after_ checking that the region has been allocated from.
938 oops_into_cset = concurrentRefineOneCard_impl(res, worker_i,
939 false /* check_for_refs_into_cset */);
940 // The above call to concurrentRefineOneCard_impl is only
941 // performed if the hot card cache is enabled. This cache is
942 // disabled during an evacuation pause - which is the only
943 // time when we need know if the card contains references
944 // that point into the collection set. Also when the hot card
945 // cache is enabled, this code is executed by the concurrent
946 // refine threads - rather than the GC worker threads - and
947 // concurrentRefineOneCard_impl will return false.
948 assert(!oops_into_cset, "should not see true here");
949 }
950 }
951 }
953 if (!defer) {
954 oops_into_cset =
955 concurrentRefineOneCard_impl(card_ptr, worker_i, check_for_refs_into_cset);
956 // We should only be detecting that the card contains references
957 // that point into the collection set if the current thread is
958 // a GC worker thread.
959 assert(!oops_into_cset || SafepointSynchronize::is_at_safepoint(),
960 "invalid result at non safepoint");
961 }
962 return oops_into_cset;
963 }
965 class HRRSStatsIter: public HeapRegionClosure {
966 size_t _occupied;
967 size_t _total_mem_sz;
968 size_t _max_mem_sz;
969 HeapRegion* _max_mem_sz_region;
970 public:
971 HRRSStatsIter() :
972 _occupied(0),
973 _total_mem_sz(0),
974 _max_mem_sz(0),
975 _max_mem_sz_region(NULL)
976 {}
978 bool doHeapRegion(HeapRegion* r) {
979 if (r->continuesHumongous()) return false;
980 size_t mem_sz = r->rem_set()->mem_size();
981 if (mem_sz > _max_mem_sz) {
982 _max_mem_sz = mem_sz;
983 _max_mem_sz_region = r;
984 }
985 _total_mem_sz += mem_sz;
986 size_t occ = r->rem_set()->occupied();
987 _occupied += occ;
988 return false;
989 }
990 size_t total_mem_sz() { return _total_mem_sz; }
991 size_t max_mem_sz() { return _max_mem_sz; }
992 size_t occupied() { return _occupied; }
993 HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
994 };
996 class PrintRSThreadVTimeClosure : public ThreadClosure {
997 public:
998 virtual void do_thread(Thread *t) {
999 ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
1000 gclog_or_tty->print(" %5.2f", crt->vtime_accum());
1001 }
1002 };
1004 void HRInto_G1RemSet::print_summary_info() {
1005 G1CollectedHeap* g1 = G1CollectedHeap::heap();
1007 #if CARD_REPEAT_HISTO
1008 gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
1009 gclog_or_tty->print_cr(" # of repeats --> # of cards with that number.");
1010 card_repeat_count.print_on(gclog_or_tty);
1011 #endif
1013 if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
1014 gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
1015 gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number.");
1016 out_of_histo.print_on(gclog_or_tty);
1017 }
1018 gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
1019 _conc_refine_cards);
1020 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
1021 jint tot_processed_buffers =
1022 dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
1023 gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers);
1024 gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.",
1025 dcqs.processed_buffers_rs_thread(),
1026 100.0*(float)dcqs.processed_buffers_rs_thread()/
1027 (float)tot_processed_buffers);
1028 gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.",
1029 dcqs.processed_buffers_mut(),
1030 100.0*(float)dcqs.processed_buffers_mut()/
1031 (float)tot_processed_buffers);
1032 gclog_or_tty->print_cr(" Conc RS threads times(s)");
1033 PrintRSThreadVTimeClosure p;
1034 gclog_or_tty->print(" ");
1035 g1->concurrent_g1_refine()->threads_do(&p);
1036 gclog_or_tty->print_cr("");
1038 if (G1UseHRIntoRS) {
1039 HRRSStatsIter blk;
1040 g1->heap_region_iterate(&blk);
1041 gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K."
1042 " Max = " SIZE_FORMAT "K.",
1043 blk.total_mem_sz()/K, blk.max_mem_sz()/K);
1044 gclog_or_tty->print_cr(" Static structures = " SIZE_FORMAT "K,"
1045 " free_lists = " SIZE_FORMAT "K.",
1046 HeapRegionRemSet::static_mem_size()/K,
1047 HeapRegionRemSet::fl_mem_size()/K);
1048 gclog_or_tty->print_cr(" %d occupied cards represented.",
1049 blk.occupied());
1050 gclog_or_tty->print_cr(" Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
1051 ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
1052 blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
1053 (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
1054 (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
1055 gclog_or_tty->print_cr(" Did %d coarsenings.",
1056 HeapRegionRemSet::n_coarsenings());
1058 }
1059 }
1061 void HRInto_G1RemSet::prepare_for_verify() {
1062 if (G1HRRSFlushLogBuffersOnVerify &&
1063 (VerifyBeforeGC || VerifyAfterGC)
1064 && !_g1->full_collection()) {
1065 cleanupHRRS();
1066 _g1->set_refine_cte_cl_concurrency(false);
1067 if (SafepointSynchronize::is_at_safepoint()) {
1068 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
1069 dcqs.concatenate_logs();
1070 }
1071 bool cg1r_use_cache = _cg1r->use_cache();
1072 _cg1r->set_use_cache(false);
1073 DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
1074 updateRS(&into_cset_dcq, 0);
1075 _g1->into_cset_dirty_card_queue_set().clear();
1076 _cg1r->set_use_cache(cg1r_use_cache);
1078 assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
1079 }
1080 }