Mon, 03 Aug 2009 12:59:30 -0700
6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_g1RemSet.cpp.incl"
28 #define CARD_REPEAT_HISTO 0
30 #if CARD_REPEAT_HISTO
31 static size_t ct_freq_sz;
32 static jbyte* ct_freq = NULL;
34 void init_ct_freq_table(size_t heap_sz_bytes) {
35 if (ct_freq == NULL) {
36 ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
37 ct_freq = new jbyte[ct_freq_sz];
38 for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
39 }
40 }
42 void ct_freq_note_card(size_t index) {
43 assert(0 <= index && index < ct_freq_sz, "Bounds error.");
44 if (ct_freq[index] < 100) { ct_freq[index]++; }
45 }
47 static IntHistogram card_repeat_count(10, 10);
49 void ct_freq_update_histo_and_reset() {
50 for (size_t j = 0; j < ct_freq_sz; j++) {
51 card_repeat_count.add_entry(ct_freq[j]);
52 ct_freq[j] = 0;
53 }
55 }
56 #endif
59 class IntoCSOopClosure: public OopsInHeapRegionClosure {
60 OopsInHeapRegionClosure* _blk;
61 G1CollectedHeap* _g1;
62 public:
63 IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
64 _g1(g1), _blk(blk) {}
65 void set_region(HeapRegion* from) {
66 _blk->set_region(from);
67 }
68 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
69 virtual void do_oop( oop* p) { do_oop_work(p); }
70 template <class T> void do_oop_work(T* p) {
71 oop obj = oopDesc::load_decode_heap_oop(p);
72 if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
73 }
74 bool apply_to_weak_ref_discovered_field() { return true; }
75 bool idempotent() { return true; }
76 };
78 class IntoCSRegionClosure: public HeapRegionClosure {
79 IntoCSOopClosure _blk;
80 G1CollectedHeap* _g1;
81 public:
82 IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
83 _g1(g1), _blk(g1, blk) {}
84 bool doHeapRegion(HeapRegion* r) {
85 if (!r->in_collection_set()) {
86 _blk.set_region(r);
87 if (r->isHumongous()) {
88 if (r->startsHumongous()) {
89 oop obj = oop(r->bottom());
90 obj->oop_iterate(&_blk);
91 }
92 } else {
93 r->oop_before_save_marks_iterate(&_blk);
94 }
95 }
96 return false;
97 }
98 };
100 void
101 StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
102 int worker_i) {
103 IntoCSRegionClosure rc(_g1, oc);
104 _g1->heap_region_iterate(&rc);
105 }
107 class VerifyRSCleanCardOopClosure: public OopClosure {
108 G1CollectedHeap* _g1;
109 public:
110 VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
112 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
113 virtual void do_oop( oop* p) { do_oop_work(p); }
114 template <class T> void do_oop_work(T* p) {
115 oop obj = oopDesc::load_decode_heap_oop(p);
116 HeapRegion* to = _g1->heap_region_containing(obj);
117 guarantee(to == NULL || !to->in_collection_set(),
118 "Missed a rem set member.");
119 }
120 };
122 HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
123 : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
124 _cg1r(g1->concurrent_g1_refine()),
125 _par_traversal_in_progress(false), _new_refs(NULL),
126 _cards_scanned(NULL), _total_cards_scanned(0)
127 {
128 _seq_task = new SubTasksDone(NumSeqTasks);
129 guarantee(n_workers() > 0, "There should be some workers");
130 _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
131 for (uint i = 0; i < n_workers(); i++) {
132 _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
133 }
134 }
136 HRInto_G1RemSet::~HRInto_G1RemSet() {
137 delete _seq_task;
138 for (uint i = 0; i < n_workers(); i++) {
139 delete _new_refs[i];
140 }
141 FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
142 }
144 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
145 if (_g1->is_in_g1_reserved(mr.start())) {
146 _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
147 if (_start_first == NULL) _start_first = mr.start();
148 }
149 }
151 class ScanRSClosure : public HeapRegionClosure {
152 size_t _cards_done, _cards;
153 G1CollectedHeap* _g1h;
154 OopsInHeapRegionClosure* _oc;
155 G1BlockOffsetSharedArray* _bot_shared;
156 CardTableModRefBS *_ct_bs;
157 int _worker_i;
158 bool _try_claimed;
159 size_t _min_skip_distance, _max_skip_distance;
160 public:
161 ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
162 _oc(oc),
163 _cards(0),
164 _cards_done(0),
165 _worker_i(worker_i),
166 _try_claimed(false)
167 {
168 _g1h = G1CollectedHeap::heap();
169 _bot_shared = _g1h->bot_shared();
170 _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
171 _min_skip_distance = 16;
172 _max_skip_distance = 2 * _g1h->n_par_threads() * _min_skip_distance;
173 }
175 void set_try_claimed() { _try_claimed = true; }
177 void scanCard(size_t index, HeapRegion *r) {
178 _cards_done++;
179 DirtyCardToOopClosure* cl =
180 r->new_dcto_closure(_oc,
181 CardTableModRefBS::Precise,
182 HeapRegionDCTOC::IntoCSFilterKind);
184 // Set the "from" region in the closure.
185 _oc->set_region(r);
186 HeapWord* card_start = _bot_shared->address_for_index(index);
187 HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
188 Space *sp = SharedHeap::heap()->space_containing(card_start);
189 MemRegion sm_region;
190 if (ParallelGCThreads > 0) {
191 // first find the used area
192 sm_region = sp->used_region_at_save_marks();
193 } else {
194 // The closure is not idempotent. We shouldn't look at objects
195 // allocated during the GC.
196 sm_region = sp->used_region_at_save_marks();
197 }
198 MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
199 if (!mr.is_empty()) {
200 cl->do_MemRegion(mr);
201 }
202 }
204 void printCard(HeapRegion* card_region, size_t card_index,
205 HeapWord* card_start) {
206 gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
207 "RS names card %p: "
208 "[" PTR_FORMAT ", " PTR_FORMAT ")",
209 _worker_i,
210 card_region->bottom(), card_region->end(),
211 card_index,
212 card_start, card_start + G1BlockOffsetSharedArray::N_words);
213 }
215 bool doHeapRegion(HeapRegion* r) {
216 assert(r->in_collection_set(), "should only be called on elements of CS.");
217 HeapRegionRemSet* hrrs = r->rem_set();
218 if (hrrs->iter_is_complete()) return false; // All done.
219 if (!_try_claimed && !hrrs->claim_iter()) return false;
220 _g1h->push_dirty_cards_region(r);
221 // If we didn't return above, then
222 // _try_claimed || r->claim_iter()
223 // is true: either we're supposed to work on claimed-but-not-complete
224 // regions, or we successfully claimed the region.
225 HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
226 hrrs->init_iterator(iter);
227 size_t card_index;
228 size_t skip_distance = 0, current_card = 0, jump_to_card = 0;
229 while (iter->has_next(card_index)) {
230 if (current_card < jump_to_card) {
231 ++current_card;
232 continue;
233 }
234 HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
235 #if 0
236 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
237 card_start, card_start + CardTableModRefBS::card_size_in_words);
238 #endif
240 HeapRegion* card_region = _g1h->heap_region_containing(card_start);
241 assert(card_region != NULL, "Yielding cards not in the heap?");
242 _cards++;
244 if (!card_region->is_on_dirty_cards_region_list()) {
245 _g1h->push_dirty_cards_region(card_region);
246 }
248 // If the card is dirty, then we will scan it during updateRS.
249 if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
250 if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
251 scanCard(card_index, card_region);
252 } else if (_try_claimed) {
253 if (jump_to_card == 0 || jump_to_card != current_card) {
254 // We did some useful work in the previous iteration.
255 // Decrease the distance.
256 skip_distance = MAX2(skip_distance >> 1, _min_skip_distance);
257 } else {
258 // Previous iteration resulted in a claim failure.
259 // Increase the distance.
260 skip_distance = MIN2(skip_distance << 1, _max_skip_distance);
261 }
262 jump_to_card = current_card + skip_distance;
263 }
264 }
265 ++current_card;
266 }
267 if (!_try_claimed) {
268 hrrs->set_iter_complete();
269 }
270 return false;
271 }
272 // Set all cards back to clean.
273 void cleanup() {_g1h->cleanUpCardTable();}
274 size_t cards_done() { return _cards_done;}
275 size_t cards_looked_up() { return _cards;}
276 };
278 // We want the parallel threads to start their scanning at
279 // different collection set regions to avoid contention.
280 // If we have:
281 // n collection set regions
282 // p threads
283 // Then thread t will start at region t * floor (n/p)
285 HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
286 HeapRegion* result = _g1p->collection_set();
287 if (ParallelGCThreads > 0) {
288 size_t cs_size = _g1p->collection_set_size();
289 int n_workers = _g1->workers()->total_workers();
290 size_t cs_spans = cs_size / n_workers;
291 size_t ind = cs_spans * worker_i;
292 for (size_t i = 0; i < ind; i++)
293 result = result->next_in_collection_set();
294 }
295 return result;
296 }
298 void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
299 double rs_time_start = os::elapsedTime();
300 HeapRegion *startRegion = calculateStartRegion(worker_i);
302 BufferingOopsInHeapRegionClosure boc(oc);
303 ScanRSClosure scanRScl(&boc, worker_i);
304 _g1->collection_set_iterate_from(startRegion, &scanRScl);
305 scanRScl.set_try_claimed();
306 _g1->collection_set_iterate_from(startRegion, &scanRScl);
308 boc.done();
309 double closure_app_time_sec = boc.closure_app_seconds();
310 double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
311 closure_app_time_sec;
312 double closure_app_time_ms = closure_app_time_sec * 1000.0;
314 assert( _cards_scanned != NULL, "invariant" );
315 _cards_scanned[worker_i] = scanRScl.cards_done();
317 _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
318 _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
320 double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
321 if (scan_new_refs_time_ms > 0.0) {
322 closure_app_time_ms += scan_new_refs_time_ms;
323 }
325 _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
326 }
328 void HRInto_G1RemSet::updateRS(int worker_i) {
329 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
331 double start = os::elapsedTime();
332 _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
334 // Apply the appropriate closure to all remaining log entries.
335 _g1->iterate_dirty_card_closure(false, worker_i);
336 // Now there should be no dirty cards.
337 if (G1RSLogCheckCardTable) {
338 CountNonCleanMemRegionClosure cl(_g1);
339 _ct_bs->mod_card_iterate(&cl);
340 // XXX This isn't true any more: keeping cards of young regions
341 // marked dirty broke it. Need some reasonable fix.
342 guarantee(cl.n() == 0, "Card table should be clean.");
343 }
345 _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
346 }
348 #ifndef PRODUCT
349 class PrintRSClosure : public HeapRegionClosure {
350 int _count;
351 public:
352 PrintRSClosure() : _count(0) {}
353 bool doHeapRegion(HeapRegion* r) {
354 HeapRegionRemSet* hrrs = r->rem_set();
355 _count += (int) hrrs->occupied();
356 if (hrrs->occupied() == 0) {
357 gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
358 "has no remset entries\n",
359 r->bottom(), r->end());
360 } else {
361 gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
362 r->bottom(), r->end());
363 r->print();
364 hrrs->print();
365 gclog_or_tty->print("\nDone printing rem set\n");
366 }
367 return false;
368 }
369 int occupied() {return _count;}
370 };
371 #endif
373 class CountRSSizeClosure: public HeapRegionClosure {
374 size_t _n;
375 size_t _tot;
376 size_t _max;
377 HeapRegion* _max_r;
378 enum {
379 N = 20,
380 MIN = 6
381 };
382 int _histo[N];
383 public:
384 CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
385 for (int i = 0; i < N; i++) _histo[i] = 0;
386 }
387 bool doHeapRegion(HeapRegion* r) {
388 if (!r->continuesHumongous()) {
389 size_t occ = r->rem_set()->occupied();
390 _n++;
391 _tot += occ;
392 if (occ > _max) {
393 _max = occ;
394 _max_r = r;
395 }
396 // Fit it into a histo bin.
397 int s = 1 << MIN;
398 int i = 0;
399 while (occ > (size_t) s && i < (N-1)) {
400 s = s << 1;
401 i++;
402 }
403 _histo[i]++;
404 }
405 return false;
406 }
407 size_t n() { return _n; }
408 size_t tot() { return _tot; }
409 size_t mx() { return _max; }
410 HeapRegion* mxr() { return _max_r; }
411 void print_histo() {
412 int mx = N;
413 while (mx >= 0) {
414 if (_histo[mx-1] > 0) break;
415 mx--;
416 }
417 gclog_or_tty->print_cr("Number of regions with given RS sizes:");
418 gclog_or_tty->print_cr(" <= %8d %8d", 1 << MIN, _histo[0]);
419 for (int i = 1; i < mx-1; i++) {
420 gclog_or_tty->print_cr(" %8d - %8d %8d",
421 (1 << (MIN + i - 1)) + 1,
422 1 << (MIN + i),
423 _histo[i]);
424 }
425 gclog_or_tty->print_cr(" > %8d %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
426 }
427 };
429 template <class T> void
430 HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
431 int worker_i) {
432 double scan_new_refs_start_sec = os::elapsedTime();
433 G1CollectedHeap* g1h = G1CollectedHeap::heap();
434 CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
435 for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
436 T* p = (T*) _new_refs[worker_i]->at(i);
437 oop obj = oopDesc::load_decode_heap_oop(p);
438 // *p was in the collection set when p was pushed on "_new_refs", but
439 // another thread may have processed this location from an RS, so it
440 // might not point into the CS any longer. If so, it's obviously been
441 // processed, and we don't need to do anything further.
442 if (g1h->obj_in_cs(obj)) {
443 HeapRegion* r = g1h->heap_region_containing(p);
445 DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
446 oc->set_region(r);
447 // If "p" has already been processed concurrently, this is
448 // idempotent.
449 oc->do_oop(p);
450 }
451 }
452 _g1p->record_scan_new_refs_time(worker_i,
453 (os::elapsedTime() - scan_new_refs_start_sec)
454 * 1000.0);
455 }
457 void HRInto_G1RemSet::cleanupHRRS() {
458 HeapRegionRemSet::cleanup();
459 }
461 void
462 HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
463 int worker_i) {
464 #if CARD_REPEAT_HISTO
465 ct_freq_update_histo_and_reset();
466 #endif
467 if (worker_i == 0) {
468 _cg1r->clear_and_record_card_counts();
469 }
471 // Make this into a command-line flag...
472 if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
473 CountRSSizeClosure count_cl;
474 _g1->heap_region_iterate(&count_cl);
475 gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
476 "max region is " PTR_FORMAT,
477 count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
478 count_cl.mx(), count_cl.mxr());
479 count_cl.print_histo();
480 }
482 if (ParallelGCThreads > 0) {
483 // The two flags below were introduced temporarily to serialize
484 // the updating and scanning of remembered sets. There are some
485 // race conditions when these two operations are done in parallel
486 // and they are causing failures. When we resolve said race
487 // conditions, we'll revert back to parallel remembered set
488 // updating and scanning. See CRs 6677707 and 6677708.
489 if (G1ParallelRSetUpdatingEnabled || (worker_i == 0)) {
490 updateRS(worker_i);
491 scanNewRefsRS(oc, worker_i);
492 } else {
493 _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
494 _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
495 _g1p->record_update_rs_time(worker_i, 0.0);
496 _g1p->record_scan_new_refs_time(worker_i, 0.0);
497 }
498 if (G1ParallelRSetScanningEnabled || (worker_i == 0)) {
499 scanRS(oc, worker_i);
500 } else {
501 _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
502 _g1p->record_scan_rs_time(worker_i, 0.0);
503 }
504 } else {
505 assert(worker_i == 0, "invariant");
506 updateRS(0);
507 scanNewRefsRS(oc, 0);
508 scanRS(oc, 0);
509 }
510 }
512 void HRInto_G1RemSet::
513 prepare_for_oops_into_collection_set_do() {
514 #if G1_REM_SET_LOGGING
515 PrintRSClosure cl;
516 _g1->collection_set_iterate(&cl);
517 #endif
518 cleanupHRRS();
519 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
520 _g1->set_refine_cte_cl_concurrency(false);
521 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
522 dcqs.concatenate_logs();
524 assert(!_par_traversal_in_progress, "Invariant between iterations.");
525 if (ParallelGCThreads > 0) {
526 set_par_traversal(true);
527 _seq_task->set_par_threads((int)n_workers());
528 }
529 guarantee( _cards_scanned == NULL, "invariant" );
530 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
531 for (uint i = 0; i < n_workers(); ++i) {
532 _cards_scanned[i] = 0;
533 }
534 _total_cards_scanned = 0;
535 }
538 class cleanUpIteratorsClosure : public HeapRegionClosure {
539 bool doHeapRegion(HeapRegion *r) {
540 HeapRegionRemSet* hrrs = r->rem_set();
541 hrrs->init_for_par_iteration();
542 return false;
543 }
544 };
546 class UpdateRSetOopsIntoCSImmediate : public OopClosure {
547 G1CollectedHeap* _g1;
548 public:
549 UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
550 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
551 virtual void do_oop( oop* p) { do_oop_work(p); }
552 template <class T> void do_oop_work(T* p) {
553 HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
554 if (to->in_collection_set()) {
555 to->rem_set()->add_reference(p, 0);
556 }
557 }
558 };
560 class UpdateRSetOopsIntoCSDeferred : public OopClosure {
561 G1CollectedHeap* _g1;
562 CardTableModRefBS* _ct_bs;
563 DirtyCardQueue* _dcq;
564 public:
565 UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
566 _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
567 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
568 virtual void do_oop( oop* p) { do_oop_work(p); }
569 template <class T> void do_oop_work(T* p) {
570 oop obj = oopDesc::load_decode_heap_oop(p);
571 if (_g1->obj_in_cs(obj)) {
572 size_t card_index = _ct_bs->index_for(p);
573 if (_ct_bs->mark_card_deferred(card_index)) {
574 _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
575 }
576 }
577 }
578 };
580 template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
581 for (size_t i = 0; i < n_workers(); i++) {
582 for (int j = 0; j < _new_refs[i]->length(); j++) {
583 T* p = (T*) _new_refs[i]->at(j);
584 cl->do_oop(p);
585 }
586 }
587 }
589 void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
590 guarantee( _cards_scanned != NULL, "invariant" );
591 _total_cards_scanned = 0;
592 for (uint i = 0; i < n_workers(); ++i)
593 _total_cards_scanned += _cards_scanned[i];
594 FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
595 _cards_scanned = NULL;
596 // Cleanup after copy
597 #if G1_REM_SET_LOGGING
598 PrintRSClosure cl;
599 _g1->heap_region_iterate(&cl);
600 #endif
601 _g1->set_refine_cte_cl_concurrency(true);
602 cleanUpIteratorsClosure iterClosure;
603 _g1->collection_set_iterate(&iterClosure);
604 // Set all cards back to clean.
605 _g1->cleanUpCardTable();
607 if (ParallelGCThreads > 0) {
608 set_par_traversal(false);
609 }
611 if (_g1->evacuation_failed()) {
612 // Restore remembered sets for the regions pointing into
613 // the collection set.
614 if (G1DeferredRSUpdate) {
615 DirtyCardQueue dcq(&_g1->dirty_card_queue_set());
616 UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq);
617 new_refs_iterate(&deferred_update);
618 } else {
619 UpdateRSetOopsIntoCSImmediate immediate_update(_g1);
620 new_refs_iterate(&immediate_update);
621 }
622 }
623 for (uint i = 0; i < n_workers(); i++) {
624 _new_refs[i]->clear();
625 }
627 assert(!_par_traversal_in_progress, "Invariant between iterations.");
628 }
630 class UpdateRSObjectClosure: public ObjectClosure {
631 UpdateRSOopClosure* _update_rs_oop_cl;
632 public:
633 UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
634 _update_rs_oop_cl(update_rs_oop_cl) {}
635 void do_object(oop obj) {
636 obj->oop_iterate(_update_rs_oop_cl);
637 }
639 };
641 class ScrubRSClosure: public HeapRegionClosure {
642 G1CollectedHeap* _g1h;
643 BitMap* _region_bm;
644 BitMap* _card_bm;
645 CardTableModRefBS* _ctbs;
646 public:
647 ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
648 _g1h(G1CollectedHeap::heap()),
649 _region_bm(region_bm), _card_bm(card_bm),
650 _ctbs(NULL)
651 {
652 ModRefBarrierSet* bs = _g1h->mr_bs();
653 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
654 _ctbs = (CardTableModRefBS*)bs;
655 }
657 bool doHeapRegion(HeapRegion* r) {
658 if (!r->continuesHumongous()) {
659 r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
660 }
661 return false;
662 }
663 };
665 void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
666 ScrubRSClosure scrub_cl(region_bm, card_bm);
667 _g1->heap_region_iterate(&scrub_cl);
668 }
670 void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
671 int worker_num, int claim_val) {
672 ScrubRSClosure scrub_cl(region_bm, card_bm);
673 _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
674 }
677 static IntHistogram out_of_histo(50, 50);
679 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
680 // If the card is no longer dirty, nothing to do.
681 if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
683 // Construct the region representing the card.
684 HeapWord* start = _ct_bs->addr_for(card_ptr);
685 // And find the region containing it.
686 HeapRegion* r = _g1->heap_region_containing(start);
687 if (r == NULL) {
688 guarantee(_g1->is_in_permanent(start), "Or else where?");
689 return; // Not in the G1 heap (might be in perm, for example.)
690 }
691 // Why do we have to check here whether a card is on a young region,
692 // given that we dirty young regions and, as a result, the
693 // post-barrier is supposed to filter them out and never to enqueue
694 // them? When we allocate a new region as the "allocation region" we
695 // actually dirty its cards after we release the lock, since card
696 // dirtying while holding the lock was a performance bottleneck. So,
697 // as a result, it is possible for other threads to actually
698 // allocate objects in the region (after the acquire the lock)
699 // before all the cards on the region are dirtied. This is unlikely,
700 // and it doesn't happen often, but it can happen. So, the extra
701 // check below filters out those cards.
702 if (r->is_young()) {
703 return;
704 }
705 // While we are processing RSet buffers during the collection, we
706 // actually don't want to scan any cards on the collection set,
707 // since we don't want to update remebered sets with entries that
708 // point into the collection set, given that live objects from the
709 // collection set are about to move and such entries will be stale
710 // very soon. This change also deals with a reliability issue which
711 // involves scanning a card in the collection set and coming across
712 // an array that was being chunked and looking malformed. Note,
713 // however, that if evacuation fails, we have to scan any objects
714 // that were not moved and create any missing entries.
715 if (r->in_collection_set()) {
716 return;
717 }
719 // Should we defer it?
720 if (_cg1r->use_cache()) {
721 card_ptr = _cg1r->cache_insert(card_ptr);
722 // If it was not an eviction, nothing to do.
723 if (card_ptr == NULL) return;
725 // OK, we have to reset the card start, region, etc.
726 start = _ct_bs->addr_for(card_ptr);
727 r = _g1->heap_region_containing(start);
728 if (r == NULL) {
729 guarantee(_g1->is_in_permanent(start), "Or else where?");
730 return; // Not in the G1 heap (might be in perm, for example.)
731 }
732 guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
733 }
735 HeapWord* end = _ct_bs->addr_for(card_ptr + 1);
736 MemRegion dirtyRegion(start, end);
738 #if CARD_REPEAT_HISTO
739 init_ct_freq_table(_g1->g1_reserved_obj_bytes());
740 ct_freq_note_card(_ct_bs->index_for(start));
741 #endif
743 UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
744 update_rs_oop_cl.set_from(r);
745 FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
747 // Undirty the card.
748 *card_ptr = CardTableModRefBS::clean_card_val();
749 // We must complete this write before we do any of the reads below.
750 OrderAccess::storeload();
751 // And process it, being careful of unallocated portions of TLAB's.
752 HeapWord* stop_point =
753 r->oops_on_card_seq_iterate_careful(dirtyRegion,
754 &filter_then_update_rs_oop_cl);
755 // If stop_point is non-null, then we encountered an unallocated region
756 // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the
757 // card and re-enqueue: if we put off the card until a GC pause, then the
758 // unallocated portion will be filled in. Alternatively, we might try
759 // the full complexity of the technique used in "regular" precleaning.
760 if (stop_point != NULL) {
761 // The card might have gotten re-dirtied and re-enqueued while we
762 // worked. (In fact, it's pretty likely.)
763 if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
764 *card_ptr = CardTableModRefBS::dirty_card_val();
765 MutexLockerEx x(Shared_DirtyCardQ_lock,
766 Mutex::_no_safepoint_check_flag);
767 DirtyCardQueue* sdcq =
768 JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
769 sdcq->enqueue(card_ptr);
770 }
771 } else {
772 out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
773 _conc_refine_cards++;
774 }
775 }
777 class HRRSStatsIter: public HeapRegionClosure {
778 size_t _occupied;
779 size_t _total_mem_sz;
780 size_t _max_mem_sz;
781 HeapRegion* _max_mem_sz_region;
782 public:
783 HRRSStatsIter() :
784 _occupied(0),
785 _total_mem_sz(0),
786 _max_mem_sz(0),
787 _max_mem_sz_region(NULL)
788 {}
790 bool doHeapRegion(HeapRegion* r) {
791 if (r->continuesHumongous()) return false;
792 size_t mem_sz = r->rem_set()->mem_size();
793 if (mem_sz > _max_mem_sz) {
794 _max_mem_sz = mem_sz;
795 _max_mem_sz_region = r;
796 }
797 _total_mem_sz += mem_sz;
798 size_t occ = r->rem_set()->occupied();
799 _occupied += occ;
800 return false;
801 }
802 size_t total_mem_sz() { return _total_mem_sz; }
803 size_t max_mem_sz() { return _max_mem_sz; }
804 size_t occupied() { return _occupied; }
805 HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
806 };
808 class PrintRSThreadVTimeClosure : public ThreadClosure {
809 public:
810 virtual void do_thread(Thread *t) {
811 ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
812 gclog_or_tty->print(" %5.2f", crt->vtime_accum());
813 }
814 };
816 void HRInto_G1RemSet::print_summary_info() {
817 G1CollectedHeap* g1 = G1CollectedHeap::heap();
819 #if CARD_REPEAT_HISTO
820 gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
821 gclog_or_tty->print_cr(" # of repeats --> # of cards with that number.");
822 card_repeat_count.print_on(gclog_or_tty);
823 #endif
825 if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
826 gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
827 gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number.");
828 out_of_histo.print_on(gclog_or_tty);
829 }
830 gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
831 _conc_refine_cards);
832 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
833 jint tot_processed_buffers =
834 dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
835 gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers);
836 gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.",
837 dcqs.processed_buffers_rs_thread(),
838 100.0*(float)dcqs.processed_buffers_rs_thread()/
839 (float)tot_processed_buffers);
840 gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.",
841 dcqs.processed_buffers_mut(),
842 100.0*(float)dcqs.processed_buffers_mut()/
843 (float)tot_processed_buffers);
844 gclog_or_tty->print_cr(" Conc RS threads times(s)");
845 PrintRSThreadVTimeClosure p;
846 gclog_or_tty->print(" ");
847 g1->concurrent_g1_refine()->threads_do(&p);
848 gclog_or_tty->print_cr("");
850 if (G1UseHRIntoRS) {
851 HRRSStatsIter blk;
852 g1->heap_region_iterate(&blk);
853 gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K."
854 " Max = " SIZE_FORMAT "K.",
855 blk.total_mem_sz()/K, blk.max_mem_sz()/K);
856 gclog_or_tty->print_cr(" Static structures = " SIZE_FORMAT "K,"
857 " free_lists = " SIZE_FORMAT "K.",
858 HeapRegionRemSet::static_mem_size()/K,
859 HeapRegionRemSet::fl_mem_size()/K);
860 gclog_or_tty->print_cr(" %d occupied cards represented.",
861 blk.occupied());
862 gclog_or_tty->print_cr(" Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
863 ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
864 blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
865 (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
866 (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
867 gclog_or_tty->print_cr(" Did %d coarsenings.",
868 HeapRegionRemSet::n_coarsenings());
870 }
871 }
872 void HRInto_G1RemSet::prepare_for_verify() {
873 if (G1HRRSFlushLogBuffersOnVerify &&
874 (VerifyBeforeGC || VerifyAfterGC)
875 && !_g1->full_collection()) {
876 cleanupHRRS();
877 _g1->set_refine_cte_cl_concurrency(false);
878 if (SafepointSynchronize::is_at_safepoint()) {
879 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
880 dcqs.concatenate_logs();
881 }
882 bool cg1r_use_cache = _cg1r->use_cache();
883 _cg1r->set_use_cache(false);
884 updateRS(0);
885 _cg1r->set_use_cache(cg1r_use_cache);
887 assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
888 }
889 }