Mon, 03 Aug 2009 12:59:30 -0700
6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 // A G1RemSet provides ways of iterating over pointers into a selected
26 // collection set.
28 class G1CollectedHeap;
29 class CardTableModRefBarrierSet;
30 class HRInto_G1RemSet;
31 class ConcurrentG1Refine;
33 class G1RemSet: public CHeapObj {
34 protected:
35 G1CollectedHeap* _g1;
36 unsigned _conc_refine_cards;
37 size_t n_workers();
39 public:
40 G1RemSet(G1CollectedHeap* g1) :
41 _g1(g1), _conc_refine_cards(0)
42 {}
44 // Invoke "blk->do_oop" on all pointers into the CS in object in regions
45 // outside the CS (having invoked "blk->set_region" to set the "from"
46 // region correctly beforehand.) The "worker_i" param is for the
47 // parallel case where the number of the worker thread calling this
48 // function can be helpful in partitioning the work to be done. It
49 // should be the same as the "i" passed to the calling thread's
50 // work(i) function. In the sequential case this param will be ingored.
51 virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
52 int worker_i) = 0;
54 // Prepare for and cleanup after an oops_into_collection_set_do
55 // call. Must call each of these once before and after (in sequential
56 // code) any threads call oops into collection set do. (This offers an
57 // opportunity to sequential setup and teardown of structures needed by a
58 // parallel iteration over the CS's RS.)
59 virtual void prepare_for_oops_into_collection_set_do() = 0;
60 virtual void cleanup_after_oops_into_collection_set_do() = 0;
62 // If "this" is of the given subtype, return "this", else "NULL".
63 virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
65 // Record, if necessary, the fact that *p (where "p" is in region "from",
66 // and is, a fortiori, required to be non-NULL) has changed to its new value.
67 virtual void write_ref(HeapRegion* from, oop* p) = 0;
68 virtual void write_ref(HeapRegion* from, narrowOop* p) = 0;
69 virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
70 virtual void par_write_ref(HeapRegion* from, narrowOop* p, int tid) = 0;
72 // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
73 // or card, respectively, such that a region or card with a corresponding
74 // 0 bit contains no part of any live object. Eliminates any remembered
75 // set entries that correspond to dead heap ranges.
76 virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0;
77 // Like the above, but assumes is called in parallel: "worker_num" is the
78 // parallel thread id of the current thread, and "claim_val" is the
79 // value that should be used to claim heap regions.
80 virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
81 int worker_num, int claim_val) = 0;
83 // Refine the card corresponding to "card_ptr". If "sts" is non-NULL,
84 // join and leave around parts that must be atomic wrt GC. (NULL means
85 // being done at a safepoint.)
86 virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
88 // Print any relevant summary info.
89 virtual void print_summary_info() {}
91 // Prepare remebered set for verification.
92 virtual void prepare_for_verify() {};
93 };
96 // The simplest possible G1RemSet: iterates over all objects in non-CS
97 // regions, searching for pointers into the CS.
98 class StupidG1RemSet: public G1RemSet {
99 public:
100 StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {}
102 void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
103 int worker_i);
105 void prepare_for_oops_into_collection_set_do() {}
106 void cleanup_after_oops_into_collection_set_do() {}
108 // Nothing is necessary in the version below.
109 void write_ref(HeapRegion* from, oop* p) {}
110 void write_ref(HeapRegion* from, narrowOop* p) {}
111 void par_write_ref(HeapRegion* from, oop* p, int tid) {}
112 void par_write_ref(HeapRegion* from, narrowOop* p, int tid) {}
114 void scrub(BitMap* region_bm, BitMap* card_bm) {}
115 void scrub_par(BitMap* region_bm, BitMap* card_bm,
116 int worker_num, int claim_val) {}
118 };
120 // A G1RemSet in which each heap region has a rem set that records the
121 // external heap references into it. Uses a mod ref bs to track updates,
122 // so that they can be used to update the individual region remsets.
124 class HRInto_G1RemSet: public G1RemSet {
125 protected:
126 enum SomePrivateConstants {
127 UpdateRStoMergeSync = 0,
128 MergeRStoDoDirtySync = 1,
129 DoDirtySync = 2,
130 LastSync = 3,
132 SeqTask = 0,
133 NumSeqTasks = 1
134 };
136 CardTableModRefBS* _ct_bs;
137 SubTasksDone* _seq_task;
138 G1CollectorPolicy* _g1p;
140 ConcurrentG1Refine* _cg1r;
142 size_t* _cards_scanned;
143 size_t _total_cards_scanned;
145 // _par_traversal_in_progress is "true" iff a parallel traversal is in
146 // progress. If so, then cards added to remembered sets should also have
147 // their references into the collection summarized in "_new_refs".
148 bool _par_traversal_in_progress;
149 void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
150 GrowableArray<OopOrNarrowOopStar>** _new_refs;
151 template <class T> void new_refs_iterate_work(OopClosure* cl);
152 void new_refs_iterate(OopClosure* cl) {
153 if (UseCompressedOops) {
154 new_refs_iterate_work<narrowOop>(cl);
155 } else {
156 new_refs_iterate_work<oop>(cl);
157 }
158 }
160 protected:
161 template <class T> void write_ref_nv(HeapRegion* from, T* p);
162 template <class T> void par_write_ref_nv(HeapRegion* from, T* p, int tid);
164 public:
165 // This is called to reset dual hash tables after the gc pause
166 // is finished and the initial hash table is no longer being
167 // scanned.
168 void cleanupHRRS();
170 HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
171 ~HRInto_G1RemSet();
173 void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
174 int worker_i);
176 void prepare_for_oops_into_collection_set_do();
177 void cleanup_after_oops_into_collection_set_do();
178 void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
179 template <class T> void scanNewRefsRS_work(OopsInHeapRegionClosure* oc, int worker_i);
180 void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i) {
181 if (UseCompressedOops) {
182 scanNewRefsRS_work<narrowOop>(oc, worker_i);
183 } else {
184 scanNewRefsRS_work<oop>(oc, worker_i);
185 }
186 }
187 void updateRS(int worker_i);
188 HeapRegion* calculateStartRegion(int i);
190 HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
192 CardTableModRefBS* ct_bs() { return _ct_bs; }
193 size_t cardsScanned() { return _total_cards_scanned; }
195 // Record, if necessary, the fact that *p (where "p" is in region "from",
196 // which is required to be non-NULL) has changed to a new non-NULL value.
197 // [Below the virtual version calls a non-virtual protected
198 // workhorse that is templatified for narrow vs wide oop.]
199 inline void write_ref(HeapRegion* from, oop* p) {
200 write_ref_nv(from, p);
201 }
202 inline void write_ref(HeapRegion* from, narrowOop* p) {
203 write_ref_nv(from, p);
204 }
205 inline void par_write_ref(HeapRegion* from, oop* p, int tid) {
206 par_write_ref_nv(from, p, tid);
207 }
208 inline void par_write_ref(HeapRegion* from, narrowOop* p, int tid) {
209 par_write_ref_nv(from, p, tid);
210 }
212 bool self_forwarded(oop obj);
214 void scrub(BitMap* region_bm, BitMap* card_bm);
215 void scrub_par(BitMap* region_bm, BitMap* card_bm,
216 int worker_num, int claim_val);
218 virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
220 virtual void print_summary_info();
221 virtual void prepare_for_verify();
222 };
224 #define G1_REM_SET_LOGGING 0
226 class CountNonCleanMemRegionClosure: public MemRegionClosure {
227 G1CollectedHeap* _g1;
228 int _n;
229 HeapWord* _start_first;
230 public:
231 CountNonCleanMemRegionClosure(G1CollectedHeap* g1) :
232 _g1(g1), _n(0), _start_first(NULL)
233 {}
234 void do_MemRegion(MemRegion mr);
235 int n() { return _n; };
236 HeapWord* start_first() { return _start_first; }
237 };
239 class UpdateRSOopClosure: public OopClosure {
240 HeapRegion* _from;
241 HRInto_G1RemSet* _rs;
242 int _worker_i;
244 template <class T> void do_oop_work(T* p);
246 public:
247 UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
248 _from(NULL), _rs(rs), _worker_i(worker_i) {
249 guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
250 }
252 void set_from(HeapRegion* from) {
253 assert(from != NULL, "from region must be non-NULL");
254 _from = from;
255 }
257 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
258 virtual void do_oop(oop* p) { do_oop_work(p); }
260 // Override: this closure is idempotent.
261 // bool idempotent() { return true; }
262 bool apply_to_weak_ref_discovered_field() { return true; }
263 };