Tue, 19 May 2009 04:05:31 -0700
6819065: G1: eliminate high serial card table clearing time
Reviewed-by: iveresov, tonyp
1 /*
2 * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentG1Refine.cpp.incl"
28 ConcurrentG1Refine::ConcurrentG1Refine() :
29 _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
30 _hot_cache(NULL),
31 _def_use_cache(false), _use_cache(false),
32 _n_periods(0), _total_cards(0), _total_travs(0),
33 _threads(NULL), _n_threads(0)
34 {
35 if (G1ConcRefine) {
36 _n_threads = (int)thread_num();
37 if (_n_threads > 0) {
38 _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
39 int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
40 ConcurrentG1RefineThread *next = NULL;
41 for (int i = _n_threads - 1; i >= 0; i--) {
42 ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
43 assert(t != NULL, "Conc refine should have been created");
44 assert(t->cg1r() == this, "Conc refine thread should refer to this");
45 _threads[i] = t;
46 next = t;
47 }
48 }
49 }
50 }
52 size_t ConcurrentG1Refine::thread_num() {
53 if (G1ConcRefine) {
54 return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
55 }
56 return 0;
57 }
59 void ConcurrentG1Refine::init() {
60 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
61 G1CollectedHeap* g1h = G1CollectedHeap::heap();
62 _n_card_counts =
63 (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
64 _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
65 for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
66 ModRefBarrierSet* bs = g1h->mr_bs();
67 guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
68 CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
69 _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
70 if (G1ConcRSCountTraversals) {
71 _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
72 _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
73 for (int i = 0; i < 256; i++) {
74 _cur_card_count_histo[i] = 0;
75 _cum_card_count_histo[i] = 0;
76 }
77 }
78 }
79 if (G1ConcRSLogCacheSize > 0) {
80 _def_use_cache = true;
81 _use_cache = true;
82 _hot_cache_size = (1 << G1ConcRSLogCacheSize);
83 _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
84 _n_hot = 0;
85 _hot_cache_idx = 0;
86 }
87 }
89 void ConcurrentG1Refine::stop() {
90 if (_threads != NULL) {
91 for (int i = 0; i < _n_threads; i++) {
92 _threads[i]->stop();
93 }
94 }
95 }
97 ConcurrentG1Refine::~ConcurrentG1Refine() {
98 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
99 assert(_card_counts != NULL, "Logic");
100 FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
101 assert(_cur_card_count_histo != NULL, "Logic");
102 FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
103 assert(_cum_card_count_histo != NULL, "Logic");
104 FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
105 }
106 if (G1ConcRSLogCacheSize > 0) {
107 assert(_hot_cache != NULL, "Logic");
108 FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
109 }
110 if (_threads != NULL) {
111 for (int i = 0; i < _n_threads; i++) {
112 delete _threads[i];
113 }
114 FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
115 }
116 }
118 void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
119 if (_threads != NULL) {
120 for (int i = 0; i < _n_threads; i++) {
121 tc->do_thread(_threads[i]);
122 }
123 }
124 }
127 int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
128 size_t card_num = (card_ptr - _ct_bot);
129 guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
130 unsigned char cnt = _card_counts[card_num];
131 if (cnt < 255) _card_counts[card_num]++;
132 return cnt;
133 _total_travs++;
134 }
136 jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
137 int count = add_card_count(card_ptr);
138 // Count previously unvisited cards.
139 if (count == 0) _total_cards++;
140 // We'll assume a traversal unless we store it in the cache.
141 if (count < G1ConcRSHotCardLimit) {
142 _total_travs++;
143 return card_ptr;
144 }
145 // Otherwise, it's hot.
146 jbyte* res = NULL;
147 MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
148 if (_n_hot == _hot_cache_size) {
149 _total_travs++;
150 res = _hot_cache[_hot_cache_idx];
151 _n_hot--;
152 }
153 // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
154 _hot_cache[_hot_cache_idx] = card_ptr;
155 _hot_cache_idx++;
156 if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
157 _n_hot++;
158 return res;
159 }
162 void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
163 assert(!use_cache(), "cache should be disabled");
164 int start_ind = _hot_cache_idx-1;
165 for (int i = 0; i < _n_hot; i++) {
166 int ind = start_ind - i;
167 if (ind < 0) ind = ind + _hot_cache_size;
168 jbyte* entry = _hot_cache[ind];
169 if (entry != NULL) {
170 g1rs->concurrentRefineOneCard(entry, worker_i);
171 }
172 }
173 _n_hot = 0;
174 _hot_cache_idx = 0;
175 }
177 void ConcurrentG1Refine::clear_and_record_card_counts() {
178 if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
179 _n_periods++;
180 if (G1ConcRSCountTraversals) {
181 for (size_t i = 0; i < _n_card_counts; i++) {
182 unsigned char bucket = _card_counts[i];
183 _cur_card_count_histo[bucket]++;
184 _card_counts[i] = 0;
185 }
186 gclog_or_tty->print_cr("Card counts:");
187 for (int i = 0; i < 256; i++) {
188 if (_cur_card_count_histo[i] > 0) {
189 gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]);
190 _cum_card_count_histo[i] += _cur_card_count_histo[i];
191 _cur_card_count_histo[i] = 0;
192 }
193 }
194 } else {
195 assert(G1ConcRSLogCacheSize > 0, "Logic");
196 Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
197 _n_card_counts / HeapWordSize);
198 }
199 }
201 void
202 ConcurrentG1Refine::
203 print_card_count_histo_range(unsigned* histo, int from, int to,
204 float& cum_card_pct,
205 float& cum_travs_pct) {
206 unsigned cards = 0;
207 unsigned travs = 0;
208 guarantee(to <= 256, "Precondition");
209 for (int i = from; i < to-1; i++) {
210 cards += histo[i];
211 travs += histo[i] * i;
212 }
213 if (to == 256) {
214 unsigned histo_card_sum = 0;
215 unsigned histo_trav_sum = 0;
216 for (int i = 1; i < 255; i++) {
217 histo_trav_sum += histo[i] * i;
218 }
219 cards += histo[255];
220 // correct traversals for the last one.
221 unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
222 travs += travs_255;
224 } else {
225 cards += histo[to-1];
226 travs += histo[to-1] * (to-1);
227 }
228 float fperiods = (float)_n_periods;
229 float f_tot_cards = (float)_total_cards/fperiods;
230 float f_tot_travs = (float)_total_travs/fperiods;
231 if (cards > 0) {
232 float fcards = (float)cards/fperiods;
233 float ftravs = (float)travs/fperiods;
234 if (to == 256) {
235 gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs);
236 } else {
237 gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs);
238 }
239 float pct_cards = fcards*100.0/f_tot_cards;
240 cum_card_pct += pct_cards;
241 float pct_travs = ftravs*100.0/f_tot_travs;
242 cum_travs_pct += pct_travs;
243 gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
244 pct_cards, cum_card_pct,
245 pct_travs, cum_travs_pct);
246 }
247 }
249 void ConcurrentG1Refine::print_final_card_counts() {
250 if (!G1ConcRSCountTraversals) return;
252 gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
253 _total_travs, _total_cards);
254 float fperiods = (float)_n_periods;
255 gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, "
256 "per collection.", (float)_total_travs/fperiods,
257 (float)_total_cards/fperiods);
258 gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct "
259 "dirty card.\n",
260 _total_cards > 0 ?
261 (float)_total_travs/(float)_total_cards : 0.0);
264 gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s",
265 "range", "# cards", "# travs", "% cards", "(cum)",
266 "% travs", "(cum)");
267 gclog_or_tty->print_cr("------------------------------------------------------------"
268 "-------------");
269 float cum_cards_pct = 0.0;
270 float cum_travs_pct = 0.0;
271 for (int i = 1; i < 10; i++) {
272 print_card_count_histo_range(_cum_card_count_histo, i, i+1,
273 cum_cards_pct, cum_travs_pct);
274 }
275 for (int i = 10; i < 100; i += 10) {
276 print_card_count_histo_range(_cum_card_count_histo, i, i+10,
277 cum_cards_pct, cum_travs_pct);
278 }
279 print_card_count_histo_range(_cum_card_count_histo, 100, 150,
280 cum_cards_pct, cum_travs_pct);
281 print_card_count_histo_range(_cum_card_count_histo, 150, 200,
282 cum_cards_pct, cum_travs_pct);
283 print_card_count_histo_range(_cum_card_count_histo, 150, 255,
284 cum_cards_pct, cum_travs_pct);
285 print_card_count_histo_range(_cum_card_count_histo, 255, 256,
286 cum_cards_pct, cum_travs_pct);
287 }