Thu, 15 Aug 2013 10:52:18 +0200
7145569: G1: optimize nmethods scanning
Summary: Add a list of nmethods to the RSet for a region that contain references into the region. Skip scanning the code cache during root scanning and scan the nmethod lists during RSet scanning instead.
Reviewed-by: tschatzl, brutisso, mgerdin, twisti, kvn
1 /*
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "gc_implementation/g1/concurrentG1Refine.hpp"
27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
28 #include "gc_implementation/g1/heapRegion.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1RemSet.inline.hpp"
31 #include "gc_implementation/g1/g1RemSetSummary.hpp"
32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
33 #include "runtime/thread.inline.hpp"
35 class GetRSThreadVTimeClosure : public ThreadClosure {
36 private:
37 G1RemSetSummary* _summary;
38 uint _counter;
40 public:
41 GetRSThreadVTimeClosure(G1RemSetSummary * summary) : ThreadClosure(), _summary(summary), _counter(0) {
42 assert(_summary != NULL, "just checking");
43 }
45 virtual void do_thread(Thread* t) {
46 ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
47 _summary->set_rs_thread_vtime(_counter, crt->vtime_accum());
48 _counter++;
49 }
50 };
52 void G1RemSetSummary::update() {
53 _num_refined_cards = remset()->conc_refine_cards();
54 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
55 _num_processed_buf_mutator = dcqs.processed_buffers_mut();
56 _num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread();
58 _num_coarsenings = HeapRegionRemSet::n_coarsenings();
60 ConcurrentG1Refine * cg1r = G1CollectedHeap::heap()->concurrent_g1_refine();
61 if (_rs_threads_vtimes != NULL) {
62 GetRSThreadVTimeClosure p(this);
63 cg1r->worker_threads_do(&p);
64 }
65 set_sampling_thread_vtime(cg1r->sampling_thread()->vtime_accum());
66 }
68 void G1RemSetSummary::set_rs_thread_vtime(uint thread, double value) {
69 assert(_rs_threads_vtimes != NULL, "just checking");
70 assert(thread < _num_vtimes, "just checking");
71 _rs_threads_vtimes[thread] = value;
72 }
74 double G1RemSetSummary::rs_thread_vtime(uint thread) const {
75 assert(_rs_threads_vtimes != NULL, "just checking");
76 assert(thread < _num_vtimes, "just checking");
77 return _rs_threads_vtimes[thread];
78 }
80 void G1RemSetSummary::initialize(G1RemSet* remset, uint num_workers) {
81 assert(_rs_threads_vtimes == NULL, "just checking");
82 assert(remset != NULL, "just checking");
84 _remset = remset;
85 _num_vtimes = num_workers;
86 _rs_threads_vtimes = NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC);
87 memset(_rs_threads_vtimes, 0, sizeof(double) * _num_vtimes);
89 update();
90 }
92 void G1RemSetSummary::set(G1RemSetSummary* other) {
93 assert(other != NULL, "just checking");
94 assert(remset() == other->remset(), "just checking");
95 assert(_num_vtimes == other->_num_vtimes, "just checking");
97 _num_refined_cards = other->num_concurrent_refined_cards();
99 _num_processed_buf_mutator = other->num_processed_buf_mutator();
100 _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads();
102 _num_coarsenings = other->_num_coarsenings;
104 memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes);
106 set_sampling_thread_vtime(other->sampling_thread_vtime());
107 }
109 void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {
110 assert(other != NULL, "just checking");
111 assert(remset() == other->remset(), "just checking");
112 assert(_num_vtimes == other->_num_vtimes, "just checking");
114 _num_refined_cards = other->num_concurrent_refined_cards() - _num_refined_cards;
116 _num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator;
117 _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads;
119 _num_coarsenings = other->num_coarsenings() - _num_coarsenings;
121 for (uint i = 0; i < _num_vtimes; i++) {
122 set_rs_thread_vtime(i, other->rs_thread_vtime(i) - rs_thread_vtime(i));
123 }
125 _sampling_thread_vtime = other->sampling_thread_vtime() - _sampling_thread_vtime;
126 }
128 class HRRSStatsIter: public HeapRegionClosure {
129 size_t _occupied;
131 size_t _total_rs_mem_sz;
132 size_t _max_rs_mem_sz;
133 HeapRegion* _max_rs_mem_sz_region;
135 size_t _total_code_root_mem_sz;
136 size_t _max_code_root_mem_sz;
137 HeapRegion* _max_code_root_mem_sz_region;
138 public:
139 HRRSStatsIter() :
140 _occupied(0),
141 _total_rs_mem_sz(0),
142 _max_rs_mem_sz(0),
143 _max_rs_mem_sz_region(NULL),
144 _total_code_root_mem_sz(0),
145 _max_code_root_mem_sz(0),
146 _max_code_root_mem_sz_region(NULL)
147 {}
149 bool doHeapRegion(HeapRegion* r) {
150 HeapRegionRemSet* hrrs = r->rem_set();
152 // HeapRegionRemSet::mem_size() includes the
153 // size of the strong code roots
154 size_t rs_mem_sz = hrrs->mem_size();
155 if (rs_mem_sz > _max_rs_mem_sz) {
156 _max_rs_mem_sz = rs_mem_sz;
157 _max_rs_mem_sz_region = r;
158 }
159 _total_rs_mem_sz += rs_mem_sz;
161 size_t code_root_mem_sz = hrrs->strong_code_roots_mem_size();
162 if (code_root_mem_sz > _max_code_root_mem_sz) {
163 _max_code_root_mem_sz = code_root_mem_sz;
164 _max_code_root_mem_sz_region = r;
165 }
166 _total_code_root_mem_sz += code_root_mem_sz;
168 size_t occ = hrrs->occupied();
169 _occupied += occ;
170 return false;
171 }
172 size_t total_rs_mem_sz() { return _total_rs_mem_sz; }
173 size_t max_rs_mem_sz() { return _max_rs_mem_sz; }
174 HeapRegion* max_rs_mem_sz_region() { return _max_rs_mem_sz_region; }
175 size_t total_code_root_mem_sz() { return _total_code_root_mem_sz; }
176 size_t max_code_root_mem_sz() { return _max_code_root_mem_sz; }
177 HeapRegion* max_code_root_mem_sz_region() { return _max_code_root_mem_sz_region; }
178 size_t occupied() { return _occupied; }
179 };
181 double calc_percentage(size_t numerator, size_t denominator) {
182 if (denominator != 0) {
183 return (double)numerator / denominator * 100.0;
184 } else {
185 return 0.0f;
186 }
187 }
189 void G1RemSetSummary::print_on(outputStream* out) {
190 out->print_cr("\n Concurrent RS processed "SIZE_FORMAT" cards",
191 num_concurrent_refined_cards());
192 out->print_cr(" Of %d completed buffers:", num_processed_buf_total());
193 out->print_cr(" %8d (%5.1f%%) by concurrent RS threads.",
194 num_processed_buf_total(),
195 calc_percentage(num_processed_buf_rs_threads(), num_processed_buf_total()));
196 out->print_cr(" %8d (%5.1f%%) by mutator threads.",
197 num_processed_buf_mutator(),
198 calc_percentage(num_processed_buf_mutator(), num_processed_buf_total()));
199 out->print_cr(" Concurrent RS threads times (s)");
200 out->print(" ");
201 for (uint i = 0; i < _num_vtimes; i++) {
202 out->print(" %5.2f", rs_thread_vtime(i));
203 }
204 out->cr();
205 out->print_cr(" Concurrent sampling threads times (s)");
206 out->print_cr(" %5.2f", sampling_thread_vtime());
208 HRRSStatsIter blk;
209 G1CollectedHeap::heap()->heap_region_iterate(&blk);
210 // RemSet stats
211 out->print_cr(" Total heap region rem set sizes = "SIZE_FORMAT"K."
212 " Max = "SIZE_FORMAT"K.",
213 blk.total_rs_mem_sz()/K, blk.max_rs_mem_sz()/K);
214 out->print_cr(" Static structures = "SIZE_FORMAT"K,"
215 " free_lists = "SIZE_FORMAT"K.",
216 HeapRegionRemSet::static_mem_size() / K,
217 HeapRegionRemSet::fl_mem_size() / K);
218 out->print_cr(" "SIZE_FORMAT" occupied cards represented.",
219 blk.occupied());
220 HeapRegion* max_rs_mem_sz_region = blk.max_rs_mem_sz_region();
221 HeapRegionRemSet* max_rs_rem_set = max_rs_mem_sz_region->rem_set();
222 out->print_cr(" Max size region = "HR_FORMAT", "
223 "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.",
224 HR_FORMAT_PARAMS(max_rs_mem_sz_region),
225 (max_rs_rem_set->mem_size() + K - 1)/K,
226 (max_rs_rem_set->occupied() + K - 1)/K);
227 out->print_cr(" Did %d coarsenings.", num_coarsenings());
228 // Strong code root stats
229 out->print_cr(" Total heap region code-root set sizes = "SIZE_FORMAT"K."
230 " Max = "SIZE_FORMAT"K.",
231 blk.total_code_root_mem_sz()/K, blk.max_code_root_mem_sz()/K);
232 HeapRegion* max_code_root_mem_sz_region = blk.max_code_root_mem_sz_region();
233 HeapRegionRemSet* max_code_root_rem_set = max_code_root_mem_sz_region->rem_set();
234 out->print_cr(" Max size region = "HR_FORMAT", "
235 "size = "SIZE_FORMAT "K, num_elems = "SIZE_FORMAT".",
236 HR_FORMAT_PARAMS(max_code_root_mem_sz_region),
237 (max_code_root_rem_set->strong_code_roots_mem_size() + K - 1)/K,
238 (max_code_root_rem_set->strong_code_roots_list_length()));
239 }