Thu, 19 Mar 2015 15:25:54 +0100
8027962: Per-phase timing measurements for strong roots processing
Reviewed-by: tschatzl, ecaspole
1 /*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
27 #include "classfile/symbolTable.hpp"
28 #include "classfile/systemDictionary.hpp"
29 #include "code/codeCache.hpp"
30 #include "gc_implementation/g1/bufferingOopClosure.hpp"
31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
32 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
33 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
34 #include "gc_implementation/g1/g1RemSet.inline.hpp"
35 #include "gc_implementation/g1/g1RootProcessor.hpp"
36 #include "memory/allocation.inline.hpp"
37 #include "runtime/fprofiler.hpp"
38 #include "runtime/mutex.hpp"
39 #include "services/management.hpp"
41 class G1CodeBlobClosure : public CodeBlobClosure {
42 class HeapRegionGatheringOopClosure : public OopClosure {
43 G1CollectedHeap* _g1h;
44 OopClosure* _work;
45 nmethod* _nm;
47 template <typename T>
48 void do_oop_work(T* p) {
49 _work->do_oop(p);
50 T oop_or_narrowoop = oopDesc::load_heap_oop(p);
51 if (!oopDesc::is_null(oop_or_narrowoop)) {
52 oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
53 HeapRegion* hr = _g1h->heap_region_containing_raw(o);
54 assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset");
55 hr->add_strong_code_root(_nm);
56 }
57 }
59 public:
60 HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
62 void do_oop(oop* o) {
63 do_oop_work(o);
64 }
66 void do_oop(narrowOop* o) {
67 do_oop_work(o);
68 }
70 void set_nm(nmethod* nm) {
71 _nm = nm;
72 }
73 };
75 HeapRegionGatheringOopClosure _oc;
76 public:
77 G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
79 void do_code_blob(CodeBlob* cb) {
80 nmethod* nm = cb->as_nmethod_or_null();
81 if (nm != NULL) {
82 if (!nm->test_set_oops_do_mark()) {
83 _oc.set_nm(nm);
84 nm->oops_do(&_oc);
85 nm->fix_oop_relocations();
86 }
87 }
88 }
89 };
92 void G1RootProcessor::worker_has_discovered_all_strong_classes() {
93 uint n_workers = _g1h->n_par_threads();
94 assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
96 uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
97 if (new_value == n_workers) {
98 // This thread is last. Notify the others.
99 MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
100 _lock.notify_all();
101 }
102 }
104 void G1RootProcessor::wait_until_all_strong_classes_discovered() {
105 uint n_workers = _g1h->n_par_threads();
106 assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
108 if ((uint)_n_workers_discovered_strong_classes != n_workers) {
109 MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
110 while ((uint)_n_workers_discovered_strong_classes != n_workers) {
111 _lock.wait(Mutex::_no_safepoint_check_flag, 0, false);
112 }
113 }
114 }
116 G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
117 _g1h(g1h),
118 _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
119 _srs(g1h),
120 _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false),
121 _n_workers_discovered_strong_classes(0) {}
123 void G1RootProcessor::evacuate_roots(OopClosure* scan_non_heap_roots,
124 OopClosure* scan_non_heap_weak_roots,
125 CLDClosure* scan_strong_clds,
126 CLDClosure* scan_weak_clds,
127 bool trace_metadata,
128 uint worker_i) {
129 // First scan the shared roots.
130 double ext_roots_start = os::elapsedTime();
131 G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
133 BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
134 BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
136 OopClosure* const weak_roots = &buf_scan_non_heap_weak_roots;
137 OopClosure* const strong_roots = &buf_scan_non_heap_roots;
139 // CodeBlobClosures are not interoperable with BufferingOopClosures
140 G1CodeBlobClosure root_code_blobs(scan_non_heap_roots);
142 process_java_roots(strong_roots,
143 trace_metadata ? scan_strong_clds : NULL,
144 scan_strong_clds,
145 trace_metadata ? NULL : scan_weak_clds,
146 &root_code_blobs,
147 phase_times,
148 worker_i);
150 // This is the point where this worker thread will not find more strong CLDs/nmethods.
151 // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
152 if (trace_metadata) {
153 worker_has_discovered_all_strong_classes();
154 }
156 process_vm_roots(strong_roots, weak_roots, phase_times, worker_i);
158 {
159 // Now the CM ref_processor roots.
160 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CMRefRoots, worker_i);
161 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_refProcessor_oops_do)) {
162 // We need to treat the discovered reference lists of the
163 // concurrent mark ref processor as roots and keep entries
164 // (which are added by the marking threads) on them live
165 // until they can be processed at the end of marking.
166 _g1h->ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
167 }
168 }
170 if (trace_metadata) {
171 {
172 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WaitForStrongCLD, worker_i);
173 // Barrier to make sure all workers passed
174 // the strong CLD and strong nmethods phases.
175 wait_until_all_strong_classes_discovered();
176 }
178 // Now take the complement of the strong CLDs.
179 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WeakCLDRoots, worker_i);
180 ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
181 } else {
182 phase_times->record_time_secs(G1GCPhaseTimes::WaitForStrongCLD, worker_i, 0.0);
183 phase_times->record_time_secs(G1GCPhaseTimes::WeakCLDRoots, worker_i, 0.0);
184 }
186 // Finish up any enqueued closure apps (attributed as object copy time).
187 buf_scan_non_heap_roots.done();
188 buf_scan_non_heap_weak_roots.done();
190 double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
191 + buf_scan_non_heap_weak_roots.closure_app_seconds();
193 phase_times->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec);
195 double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec;
197 phase_times->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec);
199 // During conc marking we have to filter the per-thread SATB buffers
200 // to make sure we remove any oops into the CSet (which will show up
201 // as implicitly live).
202 {
203 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SATBFiltering, worker_i);
204 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->mark_in_progress()) {
205 JavaThread::satb_mark_queue_set().filter_thread_buffers();
206 }
207 }
209 _process_strong_tasks->all_tasks_completed();
210 }
212 void G1RootProcessor::process_strong_roots(OopClosure* oops,
213 CLDClosure* clds,
214 CodeBlobClosure* blobs) {
216 process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0);
217 process_vm_roots(oops, NULL, NULL, 0);
219 _process_strong_tasks->all_tasks_completed();
220 }
222 void G1RootProcessor::process_all_roots(OopClosure* oops,
223 CLDClosure* clds,
224 CodeBlobClosure* blobs) {
226 process_java_roots(oops, NULL, clds, clds, NULL, NULL, 0);
227 process_vm_roots(oops, oops, NULL, 0);
229 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_CodeCache_oops_do)) {
230 CodeCache::blobs_do(blobs);
231 }
233 _process_strong_tasks->all_tasks_completed();
234 }
236 void G1RootProcessor::process_java_roots(OopClosure* strong_roots,
237 CLDClosure* thread_stack_clds,
238 CLDClosure* strong_clds,
239 CLDClosure* weak_clds,
240 CodeBlobClosure* strong_code,
241 G1GCPhaseTimes* phase_times,
242 uint worker_i) {
243 assert(thread_stack_clds == NULL || weak_clds == NULL, "There is overlap between those, only one may be set");
244 // Iterating over the CLDG and the Threads are done early to allow us to
245 // first process the strong CLDs and nmethods and then, after a barrier,
246 // let the thread process the weak CLDs and nmethods.
247 {
248 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CLDGRoots, worker_i);
249 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) {
250 ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds);
251 }
252 }
254 {
255 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i);
256 Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code);
257 }
258 }
260 void G1RootProcessor::process_vm_roots(OopClosure* strong_roots,
261 OopClosure* weak_roots,
262 G1GCPhaseTimes* phase_times,
263 uint worker_i) {
264 {
265 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::UniverseRoots, worker_i);
266 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Universe_oops_do)) {
267 Universe::oops_do(strong_roots);
268 }
269 }
271 {
272 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JNIRoots, worker_i);
273 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_JNIHandles_oops_do)) {
274 JNIHandles::oops_do(strong_roots);
275 }
276 }
278 {
279 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ObjectSynchronizerRoots, worker_i);
280 if (!_process_strong_tasks-> is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) {
281 ObjectSynchronizer::oops_do(strong_roots);
282 }
283 }
285 {
286 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::FlatProfilerRoots, worker_i);
287 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) {
288 FlatProfiler::oops_do(strong_roots);
289 }
290 }
292 {
293 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ManagementRoots, worker_i);
294 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Management_oops_do)) {
295 Management::oops_do(strong_roots);
296 }
297 }
299 {
300 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JVMTIRoots, worker_i);
301 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_jvmti_oops_do)) {
302 JvmtiExport::oops_do(strong_roots);
303 }
304 }
306 {
307 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SystemDictionaryRoots, worker_i);
308 if (!_process_strong_tasks->is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) {
309 SystemDictionary::roots_oops_do(strong_roots, weak_roots);
310 }
311 }
313 {
314 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::StringTableRoots, worker_i);
315 // All threads execute the following. A specific chunk of buckets
316 // from the StringTable are the individual tasks.
317 if (weak_roots != NULL) {
318 StringTable::possibly_parallel_oops_do(weak_roots);
319 }
320 }
321 }
323 void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
324 OopClosure* scan_non_heap_weak_roots,
325 uint worker_i) {
326 G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
327 G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CodeCacheRoots, worker_i);
329 // Now scan the complement of the collection set.
330 G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
332 _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
333 }
335 void G1RootProcessor::set_num_workers(int active_workers) {
336 _process_strong_tasks->set_n_threads(active_workers);
337 }