1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Mar 26 13:19:32 2015 +0100 1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Dec 01 15:24:56 2014 +0100 1.3 @@ -46,6 +46,7 @@ 1.4 #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp" 1.5 #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp" 1.6 #include "gc_implementation/g1/g1RemSet.inline.hpp" 1.7 +#include "gc_implementation/g1/g1RootProcessor.hpp" 1.8 #include "gc_implementation/g1/g1StringDedup.hpp" 1.9 #include "gc_implementation/g1/g1YCTypes.hpp" 1.10 #include "gc_implementation/g1/heapRegion.inline.hpp" 1.11 @@ -85,18 +86,6 @@ 1.12 // apply to TLAB allocation, which is not part of this interface: it 1.13 // is done by clients of this interface.) 1.14 1.15 -// Notes on implementation of parallelism in different tasks. 1.16 -// 1.17 -// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism. 1.18 -// The number of GC workers is passed to heap_region_par_iterate_chunked(). 1.19 -// It does use run_task() which sets _n_workers in the task. 1.20 -// G1ParTask executes g1_process_roots() -> 1.21 -// SharedHeap::process_roots() which calls eventually to 1.22 -// CardTableModRefBS::par_non_clean_card_iterate_work() which uses 1.23 -// SequentialSubTasksDone. SharedHeap::process_roots() also 1.24 -// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap). 1.25 -// 1.26 - 1.27 // Local to this file. 1.28 1.29 class RefineCardTableEntryClosure: public CardTableEntryClosure { 1.30 @@ -1854,7 +1843,6 @@ 1.31 _is_alive_closure_stw(this), 1.32 _ref_processor_cm(NULL), 1.33 _ref_processor_stw(NULL), 1.34 - _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), 1.35 _bot_shared(NULL), 1.36 _evac_failure_scan_stack(NULL), 1.37 _mark_in_progress(false), 1.38 @@ -1888,9 +1876,6 @@ 1.39 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()) { 1.40 1.41 _g1h = this; 1.42 - if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { 1.43 - vm_exit_during_initialization("Failed necessary allocation."); 1.44 - } 1.45 1.46 _allocator = G1Allocator::create_allocator(_g1h); 1.47 _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2; 1.48 @@ -3303,11 +3288,12 @@ 1.49 G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo); 1.50 G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl); 1.51 1.52 - process_all_roots(true, // activate StrongRootsScope 1.53 - SO_AllCodeCache, // roots scanning options 1.54 - &rootsCl, 1.55 - &cldCl, 1.56 - &blobsCl); 1.57 + { 1.58 + G1RootProcessor root_processor(this); 1.59 + root_processor.process_all_roots(&rootsCl, 1.60 + &cldCl, 1.61 + &blobsCl); 1.62 + } 1.63 1.64 bool failures = rootsCl.failures() || codeRootsCl.failures(); 1.65 1.66 @@ -4543,60 +4529,11 @@ 1.67 } 1.68 }; 1.69 1.70 -class G1CodeBlobClosure : public CodeBlobClosure { 1.71 - class HeapRegionGatheringOopClosure : public OopClosure { 1.72 - G1CollectedHeap* _g1h; 1.73 - OopClosure* _work; 1.74 - nmethod* _nm; 1.75 - 1.76 - template <typename T> 1.77 - void do_oop_work(T* p) { 1.78 - _work->do_oop(p); 1.79 - T oop_or_narrowoop = oopDesc::load_heap_oop(p); 1.80 - if (!oopDesc::is_null(oop_or_narrowoop)) { 1.81 - oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop); 1.82 - HeapRegion* hr = _g1h->heap_region_containing_raw(o); 1.83 - assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset"); 1.84 - hr->add_strong_code_root(_nm); 1.85 - } 1.86 - } 1.87 - 1.88 - public: 1.89 - HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {} 1.90 - 1.91 - void do_oop(oop* o) { 1.92 - do_oop_work(o); 1.93 - } 1.94 - 1.95 - void do_oop(narrowOop* o) { 1.96 - do_oop_work(o); 1.97 - } 1.98 - 1.99 - void set_nm(nmethod* nm) { 1.100 - _nm = nm; 1.101 - } 1.102 - }; 1.103 - 1.104 - HeapRegionGatheringOopClosure _oc; 1.105 -public: 1.106 - G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {} 1.107 - 1.108 - void do_code_blob(CodeBlob* cb) { 1.109 - nmethod* nm = cb->as_nmethod_or_null(); 1.110 - if (nm != NULL) { 1.111 - if (!nm->test_set_oops_do_mark()) { 1.112 - _oc.set_nm(nm); 1.113 - nm->oops_do(&_oc); 1.114 - nm->fix_oop_relocations(); 1.115 - } 1.116 - } 1.117 - } 1.118 -}; 1.119 - 1.120 class G1ParTask : public AbstractGangTask { 1.121 protected: 1.122 G1CollectedHeap* _g1h; 1.123 RefToScanQueueSet *_queues; 1.124 + G1RootProcessor* _root_processor; 1.125 ParallelTaskTerminator _terminator; 1.126 uint _n_workers; 1.127 1.128 @@ -4604,10 +4541,11 @@ 1.129 Mutex* stats_lock() { return &_stats_lock; } 1.130 1.131 public: 1.132 - G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues) 1.133 + G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor) 1.134 : AbstractGangTask("G1 collection"), 1.135 _g1h(g1h), 1.136 _queues(task_queues), 1.137 + _root_processor(root_processor), 1.138 _terminator(0, _queues), 1.139 _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) 1.140 {} 1.141 @@ -4621,13 +4559,7 @@ 1.142 ParallelTaskTerminator* terminator() { return &_terminator; } 1.143 1.144 virtual void set_for_termination(int active_workers) { 1.145 - // This task calls set_n_termination() in par_non_clean_card_iterate_work() 1.146 - // in the young space (_par_seq_tasks) in the G1 heap 1.147 - // for SequentialSubTasksDone. 1.148 - // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap 1.149 - // both of which need setting by set_n_termination(). 1.150 - _g1h->SharedHeap::set_n_termination(active_workers); 1.151 - _g1h->set_n_termination(active_workers); 1.152 + _root_processor->set_num_workers(active_workers); 1.153 terminator()->reset_for_reuse(active_workers); 1.154 _n_workers = active_workers; 1.155 } 1.156 @@ -4696,24 +4628,21 @@ 1.157 false, // Process all klasses. 1.158 true); // Need to claim CLDs. 1.159 1.160 - G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl); 1.161 - G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl); 1.162 - // IM Weak code roots are handled later. 1.163 - 1.164 OopClosure* strong_root_cl; 1.165 OopClosure* weak_root_cl; 1.166 CLDClosure* strong_cld_cl; 1.167 CLDClosure* weak_cld_cl; 1.168 - CodeBlobClosure* strong_code_cl; 1.169 + 1.170 + bool trace_metadata = false; 1.171 1.172 if (_g1h->g1_policy()->during_initial_mark_pause()) { 1.173 // We also need to mark copied objects. 1.174 strong_root_cl = &scan_mark_root_cl; 1.175 strong_cld_cl = &scan_mark_cld_cl; 1.176 - strong_code_cl = &scan_mark_code_cl; 1.177 if (ClassUnloadingWithConcurrentMark) { 1.178 weak_root_cl = &scan_mark_weak_root_cl; 1.179 weak_cld_cl = &scan_mark_weak_cld_cl; 1.180 + trace_metadata = true; 1.181 } else { 1.182 weak_root_cl = &scan_mark_root_cl; 1.183 weak_cld_cl = &scan_mark_cld_cl; 1.184 @@ -4723,21 +4652,21 @@ 1.185 weak_root_cl = &scan_only_root_cl; 1.186 strong_cld_cl = &scan_only_cld_cl; 1.187 weak_cld_cl = &scan_only_cld_cl; 1.188 - strong_code_cl = &scan_only_code_cl; 1.189 } 1.190 1.191 - 1.192 - G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); 1.193 - 1.194 pss.start_strong_roots(); 1.195 - _g1h->g1_process_roots(strong_root_cl, 1.196 - weak_root_cl, 1.197 - &push_heap_rs_cl, 1.198 - strong_cld_cl, 1.199 - weak_cld_cl, 1.200 - strong_code_cl, 1.201 - worker_id); 1.202 - 1.203 + 1.204 + _root_processor->evacuate_roots(strong_root_cl, 1.205 + weak_root_cl, 1.206 + strong_cld_cl, 1.207 + weak_cld_cl, 1.208 + trace_metadata, 1.209 + worker_id); 1.210 + 1.211 + G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); 1.212 + _root_processor->scan_remembered_sets(&push_heap_rs_cl, 1.213 + weak_root_cl, 1.214 + worker_id); 1.215 pss.end_strong_roots(); 1.216 1.217 { 1.218 @@ -4768,87 +4697,6 @@ 1.219 } 1.220 }; 1.221 1.222 -// *** Common G1 Evacuation Stuff 1.223 - 1.224 -// This method is run in a GC worker. 1.225 - 1.226 -void 1.227 -G1CollectedHeap:: 1.228 -g1_process_roots(OopClosure* scan_non_heap_roots, 1.229 - OopClosure* scan_non_heap_weak_roots, 1.230 - G1ParPushHeapRSClosure* scan_rs, 1.231 - CLDClosure* scan_strong_clds, 1.232 - CLDClosure* scan_weak_clds, 1.233 - CodeBlobClosure* scan_strong_code, 1.234 - uint worker_i) { 1.235 - 1.236 - // First scan the shared roots. 1.237 - double ext_roots_start = os::elapsedTime(); 1.238 - double closure_app_time_sec = 0.0; 1.239 - 1.240 - bool during_im = _g1h->g1_policy()->during_initial_mark_pause(); 1.241 - bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark; 1.242 - 1.243 - BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots); 1.244 - BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots); 1.245 - 1.246 - process_roots(false, // no scoping; this is parallel code 1.247 - SharedHeap::SO_None, 1.248 - &buf_scan_non_heap_roots, 1.249 - &buf_scan_non_heap_weak_roots, 1.250 - scan_strong_clds, 1.251 - // Unloading Initial Marks handle the weak CLDs separately. 1.252 - (trace_metadata ? NULL : scan_weak_clds), 1.253 - scan_strong_code); 1.254 - 1.255 - // Now the CM ref_processor roots. 1.256 - if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) { 1.257 - // We need to treat the discovered reference lists of the 1.258 - // concurrent mark ref processor as roots and keep entries 1.259 - // (which are added by the marking threads) on them live 1.260 - // until they can be processed at the end of marking. 1.261 - ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots); 1.262 - } 1.263 - 1.264 - if (trace_metadata) { 1.265 - // Barrier to make sure all workers passed 1.266 - // the strong CLD and strong nmethods phases. 1.267 - active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads()); 1.268 - 1.269 - // Now take the complement of the strong CLDs. 1.270 - ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds); 1.271 - } 1.272 - 1.273 - // Finish up any enqueued closure apps (attributed as object copy time). 1.274 - buf_scan_non_heap_roots.done(); 1.275 - buf_scan_non_heap_weak_roots.done(); 1.276 - 1.277 - double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds() 1.278 - + buf_scan_non_heap_weak_roots.closure_app_seconds(); 1.279 - 1.280 - g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec); 1.281 - 1.282 - double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec; 1.283 - g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec); 1.284 - 1.285 - // During conc marking we have to filter the per-thread SATB buffers 1.286 - // to make sure we remove any oops into the CSet (which will show up 1.287 - // as implicitly live). 1.288 - { 1.289 - G1GCParPhaseTimesTracker x(g1_policy()->phase_times(), G1GCPhaseTimes::SATBFiltering, worker_i); 1.290 - if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers) && mark_in_progress()) { 1.291 - JavaThread::satb_mark_queue_set().filter_thread_buffers(); 1.292 - } 1.293 - } 1.294 - 1.295 - // Now scan the complement of the collection set. 1.296 - G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots); 1.297 - 1.298 - g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i); 1.299 - 1.300 - _process_strong_tasks->all_tasks_completed(); 1.301 -} 1.302 - 1.303 class G1StringSymbolTableUnlinkTask : public AbstractGangTask { 1.304 private: 1.305 BoolObjectClosure* _is_alive; 1.306 @@ -5836,7 +5684,6 @@ 1.307 n_workers = 1; 1.308 } 1.309 1.310 - G1ParTask g1_par_task(this, _task_queues); 1.311 1.312 init_for_evac_failure(NULL); 1.313 1.314 @@ -5847,7 +5694,8 @@ 1.315 double end_par_time_sec; 1.316 1.317 { 1.318 - StrongRootsScope srs(this); 1.319 + G1RootProcessor root_processor(this); 1.320 + G1ParTask g1_par_task(this, _task_queues, &root_processor); 1.321 // InitialMark needs claim bits to keep track of the marked-through CLDs. 1.322 if (g1_policy()->during_initial_mark_pause()) { 1.323 ClassLoaderDataGraph::clear_claimed_marks(); 1.324 @@ -5868,9 +5716,9 @@ 1.325 end_par_time_sec = os::elapsedTime(); 1.326 1.327 // Closing the inner scope will execute the destructor 1.328 - // for the StrongRootsScope object. We record the current 1.329 + // for the G1RootProcessor object. We record the current 1.330 // elapsed time before closing the scope so that time 1.331 - // taken for the SRS destructor is NOT included in the 1.332 + // taken for the destructor is NOT included in the 1.333 // reported parallel time. 1.334 } 1.335