1.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Jan 21 11:30:22 2011 -0500 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Jan 25 10:56:22 2011 -0800 1.3 @@ -1055,7 +1055,12 @@ 1.4 do { 1.5 double start_vtime_sec = os::elapsedVTime(); 1.6 double start_time_sec = os::elapsedTime(); 1.7 - the_task->do_marking_step(10.0); 1.8 + double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1.9 + 1.10 + the_task->do_marking_step(mark_step_duration_ms, 1.11 + true /* do_stealing */, 1.12 + true /* do_termination */); 1.13 + 1.14 double end_time_sec = os::elapsedTime(); 1.15 double end_vtime_sec = os::elapsedVTime(); 1.16 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1.17 @@ -1111,7 +1116,8 @@ 1.18 1.19 _restart_for_overflow = false; 1.20 1.21 - set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); 1.22 + size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1.23 + set_phase(active_workers, true /* concurrent */); 1.24 1.25 CMConcurrentMarkingTask markingTask(this, cmThread()); 1.26 if (parallel_marking_threads() > 0) 1.27 @@ -1176,6 +1182,12 @@ 1.28 /* silent */ false, 1.29 /* use_prev_marking */ false); 1.30 } 1.31 + assert(!restart_for_overflow(), "sanity"); 1.32 + } 1.33 + 1.34 + // Reset the marking state if marking completed 1.35 + if (!restart_for_overflow()) { 1.36 + set_non_marking_state(); 1.37 } 1.38 1.39 #if VERIFY_OBJS_PROCESSED 1.40 @@ -1853,6 +1865,8 @@ 1.41 assert(local_free_list.is_empty(), "post-condition"); 1.42 } 1.43 1.44 +// Support closures for reference procssing in G1 1.45 + 1.46 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1.47 HeapWord* addr = (HeapWord*)obj; 1.48 return addr != NULL && 1.49 @@ -1873,11 +1887,17 @@ 1.50 virtual void do_oop( oop* p) { do_oop_work(p); } 1.51 1.52 template <class T> void do_oop_work(T* p) { 1.53 - oop thisOop = oopDesc::load_decode_heap_oop(p); 1.54 - HeapWord* addr = (HeapWord*)thisOop; 1.55 - if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { 1.56 + oop obj = oopDesc::load_decode_heap_oop(p); 1.57 + HeapWord* addr = (HeapWord*)obj; 1.58 + 1.59 + if (_cm->verbose_high()) 1.60 + gclog_or_tty->print_cr("\t[0] we're looking at location " 1.61 + "*"PTR_FORMAT" = "PTR_FORMAT, 1.62 + p, (void*) obj); 1.63 + 1.64 + if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 1.65 _bitMap->mark(addr); 1.66 - _cm->mark_stack_push(thisOop); 1.67 + _cm->mark_stack_push(obj); 1.68 } 1.69 } 1.70 }; 1.71 @@ -1899,6 +1919,199 @@ 1.72 } 1.73 }; 1.74 1.75 +// 'Keep Alive' closure used by parallel reference processing. 1.76 +// An instance of this closure is used in the parallel reference processing 1.77 +// code rather than an instance of G1CMKeepAliveClosure. We could have used 1.78 +// the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 1.79 +// placed on to discovered ref lists once so we can mark and push with no 1.80 +// need to check whether the object has already been marked. Using the 1.81 +// G1CMKeepAliveClosure would mean, however, having all the worker threads 1.82 +// operating on the global mark stack. This means that an individual 1.83 +// worker would be doing lock-free pushes while it processes its own 1.84 +// discovered ref list followed by drain call. If the discovered ref lists 1.85 +// are unbalanced then this could cause interference with the other 1.86 +// workers. Using a CMTask (and its embedded local data structures) 1.87 +// avoids that potential interference. 1.88 +class G1CMParKeepAliveAndDrainClosure: public OopClosure { 1.89 + ConcurrentMark* _cm; 1.90 + CMTask* _task; 1.91 + CMBitMap* _bitMap; 1.92 + int _ref_counter_limit; 1.93 + int _ref_counter; 1.94 + public: 1.95 + G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 1.96 + CMTask* task, 1.97 + CMBitMap* bitMap) : 1.98 + _cm(cm), _task(task), _bitMap(bitMap), 1.99 + _ref_counter_limit(G1RefProcDrainInterval) 1.100 + { 1.101 + assert(_ref_counter_limit > 0, "sanity"); 1.102 + _ref_counter = _ref_counter_limit; 1.103 + } 1.104 + 1.105 + virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1.106 + virtual void do_oop( oop* p) { do_oop_work(p); } 1.107 + 1.108 + template <class T> void do_oop_work(T* p) { 1.109 + if (!_cm->has_overflown()) { 1.110 + oop obj = oopDesc::load_decode_heap_oop(p); 1.111 + if (_cm->verbose_high()) 1.112 + gclog_or_tty->print_cr("\t[%d] we're looking at location " 1.113 + "*"PTR_FORMAT" = "PTR_FORMAT, 1.114 + _task->task_id(), p, (void*) obj); 1.115 + 1.116 + _task->deal_with_reference(obj); 1.117 + _ref_counter--; 1.118 + 1.119 + if (_ref_counter == 0) { 1.120 + // We have dealt with _ref_counter_limit references, pushing them and objects 1.121 + // reachable from them on to the local stack (and possibly the global stack). 1.122 + // Call do_marking_step() to process these entries. We call the routine in a 1.123 + // loop, which we'll exit if there's nothing more to do (i.e. we're done 1.124 + // with the entries that we've pushed as a result of the deal_with_reference 1.125 + // calls above) or we overflow. 1.126 + // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 1.127 + // while there may still be some work to do. (See the comment at the 1.128 + // beginning of CMTask::do_marking_step() for those conditions - one of which 1.129 + // is reaching the specified time target.) It is only when 1.130 + // CMTask::do_marking_step() returns without setting the has_aborted() flag 1.131 + // that the marking has completed. 1.132 + do { 1.133 + double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1.134 + _task->do_marking_step(mark_step_duration_ms, 1.135 + false /* do_stealing */, 1.136 + false /* do_termination */); 1.137 + } while (_task->has_aborted() && !_cm->has_overflown()); 1.138 + _ref_counter = _ref_counter_limit; 1.139 + } 1.140 + } else { 1.141 + if (_cm->verbose_high()) 1.142 + gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 1.143 + } 1.144 + } 1.145 +}; 1.146 + 1.147 +class G1CMParDrainMarkingStackClosure: public VoidClosure { 1.148 + ConcurrentMark* _cm; 1.149 + CMTask* _task; 1.150 + public: 1.151 + G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 1.152 + _cm(cm), _task(task) 1.153 + {} 1.154 + 1.155 + void do_void() { 1.156 + do { 1.157 + if (_cm->verbose_high()) 1.158 + gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id()); 1.159 + 1.160 + // We call CMTask::do_marking_step() to completely drain the local and 1.161 + // global marking stacks. The routine is called in a loop, which we'll 1.162 + // exit if there's nothing more to do (i.e. we'completely drained the 1.163 + // entries that were pushed as a result of applying the 1.164 + // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 1.165 + // lists above) or we overflow the global marking stack. 1.166 + // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 1.167 + // while there may still be some work to do. (See the comment at the 1.168 + // beginning of CMTask::do_marking_step() for those conditions - one of which 1.169 + // is reaching the specified time target.) It is only when 1.170 + // CMTask::do_marking_step() returns without setting the has_aborted() flag 1.171 + // that the marking has completed. 1.172 + 1.173 + _task->do_marking_step(1000000000.0 /* something very large */, 1.174 + true /* do_stealing */, 1.175 + true /* do_termination */); 1.176 + } while (_task->has_aborted() && !_cm->has_overflown()); 1.177 + } 1.178 +}; 1.179 + 1.180 +// Implementation of AbstractRefProcTaskExecutor for G1 1.181 +class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1.182 +private: 1.183 + G1CollectedHeap* _g1h; 1.184 + ConcurrentMark* _cm; 1.185 + CMBitMap* _bitmap; 1.186 + WorkGang* _workers; 1.187 + int _active_workers; 1.188 + 1.189 +public: 1.190 + G1RefProcTaskExecutor(G1CollectedHeap* g1h, 1.191 + ConcurrentMark* cm, 1.192 + CMBitMap* bitmap, 1.193 + WorkGang* workers, 1.194 + int n_workers) : 1.195 + _g1h(g1h), _cm(cm), _bitmap(bitmap), 1.196 + _workers(workers), _active_workers(n_workers) 1.197 + { } 1.198 + 1.199 + // Executes the given task using concurrent marking worker threads. 1.200 + virtual void execute(ProcessTask& task); 1.201 + virtual void execute(EnqueueTask& task); 1.202 +}; 1.203 + 1.204 +class G1RefProcTaskProxy: public AbstractGangTask { 1.205 + typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1.206 + ProcessTask& _proc_task; 1.207 + G1CollectedHeap* _g1h; 1.208 + ConcurrentMark* _cm; 1.209 + CMBitMap* _bitmap; 1.210 + 1.211 +public: 1.212 + G1RefProcTaskProxy(ProcessTask& proc_task, 1.213 + G1CollectedHeap* g1h, 1.214 + ConcurrentMark* cm, 1.215 + CMBitMap* bitmap) : 1.216 + AbstractGangTask("Process reference objects in parallel"), 1.217 + _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 1.218 + {} 1.219 + 1.220 + virtual void work(int i) { 1.221 + CMTask* marking_task = _cm->task(i); 1.222 + G1CMIsAliveClosure g1_is_alive(_g1h); 1.223 + G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 1.224 + G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 1.225 + 1.226 + _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1.227 + } 1.228 +}; 1.229 + 1.230 +void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) { 1.231 + assert(_workers != NULL, "Need parallel worker threads."); 1.232 + 1.233 + G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 1.234 + 1.235 + // We need to reset the phase for each task execution so that 1.236 + // the termination protocol of CMTask::do_marking_step works. 1.237 + _cm->set_phase(_active_workers, false /* concurrent */); 1.238 + _g1h->set_par_threads(_active_workers); 1.239 + _workers->run_task(&proc_task_proxy); 1.240 + _g1h->set_par_threads(0); 1.241 +} 1.242 + 1.243 +class G1RefEnqueueTaskProxy: public AbstractGangTask { 1.244 + typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1.245 + EnqueueTask& _enq_task; 1.246 + 1.247 +public: 1.248 + G1RefEnqueueTaskProxy(EnqueueTask& enq_task) : 1.249 + AbstractGangTask("Enqueue reference objects in parallel"), 1.250 + _enq_task(enq_task) 1.251 + { } 1.252 + 1.253 + virtual void work(int i) { 1.254 + _enq_task.work(i); 1.255 + } 1.256 +}; 1.257 + 1.258 +void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1.259 + assert(_workers != NULL, "Need parallel worker threads."); 1.260 + 1.261 + G1RefEnqueueTaskProxy enq_task_proxy(enq_task); 1.262 + 1.263 + _g1h->set_par_threads(_active_workers); 1.264 + _workers->run_task(&enq_task_proxy); 1.265 + _g1h->set_par_threads(0); 1.266 +} 1.267 + 1.268 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1.269 ResourceMark rm; 1.270 HandleMark hm; 1.271 @@ -1917,18 +2130,52 @@ 1.272 G1CMDrainMarkingStackClosure 1.273 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); 1.274 1.275 - // XXXYYY Also: copy the parallel ref processing code from CMS. 1.276 - rp->process_discovered_references(&g1_is_alive, 1.277 - &g1_keep_alive, 1.278 - &g1_drain_mark_stack, 1.279 - NULL); 1.280 + // We use the work gang from the G1CollectedHeap and we utilize all 1.281 + // the worker threads. 1.282 + int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1); 1.283 + 1.284 + G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 1.285 + g1h->workers(), active_workers); 1.286 + 1.287 + if (rp->processing_is_mt()) { 1.288 + // Set the degree of MT here. If the discovery is done MT, there 1.289 + // may have been a different number of threads doing the discovery 1.290 + // and a different number of discovered lists may have Ref objects. 1.291 + // That is OK as long as the Reference lists are balanced (see 1.292 + // balance_all_queues() and balance_queues()). 1.293 + rp->set_mt_degree(active_workers); 1.294 + 1.295 + rp->process_discovered_references(&g1_is_alive, 1.296 + &g1_keep_alive, 1.297 + &g1_drain_mark_stack, 1.298 + &par_task_executor); 1.299 + 1.300 + // The work routines of the parallel keep_alive and drain_marking_stack 1.301 + // will set the has_overflown flag if we overflow the global marking 1.302 + // stack. 1.303 + } else { 1.304 + rp->process_discovered_references(&g1_is_alive, 1.305 + &g1_keep_alive, 1.306 + &g1_drain_mark_stack, 1.307 + NULL); 1.308 + 1.309 + } 1.310 + 1.311 assert(_markStack.overflow() || _markStack.isEmpty(), 1.312 - "mark stack should be empty (unless it overflowed)"); 1.313 + "mark stack should be empty (unless it overflowed)"); 1.314 if (_markStack.overflow()) { 1.315 + // Should have been done already when we tried to push an 1.316 + // entry on to the global mark stack. But let's do it again. 1.317 set_has_overflown(); 1.318 } 1.319 1.320 - rp->enqueue_discovered_references(); 1.321 + if (rp->processing_is_mt()) { 1.322 + assert(rp->num_q() == active_workers, "why not"); 1.323 + rp->enqueue_discovered_references(&par_task_executor); 1.324 + } else { 1.325 + rp->enqueue_discovered_references(); 1.326 + } 1.327 + 1.328 rp->verify_no_references_recorded(); 1.329 assert(!rp->discovery_enabled(), "should have been disabled"); 1.330 1.331 @@ -1955,7 +2202,9 @@ 1.332 CMTask* task = _cm->task(worker_i); 1.333 task->record_start_time(); 1.334 do { 1.335 - task->do_marking_step(1000000000.0 /* something very large */); 1.336 + task->do_marking_step(1000000000.0 /* something very large */, 1.337 + true /* do_stealing */, 1.338 + true /* do_termination */); 1.339 } while (task->has_aborted() && !_cm->has_overflown()); 1.340 // If we overflow, then we do not want to restart. We instead 1.341 // want to abort remark and do concurrent marking again. 1.342 @@ -1978,7 +2227,7 @@ 1.343 G1CollectedHeap::StrongRootsScope srs(g1h); 1.344 // this is remark, so we'll use up all available threads 1.345 int active_workers = ParallelGCThreads; 1.346 - set_phase(active_workers, false); 1.347 + set_phase(active_workers, false /* concurrent */); 1.348 1.349 CMRemarkTask remarkTask(this); 1.350 // We will start all available threads, even if we decide that the 1.351 @@ -1992,7 +2241,7 @@ 1.352 G1CollectedHeap::StrongRootsScope srs(g1h); 1.353 // this is remark, so we'll use up all available threads 1.354 int active_workers = 1; 1.355 - set_phase(active_workers, false); 1.356 + set_phase(active_workers, false /* concurrent */); 1.357 1.358 CMRemarkTask remarkTask(this); 1.359 // We will start all available threads, even if we decide that the 1.360 @@ -2005,9 +2254,6 @@ 1.361 1.362 print_stats(); 1.363 1.364 - if (!restart_for_overflow()) 1.365 - set_non_marking_state(); 1.366 - 1.367 #if VERIFY_OBJS_PROCESSED 1.368 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 1.369 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 1.370 @@ -3124,7 +3370,7 @@ 1.371 // do nothing 1.372 } 1.373 #else // _CHECK_BOTH_FINGERS_ 1.374 - // we will only check the global finger 1.375 + // we will only check the global finger 1.376 1.377 if (objAddr < global_finger) { 1.378 // see long comment above 1.379 @@ -3249,7 +3495,7 @@ 1.380 double elapsed_time_ms = curr_time_ms - _start_time_ms; 1.381 if (elapsed_time_ms > _time_target_ms) { 1.382 set_has_aborted(); 1.383 - _has_aborted_timed_out = true; 1.384 + _has_timed_out = true; 1.385 statsOnly( ++_aborted_timed_out ); 1.386 return; 1.387 } 1.388 @@ -3754,7 +4000,9 @@ 1.389 1.390 *****************************************************************************/ 1.391 1.392 -void CMTask::do_marking_step(double time_target_ms) { 1.393 +void CMTask::do_marking_step(double time_target_ms, 1.394 + bool do_stealing, 1.395 + bool do_termination) { 1.396 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 1.397 assert(concurrent() == _cm->concurrent(), "they should be the same"); 1.398 1.399 @@ -3794,7 +4042,7 @@ 1.400 1.401 // clear all flags 1.402 clear_has_aborted(); 1.403 - _has_aborted_timed_out = false; 1.404 + _has_timed_out = false; 1.405 _draining_satb_buffers = false; 1.406 1.407 ++_calls; 1.408 @@ -3970,7 +4218,7 @@ 1.409 drain_global_stack(false); 1.410 1.411 // Attempt at work stealing from other task's queues. 1.412 - if (!has_aborted()) { 1.413 + if (do_stealing && !has_aborted()) { 1.414 // We have not aborted. This means that we have finished all that 1.415 // we could. Let's try to do some stealing... 1.416 1.417 @@ -4011,7 +4259,7 @@ 1.418 1.419 // We still haven't aborted. Now, let's try to get into the 1.420 // termination protocol. 1.421 - if (!has_aborted()) { 1.422 + if (do_termination && !has_aborted()) { 1.423 // We cannot check whether the global stack is empty, since other 1.424 // tasks might be concurrently pushing objects on it. We also cannot 1.425 // check if the region stack is empty because if a thread is aborting 1.426 @@ -4087,7 +4335,7 @@ 1.427 1.428 statsOnly( ++_aborted ); 1.429 1.430 - if (_has_aborted_timed_out) { 1.431 + if (_has_timed_out) { 1.432 double diff_ms = elapsed_time_ms - _time_target_ms; 1.433 // Keep statistics of how well we did with respect to hitting 1.434 // our target only if we actually timed out (if we aborted for