Mon, 09 Aug 2010 18:03:50 -0700
6970376: ParNew: shared TaskQueue statistics
Reviewed-by: ysr
1.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Aug 09 05:41:05 2010 -0700 1.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Aug 09 18:03:50 2010 -0700 1.3 @@ -51,9 +51,14 @@ 1.4 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), 1.5 _keep_alive_closure(&_scan_weak_ref_closure), 1.6 _promotion_failure_size(0), 1.7 - _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0), 1.8 _strong_roots_time(0.0), _term_time(0.0) 1.9 { 1.10 + #if TASKQUEUE_STATS 1.11 + _term_attempts = 0; 1.12 + _overflow_refills = 0; 1.13 + _overflow_refill_objs = 0; 1.14 + #endif // TASKQUEUE_STATS 1.15 + 1.16 _survivor_chunk_array = 1.17 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); 1.18 _hash_seed = 17; // Might want to take time-based random value. 1.19 @@ -100,7 +105,6 @@ 1.20 // Push remainder. 1.21 bool ok = work_queue()->push(old); 1.22 assert(ok, "just popped, push must be okay"); 1.23 - note_push(); 1.24 } else { 1.25 // Restore length so that it can be used if there 1.26 // is a promotion failure and forwarding pointers 1.27 @@ -126,7 +130,6 @@ 1.28 while (queue->size() > (juint)max_size) { 1.29 oop obj_to_scan; 1.30 if (queue->pop_local(obj_to_scan)) { 1.31 - note_pop(); 1.32 if ((HeapWord *)obj_to_scan < young_old_boundary()) { 1.33 if (obj_to_scan->is_objArray() && 1.34 obj_to_scan->is_forwarded() && 1.35 @@ -271,20 +274,28 @@ 1.36 GrowableArray<oop>** overflow_stacks_, 1.37 size_t desired_plab_sz, 1.38 ParallelTaskTerminator& term); 1.39 + 1.40 + ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); } 1.41 + 1.42 inline ParScanThreadState& thread_state(int i); 1.43 - int pushes() { return _pushes; } 1.44 - int pops() { return _pops; } 1.45 - int steals() { return _steals; } 1.46 + 1.47 void reset(bool promotion_failed); 1.48 void flush(); 1.49 + 1.50 + #if TASKQUEUE_STATS 1.51 + static void 1.52 + print_termination_stats_hdr(outputStream* const st = gclog_or_tty); 1.53 + void print_termination_stats(outputStream* const st = gclog_or_tty); 1.54 + static void 1.55 + print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); 1.56 + void print_taskqueue_stats(outputStream* const st = gclog_or_tty); 1.57 + void reset_stats(); 1.58 + #endif // TASKQUEUE_STATS 1.59 + 1.60 private: 1.61 ParallelTaskTerminator& _term; 1.62 ParNewGeneration& _gen; 1.63 Generation& _next_gen; 1.64 - // staticstics 1.65 - int _pushes; 1.66 - int _pops; 1.67 - int _steals; 1.68 }; 1.69 1.70 1.71 @@ -294,8 +305,7 @@ 1.72 GrowableArray<oop>** overflow_stack_set_, 1.73 size_t desired_plab_sz, ParallelTaskTerminator& term) 1.74 : ResourceArray(sizeof(ParScanThreadState), num_threads), 1.75 - _gen(gen), _next_gen(old_gen), _term(term), 1.76 - _pushes(0), _pops(0), _steals(0) 1.77 + _gen(gen), _next_gen(old_gen), _term(term) 1.78 { 1.79 assert(num_threads > 0, "sanity check!"); 1.80 // Initialize states. 1.81 @@ -323,6 +333,82 @@ 1.82 } 1.83 } 1.84 1.85 +#if TASKQUEUE_STATS 1.86 +void 1.87 +ParScanThreadState::reset_stats() 1.88 +{ 1.89 + taskqueue_stats().reset(); 1.90 + _term_attempts = 0; 1.91 + _overflow_refills = 0; 1.92 + _overflow_refill_objs = 0; 1.93 +} 1.94 + 1.95 +void ParScanThreadStateSet::reset_stats() 1.96 +{ 1.97 + for (int i = 0; i < length(); ++i) { 1.98 + thread_state(i).reset_stats(); 1.99 + } 1.100 +} 1.101 + 1.102 +void 1.103 +ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st) 1.104 +{ 1.105 + st->print_raw_cr("GC Termination Stats"); 1.106 + st->print_raw_cr(" elapsed --strong roots-- " 1.107 + "-------termination-------"); 1.108 + st->print_raw_cr("thr ms ms % " 1.109 + " ms % attempts"); 1.110 + st->print_raw_cr("--- --------- --------- ------ " 1.111 + "--------- ------ --------"); 1.112 +} 1.113 + 1.114 +void ParScanThreadStateSet::print_termination_stats(outputStream* const st) 1.115 +{ 1.116 + print_termination_stats_hdr(st); 1.117 + 1.118 + for (int i = 0; i < length(); ++i) { 1.119 + const ParScanThreadState & pss = thread_state(i); 1.120 + const double elapsed_ms = pss.elapsed_time() * 1000.0; 1.121 + const double s_roots_ms = pss.strong_roots_time() * 1000.0; 1.122 + const double term_ms = pss.term_time() * 1000.0; 1.123 + st->print_cr("%3d %9.2f %9.2f %6.2f " 1.124 + "%9.2f %6.2f " SIZE_FORMAT_W(8), 1.125 + i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, 1.126 + term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts()); 1.127 + } 1.128 +} 1.129 + 1.130 +// Print stats related to work queue activity. 1.131 +void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st) 1.132 +{ 1.133 + st->print_raw_cr("GC Task Stats"); 1.134 + st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); 1.135 + st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); 1.136 +} 1.137 + 1.138 +void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st) 1.139 +{ 1.140 + print_taskqueue_stats_hdr(st); 1.141 + 1.142 + TaskQueueStats totals; 1.143 + for (int i = 0; i < length(); ++i) { 1.144 + const ParScanThreadState & pss = thread_state(i); 1.145 + const TaskQueueStats & stats = pss.taskqueue_stats(); 1.146 + st->print("%3d ", i); stats.print(st); st->cr(); 1.147 + totals += stats; 1.148 + 1.149 + if (pss.overflow_refills() > 0) { 1.150 + st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills " 1.151 + SIZE_FORMAT_W(10) " overflow objects", 1.152 + pss.overflow_refills(), pss.overflow_refill_objs()); 1.153 + } 1.154 + } 1.155 + st->print("tot "); totals.print(st); st->cr(); 1.156 + 1.157 + DEBUG_ONLY(totals.verify()); 1.158 +} 1.159 +#endif // TASKQUEUE_STATS 1.160 + 1.161 void ParScanThreadStateSet::flush() 1.162 { 1.163 // Work in this loop should be kept as lightweight as 1.164 @@ -346,42 +432,8 @@ 1.165 // Inform old gen that we're done. 1.166 _next_gen.par_promote_alloc_done(i); 1.167 _next_gen.par_oop_since_save_marks_iterate_done(i); 1.168 + } 1.169 1.170 - // Flush stats related to work queue activity (push/pop/steal) 1.171 - // This could conceivably become a bottleneck; if so, we'll put the 1.172 - // stat's gathering under the flag. 1.173 - if (PAR_STATS_ENABLED) { 1.174 - _pushes += par_scan_state.pushes(); 1.175 - _pops += par_scan_state.pops(); 1.176 - _steals += par_scan_state.steals(); 1.177 - if (ParallelGCVerbose) { 1.178 - gclog_or_tty->print("Thread %d complete:\n" 1.179 - " Pushes: %7d Pops: %7d Steals %7d (in %d attempts)\n", 1.180 - i, par_scan_state.pushes(), par_scan_state.pops(), 1.181 - par_scan_state.steals(), par_scan_state.steal_attempts()); 1.182 - if (par_scan_state.overflow_pushes() > 0 || 1.183 - par_scan_state.overflow_refills() > 0) { 1.184 - gclog_or_tty->print(" Overflow pushes: %7d " 1.185 - "Overflow refills: %7d for %d objs.\n", 1.186 - par_scan_state.overflow_pushes(), 1.187 - par_scan_state.overflow_refills(), 1.188 - par_scan_state.overflow_refill_objs()); 1.189 - } 1.190 - 1.191 - double elapsed = par_scan_state.elapsed(); 1.192 - double strong_roots = par_scan_state.strong_roots_time(); 1.193 - double term = par_scan_state.term_time(); 1.194 - gclog_or_tty->print( 1.195 - " Elapsed: %7.2f ms.\n" 1.196 - " Strong roots: %7.2f ms (%6.2f%%)\n" 1.197 - " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n", 1.198 - elapsed * 1000.0, 1.199 - strong_roots * 1000.0, (strong_roots*100.0/elapsed), 1.200 - term * 1000.0, (term*100.0/elapsed), 1.201 - par_scan_state.term_attempts()); 1.202 - } 1.203 - } 1.204 - } 1.205 if (UseConcMarkSweepGC && ParallelGCThreads > 0) { 1.206 // We need to call this even when ResizeOldPLAB is disabled 1.207 // so as to avoid breaking some asserts. While we may be able 1.208 @@ -456,15 +508,12 @@ 1.209 // We have no local work, attempt to steal from other threads. 1.210 1.211 // attempt to steal work from promoted. 1.212 - par_scan_state()->note_steal_attempt(); 1.213 if (task_queues()->steal(par_scan_state()->thread_num(), 1.214 par_scan_state()->hash_seed(), 1.215 obj_to_scan)) { 1.216 - par_scan_state()->note_steal(); 1.217 bool res = work_q->push(obj_to_scan); 1.218 assert(res, "Empty queue should have room for a push."); 1.219 1.220 - par_scan_state()->note_push(); 1.221 // if successful, goto Start. 1.222 continue; 1.223 1.224 @@ -842,17 +891,6 @@ 1.225 } 1.226 thread_state_set.reset(promotion_failed()); 1.227 1.228 - if (PAR_STATS_ENABLED && ParallelGCVerbose) { 1.229 - gclog_or_tty->print("Thread totals:\n" 1.230 - " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n", 1.231 - thread_state_set.pushes(), thread_state_set.pops(), 1.232 - thread_state_set.steals(), 1.233 - thread_state_set.pops()+thread_state_set.steals()); 1.234 - } 1.235 - assert(thread_state_set.pushes() == thread_state_set.pops() 1.236 - + thread_state_set.steals(), 1.237 - "Or else the queues are leaky."); 1.238 - 1.239 // Process (weak) reference objects found during scavenge. 1.240 ReferenceProcessor* rp = ref_processor(); 1.241 IsAliveClosure is_alive(this); 1.242 @@ -932,6 +970,9 @@ 1.243 gch->print_heap_change(gch_prev_used); 1.244 } 1.245 1.246 + TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats()); 1.247 + TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats()); 1.248 + 1.249 if (UseAdaptiveSizePolicy) { 1.250 size_policy->minor_collection_end(gch->gc_cause()); 1.251 size_policy->avg_survived()->sample(from()->used()); 1.252 @@ -1104,9 +1145,8 @@ 1.253 gclog_or_tty->print("queue overflow!\n"); 1.254 } 1.255 push_on_overflow_list(old, par_scan_state); 1.256 - par_scan_state->note_overflow_push(); 1.257 + TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); 1.258 } 1.259 - par_scan_state->note_push(); 1.260 1.261 return new_obj; 1.262 } 1.263 @@ -1227,9 +1267,8 @@ 1.264 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { 1.265 // Add stats for overflow pushes. 1.266 push_on_overflow_list(old, par_scan_state); 1.267 - par_scan_state->note_overflow_push(); 1.268 + TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); 1.269 } 1.270 - par_scan_state->note_push(); 1.271 1.272 return new_obj; 1.273 } 1.274 @@ -1466,7 +1505,7 @@ 1.275 cur = next; 1.276 n++; 1.277 } 1.278 - par_scan_state->note_overflow_refill(n); 1.279 + TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n)); 1.280 #ifndef PRODUCT 1.281 assert(_num_par_pushes >= n, "Too many pops?"); 1.282 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
2.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Mon Aug 09 05:41:05 2010 -0700 2.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Mon Aug 09 18:03:50 2010 -0700 2.3 @@ -36,9 +36,6 @@ 2.4 typedef Padded<OopTaskQueue> ObjToScanQueue; 2.5 typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet; 2.6 2.7 -// Enable this to get push/pop/steal stats. 2.8 -const int PAR_STATS_ENABLED = 0; 2.9 - 2.10 class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure { 2.11 private: 2.12 ParScanWeakRefClosure* _par_cl; 2.13 @@ -94,8 +91,11 @@ 2.14 2.15 bool _to_space_full; 2.16 2.17 - int _pushes, _pops, _steals, _steal_attempts, _term_attempts; 2.18 - int _overflow_pushes, _overflow_refills, _overflow_refill_objs; 2.19 +#if TASKQUEUE_STATS 2.20 + size_t _term_attempts; 2.21 + size_t _overflow_refills; 2.22 + size_t _overflow_refill_objs; 2.23 +#endif // TASKQUEUE_STATS 2.24 2.25 // Stats for promotion failure 2.26 size_t _promotion_failure_size; 2.27 @@ -181,45 +181,38 @@ 2.28 } 2.29 void print_and_clear_promotion_failure_size(); 2.30 2.31 - int pushes() { return _pushes; } 2.32 - int pops() { return _pops; } 2.33 - int steals() { return _steals; } 2.34 - int steal_attempts() { return _steal_attempts; } 2.35 - int term_attempts() { return _term_attempts; } 2.36 - int overflow_pushes() { return _overflow_pushes; } 2.37 - int overflow_refills() { return _overflow_refills; } 2.38 - int overflow_refill_objs() { return _overflow_refill_objs; } 2.39 +#if TASKQUEUE_STATS 2.40 + TaskQueueStats & taskqueue_stats() const { return _work_queue->stats; } 2.41 2.42 - void note_push() { if (PAR_STATS_ENABLED) _pushes++; } 2.43 - void note_pop() { if (PAR_STATS_ENABLED) _pops++; } 2.44 - void note_steal() { if (PAR_STATS_ENABLED) _steals++; } 2.45 - void note_steal_attempt() { if (PAR_STATS_ENABLED) _steal_attempts++; } 2.46 - void note_term_attempt() { if (PAR_STATS_ENABLED) _term_attempts++; } 2.47 - void note_overflow_push() { if (PAR_STATS_ENABLED) _overflow_pushes++; } 2.48 - void note_overflow_refill(int objs) { 2.49 - if (PAR_STATS_ENABLED) { 2.50 - _overflow_refills++; 2.51 - _overflow_refill_objs += objs; 2.52 - } 2.53 + size_t term_attempts() const { return _term_attempts; } 2.54 + size_t overflow_refills() const { return _overflow_refills; } 2.55 + size_t overflow_refill_objs() const { return _overflow_refill_objs; } 2.56 + 2.57 + void note_term_attempt() { ++_term_attempts; } 2.58 + void note_overflow_refill(size_t objs) { 2.59 + ++_overflow_refills; _overflow_refill_objs += objs; 2.60 } 2.61 2.62 + void reset_stats(); 2.63 +#endif // TASKQUEUE_STATS 2.64 + 2.65 void start_strong_roots() { 2.66 _start_strong_roots = os::elapsedTime(); 2.67 } 2.68 void end_strong_roots() { 2.69 _strong_roots_time += (os::elapsedTime() - _start_strong_roots); 2.70 } 2.71 - double strong_roots_time() { return _strong_roots_time; } 2.72 + double strong_roots_time() const { return _strong_roots_time; } 2.73 void start_term_time() { 2.74 - note_term_attempt(); 2.75 + TASKQUEUE_STATS_ONLY(note_term_attempt()); 2.76 _start_term = os::elapsedTime(); 2.77 } 2.78 void end_term_time() { 2.79 _term_time += (os::elapsedTime() - _start_term); 2.80 } 2.81 - double term_time() { return _term_time; } 2.82 + double term_time() const { return _term_time; } 2.83 2.84 - double elapsed() { 2.85 + double elapsed_time() const { 2.86 return os::elapsedTime() - _start; 2.87 } 2.88 };