Mon, 09 Aug 2010 05:41:05 -0700
6966222: G1: simplify TaskQueue overflow handling
Reviewed-by: tonyp, ysr
1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Aug 06 10:17:21 2010 -0700 1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Aug 09 05:41:05 2010 -0700 1.3 @@ -2710,6 +2710,35 @@ 1.4 } 1.5 }; 1.6 1.7 +#if TASKQUEUE_STATS 1.8 +void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) { 1.9 + st->print_raw_cr("GC Task Stats"); 1.10 + st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); 1.11 + st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); 1.12 +} 1.13 + 1.14 +void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const { 1.15 + print_taskqueue_stats_hdr(st); 1.16 + 1.17 + TaskQueueStats totals; 1.18 + const int n = MAX2(workers()->total_workers(), 1); 1.19 + for (int i = 0; i < n; ++i) { 1.20 + st->print("%3d ", i); task_queue(i)->stats.print(st); st->cr(); 1.21 + totals += task_queue(i)->stats; 1.22 + } 1.23 + st->print_raw("tot "); totals.print(st); st->cr(); 1.24 + 1.25 + DEBUG_ONLY(totals.verify()); 1.26 +} 1.27 + 1.28 +void G1CollectedHeap::reset_taskqueue_stats() { 1.29 + const int n = MAX2(workers()->total_workers(), 1); 1.30 + for (int i = 0; i < n; ++i) { 1.31 + task_queue(i)->stats.reset(); 1.32 + } 1.33 +} 1.34 +#endif // TASKQUEUE_STATS 1.35 + 1.36 void 1.37 G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { 1.38 if (GC_locker::check_active_before_gc()) { 1.39 @@ -2970,6 +2999,9 @@ 1.40 } 1.41 } 1.42 1.43 + TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats()); 1.44 + TASKQUEUE_STATS_ONLY(reset_taskqueue_stats()); 1.45 + 1.46 if (PrintHeapAtGC) { 1.47 Universe::print_heap_after_gc(); 1.48 } 1.49 @@ -3715,10 +3747,6 @@ 1.50 _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)), 1.51 _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)), 1.52 _age_table(false), 1.53 -#if G1_DETAILED_STATS 1.54 - _pushes(0), _pops(0), _steals(0), 1.55 - _steal_attempts(0), _overflow_pushes(0), 1.56 -#endif 1.57 _strong_roots_time(0), _term_time(0), 1.58 _alloc_buffer_waste(0), _undo_waste(0) 1.59 { 1.60 @@ -3738,14 +3766,41 @@ 1.61 _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM; 1.62 memset(_surviving_young_words, 0, real_length * sizeof(size_t)); 1.63 1.64 - _overflowed_refs = new OverflowQueue(10); 1.65 - 1.66 _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer; 1.67 _alloc_buffers[GCAllocForTenured] = &_tenured_alloc_buffer; 1.68 1.69 _start = os::elapsedTime(); 1.70 } 1.71 1.72 +void 1.73 +G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st) 1.74 +{ 1.75 + st->print_raw_cr("GC Termination Stats"); 1.76 + st->print_raw_cr(" elapsed --strong roots-- -------termination-------" 1.77 + " ------waste (KiB)------"); 1.78 + st->print_raw_cr("thr ms ms % ms % attempts" 1.79 + " total alloc undo"); 1.80 + st->print_raw_cr("--- --------- --------- ------ --------- ------ --------" 1.81 + " ------- ------- -------"); 1.82 +} 1.83 + 1.84 +void 1.85 +G1ParScanThreadState::print_termination_stats(int i, 1.86 + outputStream* const st) const 1.87 +{ 1.88 + const double elapsed_ms = elapsed_time() * 1000.0; 1.89 + const double s_roots_ms = strong_roots_time() * 1000.0; 1.90 + const double term_ms = term_time() * 1000.0; 1.91 + st->print_cr("%3d %9.2f %9.2f %6.2f " 1.92 + "%9.2f %6.2f " SIZE_FORMAT_W(8) " " 1.93 + SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7), 1.94 + i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, 1.95 + term_ms, term_ms * 100 / elapsed_ms, term_attempts(), 1.96 + (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K, 1.97 + alloc_buffer_waste() * HeapWordSize / K, 1.98 + undo_waste() * HeapWordSize / K); 1.99 +} 1.100 + 1.101 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : 1.102 _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), 1.103 _par_scan_state(par_scan_state) { } 1.104 @@ -3952,12 +4007,9 @@ 1.105 G1ParScanThreadState* pss = par_scan_state(); 1.106 while (true) { 1.107 pss->trim_queue(); 1.108 - IF_G1_DETAILED_STATS(pss->note_steal_attempt()); 1.109 1.110 StarTask stolen_task; 1.111 if (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) { 1.112 - IF_G1_DETAILED_STATS(pss->note_steal()); 1.113 - 1.114 // slightly paranoid tests; I'm trying to catch potential 1.115 // problems before we go into push_on_queue to know where the 1.116 // problem is coming from 1.117 @@ -4076,35 +4128,9 @@ 1.118 // Clean up any par-expanded rem sets. 1.119 HeapRegionRemSet::par_cleanup(); 1.120 1.121 - MutexLocker x(stats_lock()); 1.122 if (ParallelGCVerbose) { 1.123 - gclog_or_tty->print("Thread %d complete:\n", i); 1.124 -#if G1_DETAILED_STATS 1.125 - gclog_or_tty->print(" Pushes: %7d Pops: %7d Overflows: %7d Steals %7d (in %d attempts)\n", 1.126 - pss.pushes(), 1.127 - pss.pops(), 1.128 - pss.overflow_pushes(), 1.129 - pss.steals(), 1.130 - pss.steal_attempts()); 1.131 -#endif 1.132 - double elapsed = pss.elapsed(); 1.133 - double strong_roots = pss.strong_roots_time(); 1.134 - double term = pss.term_time(); 1.135 - gclog_or_tty->print(" Elapsed: %7.2f ms.\n" 1.136 - " Strong roots: %7.2f ms (%6.2f%%)\n" 1.137 - " Termination: %7.2f ms (%6.2f%%) " 1.138 - "(in "SIZE_FORMAT" entries)\n", 1.139 - elapsed * 1000.0, 1.140 - strong_roots * 1000.0, (strong_roots*100.0/elapsed), 1.141 - term * 1000.0, (term*100.0/elapsed), 1.142 - pss.term_attempts()); 1.143 - size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste(); 1.144 - gclog_or_tty->print(" Waste: %8dK\n" 1.145 - " Alloc Buffer: %8dK\n" 1.146 - " Undo: %8dK\n", 1.147 - (total_waste * HeapWordSize) / K, 1.148 - (pss.alloc_buffer_waste() * HeapWordSize) / K, 1.149 - (pss.undo_waste() * HeapWordSize) / K); 1.150 + MutexLocker x(stats_lock()); 1.151 + pss.print_termination_stats(i); 1.152 } 1.153 1.154 assert(pss.refs_to_scan() == 0, "Task queue should be empty"); 1.155 @@ -4221,6 +4247,7 @@ 1.156 if (ParallelGCThreads > 0) { 1.157 // The individual threads will set their evac-failure closures. 1.158 StrongRootsScope srs(this); 1.159 + if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); 1.160 workers()->run_task(&g1_par_task); 1.161 } else { 1.162 StrongRootsScope srs(this);
2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Aug 06 10:17:21 2010 -0700 2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Aug 09 05:41:05 2010 -0700 2.3 @@ -46,17 +46,7 @@ 2.4 class ConcurrentG1Refine; 2.5 class ConcurrentZFThread; 2.6 2.7 -// If want to accumulate detailed statistics on work queues 2.8 -// turn this on. 2.9 -#define G1_DETAILED_STATS 0 2.10 - 2.11 -#if G1_DETAILED_STATS 2.12 -# define IF_G1_DETAILED_STATS(code) code 2.13 -#else 2.14 -# define IF_G1_DETAILED_STATS(code) 2.15 -#endif 2.16 - 2.17 -typedef GenericTaskQueue<StarTask> RefToScanQueue; 2.18 +typedef OverflowTaskQueue<StarTask> RefToScanQueue; 2.19 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet; 2.20 2.21 typedef int RegionIdx_t; // needs to hold [ 0..max_regions() ) 2.22 @@ -471,6 +461,12 @@ 2.23 virtual void shrink(size_t expand_bytes); 2.24 void shrink_helper(size_t expand_bytes); 2.25 2.26 + #if TASKQUEUE_STATS 2.27 + static void print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); 2.28 + void print_taskqueue_stats(outputStream* const st = gclog_or_tty) const; 2.29 + void reset_taskqueue_stats(); 2.30 + #endif // TASKQUEUE_STATS 2.31 + 2.32 // Do an incremental collection: identify a collection set, and evacuate 2.33 // its live objects elsewhere. 2.34 virtual void do_collection_pause(); 2.35 @@ -662,7 +658,7 @@ 2.36 public: 2.37 void set_refine_cte_cl_concurrency(bool concurrent); 2.38 2.39 - RefToScanQueue *task_queue(int i); 2.40 + RefToScanQueue *task_queue(int i) const; 2.41 2.42 // A set of cards where updates happened during the GC 2.43 DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } 2.44 @@ -1579,9 +1575,6 @@ 2.45 CardTableModRefBS* _ct_bs; 2.46 G1RemSet* _g1_rem; 2.47 2.48 - typedef GrowableArray<StarTask> OverflowQueue; 2.49 - OverflowQueue* _overflowed_refs; 2.50 - 2.51 G1ParGCAllocBuffer _surviving_alloc_buffer; 2.52 G1ParGCAllocBuffer _tenured_alloc_buffer; 2.53 G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount]; 2.54 @@ -1598,10 +1591,6 @@ 2.55 int _queue_num; 2.56 2.57 size_t _term_attempts; 2.58 -#if G1_DETAILED_STATS 2.59 - int _pushes, _pops, _steals, _steal_attempts; 2.60 - int _overflow_pushes; 2.61 -#endif 2.62 2.63 double _start; 2.64 double _start_strong_roots; 2.65 @@ -1615,7 +1604,7 @@ 2.66 // this points into the array, as we use the first few entries for padding 2.67 size_t* _surviving_young_words; 2.68 2.69 -#define PADDING_ELEM_NUM (64 / sizeof(size_t)) 2.70 +#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t)) 2.71 2.72 void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; } 2.73 2.74 @@ -1650,15 +1639,14 @@ 2.75 } 2.76 2.77 RefToScanQueue* refs() { return _refs; } 2.78 - OverflowQueue* overflowed_refs() { return _overflowed_refs; } 2.79 ageTable* age_table() { return &_age_table; } 2.80 2.81 G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { 2.82 return _alloc_buffers[purpose]; 2.83 } 2.84 2.85 - size_t alloc_buffer_waste() { return _alloc_buffer_waste; } 2.86 - size_t undo_waste() { return _undo_waste; } 2.87 + size_t alloc_buffer_waste() const { return _alloc_buffer_waste; } 2.88 + size_t undo_waste() const { return _undo_waste; } 2.89 2.90 template <class T> void push_on_queue(T* ref) { 2.91 assert(ref != NULL, "invariant"); 2.92 @@ -1671,12 +1659,7 @@ 2.93 assert(_g1h->obj_in_cs(p), "Should be in CS"); 2.94 } 2.95 #endif 2.96 - if (!refs()->push(ref)) { 2.97 - overflowed_refs()->push(ref); 2.98 - IF_G1_DETAILED_STATS(note_overflow_push()); 2.99 - } else { 2.100 - IF_G1_DETAILED_STATS(note_push()); 2.101 - } 2.102 + refs()->push(ref); 2.103 } 2.104 2.105 void pop_from_queue(StarTask& ref) { 2.106 @@ -1687,7 +1670,6 @@ 2.107 _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref) 2.108 : oopDesc::load_decode_heap_oop((oop*)ref)), 2.109 "invariant"); 2.110 - IF_G1_DETAILED_STATS(note_pop()); 2.111 } else { 2.112 StarTask null_task; 2.113 ref = null_task; 2.114 @@ -1695,7 +1677,8 @@ 2.115 } 2.116 2.117 void pop_from_overflow_queue(StarTask& ref) { 2.118 - StarTask new_ref = overflowed_refs()->pop(); 2.119 + StarTask new_ref; 2.120 + refs()->pop_overflow(new_ref); 2.121 assert((oop*)new_ref != NULL, "pop() from a local non-empty stack"); 2.122 assert(UseCompressedOops || !new_ref.is_narrow(), "Error"); 2.123 assert(has_partial_array_mask((oop*)new_ref) || 2.124 @@ -1705,8 +1688,8 @@ 2.125 ref = new_ref; 2.126 } 2.127 2.128 - int refs_to_scan() { return refs()->size(); } 2.129 - int overflowed_refs_to_scan() { return overflowed_refs()->length(); } 2.130 + int refs_to_scan() { return refs()->size(); } 2.131 + int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); } 2.132 2.133 template <class T> void update_rs(HeapRegion* from, T* p, int tid) { 2.134 if (G1DeferredRSUpdate) { 2.135 @@ -1775,30 +1758,16 @@ 2.136 int* hash_seed() { return &_hash_seed; } 2.137 int queue_num() { return _queue_num; } 2.138 2.139 - size_t term_attempts() { return _term_attempts; } 2.140 + size_t term_attempts() const { return _term_attempts; } 2.141 void note_term_attempt() { _term_attempts++; } 2.142 2.143 -#if G1_DETAILED_STATS 2.144 - int pushes() { return _pushes; } 2.145 - int pops() { return _pops; } 2.146 - int steals() { return _steals; } 2.147 - int steal_attempts() { return _steal_attempts; } 2.148 - int overflow_pushes() { return _overflow_pushes; } 2.149 - 2.150 - void note_push() { _pushes++; } 2.151 - void note_pop() { _pops++; } 2.152 - void note_steal() { _steals++; } 2.153 - void note_steal_attempt() { _steal_attempts++; } 2.154 - void note_overflow_push() { _overflow_pushes++; } 2.155 -#endif 2.156 - 2.157 void start_strong_roots() { 2.158 _start_strong_roots = os::elapsedTime(); 2.159 } 2.160 void end_strong_roots() { 2.161 _strong_roots_time += (os::elapsedTime() - _start_strong_roots); 2.162 } 2.163 - double strong_roots_time() { return _strong_roots_time; } 2.164 + double strong_roots_time() const { return _strong_roots_time; } 2.165 2.166 void start_term_time() { 2.167 note_term_attempt(); 2.168 @@ -1807,12 +1776,17 @@ 2.169 void end_term_time() { 2.170 _term_time += (os::elapsedTime() - _start_term); 2.171 } 2.172 - double term_time() { return _term_time; } 2.173 + double term_time() const { return _term_time; } 2.174 2.175 - double elapsed() { 2.176 + double elapsed_time() const { 2.177 return os::elapsedTime() - _start; 2.178 } 2.179 2.180 + static void 2.181 + print_termination_stats_hdr(outputStream* const st = gclog_or_tty); 2.182 + void 2.183 + print_termination_stats(int i, outputStream* const st = gclog_or_tty) const; 2.184 + 2.185 size_t* surviving_young_words() { 2.186 // We add on to hide entry 0 which accumulates surviving words for 2.187 // age -1 regions (i.e. non-young ones)
3.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Fri Aug 06 10:17:21 2010 -0700 3.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Mon Aug 09 05:41:05 2010 -0700 3.3 @@ -81,11 +81,10 @@ 3.4 return attempt_allocation_slow(word_size, permit_collection_pause); 3.5 } 3.6 3.7 -inline RefToScanQueue* G1CollectedHeap::task_queue(int i) { 3.8 +inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const { 3.9 return _task_queues->queue(i); 3.10 } 3.11 3.12 - 3.13 inline bool G1CollectedHeap::isMarkedPrev(oop obj) const { 3.14 return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj); 3.15 }
4.1 --- a/src/share/vm/utilities/taskqueue.cpp Fri Aug 06 10:17:21 2010 -0700 4.2 +++ b/src/share/vm/utilities/taskqueue.cpp Mon Aug 09 05:41:05 2010 -0700 4.3 @@ -36,6 +36,14 @@ 4.4 "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax" 4.5 }; 4.6 4.7 +TaskQueueStats & TaskQueueStats::operator +=(const TaskQueueStats & addend) 4.8 +{ 4.9 + for (unsigned int i = 0; i < last_stat_id; ++i) { 4.10 + _stats[i] += addend._stats[i]; 4.11 + } 4.12 + return *this; 4.13 +} 4.14 + 4.15 void TaskQueueStats::print_header(unsigned int line, outputStream* const stream, 4.16 unsigned int width) 4.17 { 4.18 @@ -71,6 +79,29 @@ 4.19 } 4.20 #undef FMT 4.21 } 4.22 + 4.23 +#ifdef ASSERT 4.24 +// Invariants which should hold after a TaskQueue has been emptied and is 4.25 +// quiescent; they do not hold at arbitrary times. 4.26 +void TaskQueueStats::verify() const 4.27 +{ 4.28 + assert(get(push) == get(pop) + get(steal), 4.29 + err_msg("push=" SIZE_FORMAT " pop=" SIZE_FORMAT " steal=" SIZE_FORMAT, 4.30 + get(push), get(pop), get(steal))); 4.31 + assert(get(pop_slow) <= get(pop), 4.32 + err_msg("pop_slow=" SIZE_FORMAT " pop=" SIZE_FORMAT, 4.33 + get(pop_slow), get(pop))); 4.34 + assert(get(steal) <= get(steal_attempt), 4.35 + err_msg("steal=" SIZE_FORMAT " steal_attempt=" SIZE_FORMAT, 4.36 + get(steal), get(steal_attempt))); 4.37 + assert(get(overflow) == 0 || get(push) != 0, 4.38 + err_msg("overflow=" SIZE_FORMAT " push=" SIZE_FORMAT, 4.39 + get(overflow), get(push))); 4.40 + assert(get(overflow_max_len) == 0 || get(overflow) != 0, 4.41 + err_msg("overflow_max_len=" SIZE_FORMAT " overflow=" SIZE_FORMAT, 4.42 + get(overflow_max_len), get(overflow))); 4.43 +} 4.44 +#endif // ASSERT 4.45 #endif // TASKQUEUE_STATS 4.46 4.47 int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
5.1 --- a/src/share/vm/utilities/taskqueue.hpp Fri Aug 06 10:17:21 2010 -0700 5.2 +++ b/src/share/vm/utilities/taskqueue.hpp Mon Aug 09 05:41:05 2010 -0700 5.3 @@ -59,15 +59,21 @@ 5.4 inline void record_steal(bool success); 5.5 inline void record_overflow(size_t new_length); 5.6 5.7 + TaskQueueStats & operator +=(const TaskQueueStats & addend); 5.8 + 5.9 inline size_t get(StatId id) const { return _stats[id]; } 5.10 inline const size_t* get() const { return _stats; } 5.11 5.12 inline void reset(); 5.13 5.14 + // Print the specified line of the header (does not include a line separator). 5.15 static void print_header(unsigned int line, outputStream* const stream = tty, 5.16 unsigned int width = 10); 5.17 + // Print the statistics (does not include a line separator). 5.18 void print(outputStream* const stream = tty, unsigned int width = 10) const; 5.19 5.20 + DEBUG_ONLY(void verify() const;) 5.21 + 5.22 private: 5.23 size_t _stats[last_stat_id]; 5.24 static const char * const _names[last_stat_id];