Wed, 05 Aug 2009 12:33:29 -0700
6821693: 64-bit TaskQueue capacity still too small
6821507: Alignment problem in GC taskqueue
Reviewed-by: tonyp, apetrusenko
src/share/vm/utilities/taskqueue.hpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/share/vm/utilities/taskqueue.hpp Wed Aug 05 18:54:12 2009 -0700 1.2 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Aug 05 12:33:29 2009 -0700 1.3 @@ -22,94 +22,90 @@ 1.4 * 1.5 */ 1.6 1.7 -#ifdef LP64 1.8 -typedef juint TAG_TYPE; 1.9 -// for a taskqueue size of 4M 1.10 -#define LOG_TASKQ_SIZE 22 1.11 -#else 1.12 -typedef jushort TAG_TYPE; 1.13 -// for a taskqueue size of 16K 1.14 -#define LOG_TASKQ_SIZE 14 1.15 -#endif 1.16 - 1.17 class TaskQueueSuper: public CHeapObj { 1.18 protected: 1.19 - // The first free element after the last one pushed (mod _n). 1.20 + // Internal type for indexing the queue; also used for the tag. 1.21 + typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; 1.22 + 1.23 + // The first free element after the last one pushed (mod N). 1.24 volatile uint _bottom; 1.25 1.26 - // log2 of the size of the queue. 1.27 - enum SomeProtectedConstants { 1.28 - Log_n = LOG_TASKQ_SIZE 1.29 + enum { 1.30 + N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K 1.31 + MOD_N_MASK = N - 1 // To compute x mod N efficiently. 1.32 }; 1.33 -#undef LOG_TASKQ_SIZE 1.34 1.35 - // Size of the queue. 1.36 - uint n() { return (1 << Log_n); } 1.37 - // For computing "x mod n" efficiently. 1.38 - uint n_mod_mask() { return n() - 1; } 1.39 + class Age { 1.40 + public: 1.41 + Age(size_t data = 0) { _data = data; } 1.42 + Age(const Age& age) { _data = age._data; } 1.43 + Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } 1.44 1.45 - struct Age { 1.46 - TAG_TYPE _top; 1.47 - TAG_TYPE _tag; 1.48 + Age get() const volatile { return _data; } 1.49 + void set(Age age) volatile { _data = age._data; } 1.50 1.51 - TAG_TYPE tag() const { return _tag; } 1.52 - TAG_TYPE top() const { return _top; } 1.53 + idx_t top() const volatile { return _fields._top; } 1.54 + idx_t tag() const volatile { return _fields._tag; } 1.55 1.56 - Age() { _tag = 0; _top = 0; } 1.57 + // Increment top; if it wraps, increment tag also. 1.58 + void increment() { 1.59 + _fields._top = increment_index(_fields._top); 1.60 + if (_fields._top == 0) ++_fields._tag; 1.61 + } 1.62 1.63 - friend bool operator ==(const Age& a1, const Age& a2) { 1.64 - return a1.tag() == a2.tag() && a1.top() == a2.top(); 1.65 + Age cmpxchg(const Age new_age, const Age old_age) volatile { 1.66 + return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, 1.67 + (volatile intptr_t *)&_data, 1.68 + (intptr_t)old_age._data); 1.69 } 1.70 + 1.71 + bool operator ==(const Age& other) const { return _data == other._data; } 1.72 + 1.73 + private: 1.74 + struct fields { 1.75 + idx_t _top; 1.76 + idx_t _tag; 1.77 + }; 1.78 + union { 1.79 + size_t _data; 1.80 + fields _fields; 1.81 + }; 1.82 }; 1.83 - Age _age; 1.84 - // These make sure we do single atomic reads and writes. 1.85 - Age get_age() { 1.86 - uint res = *(volatile uint*)(&_age); 1.87 - return *(Age*)(&res); 1.88 + 1.89 + volatile Age _age; 1.90 + 1.91 + // These both operate mod N. 1.92 + static uint increment_index(uint ind) { 1.93 + return (ind + 1) & MOD_N_MASK; 1.94 } 1.95 - void set_age(Age a) { 1.96 - *(volatile uint*)(&_age) = *(uint*)(&a); 1.97 + static uint decrement_index(uint ind) { 1.98 + return (ind - 1) & MOD_N_MASK; 1.99 } 1.100 1.101 - TAG_TYPE get_top() { 1.102 - return get_age().top(); 1.103 - } 1.104 - 1.105 - // These both operate mod _n. 1.106 - uint increment_index(uint ind) { 1.107 - return (ind + 1) & n_mod_mask(); 1.108 - } 1.109 - uint decrement_index(uint ind) { 1.110 - return (ind - 1) & n_mod_mask(); 1.111 - } 1.112 - 1.113 - // Returns a number in the range [0.._n). If the result is "n-1", it 1.114 - // should be interpreted as 0. 1.115 + // Returns a number in the range [0..N). If the result is "N-1", it should be 1.116 + // interpreted as 0. 1.117 uint dirty_size(uint bot, uint top) { 1.118 - return ((int)bot - (int)top) & n_mod_mask(); 1.119 + return (bot - top) & MOD_N_MASK; 1.120 } 1.121 1.122 // Returns the size corresponding to the given "bot" and "top". 1.123 uint size(uint bot, uint top) { 1.124 uint sz = dirty_size(bot, top); 1.125 - // Has the queue "wrapped", so that bottom is less than top? 1.126 - // There's a complicated special case here. A pair of threads could 1.127 - // perform pop_local and pop_global operations concurrently, starting 1.128 - // from a state in which _bottom == _top+1. The pop_local could 1.129 - // succeed in decrementing _bottom, and the pop_global in incrementing 1.130 - // _top (in which case the pop_global will be awarded the contested 1.131 - // queue element.) The resulting state must be interpreted as an empty 1.132 - // queue. (We only need to worry about one such event: only the queue 1.133 - // owner performs pop_local's, and several concurrent threads 1.134 - // attempting to perform the pop_global will all perform the same CAS, 1.135 - // and only one can succeed. Any stealing thread that reads after 1.136 - // either the increment or decrement will see an empty queue, and will 1.137 - // not join the competitors. The "sz == -1 || sz == _n-1" state will 1.138 - // not be modified by concurrent queues, so the owner thread can reset 1.139 - // the state to _bottom == top so subsequent pushes will be performed 1.140 - // normally. 1.141 - if (sz == (n()-1)) return 0; 1.142 - else return sz; 1.143 + // Has the queue "wrapped", so that bottom is less than top? There's a 1.144 + // complicated special case here. A pair of threads could perform pop_local 1.145 + // and pop_global operations concurrently, starting from a state in which 1.146 + // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, 1.147 + // and the pop_global in incrementing _top (in which case the pop_global 1.148 + // will be awarded the contested queue element.) The resulting state must 1.149 + // be interpreted as an empty queue. (We only need to worry about one such 1.150 + // event: only the queue owner performs pop_local's, and several concurrent 1.151 + // threads attempting to perform the pop_global will all perform the same 1.152 + // CAS, and only one can succeed.) Any stealing thread that reads after 1.153 + // either the increment or decrement will see an empty queue, and will not 1.154 + // join the competitors. The "sz == -1 || sz == N-1" state will not be 1.155 + // modified by concurrent queues, so the owner thread can reset the state to 1.156 + // _bottom == top so subsequent pushes will be performed normally. 1.157 + return (sz == N - 1) ? 0 : sz; 1.158 } 1.159 1.160 public: 1.161 @@ -122,22 +118,21 @@ 1.162 // The "careful" version admits the possibility of pop_local/pop_global 1.163 // races. 1.164 uint size() { 1.165 - return size(_bottom, get_top()); 1.166 + return size(_bottom, _age.top()); 1.167 } 1.168 1.169 uint dirty_size() { 1.170 - return dirty_size(_bottom, get_top()); 1.171 + return dirty_size(_bottom, _age.top()); 1.172 } 1.173 1.174 void set_empty() { 1.175 _bottom = 0; 1.176 - _age = Age(); 1.177 + _age.set(0); 1.178 } 1.179 1.180 // Maximum number of elements allowed in the queue. This is two less 1.181 // than the actual queue size, for somewhat complicated reasons. 1.182 - uint max_elems() { return n() - 2; } 1.183 - 1.184 + uint max_elems() { return N - 2; } 1.185 }; 1.186 1.187 template<class E> class GenericTaskQueue: public TaskQueueSuper { 1.188 @@ -179,12 +174,12 @@ 1.189 1.190 template<class E> 1.191 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { 1.192 - assert(sizeof(Age) == sizeof(int), "Depends on this."); 1.193 + assert(sizeof(Age) == sizeof(size_t), "Depends on this."); 1.194 } 1.195 1.196 template<class E> 1.197 void GenericTaskQueue<E>::initialize() { 1.198 - _elems = NEW_C_HEAP_ARRAY(E, n()); 1.199 + _elems = NEW_C_HEAP_ARRAY(E, N); 1.200 guarantee(_elems != NULL, "Allocation failed."); 1.201 } 1.202 1.203 @@ -208,14 +203,14 @@ 1.204 1.205 template<class E> 1.206 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { 1.207 - if (dirty_n_elems == n() - 1) { 1.208 + if (dirty_n_elems == N - 1) { 1.209 // Actually means 0, so do the push. 1.210 uint localBot = _bottom; 1.211 _elems[localBot] = t; 1.212 _bottom = increment_index(localBot); 1.213 return true; 1.214 - } else 1.215 - return false; 1.216 + } 1.217 + return false; 1.218 } 1.219 1.220 template<class E> 1.221 @@ -230,53 +225,45 @@ 1.222 // then have the owner thread do a pop followed by another push. Without 1.223 // the incrementing of "tag", the pop_global's CAS could succeed, 1.224 // allowing it to believe it has claimed the stale element.) 1.225 - Age newAge; 1.226 - newAge._top = localBot; 1.227 - newAge._tag = oldAge.tag() + 1; 1.228 + Age newAge((idx_t)localBot, oldAge.tag() + 1); 1.229 // Perhaps a competing pop_global has already incremented "top", in which 1.230 // case it wins the element. 1.231 if (localBot == oldAge.top()) { 1.232 - Age tempAge; 1.233 // No competing pop_global has yet incremented "top"; we'll try to 1.234 // install new_age, thus claiming the element. 1.235 - assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); 1.236 - *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); 1.237 + Age tempAge = _age.cmpxchg(newAge, oldAge); 1.238 if (tempAge == oldAge) { 1.239 // We win. 1.240 - assert(dirty_size(localBot, get_top()) != n() - 1, 1.241 - "Shouldn't be possible..."); 1.242 + assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); 1.243 return true; 1.244 } 1.245 } 1.246 - // We fail; a completing pop_global gets the element. But the queue is 1.247 - // empty (and top is greater than bottom.) Fix this representation of 1.248 - // the empty queue to become the canonical one. 1.249 - set_age(newAge); 1.250 - assert(dirty_size(localBot, get_top()) != n() - 1, 1.251 - "Shouldn't be possible..."); 1.252 + // We lose; a completing pop_global gets the element. But the queue is empty 1.253 + // and top is greater than bottom. Fix this representation of the empty queue 1.254 + // to become the canonical one. 1.255 + _age.set(newAge); 1.256 + assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); 1.257 return false; 1.258 } 1.259 1.260 template<class E> 1.261 bool GenericTaskQueue<E>::pop_global(E& t) { 1.262 - Age newAge; 1.263 - Age oldAge = get_age(); 1.264 + Age oldAge = _age.get(); 1.265 uint localBot = _bottom; 1.266 uint n_elems = size(localBot, oldAge.top()); 1.267 if (n_elems == 0) { 1.268 return false; 1.269 } 1.270 + 1.271 t = _elems[oldAge.top()]; 1.272 - newAge = oldAge; 1.273 - newAge._top = increment_index(newAge.top()); 1.274 - if ( newAge._top == 0 ) newAge._tag++; 1.275 - Age resAge; 1.276 - *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); 1.277 + Age newAge(oldAge); 1.278 + newAge.increment(); 1.279 + Age resAge = _age.cmpxchg(newAge, oldAge); 1.280 + 1.281 // Note that using "_bottom" here might fail, since a pop_local might 1.282 // have decremented it. 1.283 - assert(dirty_size(localBot, newAge._top) != n() - 1, 1.284 - "Shouldn't be possible..."); 1.285 - return (resAge == oldAge); 1.286 + assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); 1.287 + return resAge == oldAge; 1.288 } 1.289 1.290 template<class E> 1.291 @@ -459,7 +446,7 @@ 1.292 return offer_termination(NULL); 1.293 } 1.294 1.295 - // As above, but it also terminates of the should_exit_termination() 1.296 + // As above, but it also terminates if the should_exit_termination() 1.297 // method of the terminator parameter returns true. If terminator is 1.298 // NULL, then it is ignored. 1.299 bool offer_termination(TerminatorTerminator* terminator); 1.300 @@ -492,11 +479,10 @@ 1.301 } 1.302 #else 1.303 uint localBot = _bottom; 1.304 - assert((localBot >= 0) && (localBot < n()), "_bottom out of range."); 1.305 - TAG_TYPE top = get_top(); 1.306 + assert((localBot >= 0) && (localBot < N), "_bottom out of range."); 1.307 + idx_t top = _age.top(); 1.308 uint dirty_n_elems = dirty_size(localBot, top); 1.309 - assert((dirty_n_elems >= 0) && (dirty_n_elems < n()), 1.310 - "n_elems out of range."); 1.311 + assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); 1.312 if (dirty_n_elems < max_elems()) { 1.313 _elems[localBot] = t; 1.314 _bottom = increment_index(localBot); 1.315 @@ -517,12 +503,12 @@ 1.316 return true; 1.317 #else 1.318 uint localBot = _bottom; 1.319 - // This value cannot be n-1. That can only occur as a result of 1.320 + // This value cannot be N-1. That can only occur as a result of 1.321 // the assignment to bottom in this method. If it does, this method 1.322 // resets the size( to 0 before the next call (which is sequential, 1.323 // since this is pop_local.) 1.324 - uint dirty_n_elems = dirty_size(localBot, get_top()); 1.325 - assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); 1.326 + uint dirty_n_elems = dirty_size(localBot, _age.top()); 1.327 + assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); 1.328 if (dirty_n_elems == 0) return false; 1.329 localBot = decrement_index(localBot); 1.330 _bottom = localBot; 1.331 @@ -534,15 +520,14 @@ 1.332 // If there's still at least one element in the queue, based on the 1.333 // "_bottom" and "age" we've read, then there can be no interference with 1.334 // a "pop_global" operation, and we're done. 1.335 - TAG_TYPE tp = get_top(); // XXX 1.336 + idx_t tp = _age.top(); // XXX 1.337 if (size(localBot, tp) > 0) { 1.338 - assert(dirty_size(localBot, tp) != n() - 1, 1.339 - "Shouldn't be possible..."); 1.340 + assert(dirty_size(localBot, tp) != N - 1, "sanity"); 1.341 return true; 1.342 } else { 1.343 // Otherwise, the queue contained exactly one element; we take the slow 1.344 // path. 1.345 - return pop_local_slow(localBot, get_age()); 1.346 + return pop_local_slow(localBot, _age.get()); 1.347 } 1.348 #endif 1.349 }