1.1 --- a/src/share/vm/utilities/taskqueue.hpp Thu Jan 29 21:25:42 2009 -0800 1.2 +++ b/src/share/vm/utilities/taskqueue.hpp Fri Jan 30 14:17:52 2009 -0800 1.3 @@ -22,67 +22,76 @@ 1.4 * 1.5 */ 1.6 1.7 +#ifdef LP64 1.8 +typedef juint TAG_TYPE; 1.9 +// for a taskqueue size of 4M 1.10 +#define LOG_TASKQ_SIZE 22 1.11 +#else 1.12 +typedef jushort TAG_TYPE; 1.13 +// for a taskqueue size of 16K 1.14 +#define LOG_TASKQ_SIZE 14 1.15 +#endif 1.16 + 1.17 class TaskQueueSuper: public CHeapObj { 1.18 protected: 1.19 // The first free element after the last one pushed (mod _n). 1.20 - // (For now we'll assume only 32-bit CAS). 1.21 - volatile juint _bottom; 1.22 + volatile uint _bottom; 1.23 1.24 // log2 of the size of the queue. 1.25 enum SomeProtectedConstants { 1.26 - Log_n = 14 1.27 + Log_n = LOG_TASKQ_SIZE 1.28 }; 1.29 +#undef LOG_TASKQ_SIZE 1.30 1.31 // Size of the queue. 1.32 - juint n() { return (1 << Log_n); } 1.33 + uint n() { return (1 << Log_n); } 1.34 // For computing "x mod n" efficiently. 1.35 - juint n_mod_mask() { return n() - 1; } 1.36 + uint n_mod_mask() { return n() - 1; } 1.37 1.38 struct Age { 1.39 - jushort _top; 1.40 - jushort _tag; 1.41 + TAG_TYPE _top; 1.42 + TAG_TYPE _tag; 1.43 1.44 - jushort tag() const { return _tag; } 1.45 - jushort top() const { return _top; } 1.46 + TAG_TYPE tag() const { return _tag; } 1.47 + TAG_TYPE top() const { return _top; } 1.48 1.49 Age() { _tag = 0; _top = 0; } 1.50 1.51 friend bool operator ==(const Age& a1, const Age& a2) { 1.52 return a1.tag() == a2.tag() && a1.top() == a2.top(); 1.53 } 1.54 - 1.55 }; 1.56 Age _age; 1.57 // These make sure we do single atomic reads and writes. 1.58 Age get_age() { 1.59 - jint res = *(volatile jint*)(&_age); 1.60 + uint res = *(volatile uint*)(&_age); 1.61 return *(Age*)(&res); 1.62 } 1.63 void set_age(Age a) { 1.64 - *(volatile jint*)(&_age) = *(int*)(&a); 1.65 + *(volatile uint*)(&_age) = *(uint*)(&a); 1.66 } 1.67 1.68 - jushort get_top() { 1.69 + TAG_TYPE get_top() { 1.70 return get_age().top(); 1.71 } 1.72 1.73 // These both operate mod _n. 1.74 - juint increment_index(juint ind) { 1.75 + uint increment_index(uint ind) { 1.76 return (ind + 1) & n_mod_mask(); 1.77 } 1.78 - juint decrement_index(juint ind) { 1.79 + uint decrement_index(uint ind) { 1.80 return (ind - 1) & n_mod_mask(); 1.81 } 1.82 1.83 // Returns a number in the range [0.._n). If the result is "n-1", it 1.84 // should be interpreted as 0. 1.85 - juint dirty_size(juint bot, juint top) { 1.86 - return ((jint)bot - (jint)top) & n_mod_mask(); 1.87 + uint dirty_size(uint bot, uint top) { 1.88 + return ((int)bot - (int)top) & n_mod_mask(); 1.89 } 1.90 1.91 // Returns the size corresponding to the given "bot" and "top". 1.92 - juint size(juint bot, juint top) { 1.93 - juint sz = dirty_size(bot, top); 1.94 + uint size(uint bot, uint top) { 1.95 + uint sz = dirty_size(bot, top); 1.96 // Has the queue "wrapped", so that bottom is less than top? 1.97 // There's a complicated special case here. A pair of threads could 1.98 // perform pop_local and pop_global operations concurrently, starting 1.99 @@ -94,7 +103,7 @@ 1.100 // owner performs pop_local's, and several concurrent threads 1.101 // attempting to perform the pop_global will all perform the same CAS, 1.102 // and only one can succeed. Any stealing thread that reads after 1.103 - // either the increment or decrement will seen an empty queue, and will 1.104 + // either the increment or decrement will see an empty queue, and will 1.105 // not join the competitors. The "sz == -1 || sz == _n-1" state will 1.106 // not be modified by concurrent queues, so the owner thread can reset 1.107 // the state to _bottom == top so subsequent pushes will be performed 1.108 @@ -112,11 +121,11 @@ 1.109 // Return an estimate of the number of elements in the queue. 1.110 // The "careful" version admits the possibility of pop_local/pop_global 1.111 // races. 1.112 - juint size() { 1.113 + uint size() { 1.114 return size(_bottom, get_top()); 1.115 } 1.116 1.117 - juint dirty_size() { 1.118 + uint dirty_size() { 1.119 return dirty_size(_bottom, get_top()); 1.120 } 1.121 1.122 @@ -127,15 +136,15 @@ 1.123 1.124 // Maximum number of elements allowed in the queue. This is two less 1.125 // than the actual queue size, for somewhat complicated reasons. 1.126 - juint max_elems() { return n() - 2; } 1.127 + uint max_elems() { return n() - 2; } 1.128 1.129 }; 1.130 1.131 template<class E> class GenericTaskQueue: public TaskQueueSuper { 1.132 private: 1.133 // Slow paths for push, pop_local. (pop_global has no fast path.) 1.134 - bool push_slow(E t, juint dirty_n_elems); 1.135 - bool pop_local_slow(juint localBot, Age oldAge); 1.136 + bool push_slow(E t, uint dirty_n_elems); 1.137 + bool pop_local_slow(uint localBot, Age oldAge); 1.138 1.139 public: 1.140 // Initializes the queue to empty. 1.141 @@ -170,7 +179,7 @@ 1.142 1.143 template<class E> 1.144 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { 1.145 - assert(sizeof(Age) == sizeof(jint), "Depends on this."); 1.146 + assert(sizeof(Age) == sizeof(int), "Depends on this."); 1.147 } 1.148 1.149 template<class E> 1.150 @@ -182,9 +191,9 @@ 1.151 template<class E> 1.152 void GenericTaskQueue<E>::oops_do(OopClosure* f) { 1.153 // tty->print_cr("START OopTaskQueue::oops_do"); 1.154 - int iters = size(); 1.155 - juint index = _bottom; 1.156 - for (int i = 0; i < iters; ++i) { 1.157 + uint iters = size(); 1.158 + uint index = _bottom; 1.159 + for (uint i = 0; i < iters; ++i) { 1.160 index = decrement_index(index); 1.161 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, 1.162 // index, &_elems[index], _elems[index]); 1.163 @@ -198,10 +207,10 @@ 1.164 1.165 1.166 template<class E> 1.167 -bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) { 1.168 +bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { 1.169 if (dirty_n_elems == n() - 1) { 1.170 // Actually means 0, so do the push. 1.171 - juint localBot = _bottom; 1.172 + uint localBot = _bottom; 1.173 _elems[localBot] = t; 1.174 _bottom = increment_index(localBot); 1.175 return true; 1.176 @@ -211,7 +220,7 @@ 1.177 1.178 template<class E> 1.179 bool GenericTaskQueue<E>:: 1.180 -pop_local_slow(juint localBot, Age oldAge) { 1.181 +pop_local_slow(uint localBot, Age oldAge) { 1.182 // This queue was observed to contain exactly one element; either this 1.183 // thread will claim it, or a competing "pop_global". In either case, 1.184 // the queue will be logically empty afterwards. Create a new Age value 1.185 @@ -230,9 +239,8 @@ 1.186 Age tempAge; 1.187 // No competing pop_global has yet incremented "top"; we'll try to 1.188 // install new_age, thus claiming the element. 1.189 - assert(sizeof(Age) == sizeof(jint) && sizeof(jint) == sizeof(juint), 1.190 - "Assumption about CAS unit."); 1.191 - *(jint*)&tempAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge); 1.192 + assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); 1.193 + *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); 1.194 if (tempAge == oldAge) { 1.195 // We win. 1.196 assert(dirty_size(localBot, get_top()) != n() - 1, 1.197 @@ -253,8 +261,8 @@ 1.198 bool GenericTaskQueue<E>::pop_global(E& t) { 1.199 Age newAge; 1.200 Age oldAge = get_age(); 1.201 - juint localBot = _bottom; 1.202 - juint n_elems = size(localBot, oldAge.top()); 1.203 + uint localBot = _bottom; 1.204 + uint n_elems = size(localBot, oldAge.top()); 1.205 if (n_elems == 0) { 1.206 return false; 1.207 } 1.208 @@ -263,7 +271,7 @@ 1.209 newAge._top = increment_index(newAge.top()); 1.210 if ( newAge._top == 0 ) newAge._tag++; 1.211 Age resAge; 1.212 - *(jint*)&resAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge); 1.213 + *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); 1.214 // Note that using "_bottom" here might fail, since a pop_local might 1.215 // have decremented it. 1.216 assert(dirty_size(localBot, newAge._top) != n() - 1, 1.217 @@ -287,7 +295,7 @@ 1.218 1.219 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper { 1.220 private: 1.221 - int _n; 1.222 + uint _n; 1.223 GenericTaskQueue<E>** _queues; 1.224 1.225 public: 1.226 @@ -300,51 +308,51 @@ 1.227 } 1.228 } 1.229 1.230 - bool steal_1_random(int queue_num, int* seed, E& t); 1.231 - bool steal_best_of_2(int queue_num, int* seed, E& t); 1.232 - bool steal_best_of_all(int queue_num, int* seed, E& t); 1.233 + bool steal_1_random(uint queue_num, int* seed, E& t); 1.234 + bool steal_best_of_2(uint queue_num, int* seed, E& t); 1.235 + bool steal_best_of_all(uint queue_num, int* seed, E& t); 1.236 1.237 - void register_queue(int i, GenericTaskQueue<E>* q); 1.238 + void register_queue(uint i, GenericTaskQueue<E>* q); 1.239 1.240 - GenericTaskQueue<E>* queue(int n); 1.241 + GenericTaskQueue<E>* queue(uint n); 1.242 1.243 // The thread with queue number "queue_num" (and whose random number seed 1.244 // is at "seed") is trying to steal a task from some other queue. (It 1.245 // may try several queues, according to some configuration parameter.) 1.246 // If some steal succeeds, returns "true" and sets "t" the stolen task, 1.247 // otherwise returns false. 1.248 - bool steal(int queue_num, int* seed, E& t); 1.249 + bool steal(uint queue_num, int* seed, E& t); 1.250 1.251 bool peek(); 1.252 }; 1.253 1.254 template<class E> 1.255 -void GenericTaskQueueSet<E>::register_queue(int i, GenericTaskQueue<E>* q) { 1.256 - assert(0 <= i && i < _n, "index out of range."); 1.257 +void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) { 1.258 + assert(i < _n, "index out of range."); 1.259 _queues[i] = q; 1.260 } 1.261 1.262 template<class E> 1.263 -GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(int i) { 1.264 +GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) { 1.265 return _queues[i]; 1.266 } 1.267 1.268 template<class E> 1.269 -bool GenericTaskQueueSet<E>::steal(int queue_num, int* seed, E& t) { 1.270 - for (int i = 0; i < 2 * _n; i++) 1.271 +bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) { 1.272 + for (uint i = 0; i < 2 * _n; i++) 1.273 if (steal_best_of_2(queue_num, seed, t)) 1.274 return true; 1.275 return false; 1.276 } 1.277 1.278 template<class E> 1.279 -bool GenericTaskQueueSet<E>::steal_best_of_all(int queue_num, int* seed, E& t) { 1.280 +bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) { 1.281 if (_n > 2) { 1.282 int best_k; 1.283 - jint best_sz = 0; 1.284 - for (int k = 0; k < _n; k++) { 1.285 + uint best_sz = 0; 1.286 + for (uint k = 0; k < _n; k++) { 1.287 if (k == queue_num) continue; 1.288 - jint sz = _queues[k]->size(); 1.289 + uint sz = _queues[k]->size(); 1.290 if (sz > best_sz) { 1.291 best_sz = sz; 1.292 best_k = k; 1.293 @@ -362,9 +370,9 @@ 1.294 } 1.295 1.296 template<class E> 1.297 -bool GenericTaskQueueSet<E>::steal_1_random(int queue_num, int* seed, E& t) { 1.298 +bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) { 1.299 if (_n > 2) { 1.300 - int k = queue_num; 1.301 + uint k = queue_num; 1.302 while (k == queue_num) k = randomParkAndMiller(seed) % _n; 1.303 return _queues[2]->pop_global(t); 1.304 } else if (_n == 2) { 1.305 @@ -378,20 +386,20 @@ 1.306 } 1.307 1.308 template<class E> 1.309 -bool GenericTaskQueueSet<E>::steal_best_of_2(int queue_num, int* seed, E& t) { 1.310 +bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) { 1.311 if (_n > 2) { 1.312 - int k1 = queue_num; 1.313 + uint k1 = queue_num; 1.314 while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n; 1.315 - int k2 = queue_num; 1.316 + uint k2 = queue_num; 1.317 while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n; 1.318 // Sample both and try the larger. 1.319 - juint sz1 = _queues[k1]->size(); 1.320 - juint sz2 = _queues[k2]->size(); 1.321 + uint sz1 = _queues[k1]->size(); 1.322 + uint sz2 = _queues[k2]->size(); 1.323 if (sz2 > sz1) return _queues[k2]->pop_global(t); 1.324 else return _queues[k1]->pop_global(t); 1.325 } else if (_n == 2) { 1.326 // Just try the other one. 1.327 - int k = (queue_num + 1) % 2; 1.328 + uint k = (queue_num + 1) % 2; 1.329 return _queues[k]->pop_global(t); 1.330 } else { 1.331 assert(_n == 1, "can't be zero."); 1.332 @@ -402,7 +410,7 @@ 1.333 template<class E> 1.334 bool GenericTaskQueueSet<E>::peek() { 1.335 // Try all the queues. 1.336 - for (int j = 0; j < _n; j++) { 1.337 + for (uint j = 0; j < _n; j++) { 1.338 if (_queues[j]->peek()) 1.339 return true; 1.340 } 1.341 @@ -422,7 +430,7 @@ 1.342 private: 1.343 int _n_threads; 1.344 TaskQueueSetSuper* _queue_set; 1.345 - jint _offered_termination; 1.346 + int _offered_termination; 1.347 1.348 bool peek_in_queue_set(); 1.349 protected: 1.350 @@ -460,7 +468,7 @@ 1.351 1.352 template<class E> inline bool GenericTaskQueue<E>::push(E t) { 1.353 #if SIMPLE_STACK 1.354 - juint localBot = _bottom; 1.355 + uint localBot = _bottom; 1.356 if (_bottom < max_elems()) { 1.357 _elems[localBot] = t; 1.358 _bottom = localBot + 1; 1.359 @@ -469,10 +477,10 @@ 1.360 return false; 1.361 } 1.362 #else 1.363 - juint localBot = _bottom; 1.364 + uint localBot = _bottom; 1.365 assert((localBot >= 0) && (localBot < n()), "_bottom out of range."); 1.366 - jushort top = get_top(); 1.367 - juint dirty_n_elems = dirty_size(localBot, top); 1.368 + TAG_TYPE top = get_top(); 1.369 + uint dirty_n_elems = dirty_size(localBot, top); 1.370 assert((dirty_n_elems >= 0) && (dirty_n_elems < n()), 1.371 "n_elems out of range."); 1.372 if (dirty_n_elems < max_elems()) { 1.373 @@ -487,19 +495,19 @@ 1.374 1.375 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) { 1.376 #if SIMPLE_STACK 1.377 - juint localBot = _bottom; 1.378 + uint localBot = _bottom; 1.379 assert(localBot > 0, "precondition."); 1.380 localBot--; 1.381 t = _elems[localBot]; 1.382 _bottom = localBot; 1.383 return true; 1.384 #else 1.385 - juint localBot = _bottom; 1.386 + uint localBot = _bottom; 1.387 // This value cannot be n-1. That can only occur as a result of 1.388 // the assignment to bottom in this method. If it does, this method 1.389 // resets the size( to 0 before the next call (which is sequential, 1.390 // since this is pop_local.) 1.391 - juint dirty_n_elems = dirty_size(localBot, get_top()); 1.392 + uint dirty_n_elems = dirty_size(localBot, get_top()); 1.393 assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); 1.394 if (dirty_n_elems == 0) return false; 1.395 localBot = decrement_index(localBot); 1.396 @@ -512,7 +520,7 @@ 1.397 // If there's still at least one element in the queue, based on the 1.398 // "_bottom" and "age" we've read, then there can be no interference with 1.399 // a "pop_global" operation, and we're done. 1.400 - juint tp = get_top(); 1.401 + TAG_TYPE tp = get_top(); // XXX 1.402 if (size(localBot, tp) > 0) { 1.403 assert(dirty_size(localBot, tp) != n() - 1, 1.404 "Shouldn't be possible..."); 1.405 @@ -581,7 +589,7 @@ 1.406 bool is_empty(); 1.407 bool stealable_is_empty(); 1.408 bool overflow_is_empty(); 1.409 - juint stealable_size() { return _region_queue.size(); } 1.410 + uint stealable_size() { return _region_queue.size(); } 1.411 RegionTaskQueue* task_queue() { return &_region_queue; } 1.412 }; 1.413