20 * CA 95054 USA or visit www.sun.com if you need additional information or |
20 * CA 95054 USA or visit www.sun.com if you need additional information or |
21 * have any questions. |
21 * have any questions. |
22 * |
22 * |
23 */ |
23 */ |
24 |
24 |
25 #ifdef LP64 |
|
26 typedef juint TAG_TYPE; |
|
27 // for a taskqueue size of 4M |
|
28 #define LOG_TASKQ_SIZE 22 |
|
29 #else |
|
30 typedef jushort TAG_TYPE; |
|
31 // for a taskqueue size of 16K |
|
32 #define LOG_TASKQ_SIZE 14 |
|
33 #endif |
|
34 |
|
35 class TaskQueueSuper: public CHeapObj { |
25 class TaskQueueSuper: public CHeapObj { |
36 protected: |
26 protected: |
37 // The first free element after the last one pushed (mod _n). |
27 // Internal type for indexing the queue; also used for the tag. |
|
28 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; |
|
29 |
|
30 // The first free element after the last one pushed (mod N). |
38 volatile uint _bottom; |
31 volatile uint _bottom; |
39 |
32 |
40 // log2 of the size of the queue. |
33 enum { |
41 enum SomeProtectedConstants { |
34 N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K |
42 Log_n = LOG_TASKQ_SIZE |
35 MOD_N_MASK = N - 1 // To compute x mod N efficiently. |
43 }; |
36 }; |
44 #undef LOG_TASKQ_SIZE |
37 |
45 |
38 class Age { |
46 // Size of the queue. |
39 public: |
47 uint n() { return (1 << Log_n); } |
40 Age(size_t data = 0) { _data = data; } |
48 // For computing "x mod n" efficiently. |
41 Age(const Age& age) { _data = age._data; } |
49 uint n_mod_mask() { return n() - 1; } |
42 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } |
50 |
43 |
51 struct Age { |
44 Age get() const volatile { return _data; } |
52 TAG_TYPE _top; |
45 void set(Age age) volatile { _data = age._data; } |
53 TAG_TYPE _tag; |
46 |
54 |
47 idx_t top() const volatile { return _fields._top; } |
55 TAG_TYPE tag() const { return _tag; } |
48 idx_t tag() const volatile { return _fields._tag; } |
56 TAG_TYPE top() const { return _top; } |
49 |
57 |
50 // Increment top; if it wraps, increment tag also. |
58 Age() { _tag = 0; _top = 0; } |
51 void increment() { |
59 |
52 _fields._top = increment_index(_fields._top); |
60 friend bool operator ==(const Age& a1, const Age& a2) { |
53 if (_fields._top == 0) ++_fields._tag; |
61 return a1.tag() == a2.tag() && a1.top() == a2.top(); |
|
62 } |
54 } |
|
55 |
|
56 Age cmpxchg(const Age new_age, const Age old_age) volatile { |
|
57 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, |
|
58 (volatile intptr_t *)&_data, |
|
59 (intptr_t)old_age._data); |
|
60 } |
|
61 |
|
62 bool operator ==(const Age& other) const { return _data == other._data; } |
|
63 |
|
64 private: |
|
65 struct fields { |
|
66 idx_t _top; |
|
67 idx_t _tag; |
|
68 }; |
|
69 union { |
|
70 size_t _data; |
|
71 fields _fields; |
|
72 }; |
63 }; |
73 }; |
64 Age _age; |
74 |
65 // These make sure we do single atomic reads and writes. |
75 volatile Age _age; |
66 Age get_age() { |
76 |
67 uint res = *(volatile uint*)(&_age); |
77 // These both operate mod N. |
68 return *(Age*)(&res); |
78 static uint increment_index(uint ind) { |
69 } |
79 return (ind + 1) & MOD_N_MASK; |
70 void set_age(Age a) { |
80 } |
71 *(volatile uint*)(&_age) = *(uint*)(&a); |
81 static uint decrement_index(uint ind) { |
72 } |
82 return (ind - 1) & MOD_N_MASK; |
73 |
83 } |
74 TAG_TYPE get_top() { |
84 |
75 return get_age().top(); |
85 // Returns a number in the range [0..N). If the result is "N-1", it should be |
76 } |
86 // interpreted as 0. |
77 |
|
78 // These both operate mod _n. |
|
79 uint increment_index(uint ind) { |
|
80 return (ind + 1) & n_mod_mask(); |
|
81 } |
|
82 uint decrement_index(uint ind) { |
|
83 return (ind - 1) & n_mod_mask(); |
|
84 } |
|
85 |
|
86 // Returns a number in the range [0.._n). If the result is "n-1", it |
|
87 // should be interpreted as 0. |
|
88 uint dirty_size(uint bot, uint top) { |
87 uint dirty_size(uint bot, uint top) { |
89 return ((int)bot - (int)top) & n_mod_mask(); |
88 return (bot - top) & MOD_N_MASK; |
90 } |
89 } |
91 |
90 |
92 // Returns the size corresponding to the given "bot" and "top". |
91 // Returns the size corresponding to the given "bot" and "top". |
93 uint size(uint bot, uint top) { |
92 uint size(uint bot, uint top) { |
94 uint sz = dirty_size(bot, top); |
93 uint sz = dirty_size(bot, top); |
95 // Has the queue "wrapped", so that bottom is less than top? |
94 // Has the queue "wrapped", so that bottom is less than top? There's a |
96 // There's a complicated special case here. A pair of threads could |
95 // complicated special case here. A pair of threads could perform pop_local |
97 // perform pop_local and pop_global operations concurrently, starting |
96 // and pop_global operations concurrently, starting from a state in which |
98 // from a state in which _bottom == _top+1. The pop_local could |
97 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, |
99 // succeed in decrementing _bottom, and the pop_global in incrementing |
98 // and the pop_global in incrementing _top (in which case the pop_global |
100 // _top (in which case the pop_global will be awarded the contested |
99 // will be awarded the contested queue element.) The resulting state must |
101 // queue element.) The resulting state must be interpreted as an empty |
100 // be interpreted as an empty queue. (We only need to worry about one such |
102 // queue. (We only need to worry about one such event: only the queue |
101 // event: only the queue owner performs pop_local's, and several concurrent |
103 // owner performs pop_local's, and several concurrent threads |
102 // threads attempting to perform the pop_global will all perform the same |
104 // attempting to perform the pop_global will all perform the same CAS, |
103 // CAS, and only one can succeed.) Any stealing thread that reads after |
105 // and only one can succeed. Any stealing thread that reads after |
104 // either the increment or decrement will see an empty queue, and will not |
106 // either the increment or decrement will see an empty queue, and will |
105 // join the competitors. The "sz == -1 || sz == N-1" state will not be |
107 // not join the competitors. The "sz == -1 || sz == _n-1" state will |
106 // modified by concurrent queues, so the owner thread can reset the state to |
108 // not be modified by concurrent queues, so the owner thread can reset |
107 // _bottom == top so subsequent pushes will be performed normally. |
109 // the state to _bottom == top so subsequent pushes will be performed |
108 return (sz == N - 1) ? 0 : sz; |
110 // normally. |
|
111 if (sz == (n()-1)) return 0; |
|
112 else return sz; |
|
113 } |
109 } |
114 |
110 |
115 public: |
111 public: |
116 TaskQueueSuper() : _bottom(0), _age() {} |
112 TaskQueueSuper() : _bottom(0), _age() {} |
117 |
113 |
228 // must also increment "tag" because of the case where "bottom == 1", |
223 // must also increment "tag" because of the case where "bottom == 1", |
229 // "top == 0". A pop_global could read the queue element in that case, |
224 // "top == 0". A pop_global could read the queue element in that case, |
230 // then have the owner thread do a pop followed by another push. Without |
225 // then have the owner thread do a pop followed by another push. Without |
231 // the incrementing of "tag", the pop_global's CAS could succeed, |
226 // the incrementing of "tag", the pop_global's CAS could succeed, |
232 // allowing it to believe it has claimed the stale element.) |
227 // allowing it to believe it has claimed the stale element.) |
233 Age newAge; |
228 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
234 newAge._top = localBot; |
|
235 newAge._tag = oldAge.tag() + 1; |
|
236 // Perhaps a competing pop_global has already incremented "top", in which |
229 // Perhaps a competing pop_global has already incremented "top", in which |
237 // case it wins the element. |
230 // case it wins the element. |
238 if (localBot == oldAge.top()) { |
231 if (localBot == oldAge.top()) { |
239 Age tempAge; |
|
240 // No competing pop_global has yet incremented "top"; we'll try to |
232 // No competing pop_global has yet incremented "top"; we'll try to |
241 // install new_age, thus claiming the element. |
233 // install new_age, thus claiming the element. |
242 assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); |
234 Age tempAge = _age.cmpxchg(newAge, oldAge); |
243 *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); |
|
244 if (tempAge == oldAge) { |
235 if (tempAge == oldAge) { |
245 // We win. |
236 // We win. |
246 assert(dirty_size(localBot, get_top()) != n() - 1, |
237 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
247 "Shouldn't be possible..."); |
|
248 return true; |
238 return true; |
249 } |
239 } |
250 } |
240 } |
251 // We fail; a completing pop_global gets the element. But the queue is |
241 // We lose; a completing pop_global gets the element. But the queue is empty |
252 // empty (and top is greater than bottom.) Fix this representation of |
242 // and top is greater than bottom. Fix this representation of the empty queue |
253 // the empty queue to become the canonical one. |
243 // to become the canonical one. |
254 set_age(newAge); |
244 _age.set(newAge); |
255 assert(dirty_size(localBot, get_top()) != n() - 1, |
245 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
256 "Shouldn't be possible..."); |
|
257 return false; |
246 return false; |
258 } |
247 } |
259 |
248 |
260 template<class E> |
249 template<class E> |
261 bool GenericTaskQueue<E>::pop_global(E& t) { |
250 bool GenericTaskQueue<E>::pop_global(E& t) { |
262 Age newAge; |
251 Age oldAge = _age.get(); |
263 Age oldAge = get_age(); |
|
264 uint localBot = _bottom; |
252 uint localBot = _bottom; |
265 uint n_elems = size(localBot, oldAge.top()); |
253 uint n_elems = size(localBot, oldAge.top()); |
266 if (n_elems == 0) { |
254 if (n_elems == 0) { |
267 return false; |
255 return false; |
268 } |
256 } |
|
257 |
269 t = _elems[oldAge.top()]; |
258 t = _elems[oldAge.top()]; |
270 newAge = oldAge; |
259 Age newAge(oldAge); |
271 newAge._top = increment_index(newAge.top()); |
260 newAge.increment(); |
272 if ( newAge._top == 0 ) newAge._tag++; |
261 Age resAge = _age.cmpxchg(newAge, oldAge); |
273 Age resAge; |
262 |
274 *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); |
|
275 // Note that using "_bottom" here might fail, since a pop_local might |
263 // Note that using "_bottom" here might fail, since a pop_local might |
276 // have decremented it. |
264 // have decremented it. |
277 assert(dirty_size(localBot, newAge._top) != n() - 1, |
265 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
278 "Shouldn't be possible..."); |
266 return resAge == oldAge; |
279 return (resAge == oldAge); |
|
280 } |
267 } |
281 |
268 |
282 template<class E> |
269 template<class E> |
283 GenericTaskQueue<E>::~GenericTaskQueue() { |
270 GenericTaskQueue<E>::~GenericTaskQueue() { |
284 FREE_C_HEAP_ARRAY(E, _elems); |
271 FREE_C_HEAP_ARRAY(E, _elems); |
515 t = _elems[localBot]; |
501 t = _elems[localBot]; |
516 _bottom = localBot; |
502 _bottom = localBot; |
517 return true; |
503 return true; |
518 #else |
504 #else |
519 uint localBot = _bottom; |
505 uint localBot = _bottom; |
520 // This value cannot be n-1. That can only occur as a result of |
506 // This value cannot be N-1. That can only occur as a result of |
521 // the assignment to bottom in this method. If it does, this method |
507 // the assignment to bottom in this method. If it does, this method |
522 // resets the size( to 0 before the next call (which is sequential, |
508 // resets the size( to 0 before the next call (which is sequential, |
523 // since this is pop_local.) |
509 // since this is pop_local.) |
524 uint dirty_n_elems = dirty_size(localBot, get_top()); |
510 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
525 assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); |
511 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); |
526 if (dirty_n_elems == 0) return false; |
512 if (dirty_n_elems == 0) return false; |
527 localBot = decrement_index(localBot); |
513 localBot = decrement_index(localBot); |
528 _bottom = localBot; |
514 _bottom = localBot; |
529 // This is necessary to prevent any read below from being reordered |
515 // This is necessary to prevent any read below from being reordered |
530 // before the store just above. |
516 // before the store just above. |
532 t = _elems[localBot]; |
518 t = _elems[localBot]; |
533 // This is a second read of "age"; the "size()" above is the first. |
519 // This is a second read of "age"; the "size()" above is the first. |
534 // If there's still at least one element in the queue, based on the |
520 // If there's still at least one element in the queue, based on the |
535 // "_bottom" and "age" we've read, then there can be no interference with |
521 // "_bottom" and "age" we've read, then there can be no interference with |
536 // a "pop_global" operation, and we're done. |
522 // a "pop_global" operation, and we're done. |
537 TAG_TYPE tp = get_top(); // XXX |
523 idx_t tp = _age.top(); // XXX |
538 if (size(localBot, tp) > 0) { |
524 if (size(localBot, tp) > 0) { |
539 assert(dirty_size(localBot, tp) != n() - 1, |
525 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
540 "Shouldn't be possible..."); |
|
541 return true; |
526 return true; |
542 } else { |
527 } else { |
543 // Otherwise, the queue contained exactly one element; we take the slow |
528 // Otherwise, the queue contained exactly one element; we take the slow |
544 // path. |
529 // path. |
545 return pop_local_slow(localBot, get_age()); |
530 return pop_local_slow(localBot, _age.get()); |
546 } |
531 } |
547 #endif |
532 #endif |
548 } |
533 } |
549 |
534 |
550 typedef oop Task; |
535 typedef oop Task; |