Thu, 05 Jun 2008 15:57:56 -0700
6711316: Open source the Garbage-First garbage collector
Summary: First mercurial integration of the code for the Garbage-First garbage collector.
Reviewed-by: apetrusenko, iveresov, jmasa, sgoldman, tonyp, ysr
1 /*
2 * Copyright 2001-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 class TaskQueueSuper: public CHeapObj {
26 protected:
27 // The first free element after the last one pushed (mod _n).
28 // (For now we'll assume only 32-bit CAS).
29 volatile juint _bottom;
31 // log2 of the size of the queue.
32 enum SomeProtectedConstants {
33 Log_n = 14
34 };
36 // Size of the queue.
37 juint n() { return (1 << Log_n); }
38 // For computing "x mod n" efficiently.
39 juint n_mod_mask() { return n() - 1; }
41 struct Age {
42 jushort _top;
43 jushort _tag;
45 jushort tag() const { return _tag; }
46 jushort top() const { return _top; }
48 Age() { _tag = 0; _top = 0; }
50 friend bool operator ==(const Age& a1, const Age& a2) {
51 return a1.tag() == a2.tag() && a1.top() == a2.top();
52 }
54 };
55 Age _age;
56 // These make sure we do single atomic reads and writes.
57 Age get_age() {
58 jint res = *(volatile jint*)(&_age);
59 return *(Age*)(&res);
60 }
61 void set_age(Age a) {
62 *(volatile jint*)(&_age) = *(int*)(&a);
63 }
65 jushort get_top() {
66 return get_age().top();
67 }
69 // These both operate mod _n.
70 juint increment_index(juint ind) {
71 return (ind + 1) & n_mod_mask();
72 }
73 juint decrement_index(juint ind) {
74 return (ind - 1) & n_mod_mask();
75 }
77 // Returns a number in the range [0.._n). If the result is "n-1", it
78 // should be interpreted as 0.
79 juint dirty_size(juint bot, juint top) {
80 return ((jint)bot - (jint)top) & n_mod_mask();
81 }
83 // Returns the size corresponding to the given "bot" and "top".
84 juint size(juint bot, juint top) {
85 juint sz = dirty_size(bot, top);
86 // Has the queue "wrapped", so that bottom is less than top?
87 // There's a complicated special case here. A pair of threads could
88 // perform pop_local and pop_global operations concurrently, starting
89 // from a state in which _bottom == _top+1. The pop_local could
90 // succeed in decrementing _bottom, and the pop_global in incrementing
91 // _top (in which case the pop_global will be awarded the contested
92 // queue element.) The resulting state must be interpreted as an empty
93 // queue. (We only need to worry about one such event: only the queue
94 // owner performs pop_local's, and several concurrent threads
95 // attempting to perform the pop_global will all perform the same CAS,
96 // and only one can succeed. Any stealing thread that reads after
97 // either the increment or decrement will seen an empty queue, and will
98 // not join the competitors. The "sz == -1 || sz == _n-1" state will
99 // not be modified by concurrent queues, so the owner thread can reset
100 // the state to _bottom == top so subsequent pushes will be performed
101 // normally.
102 if (sz == (n()-1)) return 0;
103 else return sz;
104 }
106 public:
107 TaskQueueSuper() : _bottom(0), _age() {}
109 // Return "true" if the TaskQueue contains any tasks.
110 bool peek();
112 // Return an estimate of the number of elements in the queue.
113 // The "careful" version admits the possibility of pop_local/pop_global
114 // races.
115 juint size() {
116 return size(_bottom, get_top());
117 }
119 juint dirty_size() {
120 return dirty_size(_bottom, get_top());
121 }
123 void set_empty() {
124 _bottom = 0;
125 _age = Age();
126 }
128 // Maximum number of elements allowed in the queue. This is two less
129 // than the actual queue size, for somewhat complicated reasons.
130 juint max_elems() { return n() - 2; }
132 };
134 template<class E> class GenericTaskQueue: public TaskQueueSuper {
135 private:
136 // Slow paths for push, pop_local. (pop_global has no fast path.)
137 bool push_slow(E t, juint dirty_n_elems);
138 bool pop_local_slow(juint localBot, Age oldAge);
140 public:
141 // Initializes the queue to empty.
142 GenericTaskQueue();
144 void initialize();
146 // Push the task "t" on the queue. Returns "false" iff the queue is
147 // full.
148 inline bool push(E t);
150 // If succeeds in claiming a task (from the 'local' end, that is, the
151 // most recently pushed task), returns "true" and sets "t" to that task.
152 // Otherwise, the queue is empty and returns false.
153 inline bool pop_local(E& t);
155 // If succeeds in claiming a task (from the 'global' end, that is, the
156 // least recently pushed task), returns "true" and sets "t" to that task.
157 // Otherwise, the queue is empty and returns false.
158 bool pop_global(E& t);
160 // Delete any resource associated with the queue.
161 ~GenericTaskQueue();
163 // apply the closure to all elements in the task queue
164 void oops_do(OopClosure* f);
166 private:
167 // Element array.
168 volatile E* _elems;
169 };
171 template<class E>
172 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
173 assert(sizeof(Age) == sizeof(jint), "Depends on this.");
174 }
176 template<class E>
177 void GenericTaskQueue<E>::initialize() {
178 _elems = NEW_C_HEAP_ARRAY(E, n());
179 guarantee(_elems != NULL, "Allocation failed.");
180 }
182 template<class E>
183 void GenericTaskQueue<E>::oops_do(OopClosure* f) {
184 // tty->print_cr("START OopTaskQueue::oops_do");
185 int iters = size();
186 juint index = _bottom;
187 for (int i = 0; i < iters; ++i) {
188 index = decrement_index(index);
189 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T,
190 // index, &_elems[index], _elems[index]);
191 E* t = (E*)&_elems[index]; // cast away volatility
192 oop* p = (oop*)t;
193 assert((*t)->is_oop_or_null(), "Not an oop or null");
194 f->do_oop(p);
195 }
196 // tty->print_cr("END OopTaskQueue::oops_do");
197 }
200 template<class E>
201 bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
202 if (dirty_n_elems == n() - 1) {
203 // Actually means 0, so do the push.
204 juint localBot = _bottom;
205 _elems[localBot] = t;
206 _bottom = increment_index(localBot);
207 return true;
208 } else
209 return false;
210 }
212 template<class E>
213 bool GenericTaskQueue<E>::
214 pop_local_slow(juint localBot, Age oldAge) {
215 // This queue was observed to contain exactly one element; either this
216 // thread will claim it, or a competing "pop_global". In either case,
217 // the queue will be logically empty afterwards. Create a new Age value
218 // that represents the empty queue for the given value of "_bottom". (We
219 // must also increment "tag" because of the case where "bottom == 1",
220 // "top == 0". A pop_global could read the queue element in that case,
221 // then have the owner thread do a pop followed by another push. Without
222 // the incrementing of "tag", the pop_global's CAS could succeed,
223 // allowing it to believe it has claimed the stale element.)
224 Age newAge;
225 newAge._top = localBot;
226 newAge._tag = oldAge.tag() + 1;
227 // Perhaps a competing pop_global has already incremented "top", in which
228 // case it wins the element.
229 if (localBot == oldAge.top()) {
230 Age tempAge;
231 // No competing pop_global has yet incremented "top"; we'll try to
232 // install new_age, thus claiming the element.
233 assert(sizeof(Age) == sizeof(jint) && sizeof(jint) == sizeof(juint),
234 "Assumption about CAS unit.");
235 *(jint*)&tempAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
236 if (tempAge == oldAge) {
237 // We win.
238 assert(dirty_size(localBot, get_top()) != n() - 1,
239 "Shouldn't be possible...");
240 return true;
241 }
242 }
243 // We fail; a completing pop_global gets the element. But the queue is
244 // empty (and top is greater than bottom.) Fix this representation of
245 // the empty queue to become the canonical one.
246 set_age(newAge);
247 assert(dirty_size(localBot, get_top()) != n() - 1,
248 "Shouldn't be possible...");
249 return false;
250 }
252 template<class E>
253 bool GenericTaskQueue<E>::pop_global(E& t) {
254 Age newAge;
255 Age oldAge = get_age();
256 juint localBot = _bottom;
257 juint n_elems = size(localBot, oldAge.top());
258 if (n_elems == 0) {
259 return false;
260 }
261 t = _elems[oldAge.top()];
262 newAge = oldAge;
263 newAge._top = increment_index(newAge.top());
264 if ( newAge._top == 0 ) newAge._tag++;
265 Age resAge;
266 *(jint*)&resAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
267 // Note that using "_bottom" here might fail, since a pop_local might
268 // have decremented it.
269 assert(dirty_size(localBot, newAge._top) != n() - 1,
270 "Shouldn't be possible...");
271 return (resAge == oldAge);
272 }
274 template<class E>
275 GenericTaskQueue<E>::~GenericTaskQueue() {
276 FREE_C_HEAP_ARRAY(E, _elems);
277 }
279 // Inherits the typedef of "Task" from above.
280 class TaskQueueSetSuper: public CHeapObj {
281 protected:
282 static int randomParkAndMiller(int* seed0);
283 public:
284 // Returns "true" if some TaskQueue in the set contains a task.
285 virtual bool peek() = 0;
286 };
288 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper {
289 private:
290 int _n;
291 GenericTaskQueue<E>** _queues;
293 public:
294 GenericTaskQueueSet(int n) : _n(n) {
295 typedef GenericTaskQueue<E>* GenericTaskQueuePtr;
296 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n);
297 guarantee(_queues != NULL, "Allocation failure.");
298 for (int i = 0; i < n; i++) {
299 _queues[i] = NULL;
300 }
301 }
303 bool steal_1_random(int queue_num, int* seed, E& t);
304 bool steal_best_of_2(int queue_num, int* seed, E& t);
305 bool steal_best_of_all(int queue_num, int* seed, E& t);
307 void register_queue(int i, GenericTaskQueue<E>* q);
309 GenericTaskQueue<E>* queue(int n);
311 // The thread with queue number "queue_num" (and whose random number seed
312 // is at "seed") is trying to steal a task from some other queue. (It
313 // may try several queues, according to some configuration parameter.)
314 // If some steal succeeds, returns "true" and sets "t" the stolen task,
315 // otherwise returns false.
316 bool steal(int queue_num, int* seed, E& t);
318 bool peek();
319 };
321 template<class E>
322 void GenericTaskQueueSet<E>::register_queue(int i, GenericTaskQueue<E>* q) {
323 assert(0 <= i && i < _n, "index out of range.");
324 _queues[i] = q;
325 }
327 template<class E>
328 GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(int i) {
329 return _queues[i];
330 }
332 template<class E>
333 bool GenericTaskQueueSet<E>::steal(int queue_num, int* seed, E& t) {
334 for (int i = 0; i < 2 * _n; i++)
335 if (steal_best_of_2(queue_num, seed, t))
336 return true;
337 return false;
338 }
340 template<class E>
341 bool GenericTaskQueueSet<E>::steal_best_of_all(int queue_num, int* seed, E& t) {
342 if (_n > 2) {
343 int best_k;
344 jint best_sz = 0;
345 for (int k = 0; k < _n; k++) {
346 if (k == queue_num) continue;
347 jint sz = _queues[k]->size();
348 if (sz > best_sz) {
349 best_sz = sz;
350 best_k = k;
351 }
352 }
353 return best_sz > 0 && _queues[best_k]->pop_global(t);
354 } else if (_n == 2) {
355 // Just try the other one.
356 int k = (queue_num + 1) % 2;
357 return _queues[k]->pop_global(t);
358 } else {
359 assert(_n == 1, "can't be zero.");
360 return false;
361 }
362 }
364 template<class E>
365 bool GenericTaskQueueSet<E>::steal_1_random(int queue_num, int* seed, E& t) {
366 if (_n > 2) {
367 int k = queue_num;
368 while (k == queue_num) k = randomParkAndMiller(seed) % _n;
369 return _queues[2]->pop_global(t);
370 } else if (_n == 2) {
371 // Just try the other one.
372 int k = (queue_num + 1) % 2;
373 return _queues[k]->pop_global(t);
374 } else {
375 assert(_n == 1, "can't be zero.");
376 return false;
377 }
378 }
380 template<class E>
381 bool GenericTaskQueueSet<E>::steal_best_of_2(int queue_num, int* seed, E& t) {
382 if (_n > 2) {
383 int k1 = queue_num;
384 while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n;
385 int k2 = queue_num;
386 while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n;
387 // Sample both and try the larger.
388 juint sz1 = _queues[k1]->size();
389 juint sz2 = _queues[k2]->size();
390 if (sz2 > sz1) return _queues[k2]->pop_global(t);
391 else return _queues[k1]->pop_global(t);
392 } else if (_n == 2) {
393 // Just try the other one.
394 int k = (queue_num + 1) % 2;
395 return _queues[k]->pop_global(t);
396 } else {
397 assert(_n == 1, "can't be zero.");
398 return false;
399 }
400 }
402 template<class E>
403 bool GenericTaskQueueSet<E>::peek() {
404 // Try all the queues.
405 for (int j = 0; j < _n; j++) {
406 if (_queues[j]->peek())
407 return true;
408 }
409 return false;
410 }
412 // When to terminate from the termination protocol.
413 class TerminatorTerminator: public CHeapObj {
414 public:
415 virtual bool should_exit_termination() = 0;
416 };
418 // A class to aid in the termination of a set of parallel tasks using
419 // TaskQueueSet's for work stealing.
421 class ParallelTaskTerminator: public StackObj {
422 private:
423 int _n_threads;
424 TaskQueueSetSuper* _queue_set;
425 jint _offered_termination;
427 bool peek_in_queue_set();
428 protected:
429 virtual void yield();
430 void sleep(uint millis);
432 public:
434 // "n_threads" is the number of threads to be terminated. "queue_set" is a
435 // queue sets of work queues of other threads.
436 ParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set);
438 // The current thread has no work, and is ready to terminate if everyone
439 // else is. If returns "true", all threads are terminated. If returns
440 // "false", available work has been observed in one of the task queues,
441 // so the global task is not complete.
442 bool offer_termination() {
443 return offer_termination(NULL);
444 }
446 // As above, but it also terminates of the should_exit_termination()
447 // method of the terminator parameter returns true. If terminator is
448 // NULL, then it is ignored.
449 bool offer_termination(TerminatorTerminator* terminator);
451 // Reset the terminator, so that it may be reused again.
452 // The caller is responsible for ensuring that this is done
453 // in an MT-safe manner, once the previous round of use of
454 // the terminator is finished.
455 void reset_for_reuse();
457 };
459 #define SIMPLE_STACK 0
461 template<class E> inline bool GenericTaskQueue<E>::push(E t) {
462 #if SIMPLE_STACK
463 juint localBot = _bottom;
464 if (_bottom < max_elems()) {
465 _elems[localBot] = t;
466 _bottom = localBot + 1;
467 return true;
468 } else {
469 return false;
470 }
471 #else
472 juint localBot = _bottom;
473 assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
474 jushort top = get_top();
475 juint dirty_n_elems = dirty_size(localBot, top);
476 assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
477 "n_elems out of range.");
478 if (dirty_n_elems < max_elems()) {
479 _elems[localBot] = t;
480 _bottom = increment_index(localBot);
481 return true;
482 } else {
483 return push_slow(t, dirty_n_elems);
484 }
485 #endif
486 }
488 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
489 #if SIMPLE_STACK
490 juint localBot = _bottom;
491 assert(localBot > 0, "precondition.");
492 localBot--;
493 t = _elems[localBot];
494 _bottom = localBot;
495 return true;
496 #else
497 juint localBot = _bottom;
498 // This value cannot be n-1. That can only occur as a result of
499 // the assignment to bottom in this method. If it does, this method
500 // resets the size( to 0 before the next call (which is sequential,
501 // since this is pop_local.)
502 juint dirty_n_elems = dirty_size(localBot, get_top());
503 assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
504 if (dirty_n_elems == 0) return false;
505 localBot = decrement_index(localBot);
506 _bottom = localBot;
507 // This is necessary to prevent any read below from being reordered
508 // before the store just above.
509 OrderAccess::fence();
510 t = _elems[localBot];
511 // This is a second read of "age"; the "size()" above is the first.
512 // If there's still at least one element in the queue, based on the
513 // "_bottom" and "age" we've read, then there can be no interference with
514 // a "pop_global" operation, and we're done.
515 juint tp = get_top();
516 if (size(localBot, tp) > 0) {
517 assert(dirty_size(localBot, tp) != n() - 1,
518 "Shouldn't be possible...");
519 return true;
520 } else {
521 // Otherwise, the queue contained exactly one element; we take the slow
522 // path.
523 return pop_local_slow(localBot, get_age());
524 }
525 #endif
526 }
528 typedef oop Task;
529 typedef GenericTaskQueue<Task> OopTaskQueue;
530 typedef GenericTaskQueueSet<Task> OopTaskQueueSet;
533 #define COMPRESSED_OOP_MASK 1
535 // This is a container class for either an oop* or a narrowOop*.
536 // Both are pushed onto a task queue and the consumer will test is_narrow()
537 // to determine which should be processed.
538 class StarTask {
539 void* _holder; // either union oop* or narrowOop*
540 public:
541 StarTask(narrowOop *p) { _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); }
542 StarTask(oop *p) { _holder = (void*)p; }
543 StarTask() { _holder = NULL; }
544 operator oop*() { return (oop*)_holder; }
545 operator narrowOop*() {
546 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK);
547 }
549 // Operators to preserve const/volatile in assignments required by gcc
550 void operator=(const volatile StarTask& t) volatile { _holder = t._holder; }
552 bool is_narrow() const {
553 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0);
554 }
555 };
557 typedef GenericTaskQueue<StarTask> OopStarTaskQueue;
558 typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet;
560 typedef size_t ChunkTask; // index for chunk
561 typedef GenericTaskQueue<ChunkTask> ChunkTaskQueue;
562 typedef GenericTaskQueueSet<ChunkTask> ChunkTaskQueueSet;
564 class ChunkTaskQueueWithOverflow: public CHeapObj {
565 protected:
566 ChunkTaskQueue _chunk_queue;
567 GrowableArray<ChunkTask>* _overflow_stack;
569 public:
570 ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {}
571 // Initialize both stealable queue and overflow
572 void initialize();
573 // Save first to stealable queue and then to overflow
574 void save(ChunkTask t);
575 // Retrieve first from overflow and then from stealable queue
576 bool retrieve(ChunkTask& chunk_index);
577 // Retrieve from stealable queue
578 bool retrieve_from_stealable_queue(ChunkTask& chunk_index);
579 // Retrieve from overflow
580 bool retrieve_from_overflow(ChunkTask& chunk_index);
581 bool is_empty();
582 bool stealable_is_empty();
583 bool overflow_is_empty();
584 juint stealable_size() { return _chunk_queue.size(); }
585 ChunkTaskQueue* task_queue() { return &_chunk_queue; }
586 };
588 #define USE_ChunkTaskQueueWithOverflow