Wed, 03 Mar 2010 14:48:26 -0800
4396719: Mark Sweep stack overflow on deeply nested Object arrays
Summary: Use an explicit stack for object arrays and process them in chunks.
Reviewed-by: iveresov, apetrusenko
1.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Wed Mar 03 08:10:41 2010 -0800 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Wed Mar 03 14:48:26 2010 -0800 1.3 @@ -24,8 +24,8 @@ 1.4 1.5 class G1CollectedHeap; 1.6 class CMTask; 1.7 -typedef GenericTaskQueue<oop> CMTaskQueue; 1.8 -typedef GenericTaskQueueSet<oop> CMTaskQueueSet; 1.9 +typedef GenericTaskQueue<oop> CMTaskQueue; 1.10 +typedef GenericTaskQueueSet<CMTaskQueue> CMTaskQueueSet; 1.11 1.12 // A generic CM bit map. This is essentially a wrapper around the BitMap 1.13 // class, with one bit per (1<<_shifter) HeapWords.
2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Mar 03 08:10:41 2010 -0800 2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Mar 03 14:48:26 2010 -0800 2.3 @@ -56,8 +56,8 @@ 2.4 # define IF_G1_DETAILED_STATS(code) 2.5 #endif 2.6 2.7 -typedef GenericTaskQueue<StarTask> RefToScanQueue; 2.8 -typedef GenericTaskQueueSet<StarTask> RefToScanQueueSet; 2.9 +typedef GenericTaskQueue<StarTask> RefToScanQueue; 2.10 +typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet; 2.11 2.12 typedef int RegionIdx_t; // needs to hold [ 0..max_regions() ) 2.13 typedef int CardIdx_t; // needs to hold [ 0..CardsPerRegion )
3.1 --- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Wed Mar 03 08:10:41 2010 -0800 3.2 +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Wed Mar 03 14:48:26 2010 -0800 3.3 @@ -101,6 +101,8 @@ 3.4 3.5 GenMarkSweep::_marking_stack = 3.6 new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true); 3.7 + GenMarkSweep::_objarray_stack = 3.8 + new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true); 3.9 3.10 int size = SystemDictionary::number_of_classes() * 2; 3.11 GenMarkSweep::_revisit_klass_stack =
4.1 --- a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge Wed Mar 03 08:10:41 2010 -0800 4.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge Wed Mar 03 14:48:26 2010 -0800 4.3 @@ -175,6 +175,7 @@ 4.4 psAdaptiveSizePolicy.hpp adaptiveSizePolicy.hpp 4.5 4.6 psCompactionManager.cpp gcTaskManager.hpp 4.7 +psCompactionManager.cpp objArrayKlass.inline.hpp 4.8 psCompactionManager.cpp objectStartArray.hpp 4.9 psCompactionManager.cpp oop.hpp 4.10 psCompactionManager.cpp oop.inline.hpp 4.11 @@ -189,6 +190,9 @@ 4.12 psCompactionManager.hpp allocation.hpp 4.13 psCompactionManager.hpp taskqueue.hpp 4.14 4.15 +psCompactionManager.inline.hpp psCompactionManager.hpp 4.16 +psCompactionManager.inline.hpp psParallelCompact.hpp 4.17 + 4.18 psGCAdaptivePolicyCounters.hpp gcAdaptivePolicyCounters.hpp 4.19 psGCAdaptivePolicyCounters.hpp gcPolicyCounters.hpp 4.20 psGCAdaptivePolicyCounters.hpp psAdaptiveSizePolicy.hpp 4.21 @@ -379,12 +383,12 @@ 4.22 pcTasks.cpp jniHandles.hpp 4.23 pcTasks.cpp jvmtiExport.hpp 4.24 pcTasks.cpp management.hpp 4.25 +pcTasks.cpp objArrayKlass.inline.hpp 4.26 pcTasks.cpp psParallelCompact.hpp 4.27 pcTasks.cpp pcTasks.hpp 4.28 pcTasks.cpp oop.inline.hpp 4.29 pcTasks.cpp oop.pcgc.inline.hpp 4.30 pcTasks.cpp systemDictionary.hpp 4.31 -pcTasks.cpp taskqueue.hpp 4.32 pcTasks.cpp thread.hpp 4.33 pcTasks.cpp universe.hpp 4.34 pcTasks.cpp vmThread.hpp
5.1 --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Wed Mar 03 08:10:41 2010 -0800 5.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Wed Mar 03 14:48:26 2010 -0800 5.3 @@ -48,7 +48,7 @@ 5.4 _vm_thread->oops_do(&mark_and_push_closure, &mark_and_push_in_blobs); 5.5 5.6 // Do the real work 5.7 - cm->drain_marking_stacks(&mark_and_push_closure); 5.8 + cm->follow_marking_stacks(); 5.9 } 5.10 5.11 5.12 @@ -118,7 +118,7 @@ 5.13 } 5.14 5.15 // Do the real work 5.16 - cm->drain_marking_stacks(&mark_and_push_closure); 5.17 + cm->follow_marking_stacks(); 5.18 // cm->deallocate_stacks(); 5.19 } 5.20 5.21 @@ -196,17 +196,19 @@ 5.22 PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); 5.23 5.24 oop obj = NULL; 5.25 + ObjArrayTask task; 5.26 int random_seed = 17; 5.27 - while(true) { 5.28 - if (ParCompactionManager::steal(which, &random_seed, obj)) { 5.29 + do { 5.30 + while (ParCompactionManager::steal_objarray(which, &random_seed, task)) { 5.31 + objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint(); 5.32 + k->oop_follow_contents(cm, task.obj(), task.index()); 5.33 + cm->follow_marking_stacks(); 5.34 + } 5.35 + while (ParCompactionManager::steal(which, &random_seed, obj)) { 5.36 obj->follow_contents(cm); 5.37 - cm->drain_marking_stacks(&mark_and_push_closure); 5.38 - } else { 5.39 - if (terminator()->offer_termination()) { 5.40 - break; 5.41 - } 5.42 + cm->follow_marking_stacks(); 5.43 } 5.44 - } 5.45 + } while (!terminator()->offer_termination()); 5.46 } 5.47 5.48 //
6.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Wed Mar 03 08:10:41 2010 -0800 6.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Wed Mar 03 14:48:26 2010 -0800 6.3 @@ -28,6 +28,8 @@ 6.4 PSOldGen* ParCompactionManager::_old_gen = NULL; 6.5 ParCompactionManager** ParCompactionManager::_manager_array = NULL; 6.6 OopTaskQueueSet* ParCompactionManager::_stack_array = NULL; 6.7 +ParCompactionManager::ObjArrayTaskQueueSet* 6.8 + ParCompactionManager::_objarray_queues = NULL; 6.9 ObjectStartArray* ParCompactionManager::_start_array = NULL; 6.10 ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; 6.11 RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; 6.12 @@ -46,6 +48,11 @@ 6.13 6.14 // We want the overflow stack to be permanent 6.15 _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true); 6.16 + 6.17 + _objarray_queue.initialize(); 6.18 + _objarray_overflow_stack = 6.19 + new (ResourceObj::C_HEAP) ObjArrayOverflowStack(10, true); 6.20 + 6.21 #ifdef USE_RegionTaskQueueWithOverflow 6.22 region_stack()->initialize(); 6.23 #else 6.24 @@ -69,6 +76,7 @@ 6.25 6.26 ParCompactionManager::~ParCompactionManager() { 6.27 delete _overflow_stack; 6.28 + delete _objarray_overflow_stack; 6.29 delete _revisit_klass_stack; 6.30 delete _revisit_mdo_stack; 6.31 // _manager_array and _stack_array are statics 6.32 @@ -86,18 +94,21 @@ 6.33 6.34 assert(_manager_array == NULL, "Attempt to initialize twice"); 6.35 _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 ); 6.36 - guarantee(_manager_array != NULL, "Could not initialize promotion manager"); 6.37 + guarantee(_manager_array != NULL, "Could not allocate manager_array"); 6.38 6.39 _stack_array = new OopTaskQueueSet(parallel_gc_threads); 6.40 - guarantee(_stack_array != NULL, "Count not initialize promotion manager"); 6.41 + guarantee(_stack_array != NULL, "Could not allocate stack_array"); 6.42 + _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads); 6.43 + guarantee(_objarray_queues != NULL, "Could not allocate objarray_queues"); 6.44 _region_array = new RegionTaskQueueSet(parallel_gc_threads); 6.45 - guarantee(_region_array != NULL, "Count not initialize promotion manager"); 6.46 + guarantee(_region_array != NULL, "Could not allocate region_array"); 6.47 6.48 // Create and register the ParCompactionManager(s) for the worker threads. 6.49 for(uint i=0; i<parallel_gc_threads; i++) { 6.50 _manager_array[i] = new ParCompactionManager(); 6.51 guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager"); 6.52 stack_array()->register_queue(i, _manager_array[i]->marking_stack()); 6.53 + _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_queue); 6.54 #ifdef USE_RegionTaskQueueWithOverflow 6.55 region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue()); 6.56 #else 6.57 @@ -203,36 +214,30 @@ 6.58 } 6.59 } 6.60 6.61 -void ParCompactionManager::drain_marking_stacks(OopClosure* blk) { 6.62 -#ifdef ASSERT 6.63 - ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); 6.64 - assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); 6.65 - MutableSpace* to_space = heap->young_gen()->to_space(); 6.66 - MutableSpace* old_space = heap->old_gen()->object_space(); 6.67 - MutableSpace* perm_space = heap->perm_gen()->object_space(); 6.68 -#endif /* ASSERT */ 6.69 - 6.70 - 6.71 +void ParCompactionManager::follow_marking_stacks() { 6.72 do { 6.73 - 6.74 - // Drain overflow stack first, so other threads can steal from 6.75 - // claimed stack while we work. 6.76 - while(!overflow_stack()->is_empty()) { 6.77 - oop obj = overflow_stack()->pop(); 6.78 + // Drain the overflow stack first, to allow stealing from the marking stack. 6.79 + while (!overflow_stack()->is_empty()) { 6.80 + overflow_stack()->pop()->follow_contents(this); 6.81 + } 6.82 + oop obj; 6.83 + while (marking_stack()->pop_local(obj)) { 6.84 obj->follow_contents(this); 6.85 } 6.86 6.87 - oop obj; 6.88 - // obj is a reference!!! 6.89 - while (marking_stack()->pop_local(obj)) { 6.90 - // It would be nice to assert about the type of objects we might 6.91 - // pop, but they can come from anywhere, unfortunately. 6.92 - obj->follow_contents(this); 6.93 + ObjArrayTask task; 6.94 + while (!_objarray_overflow_stack->is_empty()) { 6.95 + task = _objarray_overflow_stack->pop(); 6.96 + objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint(); 6.97 + k->oop_follow_contents(this, task.obj(), task.index()); 6.98 } 6.99 - } while((marking_stack()->size() != 0) || (overflow_stack()->length() != 0)); 6.100 + while (_objarray_queue.pop_local(task)) { 6.101 + objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint(); 6.102 + k->oop_follow_contents(this, task.obj(), task.index()); 6.103 + } 6.104 + } while (!marking_stacks_empty()); 6.105 6.106 - assert(marking_stack()->size() == 0, "Sanity"); 6.107 - assert(overflow_stack()->length() == 0, "Sanity"); 6.108 + assert(marking_stacks_empty(), "Sanity"); 6.109 } 6.110 6.111 void ParCompactionManager::drain_region_overflow_stack() {
7.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Wed Mar 03 08:10:41 2010 -0800 7.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Wed Mar 03 14:48:26 2010 -0800 7.3 @@ -22,18 +22,6 @@ 7.4 * 7.5 */ 7.6 7.7 -// 7.8 -// psPromotionManager is used by a single thread to manage object survival 7.9 -// during a scavenge. The promotion manager contains thread local data only. 7.10 -// 7.11 -// NOTE! Be carefull when allocating the stacks on cheap. If you are going 7.12 -// to use a promotion manager in more than one thread, the stacks MUST be 7.13 -// on cheap. This can lead to memory leaks, though, as they are not auto 7.14 -// deallocated. 7.15 -// 7.16 -// FIX ME FIX ME Add a destructor, and don't rely on the user to drain/flush/deallocate! 7.17 -// 7.18 - 7.19 // Move to some global location 7.20 #define HAS_BEEN_MOVED 0x1501d01d 7.21 // End move to some global location 7.22 @@ -46,8 +34,6 @@ 7.23 class ParallelCompactData; 7.24 class ParMarkBitMap; 7.25 7.26 -// Move to it's own file if this works out. 7.27 - 7.28 class ParCompactionManager : public CHeapObj { 7.29 friend class ParallelTaskTerminator; 7.30 friend class ParMarkBitMap; 7.31 @@ -72,14 +58,27 @@ 7.32 // ------------------------ End don't putback if not needed 7.33 7.34 private: 7.35 + // 32-bit: 4K * 8 = 32KiB; 64-bit: 8K * 16 = 128KiB 7.36 + #define OBJARRAY_QUEUE_SIZE (1 << NOT_LP64(12) LP64_ONLY(13)) 7.37 + typedef GenericTaskQueue<ObjArrayTask, OBJARRAY_QUEUE_SIZE> ObjArrayTaskQueue; 7.38 + typedef GenericTaskQueueSet<ObjArrayTaskQueue> ObjArrayTaskQueueSet; 7.39 + #undef OBJARRAY_QUEUE_SIZE 7.40 + 7.41 static ParCompactionManager** _manager_array; 7.42 static OopTaskQueueSet* _stack_array; 7.43 + static ObjArrayTaskQueueSet* _objarray_queues; 7.44 static ObjectStartArray* _start_array; 7.45 static RegionTaskQueueSet* _region_array; 7.46 static PSOldGen* _old_gen; 7.47 7.48 +private: 7.49 OopTaskQueue _marking_stack; 7.50 GrowableArray<oop>* _overflow_stack; 7.51 + 7.52 + typedef GrowableArray<ObjArrayTask> ObjArrayOverflowStack; 7.53 + ObjArrayTaskQueue _objarray_queue; 7.54 + ObjArrayOverflowStack* _objarray_overflow_stack; 7.55 + 7.56 // Is there a way to reuse the _marking_stack for the 7.57 // saving empty regions? For now just create a different 7.58 // type of TaskQueue. 7.59 @@ -128,8 +127,8 @@ 7.60 // Pushes onto the region stack. If the region stack is full, 7.61 // pushes onto the region overflow stack. 7.62 void region_stack_push(size_t region_index); 7.63 - public: 7.64 7.65 +public: 7.66 Action action() { return _action; } 7.67 void set_action(Action v) { _action = v; } 7.68 7.69 @@ -163,6 +162,8 @@ 7.70 // Get a oop for scanning. If returns null, no oop were found. 7.71 oop retrieve_for_scanning(); 7.72 7.73 + inline void push_objarray(oop obj, size_t index); 7.74 + 7.75 // Save region for later processing. Must not fail. 7.76 void save_for_processing(size_t region_index); 7.77 // Get a region for processing. If returns null, no region were found. 7.78 @@ -175,12 +176,17 @@ 7.79 return stack_array()->steal(queue_num, seed, t); 7.80 } 7.81 7.82 + static bool steal_objarray(int queue_num, int* seed, ObjArrayTask& t) { 7.83 + return _objarray_queues->steal(queue_num, seed, t); 7.84 + } 7.85 + 7.86 static bool steal(int queue_num, int* seed, RegionTask& t) { 7.87 return region_array()->steal(queue_num, seed, t); 7.88 } 7.89 7.90 - // Process tasks remaining on any stack 7.91 - void drain_marking_stacks(OopClosure *blk); 7.92 + // Process tasks remaining on any marking stack 7.93 + void follow_marking_stacks(); 7.94 + inline bool marking_stacks_empty() const; 7.95 7.96 // Process tasks remaining on any stack 7.97 void drain_region_stacks(); 7.98 @@ -200,3 +206,8 @@ 7.99 "out of range manager_array access"); 7.100 return _manager_array[index]; 7.101 } 7.102 + 7.103 +bool ParCompactionManager::marking_stacks_empty() const { 7.104 + return _marking_stack.size() == 0 && _overflow_stack->is_empty() && 7.105 + _objarray_queue.size() == 0 && _objarray_overflow_stack->is_empty(); 7.106 +}
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp Wed Mar 03 14:48:26 2010 -0800 8.3 @@ -0,0 +1,32 @@ 8.4 +/* 8.5 + * Copyright 2010 Sun Microsystems, Inc. All Rights Reserved. 8.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 8.7 + * 8.8 + * This code is free software; you can redistribute it and/or modify it 8.9 + * under the terms of the GNU General Public License version 2 only, as 8.10 + * published by the Free Software Foundation. 8.11 + * 8.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 8.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 8.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 8.15 + * version 2 for more details (a copy is included in the LICENSE file that 8.16 + * accompanied this code). 8.17 + * 8.18 + * You should have received a copy of the GNU General Public License version 8.19 + * 2 along with this work; if not, write to the Free Software Foundation, 8.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 8.21 + * 8.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 8.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 8.24 + * have any questions. 8.25 + * 8.26 + */ 8.27 + 8.28 +void ParCompactionManager::push_objarray(oop obj, size_t index) 8.29 +{ 8.30 + ObjArrayTask task(obj, index); 8.31 + assert(task.is_valid(), "bad ObjArrayTask"); 8.32 + if (!_objarray_queue.push(task)) { 8.33 + _objarray_overflow_stack->push(task); 8.34 + } 8.35 +}
9.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Wed Mar 03 08:10:41 2010 -0800 9.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Wed Mar 03 14:48:26 2010 -0800 9.3 @@ -479,6 +479,7 @@ 9.4 _preserved_oop_stack = NULL; 9.5 9.6 _marking_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true); 9.7 + _objarray_stack = new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true); 9.8 9.9 int size = SystemDictionary::number_of_classes() * 2; 9.10 _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true); 9.11 @@ -497,6 +498,7 @@ 9.12 } 9.13 9.14 delete _marking_stack; 9.15 + delete _objarray_stack; 9.16 delete _revisit_klass_stack; 9.17 delete _revisit_mdo_stack; 9.18 }
10.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Mar 03 08:10:41 2010 -0800 10.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Mar 03 14:48:26 2010 -0800 10.3 @@ -785,7 +785,7 @@ 10.4 void PSParallelCompact::AdjustPointerClosure::do_oop(oop* p) { adjust_pointer(p, _is_root); } 10.5 void PSParallelCompact::AdjustPointerClosure::do_oop(narrowOop* p) { adjust_pointer(p, _is_root); } 10.6 10.7 -void PSParallelCompact::FollowStackClosure::do_void() { follow_stack(_compaction_manager); } 10.8 +void PSParallelCompact::FollowStackClosure::do_void() { _compaction_manager->follow_marking_stacks(); } 10.9 10.10 void PSParallelCompact::MarkAndPushClosure::do_oop(oop* p) { mark_and_push(_compaction_manager, p); } 10.11 void PSParallelCompact::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(_compaction_manager, p); } 10.12 @@ -2376,7 +2376,7 @@ 10.13 // Follow code cache roots. 10.14 CodeCache::do_unloading(is_alive_closure(), &mark_and_push_closure, 10.15 purged_class); 10.16 - follow_stack(cm); // Flush marking stack. 10.17 + cm->follow_marking_stacks(); // Flush marking stack. 10.18 10.19 // Update subklass/sibling/implementor links of live klasses 10.20 // revisit_klass_stack is used in follow_weak_klass_links(). 10.21 @@ -2389,8 +2389,7 @@ 10.22 SymbolTable::unlink(is_alive_closure()); 10.23 StringTable::unlink(is_alive_closure()); 10.24 10.25 - assert(cm->marking_stack()->size() == 0, "stack should be empty by now"); 10.26 - assert(cm->overflow_stack()->is_empty(), "stack should be empty by now"); 10.27 + assert(cm->marking_stacks_empty(), "marking stacks should be empty"); 10.28 } 10.29 10.30 // This should be moved to the shared markSweep code! 10.31 @@ -2709,22 +2708,6 @@ 10.32 young_gen->move_and_update(cm); 10.33 } 10.34 10.35 - 10.36 -void PSParallelCompact::follow_stack(ParCompactionManager* cm) { 10.37 - while(!cm->overflow_stack()->is_empty()) { 10.38 - oop obj = cm->overflow_stack()->pop(); 10.39 - obj->follow_contents(cm); 10.40 - } 10.41 - 10.42 - oop obj; 10.43 - // obj is a reference!!! 10.44 - while (cm->marking_stack()->pop_local(obj)) { 10.45 - // It would be nice to assert about the type of objects we might 10.46 - // pop, but they can come from anywhere, unfortunately. 10.47 - obj->follow_contents(cm); 10.48 - } 10.49 -} 10.50 - 10.51 void 10.52 PSParallelCompact::follow_weak_klass_links() { 10.53 // All klasses on the revisit stack are marked at this point. 10.54 @@ -2745,7 +2728,7 @@ 10.55 &keep_alive_closure); 10.56 } 10.57 // revisit_klass_stack is cleared in reset() 10.58 - follow_stack(cm); 10.59 + cm->follow_marking_stacks(); 10.60 } 10.61 } 10.62 10.63 @@ -2776,7 +2759,7 @@ 10.64 rms->at(j)->follow_weak_refs(is_alive_closure()); 10.65 } 10.66 // revisit_mdo_stack is cleared in reset() 10.67 - follow_stack(cm); 10.68 + cm->follow_marking_stacks(); 10.69 } 10.70 } 10.71
11.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Wed Mar 03 08:10:41 2010 -0800 11.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Wed Mar 03 14:48:26 2010 -0800 11.3 @@ -901,7 +901,6 @@ 11.4 // Mark live objects 11.5 static void marking_phase(ParCompactionManager* cm, 11.6 bool maximum_heap_compaction); 11.7 - static void follow_stack(ParCompactionManager* cm); 11.8 static void follow_weak_klass_links(); 11.9 static void follow_mdo_weak_refs(); 11.10 11.11 @@ -1276,7 +1275,7 @@ 11.12 } 11.13 } 11.14 } 11.15 - follow_stack(cm); 11.16 + cm->follow_marking_stacks(); 11.17 } 11.18 11.19 template <class T>
12.1 --- a/src/share/vm/gc_implementation/shared/markSweep.cpp Wed Mar 03 08:10:41 2010 -0800 12.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.cpp Wed Mar 03 14:48:26 2010 -0800 12.3 @@ -25,8 +25,9 @@ 12.4 #include "incls/_precompiled.incl" 12.5 #include "incls/_markSweep.cpp.incl" 12.6 12.7 -GrowableArray<oop>* MarkSweep::_marking_stack = NULL; 12.8 -GrowableArray<Klass*>* MarkSweep::_revisit_klass_stack = NULL; 12.9 +GrowableArray<oop>* MarkSweep::_marking_stack = NULL; 12.10 +GrowableArray<ObjArrayTask>* MarkSweep::_objarray_stack = NULL; 12.11 +GrowableArray<Klass*>* MarkSweep::_revisit_klass_stack = NULL; 12.12 GrowableArray<DataLayout*>* MarkSweep::_revisit_mdo_stack = NULL; 12.13 12.14 GrowableArray<oop>* MarkSweep::_preserved_oop_stack = NULL; 12.15 @@ -104,11 +105,18 @@ 12.16 void MarkSweep::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(p); } 12.17 12.18 void MarkSweep::follow_stack() { 12.19 - while (!_marking_stack->is_empty()) { 12.20 - oop obj = _marking_stack->pop(); 12.21 - assert (obj->is_gc_marked(), "p must be marked"); 12.22 - obj->follow_contents(); 12.23 - } 12.24 + do { 12.25 + while (!_marking_stack->is_empty()) { 12.26 + oop obj = _marking_stack->pop(); 12.27 + assert (obj->is_gc_marked(), "p must be marked"); 12.28 + obj->follow_contents(); 12.29 + } 12.30 + while (!_objarray_stack->is_empty()) { 12.31 + ObjArrayTask task = _objarray_stack->pop(); 12.32 + objArrayKlass* const k = (objArrayKlass*)task.obj()->blueprint(); 12.33 + k->oop_follow_contents(task.obj(), task.index()); 12.34 + } 12.35 + } while (!_marking_stack->is_empty() || !_objarray_stack->is_empty()); 12.36 } 12.37 12.38 MarkSweep::FollowStackClosure MarkSweep::follow_stack_closure;
13.1 --- a/src/share/vm/gc_implementation/shared/markSweep.hpp Wed Mar 03 08:10:41 2010 -0800 13.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.hpp Wed Mar 03 14:48:26 2010 -0800 13.3 @@ -110,8 +110,9 @@ 13.4 // Vars 13.5 // 13.6 protected: 13.7 - // Traversal stack used during phase1 13.8 + // Traversal stacks used during phase1 13.9 static GrowableArray<oop>* _marking_stack; 13.10 + static GrowableArray<ObjArrayTask>* _objarray_stack; 13.11 // Stack for live klasses to revisit at end of marking phase 13.12 static GrowableArray<Klass*>* _revisit_klass_stack; 13.13 // Set (stack) of MDO's to revisit at end of marking phase 13.14 @@ -188,6 +189,7 @@ 13.15 template <class T> static inline void mark_and_follow(T* p); 13.16 // Check mark and maybe push on marking stack 13.17 template <class T> static inline void mark_and_push(T* p); 13.18 + static inline void push_objarray(oop obj, size_t index); 13.19 13.20 static void follow_stack(); // Empty marking stack. 13.21
14.1 --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Wed Mar 03 08:10:41 2010 -0800 14.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Wed Mar 03 14:48:26 2010 -0800 14.3 @@ -77,6 +77,12 @@ 14.4 } 14.5 } 14.6 14.7 +void MarkSweep::push_objarray(oop obj, size_t index) { 14.8 + ObjArrayTask task(obj, index); 14.9 + assert(task.is_valid(), "bad ObjArrayTask"); 14.10 + _objarray_stack->push(task); 14.11 +} 14.12 + 14.13 template <class T> inline void MarkSweep::adjust_pointer(T* p, bool isroot) { 14.14 T heap_oop = oopDesc::load_heap_oop(p); 14.15 if (!oopDesc::is_null(heap_oop)) {
15.1 --- a/src/share/vm/includeDB_core Wed Mar 03 08:10:41 2010 -0800 15.2 +++ b/src/share/vm/includeDB_core Wed Mar 03 14:48:26 2010 -0800 15.3 @@ -2724,8 +2724,10 @@ 15.4 15.5 markSweep.cpp compileBroker.hpp 15.6 markSweep.cpp methodDataOop.hpp 15.7 +markSweep.cpp objArrayKlass.inline.hpp 15.8 15.9 markSweep.hpp collectedHeap.hpp 15.10 +markSweep.hpp taskqueue.hpp 15.11 15.12 memRegion.cpp globals.hpp 15.13 memRegion.cpp memRegion.hpp 15.14 @@ -3054,8 +3056,10 @@ 15.15 objArrayKlass.cpp genOopClosures.inline.hpp 15.16 objArrayKlass.cpp handles.inline.hpp 15.17 objArrayKlass.cpp instanceKlass.hpp 15.18 +objArrayKlass.cpp markSweep.inline.hpp 15.19 objArrayKlass.cpp mutexLocker.hpp 15.20 objArrayKlass.cpp objArrayKlass.hpp 15.21 +objArrayKlass.cpp objArrayKlass.inline.hpp 15.22 objArrayKlass.cpp objArrayKlassKlass.hpp 15.23 objArrayKlass.cpp objArrayOop.hpp 15.24 objArrayKlass.cpp oop.inline.hpp 15.25 @@ -3066,11 +3070,12 @@ 15.26 objArrayKlass.cpp universe.inline.hpp 15.27 objArrayKlass.cpp vmSymbols.hpp 15.28 15.29 - 15.30 objArrayKlass.hpp arrayKlass.hpp 15.31 objArrayKlass.hpp instanceKlass.hpp 15.32 objArrayKlass.hpp specialized_oop_closures.hpp 15.33 15.34 +objArrayKlass.inline.hpp objArrayKlass.hpp 15.35 + 15.36 objArrayKlassKlass.cpp collectedHeap.inline.hpp 15.37 objArrayKlassKlass.cpp instanceKlass.hpp 15.38 objArrayKlassKlass.cpp javaClasses.hpp 15.39 @@ -4096,6 +4101,7 @@ 15.40 task.hpp top.hpp 15.41 15.42 taskqueue.cpp debug.hpp 15.43 +taskqueue.cpp oop.inline.hpp 15.44 taskqueue.cpp os.hpp 15.45 taskqueue.cpp taskqueue.hpp 15.46 taskqueue.cpp thread_<os_family>.inline.hpp
16.1 --- a/src/share/vm/includeDB_gc_parallel Wed Mar 03 08:10:41 2010 -0800 16.2 +++ b/src/share/vm/includeDB_gc_parallel Wed Mar 03 14:48:26 2010 -0800 16.3 @@ -115,10 +115,14 @@ 16.4 objArrayKlass.cpp g1CollectedHeap.inline.hpp 16.5 objArrayKlass.cpp g1OopClosures.inline.hpp 16.6 objArrayKlass.cpp oop.pcgc.inline.hpp 16.7 +objArrayKlass.cpp psCompactionManager.hpp 16.8 objArrayKlass.cpp psPromotionManager.inline.hpp 16.9 objArrayKlass.cpp psScavenge.inline.hpp 16.10 objArrayKlass.cpp parOopClosures.inline.hpp 16.11 16.12 +objArrayKlass.inline.hpp psCompactionManager.inline.hpp 16.13 +objArrayKlass.inline.hpp psParallelCompact.hpp 16.14 + 16.15 oop.pcgc.inline.hpp parNewGeneration.hpp 16.16 oop.pcgc.inline.hpp parallelScavengeHeap.hpp 16.17 oop.pcgc.inline.hpp psCompactionManager.hpp
17.1 --- a/src/share/vm/memory/genMarkSweep.cpp Wed Mar 03 08:10:41 2010 -0800 17.2 +++ b/src/share/vm/memory/genMarkSweep.cpp Wed Mar 03 14:48:26 2010 -0800 17.3 @@ -159,6 +159,7 @@ 17.4 _preserved_oop_stack = NULL; 17.5 17.6 _marking_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true); 17.7 + _objarray_stack = new (ResourceObj::C_HEAP) GrowableArray<ObjArrayTask>(50, true); 17.8 17.9 int size = SystemDictionary::number_of_classes() * 2; 17.10 _revisit_klass_stack = new (ResourceObj::C_HEAP) GrowableArray<Klass*>(size, true); 17.11 @@ -194,7 +195,6 @@ 17.12 17.13 17.14 void GenMarkSweep::deallocate_stacks() { 17.15 - 17.16 if (!UseG1GC) { 17.17 GenCollectedHeap* gch = GenCollectedHeap::heap(); 17.18 gch->release_scratch(); 17.19 @@ -208,6 +208,7 @@ 17.20 } 17.21 17.22 delete _marking_stack; 17.23 + delete _objarray_stack; 17.24 delete _revisit_klass_stack; 17.25 delete _revisit_mdo_stack; 17.26
18.1 --- a/src/share/vm/memory/genOopClosures.hpp Wed Mar 03 08:10:41 2010 -0800 18.2 +++ b/src/share/vm/memory/genOopClosures.hpp Wed Mar 03 14:48:26 2010 -0800 18.3 @@ -28,10 +28,10 @@ 18.4 class CardTableModRefBS; 18.5 class DefNewGeneration; 18.6 18.7 -template<class E> class GenericTaskQueue; 18.8 -typedef GenericTaskQueue<oop> OopTaskQueue; 18.9 -template<class E> class GenericTaskQueueSet; 18.10 -typedef GenericTaskQueueSet<oop> OopTaskQueueSet; 18.11 +template<class E, unsigned int N> class GenericTaskQueue; 18.12 +typedef GenericTaskQueue<oop, TASKQUEUE_SIZE> OopTaskQueue; 18.13 +template<class T> class GenericTaskQueueSet; 18.14 +typedef GenericTaskQueueSet<OopTaskQueue> OopTaskQueueSet; 18.15 18.16 // Closure for iterating roots from a particular generation 18.17 // Note: all classes deriving from this MUST call this do_barrier
19.1 --- a/src/share/vm/oops/objArrayKlass.cpp Wed Mar 03 08:10:41 2010 -0800 19.2 +++ b/src/share/vm/oops/objArrayKlass.cpp Wed Mar 03 14:48:26 2010 -0800 19.3 @@ -314,24 +314,24 @@ 19.4 19.5 void objArrayKlass::oop_follow_contents(oop obj) { 19.6 assert (obj->is_array(), "obj must be array"); 19.7 - objArrayOop a = objArrayOop(obj); 19.8 - a->follow_header(); 19.9 - ObjArrayKlass_OOP_ITERATE( \ 19.10 - a, p, \ 19.11 - /* we call mark_and_follow here to avoid excessive marking stack usage */ \ 19.12 - MarkSweep::mark_and_follow(p)) 19.13 + objArrayOop(obj)->follow_header(); 19.14 + if (UseCompressedOops) { 19.15 + objarray_follow_contents<narrowOop>(obj, 0); 19.16 + } else { 19.17 + objarray_follow_contents<oop>(obj, 0); 19.18 + } 19.19 } 19.20 19.21 #ifndef SERIALGC 19.22 void objArrayKlass::oop_follow_contents(ParCompactionManager* cm, 19.23 oop obj) { 19.24 - assert (obj->is_array(), "obj must be array"); 19.25 - objArrayOop a = objArrayOop(obj); 19.26 - a->follow_header(cm); 19.27 - ObjArrayKlass_OOP_ITERATE( \ 19.28 - a, p, \ 19.29 - /* we call mark_and_follow here to avoid excessive marking stack usage */ \ 19.30 - PSParallelCompact::mark_and_follow(cm, p)) 19.31 + assert(obj->is_array(), "obj must be array"); 19.32 + objArrayOop(obj)->follow_header(cm); 19.33 + if (UseCompressedOops) { 19.34 + objarray_follow_contents<narrowOop>(cm, obj, 0); 19.35 + } else { 19.36 + objarray_follow_contents<oop>(cm, obj, 0); 19.37 + } 19.38 } 19.39 #endif // SERIALGC 19.40
20.1 --- a/src/share/vm/oops/objArrayKlass.hpp Wed Mar 03 08:10:41 2010 -0800 20.2 +++ b/src/share/vm/oops/objArrayKlass.hpp Wed Mar 03 14:48:26 2010 -0800 20.3 @@ -91,10 +91,18 @@ 20.4 20.5 // Garbage collection 20.6 void oop_follow_contents(oop obj); 20.7 + inline void oop_follow_contents(oop obj, int index); 20.8 + template <class T> inline void objarray_follow_contents(oop obj, int index); 20.9 + 20.10 int oop_adjust_pointers(oop obj); 20.11 20.12 // Parallel Scavenge and Parallel Old 20.13 PARALLEL_GC_DECLS 20.14 +#ifndef SERIALGC 20.15 + inline void oop_follow_contents(ParCompactionManager* cm, oop obj, int index); 20.16 + template <class T> inline void 20.17 + objarray_follow_contents(ParCompactionManager* cm, oop obj, int index); 20.18 +#endif // !SERIALGC 20.19 20.20 // Iterators 20.21 int oop_oop_iterate(oop obj, OopClosure* blk) { 20.22 @@ -131,5 +139,4 @@ 20.23 void oop_verify_on(oop obj, outputStream* st); 20.24 void oop_verify_old_oop(oop obj, oop* p, bool allow_dirty); 20.25 void oop_verify_old_oop(oop obj, narrowOop* p, bool allow_dirty); 20.26 - 20.27 };
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/src/share/vm/oops/objArrayKlass.inline.hpp Wed Mar 03 14:48:26 2010 -0800 21.3 @@ -0,0 +1,89 @@ 21.4 +/* 21.5 + * Copyright 2010 Sun Microsystems, Inc. All Rights Reserved. 21.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 21.7 + * 21.8 + * This code is free software; you can redistribute it and/or modify it 21.9 + * under the terms of the GNU General Public License version 2 only, as 21.10 + * published by the Free Software Foundation. 21.11 + * 21.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 21.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 21.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 21.15 + * version 2 for more details (a copy is included in the LICENSE file that 21.16 + * accompanied this code). 21.17 + * 21.18 + * You should have received a copy of the GNU General Public License version 21.19 + * 2 along with this work; if not, write to the Free Software Foundation, 21.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21.21 + * 21.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 21.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 21.24 + * have any questions. 21.25 + * 21.26 + */ 21.27 + 21.28 +void objArrayKlass::oop_follow_contents(oop obj, int index) { 21.29 + if (UseCompressedOops) { 21.30 + objarray_follow_contents<narrowOop>(obj, index); 21.31 + } else { 21.32 + objarray_follow_contents<oop>(obj, index); 21.33 + } 21.34 +} 21.35 + 21.36 +template <class T> 21.37 +void objArrayKlass::objarray_follow_contents(oop obj, int index) { 21.38 + objArrayOop a = objArrayOop(obj); 21.39 + const size_t len = size_t(a->length()); 21.40 + const size_t beg_index = size_t(index); 21.41 + assert(beg_index < len || len == 0, "index too large"); 21.42 + 21.43 + const size_t stride = MIN2(len - beg_index, ObjArrayMarkingStride); 21.44 + const size_t end_index = beg_index + stride; 21.45 + T* const base = (T*)a->base(); 21.46 + T* const beg = base + beg_index; 21.47 + T* const end = base + end_index; 21.48 + 21.49 + // Push the non-NULL elements of the next stride on the marking stack. 21.50 + for (T* e = beg; e < end; e++) { 21.51 + MarkSweep::mark_and_push<T>(e); 21.52 + } 21.53 + 21.54 + if (end_index < len) { 21.55 + MarkSweep::push_objarray(a, end_index); // Push the continuation. 21.56 + } 21.57 +} 21.58 + 21.59 +#ifndef SERIALGC 21.60 +void objArrayKlass::oop_follow_contents(ParCompactionManager* cm, oop obj, 21.61 + int index) { 21.62 + if (UseCompressedOops) { 21.63 + objarray_follow_contents<narrowOop>(cm, obj, index); 21.64 + } else { 21.65 + objarray_follow_contents<oop>(cm, obj, index); 21.66 + } 21.67 +} 21.68 + 21.69 +template <class T> 21.70 +void objArrayKlass::objarray_follow_contents(ParCompactionManager* cm, oop obj, 21.71 + int index) { 21.72 + objArrayOop a = objArrayOop(obj); 21.73 + const size_t len = size_t(a->length()); 21.74 + const size_t beg_index = size_t(index); 21.75 + assert(beg_index < len || len == 0, "index too large"); 21.76 + 21.77 + const size_t stride = MIN2(len - beg_index, ObjArrayMarkingStride); 21.78 + const size_t end_index = beg_index + stride; 21.79 + T* const base = (T*)a->base(); 21.80 + T* const beg = base + beg_index; 21.81 + T* const end = base + end_index; 21.82 + 21.83 + // Push the non-NULL elements of the next stride on the marking stack. 21.84 + for (T* e = beg; e < end; e++) { 21.85 + PSParallelCompact::mark_and_push<T>(cm, e); 21.86 + } 21.87 + 21.88 + if (end_index < len) { 21.89 + cm->push_objarray(a, end_index); // Push the continuation. 21.90 + } 21.91 +} 21.92 +#endif // #ifndef SERIALGC
22.1 --- a/src/share/vm/runtime/arguments.cpp Wed Mar 03 08:10:41 2010 -0800 22.2 +++ b/src/share/vm/runtime/arguments.cpp Wed Mar 03 14:48:26 2010 -0800 22.3 @@ -1346,9 +1346,7 @@ 22.4 } 22.5 22.6 if (FLAG_IS_DEFAULT(MarkStackSize)) { 22.7 - // Size as a multiple of TaskQueueSuper::N which is larger 22.8 - // for 64-bit. 22.9 - FLAG_SET_DEFAULT(MarkStackSize, 128 * TaskQueueSuper::total_size()); 22.10 + FLAG_SET_DEFAULT(MarkStackSize, 128 * TASKQUEUE_SIZE); 22.11 } 22.12 if (PrintGCDetails && Verbose) { 22.13 tty->print_cr("MarkStackSize: %uk MarkStackSizeMax: %uk",
23.1 --- a/src/share/vm/runtime/globals.hpp Wed Mar 03 08:10:41 2010 -0800 23.2 +++ b/src/share/vm/runtime/globals.hpp Wed Mar 03 14:48:26 2010 -0800 23.3 @@ -1795,6 +1795,10 @@ 23.4 product(uintx, PreserveMarkStackSize, 1024, \ 23.5 "Size for stack used in promotion failure handling") \ 23.6 \ 23.7 + develop(uintx, ObjArrayMarkingStride, 512, \ 23.8 + "Number of ObjArray elements to push onto the marking stack" \ 23.9 + "before pushing a continuation entry") \ 23.10 + \ 23.11 product_pd(bool, UseTLAB, "Use thread-local object allocation") \ 23.12 \ 23.13 product_pd(bool, ResizeTLAB, \
24.1 --- a/src/share/vm/utilities/globalDefinitions.hpp Wed Mar 03 08:10:41 2010 -0800 24.2 +++ b/src/share/vm/utilities/globalDefinitions.hpp Wed Mar 03 14:48:26 2010 -0800 24.3 @@ -827,6 +827,8 @@ 24.4 #define badHeapWord (::badHeapWordVal) 24.5 #define badJNIHandle ((oop)::badJNIHandleVal) 24.6 24.7 +// Default TaskQueue size is 16K (32-bit) or 128K (64-bit) 24.8 +#define TASKQUEUE_SIZE (NOT_LP64(1<<14) LP64_ONLY(1<<17)) 24.9 24.10 //---------------------------------------------------------------------------------------------------- 24.11 // Utility functions for bitfield manipulations
25.1 --- a/src/share/vm/utilities/taskqueue.cpp Wed Mar 03 08:10:41 2010 -0800 25.2 +++ b/src/share/vm/utilities/taskqueue.cpp Wed Mar 03 14:48:26 2010 -0800 25.3 @@ -31,10 +31,6 @@ 25.4 uint ParallelTaskTerminator::_total_peeks = 0; 25.5 #endif 25.6 25.7 -bool TaskQueueSuper::peek() { 25.8 - return _bottom != _age.top(); 25.9 -} 25.10 - 25.11 int TaskQueueSetSuper::randomParkAndMiller(int *seed0) { 25.12 const int a = 16807; 25.13 const int m = 2147483647; 25.14 @@ -180,6 +176,13 @@ 25.15 } 25.16 } 25.17 25.18 +#ifdef ASSERT 25.19 +bool ObjArrayTask::is_valid() const { 25.20 + return _obj != NULL && _obj->is_objArray() && _index > 0 && 25.21 + _index < objArrayOop(_obj)->length(); 25.22 +} 25.23 +#endif // ASSERT 25.24 + 25.25 bool RegionTaskQueueWithOverflow::is_empty() { 25.26 return (_region_queue.size() == 0) && 25.27 (_overflow_stack->length() == 0);
26.1 --- a/src/share/vm/utilities/taskqueue.hpp Wed Mar 03 08:10:41 2010 -0800 26.2 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Mar 03 14:48:26 2010 -0800 26.3 @@ -22,6 +22,7 @@ 26.4 * 26.5 */ 26.6 26.7 +template <unsigned int N> 26.8 class TaskQueueSuper: public CHeapObj { 26.9 protected: 26.10 // Internal type for indexing the queue; also used for the tag. 26.11 @@ -30,10 +31,7 @@ 26.12 // The first free element after the last one pushed (mod N). 26.13 volatile uint _bottom; 26.14 26.15 - enum { 26.16 - N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K 26.17 - MOD_N_MASK = N - 1 // To compute x mod N efficiently. 26.18 - }; 26.19 + enum { MOD_N_MASK = N - 1 }; 26.20 26.21 class Age { 26.22 public: 26.23 @@ -84,12 +82,12 @@ 26.24 26.25 // Returns a number in the range [0..N). If the result is "N-1", it should be 26.26 // interpreted as 0. 26.27 - uint dirty_size(uint bot, uint top) { 26.28 + uint dirty_size(uint bot, uint top) const { 26.29 return (bot - top) & MOD_N_MASK; 26.30 } 26.31 26.32 // Returns the size corresponding to the given "bot" and "top". 26.33 - uint size(uint bot, uint top) { 26.34 + uint size(uint bot, uint top) const { 26.35 uint sz = dirty_size(bot, top); 26.36 // Has the queue "wrapped", so that bottom is less than top? There's a 26.37 // complicated special case here. A pair of threads could perform pop_local 26.38 @@ -111,17 +109,17 @@ 26.39 public: 26.40 TaskQueueSuper() : _bottom(0), _age() {} 26.41 26.42 - // Return "true" if the TaskQueue contains any tasks. 26.43 - bool peek(); 26.44 + // Return true if the TaskQueue contains any tasks. 26.45 + bool peek() { return _bottom != _age.top(); } 26.46 26.47 // Return an estimate of the number of elements in the queue. 26.48 // The "careful" version admits the possibility of pop_local/pop_global 26.49 // races. 26.50 - uint size() { 26.51 + uint size() const { 26.52 return size(_bottom, _age.top()); 26.53 } 26.54 26.55 - uint dirty_size() { 26.56 + uint dirty_size() const { 26.57 return dirty_size(_bottom, _age.top()); 26.58 } 26.59 26.60 @@ -132,19 +130,36 @@ 26.61 26.62 // Maximum number of elements allowed in the queue. This is two less 26.63 // than the actual queue size, for somewhat complicated reasons. 26.64 - uint max_elems() { return N - 2; } 26.65 + uint max_elems() const { return N - 2; } 26.66 26.67 // Total size of queue. 26.68 static const uint total_size() { return N; } 26.69 }; 26.70 26.71 -template<class E> class GenericTaskQueue: public TaskQueueSuper { 26.72 +template<class E, unsigned int N = TASKQUEUE_SIZE> 26.73 +class GenericTaskQueue: public TaskQueueSuper<N> { 26.74 +protected: 26.75 + typedef typename TaskQueueSuper<N>::Age Age; 26.76 + typedef typename TaskQueueSuper<N>::idx_t idx_t; 26.77 + 26.78 + using TaskQueueSuper<N>::_bottom; 26.79 + using TaskQueueSuper<N>::_age; 26.80 + using TaskQueueSuper<N>::increment_index; 26.81 + using TaskQueueSuper<N>::decrement_index; 26.82 + using TaskQueueSuper<N>::dirty_size; 26.83 + 26.84 +public: 26.85 + using TaskQueueSuper<N>::max_elems; 26.86 + using TaskQueueSuper<N>::size; 26.87 + 26.88 private: 26.89 // Slow paths for push, pop_local. (pop_global has no fast path.) 26.90 bool push_slow(E t, uint dirty_n_elems); 26.91 bool pop_local_slow(uint localBot, Age oldAge); 26.92 26.93 public: 26.94 + typedef E element_type; 26.95 + 26.96 // Initializes the queue to empty. 26.97 GenericTaskQueue(); 26.98 26.99 @@ -175,19 +190,19 @@ 26.100 volatile E* _elems; 26.101 }; 26.102 26.103 -template<class E> 26.104 -GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { 26.105 +template<class E, unsigned int N> 26.106 +GenericTaskQueue<E, N>::GenericTaskQueue() { 26.107 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); 26.108 } 26.109 26.110 -template<class E> 26.111 -void GenericTaskQueue<E>::initialize() { 26.112 +template<class E, unsigned int N> 26.113 +void GenericTaskQueue<E, N>::initialize() { 26.114 _elems = NEW_C_HEAP_ARRAY(E, N); 26.115 guarantee(_elems != NULL, "Allocation failed."); 26.116 } 26.117 26.118 -template<class E> 26.119 -void GenericTaskQueue<E>::oops_do(OopClosure* f) { 26.120 +template<class E, unsigned int N> 26.121 +void GenericTaskQueue<E, N>::oops_do(OopClosure* f) { 26.122 // tty->print_cr("START OopTaskQueue::oops_do"); 26.123 uint iters = size(); 26.124 uint index = _bottom; 26.125 @@ -203,21 +218,21 @@ 26.126 // tty->print_cr("END OopTaskQueue::oops_do"); 26.127 } 26.128 26.129 - 26.130 -template<class E> 26.131 -bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { 26.132 +template<class E, unsigned int N> 26.133 +bool GenericTaskQueue<E, N>::push_slow(E t, uint dirty_n_elems) { 26.134 if (dirty_n_elems == N - 1) { 26.135 // Actually means 0, so do the push. 26.136 uint localBot = _bottom; 26.137 - _elems[localBot] = t; 26.138 + // g++ complains if the volatile result of the assignment is unused. 26.139 + const_cast<E&>(_elems[localBot] = t); 26.140 OrderAccess::release_store(&_bottom, increment_index(localBot)); 26.141 return true; 26.142 } 26.143 return false; 26.144 } 26.145 26.146 -template<class E> 26.147 -bool GenericTaskQueue<E>:: 26.148 +template<class E, unsigned int N> 26.149 +bool GenericTaskQueue<E, N>:: 26.150 pop_local_slow(uint localBot, Age oldAge) { 26.151 // This queue was observed to contain exactly one element; either this 26.152 // thread will claim it, or a competing "pop_global". In either case, 26.153 @@ -249,8 +264,8 @@ 26.154 return false; 26.155 } 26.156 26.157 -template<class E> 26.158 -bool GenericTaskQueue<E>::pop_global(E& t) { 26.159 +template<class E, unsigned int N> 26.160 +bool GenericTaskQueue<E, N>::pop_global(E& t) { 26.161 Age oldAge = _age.get(); 26.162 uint localBot = _bottom; 26.163 uint n_elems = size(localBot, oldAge.top()); 26.164 @@ -258,7 +273,7 @@ 26.165 return false; 26.166 } 26.167 26.168 - t = _elems[oldAge.top()]; 26.169 + const_cast<E&>(t = _elems[oldAge.top()]); 26.170 Age newAge(oldAge); 26.171 newAge.increment(); 26.172 Age resAge = _age.cmpxchg(newAge, oldAge); 26.173 @@ -269,8 +284,8 @@ 26.174 return resAge == oldAge; 26.175 } 26.176 26.177 -template<class E> 26.178 -GenericTaskQueue<E>::~GenericTaskQueue() { 26.179 +template<class E, unsigned int N> 26.180 +GenericTaskQueue<E, N>::~GenericTaskQueue() { 26.181 FREE_C_HEAP_ARRAY(E, _elems); 26.182 } 26.183 26.184 @@ -283,16 +298,18 @@ 26.185 virtual bool peek() = 0; 26.186 }; 26.187 26.188 -template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper { 26.189 +template<class T> 26.190 +class GenericTaskQueueSet: public TaskQueueSetSuper { 26.191 private: 26.192 uint _n; 26.193 - GenericTaskQueue<E>** _queues; 26.194 + T** _queues; 26.195 26.196 public: 26.197 + typedef typename T::element_type E; 26.198 + 26.199 GenericTaskQueueSet(int n) : _n(n) { 26.200 - typedef GenericTaskQueue<E>* GenericTaskQueuePtr; 26.201 + typedef T* GenericTaskQueuePtr; 26.202 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n); 26.203 - guarantee(_queues != NULL, "Allocation failure."); 26.204 for (int i = 0; i < n; i++) { 26.205 _queues[i] = NULL; 26.206 } 26.207 @@ -302,9 +319,9 @@ 26.208 bool steal_best_of_2(uint queue_num, int* seed, E& t); 26.209 bool steal_best_of_all(uint queue_num, int* seed, E& t); 26.210 26.211 - void register_queue(uint i, GenericTaskQueue<E>* q); 26.212 + void register_queue(uint i, T* q); 26.213 26.214 - GenericTaskQueue<E>* queue(uint n); 26.215 + T* queue(uint n); 26.216 26.217 // The thread with queue number "queue_num" (and whose random number seed 26.218 // is at "seed") is trying to steal a task from some other queue. (It 26.219 @@ -316,27 +333,27 @@ 26.220 bool peek(); 26.221 }; 26.222 26.223 -template<class E> 26.224 -void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) { 26.225 +template<class T> void 26.226 +GenericTaskQueueSet<T>::register_queue(uint i, T* q) { 26.227 assert(i < _n, "index out of range."); 26.228 _queues[i] = q; 26.229 } 26.230 26.231 -template<class E> 26.232 -GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) { 26.233 +template<class T> T* 26.234 +GenericTaskQueueSet<T>::queue(uint i) { 26.235 return _queues[i]; 26.236 } 26.237 26.238 -template<class E> 26.239 -bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) { 26.240 +template<class T> bool 26.241 +GenericTaskQueueSet<T>::steal(uint queue_num, int* seed, E& t) { 26.242 for (uint i = 0; i < 2 * _n; i++) 26.243 if (steal_best_of_2(queue_num, seed, t)) 26.244 return true; 26.245 return false; 26.246 } 26.247 26.248 -template<class E> 26.249 -bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) { 26.250 +template<class T> bool 26.251 +GenericTaskQueueSet<T>::steal_best_of_all(uint queue_num, int* seed, E& t) { 26.252 if (_n > 2) { 26.253 int best_k; 26.254 uint best_sz = 0; 26.255 @@ -359,8 +376,8 @@ 26.256 } 26.257 } 26.258 26.259 -template<class E> 26.260 -bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) { 26.261 +template<class T> bool 26.262 +GenericTaskQueueSet<T>::steal_1_random(uint queue_num, int* seed, E& t) { 26.263 if (_n > 2) { 26.264 uint k = queue_num; 26.265 while (k == queue_num) k = randomParkAndMiller(seed) % _n; 26.266 @@ -375,8 +392,8 @@ 26.267 } 26.268 } 26.269 26.270 -template<class E> 26.271 -bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) { 26.272 +template<class T> bool 26.273 +GenericTaskQueueSet<T>::steal_best_of_2(uint queue_num, int* seed, E& t) { 26.274 if (_n > 2) { 26.275 uint k1 = queue_num; 26.276 while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n; 26.277 @@ -397,8 +414,8 @@ 26.278 } 26.279 } 26.280 26.281 -template<class E> 26.282 -bool GenericTaskQueueSet<E>::peek() { 26.283 +template<class T> 26.284 +bool GenericTaskQueueSet<T>::peek() { 26.285 // Try all the queues. 26.286 for (uint j = 0; j < _n; j++) { 26.287 if (_queues[j]->peek()) 26.288 @@ -468,14 +485,16 @@ 26.289 #endif 26.290 }; 26.291 26.292 -template<class E> inline bool GenericTaskQueue<E>::push(E t) { 26.293 +template<class E, unsigned int N> inline bool 26.294 +GenericTaskQueue<E, N>::push(E t) { 26.295 uint localBot = _bottom; 26.296 assert((localBot >= 0) && (localBot < N), "_bottom out of range."); 26.297 idx_t top = _age.top(); 26.298 uint dirty_n_elems = dirty_size(localBot, top); 26.299 - assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); 26.300 + assert(dirty_n_elems < N, "n_elems out of range."); 26.301 if (dirty_n_elems < max_elems()) { 26.302 - _elems[localBot] = t; 26.303 + // g++ complains if the volatile result of the assignment is unused. 26.304 + const_cast<E&>(_elems[localBot] = t); 26.305 OrderAccess::release_store(&_bottom, increment_index(localBot)); 26.306 return true; 26.307 } else { 26.308 @@ -483,7 +502,8 @@ 26.309 } 26.310 } 26.311 26.312 -template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) { 26.313 +template<class E, unsigned int N> inline bool 26.314 +GenericTaskQueue<E, N>::pop_local(E& t) { 26.315 uint localBot = _bottom; 26.316 // This value cannot be N-1. That can only occur as a result of 26.317 // the assignment to bottom in this method. If it does, this method 26.318 @@ -497,7 +517,7 @@ 26.319 // This is necessary to prevent any read below from being reordered 26.320 // before the store just above. 26.321 OrderAccess::fence(); 26.322 - t = _elems[localBot]; 26.323 + const_cast<E&>(t = _elems[localBot]); 26.324 // This is a second read of "age"; the "size()" above is the first. 26.325 // If there's still at least one element in the queue, based on the 26.326 // "_bottom" and "age" we've read, then there can be no interference with 26.327 @@ -514,17 +534,23 @@ 26.328 } 26.329 26.330 typedef oop Task; 26.331 -typedef GenericTaskQueue<Task> OopTaskQueue; 26.332 -typedef GenericTaskQueueSet<Task> OopTaskQueueSet; 26.333 +typedef GenericTaskQueue<Task> OopTaskQueue; 26.334 +typedef GenericTaskQueueSet<OopTaskQueue> OopTaskQueueSet; 26.335 26.336 - 26.337 -#define COMPRESSED_OOP_MASK 1 26.338 +#ifdef _MSC_VER 26.339 +#pragma warning(push) 26.340 +// warning C4522: multiple assignment operators specified 26.341 +#pragma warning(disable:4522) 26.342 +#endif 26.343 26.344 // This is a container class for either an oop* or a narrowOop*. 26.345 // Both are pushed onto a task queue and the consumer will test is_narrow() 26.346 // to determine which should be processed. 26.347 class StarTask { 26.348 void* _holder; // either union oop* or narrowOop* 26.349 + 26.350 + enum { COMPRESSED_OOP_MASK = 1 }; 26.351 + 26.352 public: 26.353 StarTask(narrowOop* p) { 26.354 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); 26.355 @@ -540,20 +566,61 @@ 26.356 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK); 26.357 } 26.358 26.359 - // Operators to preserve const/volatile in assignments required by gcc 26.360 - void operator=(const volatile StarTask& t) volatile { _holder = t._holder; } 26.361 + StarTask& operator=(const StarTask& t) { 26.362 + _holder = t._holder; 26.363 + return *this; 26.364 + } 26.365 + volatile StarTask& operator=(const volatile StarTask& t) volatile { 26.366 + _holder = t._holder; 26.367 + return *this; 26.368 + } 26.369 26.370 bool is_narrow() const { 26.371 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0); 26.372 } 26.373 }; 26.374 26.375 -typedef GenericTaskQueue<StarTask> OopStarTaskQueue; 26.376 -typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet; 26.377 +class ObjArrayTask 26.378 +{ 26.379 +public: 26.380 + ObjArrayTask(oop o = NULL, int idx = 0): _obj(o), _index(idx) { } 26.381 + ObjArrayTask(oop o, size_t idx): _obj(o), _index(int(idx)) { 26.382 + assert(idx <= size_t(max_jint), "too big"); 26.383 + } 26.384 + ObjArrayTask(const ObjArrayTask& t): _obj(t._obj), _index(t._index) { } 26.385 + 26.386 + ObjArrayTask& operator =(const ObjArrayTask& t) { 26.387 + _obj = t._obj; 26.388 + _index = t._index; 26.389 + return *this; 26.390 + } 26.391 + volatile ObjArrayTask& 26.392 + operator =(const volatile ObjArrayTask& t) volatile { 26.393 + _obj = t._obj; 26.394 + _index = t._index; 26.395 + return *this; 26.396 + } 26.397 + 26.398 + inline oop obj() const { return _obj; } 26.399 + inline int index() const { return _index; } 26.400 + 26.401 + DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. 26.402 + 26.403 +private: 26.404 + oop _obj; 26.405 + int _index; 26.406 +}; 26.407 + 26.408 +#ifdef _MSC_VER 26.409 +#pragma warning(pop) 26.410 +#endif 26.411 + 26.412 +typedef GenericTaskQueue<StarTask> OopStarTaskQueue; 26.413 +typedef GenericTaskQueueSet<OopStarTaskQueue> OopStarTaskQueueSet; 26.414 26.415 typedef size_t RegionTask; // index for region 26.416 -typedef GenericTaskQueue<RegionTask> RegionTaskQueue; 26.417 -typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet; 26.418 +typedef GenericTaskQueue<RegionTask> RegionTaskQueue; 26.419 +typedef GenericTaskQueueSet<RegionTaskQueue> RegionTaskQueueSet; 26.420 26.421 class RegionTaskQueueWithOverflow: public CHeapObj { 26.422 protected: