Thu, 02 Apr 2009 15:57:41 -0700
6824570: ParNew: Fix memory leak introduced in 6819891
Summary: Allocate worker-local overflow stacks, introduced in 6819891, along with ParNewGeneration, rather than with the per-scavenge ParScanThreadState.
Reviewed-by: jmasa
1.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Sat Mar 28 15:47:29 2009 -0700 1.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Thu Apr 02 15:57:41 2009 -0700 1.3 @@ -34,10 +34,12 @@ 1.4 Generation* old_gen_, 1.5 int thread_num_, 1.6 ObjToScanQueueSet* work_queue_set_, 1.7 + GrowableArray<oop>** overflow_stack_set_, 1.8 size_t desired_plab_sz_, 1.9 ParallelTaskTerminator& term_) : 1.10 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), 1.11 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), 1.12 + _overflow_stack(overflow_stack_set_[thread_num_]), 1.13 _ageTable(false), // false ==> not the global age table, no perf data. 1.14 _to_space_alloc_buffer(desired_plab_sz_), 1.15 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), 1.16 @@ -57,11 +59,6 @@ 1.17 _start = os::elapsedTime(); 1.18 _old_gen_closure.set_generation(old_gen_); 1.19 _old_gen_root_closure.set_generation(old_gen_); 1.20 - if (UseCompressedOops) { 1.21 - _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true); 1.22 - } else { 1.23 - _overflow_stack = NULL; 1.24 - } 1.25 } 1.26 #ifdef _MSC_VER 1.27 #pragma warning( pop ) 1.28 @@ -155,7 +152,7 @@ 1.29 } 1.30 1.31 bool ParScanThreadState::take_from_overflow_stack() { 1.32 - assert(UseCompressedOops, "Else should not call"); 1.33 + assert(ParGCUseLocalOverflow, "Else should not call"); 1.34 assert(young_gen()->overflow_list() == NULL, "Error"); 1.35 ObjToScanQueue* queue = work_queue(); 1.36 GrowableArray<oop>* of_stack = overflow_stack(); 1.37 @@ -183,7 +180,7 @@ 1.38 } 1.39 1.40 void ParScanThreadState::push_on_overflow_stack(oop p) { 1.41 - assert(UseCompressedOops, "Else should not call"); 1.42 + assert(ParGCUseLocalOverflow, "Else should not call"); 1.43 overflow_stack()->push(p); 1.44 assert(young_gen()->overflow_list() == NULL, "Error"); 1.45 } 1.46 @@ -260,6 +257,7 @@ 1.47 ParNewGeneration& gen, 1.48 Generation& old_gen, 1.49 ObjToScanQueueSet& queue_set, 1.50 + GrowableArray<oop>** overflow_stacks_, 1.51 size_t desired_plab_sz, 1.52 ParallelTaskTerminator& term); 1.53 inline ParScanThreadState& thread_sate(int i); 1.54 @@ -282,6 +280,7 @@ 1.55 ParScanThreadStateSet::ParScanThreadStateSet( 1.56 int num_threads, Space& to_space, ParNewGeneration& gen, 1.57 Generation& old_gen, ObjToScanQueueSet& queue_set, 1.58 + GrowableArray<oop>** overflow_stack_set_, 1.59 size_t desired_plab_sz, ParallelTaskTerminator& term) 1.60 : ResourceArray(sizeof(ParScanThreadState), num_threads), 1.61 _gen(gen), _next_gen(old_gen), _term(term), 1.62 @@ -292,7 +291,7 @@ 1.63 for (int i = 0; i < num_threads; ++i) { 1.64 new ((ParScanThreadState*)_data + i) 1.65 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, 1.66 - desired_plab_sz, term); 1.67 + overflow_stack_set_, desired_plab_sz, term); 1.68 } 1.69 } 1.70 1.71 @@ -519,6 +518,17 @@ 1.72 for (uint i2 = 0; i2 < ParallelGCThreads; i2++) 1.73 _task_queues->queue(i2)->initialize(); 1.74 1.75 + _overflow_stacks = NEW_C_HEAP_ARRAY(GrowableArray<oop>*, ParallelGCThreads); 1.76 + guarantee(_overflow_stacks != NULL, "Overflow stack set allocation failure"); 1.77 + for (uint i = 0; i < ParallelGCThreads; i++) { 1.78 + if (ParGCUseLocalOverflow) { 1.79 + _overflow_stacks[i] = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true); 1.80 + guarantee(_overflow_stacks[i] != NULL, "Overflow Stack allocation failure."); 1.81 + } else { 1.82 + _overflow_stacks[i] = NULL; 1.83 + } 1.84 + } 1.85 + 1.86 if (UsePerfData) { 1.87 EXCEPTION_MARK; 1.88 ResourceMark rm; 1.89 @@ -784,7 +794,7 @@ 1.90 ParallelTaskTerminator _term(workers->total_workers(), task_queues()); 1.91 ParScanThreadStateSet thread_state_set(workers->total_workers(), 1.92 *to(), *this, *_next_gen, *task_queues(), 1.93 - desired_plab_sz(), _term); 1.94 + _overflow_stacks, desired_plab_sz(), _term); 1.95 1.96 ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); 1.97 int n_workers = workers->total_workers(); 1.98 @@ -1238,11 +1248,12 @@ 1.99 #define BUSY (oop(0x1aff1aff)) 1.100 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { 1.101 assert(is_in_reserved(from_space_obj), "Should be from this generation"); 1.102 - if (UseCompressedOops) { 1.103 + if (ParGCUseLocalOverflow) { 1.104 // In the case of compressed oops, we use a private, not-shared 1.105 // overflow stack. 1.106 par_scan_state->push_on_overflow_stack(from_space_obj); 1.107 } else { 1.108 + assert(!UseCompressedOops, "Error"); 1.109 // if the object has been forwarded to itself, then we cannot 1.110 // use the klass pointer for the linked list. Instead we have 1.111 // to allocate an oopDesc in the C-Heap and use that for the linked list. 1.112 @@ -1275,9 +1286,10 @@ 1.113 bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { 1.114 bool res; 1.115 1.116 - if (UseCompressedOops) { 1.117 + if (ParGCUseLocalOverflow) { 1.118 res = par_scan_state->take_from_overflow_stack(); 1.119 } else { 1.120 + assert(!UseCompressedOops, "Error"); 1.121 res = take_from_overflow_list_work(par_scan_state); 1.122 } 1.123 return res; 1.124 @@ -1305,6 +1317,7 @@ 1.125 (size_t)ParGCDesiredObjsFromOverflowList); 1.126 1.127 assert(par_scan_state->overflow_stack() == NULL, "Error"); 1.128 + assert(!UseCompressedOops, "Error"); 1.129 if (_overflow_list == NULL) return false; 1.130 1.131 // Otherwise, there was something there; try claiming the list.
2.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Sat Mar 28 15:47:29 2009 -0700 2.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Thu Apr 02 15:57:41 2009 -0700 2.3 @@ -33,8 +33,8 @@ 2.4 // but they must be here to allow ParScanClosure::do_oop_work to be defined 2.5 // in genOopClosures.inline.hpp. 2.6 2.7 -typedef OopTaskQueue ObjToScanQueue; 2.8 -typedef OopTaskQueueSet ObjToScanQueueSet; 2.9 +typedef OopTaskQueue ObjToScanQueue; 2.10 +typedef OopTaskQueueSet ObjToScanQueueSet; 2.11 2.12 // Enable this to get push/pop/steal stats. 2.13 const int PAR_STATS_ENABLED = 0; 2.14 @@ -116,7 +116,9 @@ 2.15 2.16 ParScanThreadState(Space* to_space_, ParNewGeneration* gen_, 2.17 Generation* old_gen_, int thread_num_, 2.18 - ObjToScanQueueSet* work_queue_set_, size_t desired_plab_sz_, 2.19 + ObjToScanQueueSet* work_queue_set_, 2.20 + GrowableArray<oop>** overflow_stack_set_, 2.21 + size_t desired_plab_sz_, 2.22 ParallelTaskTerminator& term_); 2.23 2.24 public: 2.25 @@ -296,9 +298,12 @@ 2.26 char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing 2.27 }; 2.28 2.29 - // The per-thread work queues, available here for stealing. 2.30 + // The per-worker-thread work queues 2.31 ObjToScanQueueSet* _task_queues; 2.32 2.33 + // Per-worker-thread local overflow stacks 2.34 + GrowableArray<oop>** _overflow_stacks; 2.35 + 2.36 // Desired size of survivor space plab's 2.37 PLABStats _plab_stats; 2.38
3.1 --- a/src/share/vm/runtime/arguments.cpp Sat Mar 28 15:47:29 2009 -0700 3.2 +++ b/src/share/vm/runtime/arguments.cpp Thu Apr 02 15:57:41 2009 -0700 3.3 @@ -971,7 +971,7 @@ 3.4 } else { 3.5 no_shared_spaces(); 3.6 3.7 - // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 correspondinly, 3.8 + // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 respectively, 3.9 // these settings are default for Parallel Scavenger. For ParNew+Tenured configuration 3.10 // we set them to 1024 and 1024. 3.11 // See CR 6362902. 3.12 @@ -987,6 +987,16 @@ 3.13 if (AlwaysTenure) { 3.14 FLAG_SET_CMDLINE(intx, MaxTenuringThreshold, 0); 3.15 } 3.16 + // When using compressed oops, we use local overflow stacks, 3.17 + // rather than using a global overflow list chained through 3.18 + // the klass word of the object's pre-image. 3.19 + if (UseCompressedOops && !ParGCUseLocalOverflow) { 3.20 + if (!FLAG_IS_DEFAULT(ParGCUseLocalOverflow)) { 3.21 + warning("Forcing +ParGCUseLocalOverflow: needed if using compressed references"); 3.22 + } 3.23 + FLAG_SET_DEFAULT(ParGCUseLocalOverflow, true); 3.24 + } 3.25 + assert(ParGCUseLocalOverflow || !UseCompressedOops, "Error"); 3.26 } 3.27 } 3.28
4.1 --- a/src/share/vm/runtime/globals.hpp Sat Mar 28 15:47:29 2009 -0700 4.2 +++ b/src/share/vm/runtime/globals.hpp Thu Apr 02 15:57:41 2009 -0700 4.3 @@ -1316,8 +1316,11 @@ 4.4 product(intx, ParGCArrayScanChunk, 50, \ 4.5 "Scan a subset and push remainder, if array is bigger than this") \ 4.6 \ 4.7 + product(bool, ParGCUseLocalOverflow, false, \ 4.8 + "Instead of a global overflow list, use local overflow stacks") \ 4.9 + \ 4.10 product(bool, ParGCTrimOverflow, true, \ 4.11 - "Eagerly trim the overflow lists (useful for UseCompressedOops") \ 4.12 + "Eagerly trim the local overflow lists (when ParGCUseLocalOverflow") \ 4.13 \ 4.14 notproduct(bool, ParGCWorkQueueOverflowALot, false, \ 4.15 "Whether we should simulate work queue overflow in ParNew") \