Thu, 02 Apr 2009 15:57:41 -0700
6824570: ParNew: Fix memory leak introduced in 6819891
Summary: Allocate worker-local overflow stacks, introduced in 6819891, along with ParNewGeneration, rather than with the per-scavenge ParScanThreadState.
Reviewed-by: jmasa
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 class ChunkArray;
26 class ParScanWithoutBarrierClosure;
27 class ParScanWithBarrierClosure;
28 class ParRootScanWithoutBarrierClosure;
29 class ParRootScanWithBarrierTwoGensClosure;
30 class ParEvacuateFollowersClosure;
32 // It would be better if these types could be kept local to the .cpp file,
33 // but they must be here to allow ParScanClosure::do_oop_work to be defined
34 // in genOopClosures.inline.hpp.
36 typedef OopTaskQueue ObjToScanQueue;
37 typedef OopTaskQueueSet ObjToScanQueueSet;
39 // Enable this to get push/pop/steal stats.
40 const int PAR_STATS_ENABLED = 0;
42 class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
43 private:
44 ParScanWeakRefClosure* _par_cl;
45 protected:
46 template <class T> void do_oop_work(T* p);
47 public:
48 ParKeepAliveClosure(ParScanWeakRefClosure* cl);
49 virtual void do_oop(oop* p);
50 virtual void do_oop(narrowOop* p);
51 };
53 // The state needed by thread performing parallel young-gen collection.
54 class ParScanThreadState {
55 friend class ParScanThreadStateSet;
56 private:
57 ObjToScanQueue *_work_queue;
58 GrowableArray<oop>* _overflow_stack;
60 ParGCAllocBuffer _to_space_alloc_buffer;
62 ParScanWithoutBarrierClosure _to_space_closure; // scan_without_gc_barrier
63 ParScanWithBarrierClosure _old_gen_closure; // scan_with_gc_barrier
64 ParRootScanWithoutBarrierClosure _to_space_root_closure; // scan_root_without_gc_barrier
65 // One of these two will be passed to process_strong_roots, which will
66 // set its generation. The first is for two-gen configs where the
67 // old gen collects the perm gen; the second is for arbitrary configs.
68 // The second isn't used right now (it used to be used for the train, an
69 // incremental collector) but the declaration has been left as a reminder.
70 ParRootScanWithBarrierTwoGensClosure _older_gen_closure;
71 // This closure will always be bound to the old gen; it will be used
72 // in evacuate_followers.
73 ParRootScanWithBarrierTwoGensClosure _old_gen_root_closure; // scan_old_root_with_gc_barrier
74 ParEvacuateFollowersClosure _evacuate_followers;
75 DefNewGeneration::IsAliveClosure _is_alive_closure;
76 ParScanWeakRefClosure _scan_weak_ref_closure;
77 ParKeepAliveClosure _keep_alive_closure;
80 Space* _to_space;
81 Space* to_space() { return _to_space; }
83 ParNewGeneration* _young_gen;
84 ParNewGeneration* young_gen() const { return _young_gen; }
86 Generation* _old_gen;
87 Generation* old_gen() { return _old_gen; }
89 HeapWord *_young_old_boundary;
91 int _hash_seed;
92 int _thread_num;
93 ageTable _ageTable;
95 bool _to_space_full;
97 int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
98 int _overflow_pushes, _overflow_refills, _overflow_refill_objs;
100 // Timing numbers.
101 double _start;
102 double _start_strong_roots;
103 double _strong_roots_time;
104 double _start_term;
105 double _term_time;
107 // Helper for trim_queues. Scans subset of an array and makes
108 // remainder available for work stealing.
109 void scan_partial_array_and_push_remainder(oop obj);
111 // In support of CMS' parallel rescan of survivor space.
112 ChunkArray* _survivor_chunk_array;
113 ChunkArray* survivor_chunk_array() { return _survivor_chunk_array; }
115 void record_survivor_plab(HeapWord* plab_start, size_t plab_word_size);
117 ParScanThreadState(Space* to_space_, ParNewGeneration* gen_,
118 Generation* old_gen_, int thread_num_,
119 ObjToScanQueueSet* work_queue_set_,
120 GrowableArray<oop>** overflow_stack_set_,
121 size_t desired_plab_sz_,
122 ParallelTaskTerminator& term_);
124 public:
125 ageTable* age_table() {return &_ageTable;}
127 ObjToScanQueue* work_queue() { return _work_queue; }
129 ParGCAllocBuffer* to_space_alloc_buffer() {
130 return &_to_space_alloc_buffer;
131 }
133 ParEvacuateFollowersClosure& evacuate_followers_closure() { return _evacuate_followers; }
134 DefNewGeneration::IsAliveClosure& is_alive_closure() { return _is_alive_closure; }
135 ParScanWeakRefClosure& scan_weak_ref_closure() { return _scan_weak_ref_closure; }
136 ParKeepAliveClosure& keep_alive_closure() { return _keep_alive_closure; }
137 ParScanClosure& older_gen_closure() { return _older_gen_closure; }
138 ParRootScanWithoutBarrierClosure& to_space_root_closure() { return _to_space_root_closure; };
140 // Decrease queue size below "max_size".
141 void trim_queues(int max_size);
143 // Private overflow stack usage
144 GrowableArray<oop>* overflow_stack() { return _overflow_stack; }
145 bool take_from_overflow_stack();
146 void push_on_overflow_stack(oop p);
148 // Is new_obj a candidate for scan_partial_array_and_push_remainder method.
149 inline bool should_be_partially_scanned(oop new_obj, oop old_obj) const;
151 int* hash_seed() { return &_hash_seed; }
152 int thread_num() { return _thread_num; }
154 // Allocate a to-space block of size "sz", or else return NULL.
155 HeapWord* alloc_in_to_space_slow(size_t word_sz);
157 HeapWord* alloc_in_to_space(size_t word_sz) {
158 HeapWord* obj = to_space_alloc_buffer()->allocate(word_sz);
159 if (obj != NULL) return obj;
160 else return alloc_in_to_space_slow(word_sz);
161 }
163 HeapWord* young_old_boundary() { return _young_old_boundary; }
165 void set_young_old_boundary(HeapWord *boundary) {
166 _young_old_boundary = boundary;
167 }
169 // Undo the most recent allocation ("obj", of "word_sz").
170 void undo_alloc_in_to_space(HeapWord* obj, size_t word_sz);
172 int pushes() { return _pushes; }
173 int pops() { return _pops; }
174 int steals() { return _steals; }
175 int steal_attempts() { return _steal_attempts; }
176 int term_attempts() { return _term_attempts; }
177 int overflow_pushes() { return _overflow_pushes; }
178 int overflow_refills() { return _overflow_refills; }
179 int overflow_refill_objs() { return _overflow_refill_objs; }
181 void note_push() { if (PAR_STATS_ENABLED) _pushes++; }
182 void note_pop() { if (PAR_STATS_ENABLED) _pops++; }
183 void note_steal() { if (PAR_STATS_ENABLED) _steals++; }
184 void note_steal_attempt() { if (PAR_STATS_ENABLED) _steal_attempts++; }
185 void note_term_attempt() { if (PAR_STATS_ENABLED) _term_attempts++; }
186 void note_overflow_push() { if (PAR_STATS_ENABLED) _overflow_pushes++; }
187 void note_overflow_refill(int objs) {
188 if (PAR_STATS_ENABLED) {
189 _overflow_refills++;
190 _overflow_refill_objs += objs;
191 }
192 }
194 void start_strong_roots() {
195 _start_strong_roots = os::elapsedTime();
196 }
197 void end_strong_roots() {
198 _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
199 }
200 double strong_roots_time() { return _strong_roots_time; }
201 void start_term_time() {
202 note_term_attempt();
203 _start_term = os::elapsedTime();
204 }
205 void end_term_time() {
206 _term_time += (os::elapsedTime() - _start_term);
207 }
208 double term_time() { return _term_time; }
210 double elapsed() {
211 return os::elapsedTime() - _start;
212 }
213 };
215 class ParNewGenTask: public AbstractGangTask {
216 private:
217 ParNewGeneration* _gen;
218 Generation* _next_gen;
219 HeapWord* _young_old_boundary;
220 class ParScanThreadStateSet* _state_set;
222 public:
223 ParNewGenTask(ParNewGeneration* gen,
224 Generation* next_gen,
225 HeapWord* young_old_boundary,
226 ParScanThreadStateSet* state_set);
228 HeapWord* young_old_boundary() { return _young_old_boundary; }
230 void work(int i);
231 };
233 class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
234 protected:
235 template <class T> void do_oop_work(T* p);
236 public:
237 KeepAliveClosure(ScanWeakRefClosure* cl);
238 virtual void do_oop(oop* p);
239 virtual void do_oop(narrowOop* p);
240 };
242 class EvacuateFollowersClosureGeneral: public VoidClosure {
243 private:
244 GenCollectedHeap* _gch;
245 int _level;
246 OopsInGenClosure* _scan_cur_or_nonheap;
247 OopsInGenClosure* _scan_older;
248 public:
249 EvacuateFollowersClosureGeneral(GenCollectedHeap* gch, int level,
250 OopsInGenClosure* cur,
251 OopsInGenClosure* older);
252 virtual void do_void();
253 };
255 // Closure for scanning ParNewGeneration.
256 // Same as ScanClosure, except does parallel GC barrier.
257 class ScanClosureWithParBarrier: public ScanClosure {
258 protected:
259 template <class T> void do_oop_work(T* p);
260 public:
261 ScanClosureWithParBarrier(ParNewGeneration* g, bool gc_barrier);
262 virtual void do_oop(oop* p);
263 virtual void do_oop(narrowOop* p);
264 };
266 // Implements AbstractRefProcTaskExecutor for ParNew.
267 class ParNewRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
268 private:
269 ParNewGeneration& _generation;
270 ParScanThreadStateSet& _state_set;
271 public:
272 ParNewRefProcTaskExecutor(ParNewGeneration& generation,
273 ParScanThreadStateSet& state_set)
274 : _generation(generation), _state_set(state_set)
275 { }
277 // Executes a task using worker threads.
278 virtual void execute(ProcessTask& task);
279 virtual void execute(EnqueueTask& task);
280 // Switch to single threaded mode.
281 virtual void set_single_threaded_mode();
282 };
285 // A Generation that does parallel young-gen collection.
287 class ParNewGeneration: public DefNewGeneration {
288 friend class ParNewGenTask;
289 friend class ParNewRefProcTask;
290 friend class ParNewRefProcTaskExecutor;
291 friend class ParScanThreadStateSet;
292 friend class ParEvacuateFollowersClosure;
294 private:
295 // XXX use a global constant instead of 64!
296 struct ObjToScanQueuePadded {
297 ObjToScanQueue work_queue;
298 char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing
299 };
301 // The per-worker-thread work queues
302 ObjToScanQueueSet* _task_queues;
304 // Per-worker-thread local overflow stacks
305 GrowableArray<oop>** _overflow_stacks;
307 // Desired size of survivor space plab's
308 PLABStats _plab_stats;
310 // A list of from-space images of to-be-scanned objects, threaded through
311 // klass-pointers (klass information already copied to the forwarded
312 // image.) Manipulated with CAS.
313 oop _overflow_list;
314 NOT_PRODUCT(ssize_t _num_par_pushes;)
316 // If true, older generation does not support promotion undo, so avoid.
317 static bool _avoid_promotion_undo;
319 // This closure is used by the reference processor to filter out
320 // references to live referent.
321 DefNewGeneration::IsAliveClosure _is_alive_closure;
323 static oop real_forwardee_slow(oop obj);
324 static void waste_some_time();
326 // Preserve the mark of "obj", if necessary, in preparation for its mark
327 // word being overwritten with a self-forwarding-pointer.
328 void preserve_mark_if_necessary(oop obj, markOop m);
330 protected:
332 bool _survivor_overflow;
334 bool avoid_promotion_undo() { return _avoid_promotion_undo; }
335 void set_avoid_promotion_undo(bool v) { _avoid_promotion_undo = v; }
337 bool survivor_overflow() { return _survivor_overflow; }
338 void set_survivor_overflow(bool v) { _survivor_overflow = v; }
340 // Adjust the tenuring threshold. See the implementation for
341 // the details of the policy.
342 virtual void adjust_desired_tenuring_threshold();
344 public:
345 ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level);
347 ~ParNewGeneration() {
348 for (uint i = 0; i < ParallelGCThreads; i++)
349 delete _task_queues->queue(i);
351 delete _task_queues;
352 }
354 virtual void ref_processor_init();
355 virtual Generation::Name kind() { return Generation::ParNew; }
356 virtual const char* name() const;
357 virtual const char* short_name() const { return "ParNew"; }
359 // override
360 virtual bool refs_discovery_is_mt() const {
361 assert(UseParNewGC, "ParNewGeneration only when UseParNewGC");
362 return ParallelGCThreads > 1;
363 }
365 // Make the collection virtual.
366 virtual void collect(bool full,
367 bool clear_all_soft_refs,
368 size_t size,
369 bool is_tlab);
371 // This needs to be visible to the closure function.
372 // "obj" is the object to be copied, "m" is a recent value of its mark
373 // that must not contain a forwarding pointer (though one might be
374 // inserted in "obj"s mark word by a parallel thread).
375 inline oop copy_to_survivor_space(ParScanThreadState* par_scan_state,
376 oop obj, size_t obj_sz, markOop m) {
377 if (_avoid_promotion_undo) {
378 return copy_to_survivor_space_avoiding_promotion_undo(par_scan_state,
379 obj, obj_sz, m);
380 }
382 return copy_to_survivor_space_with_undo(par_scan_state, obj, obj_sz, m);
383 }
385 oop copy_to_survivor_space_avoiding_promotion_undo(ParScanThreadState* par_scan_state,
386 oop obj, size_t obj_sz, markOop m);
388 oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state,
389 oop obj, size_t obj_sz, markOop m);
391 // in support of testing overflow code
392 NOT_PRODUCT(int _overflow_counter;)
393 NOT_PRODUCT(bool should_simulate_overflow();)
395 // Accessor for overflow list
396 oop overflow_list() { return _overflow_list; }
398 // Push the given (from-space) object on the global overflow list.
399 void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state);
401 // If the global overflow list is non-empty, move some tasks from it
402 // onto "work_q" (which need not be empty). No more than 1/4 of the
403 // available space on "work_q" is used.
404 bool take_from_overflow_list(ParScanThreadState* par_scan_state);
405 bool take_from_overflow_list_work(ParScanThreadState* par_scan_state);
407 // The task queues to be used by parallel GC threads.
408 ObjToScanQueueSet* task_queues() {
409 return _task_queues;
410 }
412 PLABStats* plab_stats() {
413 return &_plab_stats;
414 }
416 size_t desired_plab_sz() {
417 return _plab_stats.desired_plab_sz();
418 }
420 static oop real_forwardee(oop obj);
422 DEBUG_ONLY(static bool is_legal_forward_ptr(oop p);)
423 };