32 ParScanThreadState::ParScanThreadState(Space* to_space_, |
32 ParScanThreadState::ParScanThreadState(Space* to_space_, |
33 ParNewGeneration* gen_, |
33 ParNewGeneration* gen_, |
34 Generation* old_gen_, |
34 Generation* old_gen_, |
35 int thread_num_, |
35 int thread_num_, |
36 ObjToScanQueueSet* work_queue_set_, |
36 ObjToScanQueueSet* work_queue_set_, |
|
37 GrowableArray<oop>** overflow_stack_set_, |
37 size_t desired_plab_sz_, |
38 size_t desired_plab_sz_, |
38 ParallelTaskTerminator& term_) : |
39 ParallelTaskTerminator& term_) : |
39 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), |
40 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), |
40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), |
41 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), |
|
42 _overflow_stack(overflow_stack_set_[thread_num_]), |
41 _ageTable(false), // false ==> not the global age table, no perf data. |
43 _ageTable(false), // false ==> not the global age table, no perf data. |
42 _to_space_alloc_buffer(desired_plab_sz_), |
44 _to_space_alloc_buffer(desired_plab_sz_), |
43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), |
45 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), |
44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), |
46 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), |
45 _older_gen_closure(gen_, this), |
47 _older_gen_closure(gen_, this), |
55 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); |
57 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); |
56 _hash_seed = 17; // Might want to take time-based random value. |
58 _hash_seed = 17; // Might want to take time-based random value. |
57 _start = os::elapsedTime(); |
59 _start = os::elapsedTime(); |
58 _old_gen_closure.set_generation(old_gen_); |
60 _old_gen_closure.set_generation(old_gen_); |
59 _old_gen_root_closure.set_generation(old_gen_); |
61 _old_gen_root_closure.set_generation(old_gen_); |
60 if (UseCompressedOops) { |
|
61 _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true); |
|
62 } else { |
|
63 _overflow_stack = NULL; |
|
64 } |
|
65 } |
62 } |
66 #ifdef _MSC_VER |
63 #ifdef _MSC_VER |
67 #pragma warning( pop ) |
64 #pragma warning( pop ) |
68 #endif |
65 #endif |
69 |
66 |
153 // private overflow stack. |
150 // private overflow stack. |
154 } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this)); |
151 } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this)); |
155 } |
152 } |
156 |
153 |
157 bool ParScanThreadState::take_from_overflow_stack() { |
154 bool ParScanThreadState::take_from_overflow_stack() { |
158 assert(UseCompressedOops, "Else should not call"); |
155 assert(ParGCUseLocalOverflow, "Else should not call"); |
159 assert(young_gen()->overflow_list() == NULL, "Error"); |
156 assert(young_gen()->overflow_list() == NULL, "Error"); |
160 ObjToScanQueue* queue = work_queue(); |
157 ObjToScanQueue* queue = work_queue(); |
161 GrowableArray<oop>* of_stack = overflow_stack(); |
158 GrowableArray<oop>* of_stack = overflow_stack(); |
162 uint num_overflow_elems = of_stack->length(); |
159 uint num_overflow_elems = of_stack->length(); |
163 uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4, |
160 uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4, |
181 assert(young_gen()->overflow_list() == NULL, "Error"); |
178 assert(young_gen()->overflow_list() == NULL, "Error"); |
182 return num_take_elems > 0; // was something transferred? |
179 return num_take_elems > 0; // was something transferred? |
183 } |
180 } |
184 |
181 |
185 void ParScanThreadState::push_on_overflow_stack(oop p) { |
182 void ParScanThreadState::push_on_overflow_stack(oop p) { |
186 assert(UseCompressedOops, "Else should not call"); |
183 assert(ParGCUseLocalOverflow, "Else should not call"); |
187 overflow_stack()->push(p); |
184 overflow_stack()->push(p); |
188 assert(young_gen()->overflow_list() == NULL, "Error"); |
185 assert(young_gen()->overflow_list() == NULL, "Error"); |
189 } |
186 } |
190 |
187 |
191 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { |
188 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { |
258 ParScanThreadStateSet(int num_threads, |
255 ParScanThreadStateSet(int num_threads, |
259 Space& to_space, |
256 Space& to_space, |
260 ParNewGeneration& gen, |
257 ParNewGeneration& gen, |
261 Generation& old_gen, |
258 Generation& old_gen, |
262 ObjToScanQueueSet& queue_set, |
259 ObjToScanQueueSet& queue_set, |
|
260 GrowableArray<oop>** overflow_stacks_, |
263 size_t desired_plab_sz, |
261 size_t desired_plab_sz, |
264 ParallelTaskTerminator& term); |
262 ParallelTaskTerminator& term); |
265 inline ParScanThreadState& thread_sate(int i); |
263 inline ParScanThreadState& thread_sate(int i); |
266 int pushes() { return _pushes; } |
264 int pushes() { return _pushes; } |
267 int pops() { return _pops; } |
265 int pops() { return _pops; } |
280 |
278 |
281 |
279 |
282 ParScanThreadStateSet::ParScanThreadStateSet( |
280 ParScanThreadStateSet::ParScanThreadStateSet( |
283 int num_threads, Space& to_space, ParNewGeneration& gen, |
281 int num_threads, Space& to_space, ParNewGeneration& gen, |
284 Generation& old_gen, ObjToScanQueueSet& queue_set, |
282 Generation& old_gen, ObjToScanQueueSet& queue_set, |
|
283 GrowableArray<oop>** overflow_stack_set_, |
285 size_t desired_plab_sz, ParallelTaskTerminator& term) |
284 size_t desired_plab_sz, ParallelTaskTerminator& term) |
286 : ResourceArray(sizeof(ParScanThreadState), num_threads), |
285 : ResourceArray(sizeof(ParScanThreadState), num_threads), |
287 _gen(gen), _next_gen(old_gen), _term(term), |
286 _gen(gen), _next_gen(old_gen), _term(term), |
288 _pushes(0), _pops(0), _steals(0) |
287 _pushes(0), _pops(0), _steals(0) |
289 { |
288 { |
290 assert(num_threads > 0, "sanity check!"); |
289 assert(num_threads > 0, "sanity check!"); |
291 // Initialize states. |
290 // Initialize states. |
292 for (int i = 0; i < num_threads; ++i) { |
291 for (int i = 0; i < num_threads; ++i) { |
293 new ((ParScanThreadState*)_data + i) |
292 new ((ParScanThreadState*)_data + i) |
294 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, |
293 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, |
295 desired_plab_sz, term); |
294 overflow_stack_set_, desired_plab_sz, term); |
296 } |
295 } |
297 } |
296 } |
298 |
297 |
299 inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i) |
298 inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i) |
300 { |
299 { |
517 } |
516 } |
518 |
517 |
519 for (uint i2 = 0; i2 < ParallelGCThreads; i2++) |
518 for (uint i2 = 0; i2 < ParallelGCThreads; i2++) |
520 _task_queues->queue(i2)->initialize(); |
519 _task_queues->queue(i2)->initialize(); |
521 |
520 |
|
521 _overflow_stacks = NEW_C_HEAP_ARRAY(GrowableArray<oop>*, ParallelGCThreads); |
|
522 guarantee(_overflow_stacks != NULL, "Overflow stack set allocation failure"); |
|
523 for (uint i = 0; i < ParallelGCThreads; i++) { |
|
524 if (ParGCUseLocalOverflow) { |
|
525 _overflow_stacks[i] = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true); |
|
526 guarantee(_overflow_stacks[i] != NULL, "Overflow Stack allocation failure."); |
|
527 } else { |
|
528 _overflow_stacks[i] = NULL; |
|
529 } |
|
530 } |
|
531 |
522 if (UsePerfData) { |
532 if (UsePerfData) { |
523 EXCEPTION_MARK; |
533 EXCEPTION_MARK; |
524 ResourceMark rm; |
534 ResourceMark rm; |
525 |
535 |
526 const char* cname = |
536 const char* cname = |
782 gch->save_marks(); |
792 gch->save_marks(); |
783 assert(workers != NULL, "Need parallel worker threads."); |
793 assert(workers != NULL, "Need parallel worker threads."); |
784 ParallelTaskTerminator _term(workers->total_workers(), task_queues()); |
794 ParallelTaskTerminator _term(workers->total_workers(), task_queues()); |
785 ParScanThreadStateSet thread_state_set(workers->total_workers(), |
795 ParScanThreadStateSet thread_state_set(workers->total_workers(), |
786 *to(), *this, *_next_gen, *task_queues(), |
796 *to(), *this, *_next_gen, *task_queues(), |
787 desired_plab_sz(), _term); |
797 _overflow_stacks, desired_plab_sz(), _term); |
788 |
798 |
789 ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); |
799 ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); |
790 int n_workers = workers->total_workers(); |
800 int n_workers = workers->total_workers(); |
791 gch->set_par_threads(n_workers); |
801 gch->set_par_threads(n_workers); |
792 gch->change_strong_roots_parity(); |
802 gch->change_strong_roots_parity(); |
1236 // single global lists have their own performance disadvantages |
1246 // single global lists have their own performance disadvantages |
1237 // as we were made painfully aware not long ago, see 6786503). |
1247 // as we were made painfully aware not long ago, see 6786503). |
1238 #define BUSY (oop(0x1aff1aff)) |
1248 #define BUSY (oop(0x1aff1aff)) |
1239 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { |
1249 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { |
1240 assert(is_in_reserved(from_space_obj), "Should be from this generation"); |
1250 assert(is_in_reserved(from_space_obj), "Should be from this generation"); |
1241 if (UseCompressedOops) { |
1251 if (ParGCUseLocalOverflow) { |
1242 // In the case of compressed oops, we use a private, not-shared |
1252 // In the case of compressed oops, we use a private, not-shared |
1243 // overflow stack. |
1253 // overflow stack. |
1244 par_scan_state->push_on_overflow_stack(from_space_obj); |
1254 par_scan_state->push_on_overflow_stack(from_space_obj); |
1245 } else { |
1255 } else { |
|
1256 assert(!UseCompressedOops, "Error"); |
1246 // if the object has been forwarded to itself, then we cannot |
1257 // if the object has been forwarded to itself, then we cannot |
1247 // use the klass pointer for the linked list. Instead we have |
1258 // use the klass pointer for the linked list. Instead we have |
1248 // to allocate an oopDesc in the C-Heap and use that for the linked list. |
1259 // to allocate an oopDesc in the C-Heap and use that for the linked list. |
1249 // XXX This is horribly inefficient when a promotion failure occurs |
1260 // XXX This is horribly inefficient when a promotion failure occurs |
1250 // and should be fixed. XXX FIX ME !!! |
1261 // and should be fixed. XXX FIX ME !!! |
1303 // How many to take? |
1315 // How many to take? |
1304 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, |
1316 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, |
1305 (size_t)ParGCDesiredObjsFromOverflowList); |
1317 (size_t)ParGCDesiredObjsFromOverflowList); |
1306 |
1318 |
1307 assert(par_scan_state->overflow_stack() == NULL, "Error"); |
1319 assert(par_scan_state->overflow_stack() == NULL, "Error"); |
|
1320 assert(!UseCompressedOops, "Error"); |
1308 if (_overflow_list == NULL) return false; |
1321 if (_overflow_list == NULL) return false; |
1309 |
1322 |
1310 // Otherwise, there was something there; try claiming the list. |
1323 // Otherwise, there was something there; try claiming the list. |
1311 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
1324 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
1312 // Trim off a prefix of at most objsFromOverflow items |
1325 // Trim off a prefix of at most objsFromOverflow items |