Wed, 01 Oct 2008 20:15:03 -0400
Merge
1.1 --- a/src/share/vm/asm/assembler.cpp Tue Sep 30 12:24:27 2008 -0400 1.2 +++ b/src/share/vm/asm/assembler.cpp Wed Oct 01 20:15:03 2008 -0400 1.3 @@ -249,8 +249,6 @@ 1.4 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { 1.5 // Exception handler checks the nmethod's implicit null checks table 1.6 // only when this method returns false. 1.7 -#ifndef SPARC 1.8 - // Sparc does not have based addressing 1.9 if (UseCompressedOops) { 1.10 // The first page after heap_base is unmapped and 1.11 // the 'offset' is equal to [heap_base + offset] for 1.12 @@ -261,7 +259,6 @@ 1.13 offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1)); 1.14 } 1.15 } 1.16 -#endif // SPARC 1.17 return offset < 0 || os::vm_page_size() <= offset; 1.18 } 1.19
2.1 --- a/src/share/vm/ci/ciMethodBlocks.cpp Tue Sep 30 12:24:27 2008 -0400 2.2 +++ b/src/share/vm/ci/ciMethodBlocks.cpp Wed Oct 01 20:15:03 2008 -0400 2.3 @@ -49,7 +49,7 @@ 2.4 // first half. Returns the range beginning at bci. 2.5 ciBlock *ciMethodBlocks::split_block_at(int bci) { 2.6 ciBlock *former_block = block_containing(bci); 2.7 - ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci()); 2.8 + ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci()); 2.9 _blocks->append(new_block); 2.10 assert(former_block != NULL, "must not be NULL"); 2.11 new_block->set_limit_bci(bci); 2.12 @@ -83,7 +83,7 @@ 2.13 if (cb == NULL ) { 2.14 // This is our first time visiting this bytecode. Create 2.15 // a fresh block and assign it this starting point. 2.16 - ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci); 2.17 + ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci); 2.18 _blocks->append(nb); 2.19 _bci_to_block[bci] = nb; 2.20 return nb; 2.21 @@ -98,6 +98,11 @@ 2.22 } 2.23 } 2.24 2.25 +ciBlock *ciMethodBlocks::make_dummy_block() { 2.26 + ciBlock *dum = new(_arena) ciBlock(_method, -1, 0); 2.27 + return dum; 2.28 +} 2.29 + 2.30 void ciMethodBlocks::do_analysis() { 2.31 ciBytecodeStream s(_method); 2.32 ciBlock *cur_block = block_containing(0); 2.33 @@ -253,7 +258,7 @@ 2.34 Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord)); 2.35 2.36 // create initial block covering the entire method 2.37 - ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0); 2.38 + ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0); 2.39 _blocks->append(b); 2.40 _bci_to_block[0] = b; 2.41 2.42 @@ -334,7 +339,7 @@ 2.43 #endif 2.44 2.45 2.46 -ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) : 2.47 +ciBlock::ciBlock(ciMethod *method, int index, int start_bci) : 2.48 #ifndef PRODUCT 2.49 _method(method), 2.50 #endif
3.1 --- a/src/share/vm/ci/ciMethodBlocks.hpp Tue Sep 30 12:24:27 2008 -0400 3.2 +++ b/src/share/vm/ci/ciMethodBlocks.hpp Wed Oct 01 20:15:03 2008 -0400 3.3 @@ -48,6 +48,8 @@ 3.4 int num_blocks() { return _num_blocks;} 3.5 void clear_processed(); 3.6 3.7 + ciBlock *make_dummy_block(); // a block not associated with a bci 3.8 + 3.9 #ifndef PRODUCT 3.10 void dump(); 3.11 #endif 3.12 @@ -81,7 +83,7 @@ 3.13 fall_through_bci = -1 3.14 }; 3.15 3.16 - ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci); 3.17 + ciBlock(ciMethod *method, int index, int start_bci); 3.18 int start_bci() const { return _start_bci; } 3.19 int limit_bci() const { return _limit_bci; } 3.20 int control_bci() const { return _control_bci; } 3.21 @@ -94,7 +96,6 @@ 3.22 int ex_limit_bci() const { return _ex_limit_bci; } 3.23 bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); } 3.24 3.25 - 3.26 // flag handling 3.27 bool processed() const { return (_flags & Processed) != 0; } 3.28 bool is_handler() const { return (_flags & Handler) != 0; }
4.1 --- a/src/share/vm/ci/ciTypeFlow.cpp Tue Sep 30 12:24:27 2008 -0400 4.2 +++ b/src/share/vm/ci/ciTypeFlow.cpp Wed Oct 01 20:15:03 2008 -0400 4.3 @@ -338,8 +338,10 @@ 4.4 } 4.5 _trap_bci = -1; 4.6 _trap_index = 0; 4.7 + _def_locals.clear(); 4.8 } 4.9 4.10 + 4.11 // ------------------------------------------------------------------ 4.12 // ciTypeFlow::get_start_state 4.13 // 4.14 @@ -735,7 +737,7 @@ 4.15 void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) { 4.16 bool will_link; 4.17 ciKlass* klass = str->get_klass(will_link); 4.18 - if (!will_link) { 4.19 + if (!will_link || str->is_unresolved_klass()) { 4.20 trap(str, klass, str->get_klass_index()); 4.21 } else { 4.22 push_object(klass); 4.23 @@ -1268,7 +1270,9 @@ 4.24 } 4.25 case Bytecodes::_iinc: 4.26 { 4.27 - check_int(local(str->get_index())); 4.28 + int lnum = str->get_index(); 4.29 + check_int(local(lnum)); 4.30 + store_to_local(lnum); 4.31 break; 4.32 } 4.33 case Bytecodes::_iload: load_local_int(str->get_index()); break; 4.34 @@ -1506,6 +1510,46 @@ 4.35 } 4.36 #endif 4.37 4.38 + 4.39 +// ------------------------------------------------------------------ 4.40 +// ciTypeFlow::SuccIter::next 4.41 +// 4.42 +void ciTypeFlow::SuccIter::next() { 4.43 + int succ_ct = _pred->successors()->length(); 4.44 + int next = _index + 1; 4.45 + if (next < succ_ct) { 4.46 + _index = next; 4.47 + _succ = _pred->successors()->at(next); 4.48 + return; 4.49 + } 4.50 + for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) { 4.51 + // Do not compile any code for unloaded exception types. 4.52 + // Following compiler passes are responsible for doing this also. 4.53 + ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i); 4.54 + if (exception_klass->is_loaded()) { 4.55 + _index = next; 4.56 + _succ = _pred->exceptions()->at(i); 4.57 + return; 4.58 + } 4.59 + next++; 4.60 + } 4.61 + _index = -1; 4.62 + _succ = NULL; 4.63 +} 4.64 + 4.65 +// ------------------------------------------------------------------ 4.66 +// ciTypeFlow::SuccIter::set_succ 4.67 +// 4.68 +void ciTypeFlow::SuccIter::set_succ(Block* succ) { 4.69 + int succ_ct = _pred->successors()->length(); 4.70 + if (_index < succ_ct) { 4.71 + _pred->successors()->at_put(_index, succ); 4.72 + } else { 4.73 + int idx = _index - succ_ct; 4.74 + _pred->exceptions()->at_put(idx, succ); 4.75 + } 4.76 +} 4.77 + 4.78 // ciTypeFlow::Block 4.79 // 4.80 // A basic block. 4.81 @@ -1526,10 +1570,11 @@ 4.82 _jsrs = new_jsrs; 4.83 _next = NULL; 4.84 _on_work_list = false; 4.85 - _pre_order = -1; assert(!has_pre_order(), ""); 4.86 - _private_copy = false; 4.87 + _backedge_copy = false; 4.88 + _exception_entry = false; 4.89 _trap_bci = -1; 4.90 _trap_index = 0; 4.91 + df_init(); 4.92 4.93 if (CITraceTypeFlow) { 4.94 tty->print_cr(">> Created new block"); 4.95 @@ -1541,55 +1586,13 @@ 4.96 } 4.97 4.98 // ------------------------------------------------------------------ 4.99 -// ciTypeFlow::Block::clone_loop_head 4.100 -// 4.101 -ciTypeFlow::Block* 4.102 -ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer, 4.103 - int branch_bci, 4.104 - ciTypeFlow::Block* target, 4.105 - ciTypeFlow::JsrSet* jsrs) { 4.106 - // Loop optimizations are not performed on Tier1 compiles. Do nothing. 4.107 - if (analyzer->env()->comp_level() < CompLevel_full_optimization) { 4.108 - return target; 4.109 - } 4.110 - 4.111 - // The current block ends with a branch. 4.112 - // 4.113 - // If the target block appears to be the test-clause of a for loop, and 4.114 - // it is not too large, and it has not yet been cloned, clone it. 4.115 - // The pre-existing copy becomes the private clone used only by 4.116 - // the initial iteration of the loop. (We know we are simulating 4.117 - // the initial iteration right now, since we have never calculated 4.118 - // successors before for this block.) 4.119 - 4.120 - if (branch_bci <= start() 4.121 - && (target->limit() - target->start()) <= CICloneLoopTestLimit 4.122 - && target->private_copy_count() == 0) { 4.123 - // Setting the private_copy bit ensures that the target block cannot be 4.124 - // reached by any other paths, such as fall-in from the loop body. 4.125 - // The private copy will be accessible only on successor lists 4.126 - // created up to this point. 4.127 - target->set_private_copy(true); 4.128 - if (CITraceTypeFlow) { 4.129 - tty->print(">> Cloning a test-clause block "); 4.130 - print_value_on(tty); 4.131 - tty->cr(); 4.132 - } 4.133 - // If the target is the current block, then later on a new copy of the 4.134 - // target block will be created when its bytecodes are reached by 4.135 - // an alternate path. (This is the case for loops with the loop 4.136 - // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.) 4.137 - // 4.138 - // Otherwise, duplicate the target block now and use it immediately. 4.139 - // (The case for loops with the loop head at the bci-wise top of the 4.140 - // loop, as with 1.4.2 javac.) 4.141 - // 4.142 - // In either case, the new copy of the block will remain public. 4.143 - if (target != this) { 4.144 - target = analyzer->block_at(branch_bci, jsrs); 4.145 - } 4.146 - } 4.147 - return target; 4.148 +// ciTypeFlow::Block::df_init 4.149 +void ciTypeFlow::Block::df_init() { 4.150 + _pre_order = -1; assert(!has_pre_order(), ""); 4.151 + _post_order = -1; assert(!has_post_order(), ""); 4.152 + _loop = NULL; 4.153 + _irreducible_entry = false; 4.154 + _rpo_next = NULL; 4.155 } 4.156 4.157 // ------------------------------------------------------------------ 4.158 @@ -1644,7 +1647,6 @@ 4.159 case Bytecodes::_ifnull: case Bytecodes::_ifnonnull: 4.160 // Our successors are the branch target and the next bci. 4.161 branch_bci = str->get_dest(); 4.162 - clone_loop_head(analyzer, branch_bci, this, jsrs); 4.163 _successors = 4.164 new (arena) GrowableArray<Block*>(arena, 2, 0, NULL); 4.165 assert(_successors->length() == IF_NOT_TAKEN, ""); 4.166 @@ -1658,14 +1660,7 @@ 4.167 _successors = 4.168 new (arena) GrowableArray<Block*>(arena, 1, 0, NULL); 4.169 assert(_successors->length() == GOTO_TARGET, ""); 4.170 - target = analyzer->block_at(branch_bci, jsrs); 4.171 - // If the target block has not been visited yet, and looks like 4.172 - // a two-way branch, attempt to clone it if it is a loop head. 4.173 - if (target->_successors != NULL 4.174 - && target->_successors->length() == (IF_TAKEN + 1)) { 4.175 - target = clone_loop_head(analyzer, branch_bci, target, jsrs); 4.176 - } 4.177 - _successors->append(target); 4.178 + _successors->append(analyzer->block_at(branch_bci, jsrs)); 4.179 break; 4.180 4.181 case Bytecodes::_jsr: 4.182 @@ -1801,65 +1796,60 @@ 4.183 } 4.184 4.185 // ------------------------------------------------------------------ 4.186 -// ciTypeFlow::Block::is_simpler_than 4.187 -// 4.188 -// A relation used to order our work list. We work on a block earlier 4.189 -// if it has a smaller jsr stack or it occurs earlier in the program 4.190 -// text. 4.191 -// 4.192 -// Note: maybe we should redo this functionality to make blocks 4.193 -// which correspond to exceptions lower priority. 4.194 -bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) { 4.195 - if (other == NULL) { 4.196 - return true; 4.197 - } else { 4.198 - int size1 = _jsrs->size(); 4.199 - int size2 = other->_jsrs->size(); 4.200 - if (size1 < size2) { 4.201 - return true; 4.202 - } else if (size2 < size1) { 4.203 - return false; 4.204 - } else { 4.205 -#if 0 4.206 - if (size1 > 0) { 4.207 - int r1 = _jsrs->record_at(0)->return_address(); 4.208 - int r2 = _jsrs->record_at(0)->return_address(); 4.209 - if (r1 < r2) { 4.210 - return true; 4.211 - } else if (r2 < r1) { 4.212 - return false; 4.213 - } else { 4.214 - int e1 = _jsrs->record_at(0)->return_address(); 4.215 - int e2 = _jsrs->record_at(0)->return_address(); 4.216 - if (e1 < e2) { 4.217 - return true; 4.218 - } else if (e2 < e1) { 4.219 - return false; 4.220 - } 4.221 - } 4.222 - } 4.223 -#endif 4.224 - return (start() <= other->start()); 4.225 - } 4.226 - } 4.227 +// ciTypeFlow::Block::set_backedge_copy 4.228 +// Use this only to make a pre-existing public block into a backedge copy. 4.229 +void ciTypeFlow::Block::set_backedge_copy(bool z) { 4.230 + assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public"); 4.231 + _backedge_copy = z; 4.232 } 4.233 4.234 // ------------------------------------------------------------------ 4.235 -// ciTypeFlow::Block::set_private_copy 4.236 -// Use this only to make a pre-existing public block into a private copy. 4.237 -void ciTypeFlow::Block::set_private_copy(bool z) { 4.238 - assert(z || (z == is_private_copy()), "cannot make a private copy public"); 4.239 - _private_copy = z; 4.240 +// ciTypeFlow::Block::is_clonable_exit 4.241 +// 4.242 +// At most 2 normal successors, one of which continues looping, 4.243 +// and all exceptional successors must exit. 4.244 +bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) { 4.245 + int normal_cnt = 0; 4.246 + int in_loop_cnt = 0; 4.247 + for (SuccIter iter(this); !iter.done(); iter.next()) { 4.248 + Block* succ = iter.succ(); 4.249 + if (iter.is_normal_ctrl()) { 4.250 + if (++normal_cnt > 2) return false; 4.251 + if (lp->contains(succ->loop())) { 4.252 + if (++in_loop_cnt > 1) return false; 4.253 + } 4.254 + } else { 4.255 + if (lp->contains(succ->loop())) return false; 4.256 + } 4.257 + } 4.258 + return in_loop_cnt == 1; 4.259 +} 4.260 + 4.261 +// ------------------------------------------------------------------ 4.262 +// ciTypeFlow::Block::looping_succ 4.263 +// 4.264 +ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) { 4.265 + assert(successors()->length() <= 2, "at most 2 normal successors"); 4.266 + for (SuccIter iter(this); !iter.done(); iter.next()) { 4.267 + Block* succ = iter.succ(); 4.268 + if (lp->contains(succ->loop())) { 4.269 + return succ; 4.270 + } 4.271 + } 4.272 + return NULL; 4.273 } 4.274 4.275 #ifndef PRODUCT 4.276 // ------------------------------------------------------------------ 4.277 // ciTypeFlow::Block::print_value_on 4.278 void ciTypeFlow::Block::print_value_on(outputStream* st) const { 4.279 - if (has_pre_order()) st->print("#%-2d ", pre_order()); 4.280 + if (has_pre_order()) st->print("#%-2d ", pre_order()); 4.281 + if (has_rpo()) st->print("rpo#%-2d ", rpo()); 4.282 st->print("[%d - %d)", start(), limit()); 4.283 + if (is_loop_head()) st->print(" lphd"); 4.284 + if (is_irreducible_entry()) st->print(" irred"); 4.285 if (_jsrs->size() > 0) { st->print("/"); _jsrs->print_on(st); } 4.286 - if (is_private_copy()) st->print("/private_copy"); 4.287 + if (is_backedge_copy()) st->print("/backedge_copy"); 4.288 } 4.289 4.290 // ------------------------------------------------------------------ 4.291 @@ -1871,6 +1861,16 @@ 4.292 st->print_cr(" ==================================================== "); 4.293 st->print (" "); 4.294 print_value_on(st); 4.295 + st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr(); 4.296 + if (loop() && loop()->parent() != NULL) { 4.297 + st->print(" loops:"); 4.298 + Loop* lp = loop(); 4.299 + do { 4.300 + st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order()); 4.301 + if (lp->is_irreducible()) st->print("(ir)"); 4.302 + lp = lp->parent(); 4.303 + } while (lp->parent() != NULL); 4.304 + } 4.305 st->cr(); 4.306 _state->print_on(st); 4.307 if (_successors == NULL) { 4.308 @@ -1907,6 +1907,21 @@ 4.309 } 4.310 #endif 4.311 4.312 +#ifndef PRODUCT 4.313 +// ------------------------------------------------------------------ 4.314 +// ciTypeFlow::LocalSet::print_on 4.315 +void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const { 4.316 + st->print("{"); 4.317 + for (int i = 0; i < max; i++) { 4.318 + if (test(i)) st->print(" %d", i); 4.319 + } 4.320 + if (limit > max) { 4.321 + st->print(" %d..%d ", max, limit); 4.322 + } 4.323 + st->print(" }"); 4.324 +} 4.325 +#endif 4.326 + 4.327 // ciTypeFlow 4.328 // 4.329 // This is a pass over the bytecodes which computes the following: 4.330 @@ -1922,12 +1937,11 @@ 4.331 _max_locals = method->max_locals(); 4.332 _max_stack = method->max_stack(); 4.333 _code_size = method->code_size(); 4.334 + _has_irreducible_entry = false; 4.335 _osr_bci = osr_bci; 4.336 _failure_reason = NULL; 4.337 assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument"); 4.338 - 4.339 _work_list = NULL; 4.340 - _next_pre_order = 0; 4.341 4.342 _ciblock_count = _methodBlocks->num_blocks(); 4.343 _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count); 4.344 @@ -1949,12 +1963,6 @@ 4.345 _work_list = next_block->next(); 4.346 next_block->set_next(NULL); 4.347 next_block->set_on_work_list(false); 4.348 - if (!next_block->has_pre_order()) { 4.349 - // Assign "pre_order" as each new block is taken from the work list. 4.350 - // This number may be used by following phases to order block visits. 4.351 - assert(!have_block_count(), "must not have mapped blocks yet") 4.352 - next_block->set_pre_order(_next_pre_order++); 4.353 - } 4.354 return next_block; 4.355 } 4.356 4.357 @@ -1962,30 +1970,37 @@ 4.358 // ciTypeFlow::add_to_work_list 4.359 // 4.360 // Add a basic block to our work list. 4.361 +// List is sorted by decreasing postorder sort (same as increasing RPO) 4.362 void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) { 4.363 assert(!block->is_on_work_list(), "must not already be on work list"); 4.364 4.365 if (CITraceTypeFlow) { 4.366 - tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : ""); 4.367 + tty->print(">> Adding block "); 4.368 block->print_value_on(tty); 4.369 tty->print_cr(" to the work list : "); 4.370 } 4.371 4.372 block->set_on_work_list(true); 4.373 - if (block->is_simpler_than(_work_list)) { 4.374 + 4.375 + // decreasing post order sort 4.376 + 4.377 + Block* prev = NULL; 4.378 + Block* current = _work_list; 4.379 + int po = block->post_order(); 4.380 + while (current != NULL) { 4.381 + if (!current->has_post_order() || po > current->post_order()) 4.382 + break; 4.383 + prev = current; 4.384 + current = current->next(); 4.385 + } 4.386 + if (prev == NULL) { 4.387 block->set_next(_work_list); 4.388 _work_list = block; 4.389 } else { 4.390 - Block *temp = _work_list; 4.391 - while (!block->is_simpler_than(temp->next())) { 4.392 - if (CITraceTypeFlow) { 4.393 - tty->print("."); 4.394 - } 4.395 - temp = temp->next(); 4.396 - } 4.397 - block->set_next(temp->next()); 4.398 - temp->set_next(block); 4.399 + block->set_next(current); 4.400 + prev->set_next(block); 4.401 } 4.402 + 4.403 if (CITraceTypeFlow) { 4.404 tty->cr(); 4.405 } 4.406 @@ -2008,7 +2023,7 @@ 4.407 assert(ciblk->start_bci() == bci, "bad ciBlock boundaries"); 4.408 Block* block = get_block_for(ciblk->index(), jsrs, option); 4.409 4.410 - assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result"); 4.411 + assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result"); 4.412 4.413 if (CITraceTypeFlow) { 4.414 if (block != NULL) { 4.415 @@ -2072,8 +2087,9 @@ 4.416 } 4.417 4.418 if (block->meet_exception(exception_klass, state)) { 4.419 - // Block was modified. Add it to the work list. 4.420 - if (!block->is_on_work_list()) { 4.421 + // Block was modified and has PO. Add it to the work list. 4.422 + if (block->has_post_order() && 4.423 + !block->is_on_work_list()) { 4.424 add_to_work_list(block); 4.425 } 4.426 } 4.427 @@ -2091,8 +2107,9 @@ 4.428 for (int i = 0; i < len; i++) { 4.429 Block* block = successors->at(i); 4.430 if (block->meet(state)) { 4.431 - // Block was modified. Add it to the work list. 4.432 - if (!block->is_on_work_list()) { 4.433 + // Block was modified and has PO. Add it to the work list. 4.434 + if (block->has_post_order() && 4.435 + !block->is_on_work_list()) { 4.436 add_to_work_list(block); 4.437 } 4.438 } 4.439 @@ -2133,6 +2150,111 @@ 4.440 return true; 4.441 } 4.442 4.443 +// ------------------------------------------------------------------ 4.444 +// ciTypeFlow::clone_loop_heads 4.445 +// 4.446 +// Clone the loop heads 4.447 +bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { 4.448 + bool rslt = false; 4.449 + for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) { 4.450 + lp = iter.current(); 4.451 + Block* head = lp->head(); 4.452 + if (lp == loop_tree_root() || 4.453 + lp->is_irreducible() || 4.454 + !head->is_clonable_exit(lp)) 4.455 + continue; 4.456 + 4.457 + // check not already cloned 4.458 + if (head->backedge_copy_count() != 0) 4.459 + continue; 4.460 + 4.461 + // check _no_ shared head below us 4.462 + Loop* ch; 4.463 + for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling()); 4.464 + if (ch != NULL) 4.465 + continue; 4.466 + 4.467 + // Clone head 4.468 + Block* new_head = head->looping_succ(lp); 4.469 + Block* clone = clone_loop_head(lp, temp_vector, temp_set); 4.470 + // Update lp's info 4.471 + clone->set_loop(lp); 4.472 + lp->set_head(new_head); 4.473 + lp->set_tail(clone); 4.474 + // And move original head into outer loop 4.475 + head->set_loop(lp->parent()); 4.476 + 4.477 + rslt = true; 4.478 + } 4.479 + return rslt; 4.480 +} 4.481 + 4.482 +// ------------------------------------------------------------------ 4.483 +// ciTypeFlow::clone_loop_head 4.484 +// 4.485 +// Clone lp's head and replace tail's successors with clone. 4.486 +// 4.487 +// | 4.488 +// v 4.489 +// head <-> body 4.490 +// | 4.491 +// v 4.492 +// exit 4.493 +// 4.494 +// new_head 4.495 +// 4.496 +// | 4.497 +// v 4.498 +// head ----------\ 4.499 +// | | 4.500 +// | v 4.501 +// | clone <-> body 4.502 +// | | 4.503 +// | /--/ 4.504 +// | | 4.505 +// v v 4.506 +// exit 4.507 +// 4.508 +ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { 4.509 + Block* head = lp->head(); 4.510 + Block* tail = lp->tail(); 4.511 + if (CITraceTypeFlow) { 4.512 + tty->print(">> Requesting clone of loop head "); head->print_value_on(tty); 4.513 + tty->print(" for predecessor "); tail->print_value_on(tty); 4.514 + tty->cr(); 4.515 + } 4.516 + Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy); 4.517 + assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges"); 4.518 + 4.519 + assert(!clone->has_pre_order(), "just created"); 4.520 + clone->set_next_pre_order(); 4.521 + 4.522 + // Insert clone after (orig) tail in reverse post order 4.523 + clone->set_rpo_next(tail->rpo_next()); 4.524 + tail->set_rpo_next(clone); 4.525 + 4.526 + // tail->head becomes tail->clone 4.527 + for (SuccIter iter(tail); !iter.done(); iter.next()) { 4.528 + if (iter.succ() == head) { 4.529 + iter.set_succ(clone); 4.530 + break; 4.531 + } 4.532 + } 4.533 + flow_block(tail, temp_vector, temp_set); 4.534 + if (head == tail) { 4.535 + // For self-loops, clone->head becomes clone->clone 4.536 + flow_block(clone, temp_vector, temp_set); 4.537 + for (SuccIter iter(clone); !iter.done(); iter.next()) { 4.538 + if (iter.succ() == head) { 4.539 + iter.set_succ(clone); 4.540 + break; 4.541 + } 4.542 + } 4.543 + } 4.544 + flow_block(clone, temp_vector, temp_set); 4.545 + 4.546 + return clone; 4.547 +} 4.548 4.549 // ------------------------------------------------------------------ 4.550 // ciTypeFlow::flow_block 4.551 @@ -2159,11 +2281,14 @@ 4.552 4.553 // Grab the state from the current block. 4.554 block->copy_state_into(state); 4.555 + state->def_locals()->clear(); 4.556 4.557 GrowableArray<Block*>* exceptions = block->exceptions(); 4.558 GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses(); 4.559 bool has_exceptions = exceptions->length() > 0; 4.560 4.561 + bool exceptions_used = false; 4.562 + 4.563 ciBytecodeStream str(method()); 4.564 str.reset_to_bci(start); 4.565 Bytecodes::Code code; 4.566 @@ -2172,6 +2297,7 @@ 4.567 // Check for exceptional control flow from this point. 4.568 if (has_exceptions && can_trap(str)) { 4.569 flow_exceptions(exceptions, exc_klasses, state); 4.570 + exceptions_used = true; 4.571 } 4.572 // Apply the effects of the current bytecode to our state. 4.573 bool res = state->apply_one_bytecode(&str); 4.574 @@ -2189,9 +2315,14 @@ 4.575 block->print_on(tty); 4.576 } 4.577 4.578 + // Save set of locals defined in this block 4.579 + block->def_locals()->add(state->def_locals()); 4.580 + 4.581 // Record (no) successors. 4.582 block->successors(&str, state, jsrs); 4.583 4.584 + assert(!has_exceptions || exceptions_used, "Not removing exceptions"); 4.585 + 4.586 // Discontinue interpretation of this Block. 4.587 return; 4.588 } 4.589 @@ -2202,6 +2333,7 @@ 4.590 // Check for exceptional control flow from this point. 4.591 if (has_exceptions && can_trap(str)) { 4.592 flow_exceptions(exceptions, exc_klasses, state); 4.593 + exceptions_used = true; 4.594 } 4.595 4.596 // Fix the JsrSet to reflect effect of the bytecode. 4.597 @@ -2218,11 +2350,306 @@ 4.598 successors = block->successors(&str, NULL, NULL); 4.599 } 4.600 4.601 + // Save set of locals defined in this block 4.602 + block->def_locals()->add(state->def_locals()); 4.603 + 4.604 + // Remove untaken exception paths 4.605 + if (!exceptions_used) 4.606 + exceptions->clear(); 4.607 + 4.608 // Pass our state to successors. 4.609 flow_successors(successors, state); 4.610 } 4.611 4.612 // ------------------------------------------------------------------ 4.613 +// ciTypeFlow::PostOrderLoops::next 4.614 +// 4.615 +// Advance to next loop tree using a postorder, left-to-right traversal. 4.616 +void ciTypeFlow::PostorderLoops::next() { 4.617 + assert(!done(), "must not be done."); 4.618 + if (_current->sibling() != NULL) { 4.619 + _current = _current->sibling(); 4.620 + while (_current->child() != NULL) { 4.621 + _current = _current->child(); 4.622 + } 4.623 + } else { 4.624 + _current = _current->parent(); 4.625 + } 4.626 +} 4.627 + 4.628 +// ------------------------------------------------------------------ 4.629 +// ciTypeFlow::PreOrderLoops::next 4.630 +// 4.631 +// Advance to next loop tree using a preorder, left-to-right traversal. 4.632 +void ciTypeFlow::PreorderLoops::next() { 4.633 + assert(!done(), "must not be done."); 4.634 + if (_current->child() != NULL) { 4.635 + _current = _current->child(); 4.636 + } else if (_current->sibling() != NULL) { 4.637 + _current = _current->sibling(); 4.638 + } else { 4.639 + while (_current != _root && _current->sibling() == NULL) { 4.640 + _current = _current->parent(); 4.641 + } 4.642 + if (_current == _root) { 4.643 + _current = NULL; 4.644 + assert(done(), "must be done."); 4.645 + } else { 4.646 + assert(_current->sibling() != NULL, "must be more to do"); 4.647 + _current = _current->sibling(); 4.648 + } 4.649 + } 4.650 +} 4.651 + 4.652 +// ------------------------------------------------------------------ 4.653 +// ciTypeFlow::Loop::sorted_merge 4.654 +// 4.655 +// Merge the branch lp into this branch, sorting on the loop head 4.656 +// pre_orders. Returns the leaf of the merged branch. 4.657 +// Child and sibling pointers will be setup later. 4.658 +// Sort is (looking from leaf towards the root) 4.659 +// descending on primary key: loop head's pre_order, and 4.660 +// ascending on secondary key: loop tail's pre_order. 4.661 +ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) { 4.662 + Loop* leaf = this; 4.663 + Loop* prev = NULL; 4.664 + Loop* current = leaf; 4.665 + while (lp != NULL) { 4.666 + int lp_pre_order = lp->head()->pre_order(); 4.667 + // Find insertion point for "lp" 4.668 + while (current != NULL) { 4.669 + if (current == lp) 4.670 + return leaf; // Already in list 4.671 + if (current->head()->pre_order() < lp_pre_order) 4.672 + break; 4.673 + if (current->head()->pre_order() == lp_pre_order && 4.674 + current->tail()->pre_order() > lp->tail()->pre_order()) { 4.675 + break; 4.676 + } 4.677 + prev = current; 4.678 + current = current->parent(); 4.679 + } 4.680 + Loop* next_lp = lp->parent(); // Save future list of items to insert 4.681 + // Insert lp before current 4.682 + lp->set_parent(current); 4.683 + if (prev != NULL) { 4.684 + prev->set_parent(lp); 4.685 + } else { 4.686 + leaf = lp; 4.687 + } 4.688 + prev = lp; // Inserted item is new prev[ious] 4.689 + lp = next_lp; // Next item to insert 4.690 + } 4.691 + return leaf; 4.692 +} 4.693 + 4.694 +// ------------------------------------------------------------------ 4.695 +// ciTypeFlow::build_loop_tree 4.696 +// 4.697 +// Incrementally build loop tree. 4.698 +void ciTypeFlow::build_loop_tree(Block* blk) { 4.699 + assert(!blk->is_post_visited(), "precondition"); 4.700 + Loop* innermost = NULL; // merge of loop tree branches over all successors 4.701 + 4.702 + for (SuccIter iter(blk); !iter.done(); iter.next()) { 4.703 + Loop* lp = NULL; 4.704 + Block* succ = iter.succ(); 4.705 + if (!succ->is_post_visited()) { 4.706 + // Found backedge since predecessor post visited, but successor is not 4.707 + assert(succ->pre_order() <= blk->pre_order(), "should be backedge"); 4.708 + 4.709 + // Create a LoopNode to mark this loop. 4.710 + lp = new (arena()) Loop(succ, blk); 4.711 + if (succ->loop() == NULL) 4.712 + succ->set_loop(lp); 4.713 + // succ->loop will be updated to innermost loop on a later call, when blk==succ 4.714 + 4.715 + } else { // Nested loop 4.716 + lp = succ->loop(); 4.717 + 4.718 + // If succ is loop head, find outer loop. 4.719 + while (lp != NULL && lp->head() == succ) { 4.720 + lp = lp->parent(); 4.721 + } 4.722 + if (lp == NULL) { 4.723 + // Infinite loop, it's parent is the root 4.724 + lp = loop_tree_root(); 4.725 + } 4.726 + } 4.727 + 4.728 + // Check for irreducible loop. 4.729 + // Successor has already been visited. If the successor's loop head 4.730 + // has already been post-visited, then this is another entry into the loop. 4.731 + while (lp->head()->is_post_visited() && lp != loop_tree_root()) { 4.732 + _has_irreducible_entry = true; 4.733 + lp->set_irreducible(succ); 4.734 + if (!succ->is_on_work_list()) { 4.735 + // Assume irreducible entries need more data flow 4.736 + add_to_work_list(succ); 4.737 + } 4.738 + lp = lp->parent(); 4.739 + assert(lp != NULL, "nested loop must have parent by now"); 4.740 + } 4.741 + 4.742 + // Merge loop tree branch for all successors. 4.743 + innermost = innermost == NULL ? lp : innermost->sorted_merge(lp); 4.744 + 4.745 + } // end loop 4.746 + 4.747 + if (innermost == NULL) { 4.748 + assert(blk->successors()->length() == 0, "CFG exit"); 4.749 + blk->set_loop(loop_tree_root()); 4.750 + } else if (innermost->head() == blk) { 4.751 + // If loop header, complete the tree pointers 4.752 + if (blk->loop() != innermost) { 4.753 +#if ASSERT 4.754 + assert(blk->loop()->head() == innermost->head(), "same head"); 4.755 + Loop* dl; 4.756 + for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent()); 4.757 + assert(dl == blk->loop(), "blk->loop() already in innermost list"); 4.758 +#endif 4.759 + blk->set_loop(innermost); 4.760 + } 4.761 + innermost->def_locals()->add(blk->def_locals()); 4.762 + Loop* l = innermost; 4.763 + Loop* p = l->parent(); 4.764 + while (p && l->head() == blk) { 4.765 + l->set_sibling(p->child()); // Put self on parents 'next child' 4.766 + p->set_child(l); // Make self the first child of parent 4.767 + p->def_locals()->add(l->def_locals()); 4.768 + l = p; // Walk up the parent chain 4.769 + p = l->parent(); 4.770 + } 4.771 + } else { 4.772 + blk->set_loop(innermost); 4.773 + innermost->def_locals()->add(blk->def_locals()); 4.774 + } 4.775 +} 4.776 + 4.777 +// ------------------------------------------------------------------ 4.778 +// ciTypeFlow::Loop::contains 4.779 +// 4.780 +// Returns true if lp is nested loop. 4.781 +bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const { 4.782 + assert(lp != NULL, ""); 4.783 + if (this == lp || head() == lp->head()) return true; 4.784 + int depth1 = depth(); 4.785 + int depth2 = lp->depth(); 4.786 + if (depth1 > depth2) 4.787 + return false; 4.788 + while (depth1 < depth2) { 4.789 + depth2--; 4.790 + lp = lp->parent(); 4.791 + } 4.792 + return this == lp; 4.793 +} 4.794 + 4.795 +// ------------------------------------------------------------------ 4.796 +// ciTypeFlow::Loop::depth 4.797 +// 4.798 +// Loop depth 4.799 +int ciTypeFlow::Loop::depth() const { 4.800 + int dp = 0; 4.801 + for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent()) 4.802 + dp++; 4.803 + return dp; 4.804 +} 4.805 + 4.806 +#ifndef PRODUCT 4.807 +// ------------------------------------------------------------------ 4.808 +// ciTypeFlow::Loop::print 4.809 +void ciTypeFlow::Loop::print(outputStream* st, int indent) const { 4.810 + for (int i = 0; i < indent; i++) st->print(" "); 4.811 + st->print("%d<-%d %s", 4.812 + is_root() ? 0 : this->head()->pre_order(), 4.813 + is_root() ? 0 : this->tail()->pre_order(), 4.814 + is_irreducible()?" irr":""); 4.815 + st->print(" defs: "); 4.816 + def_locals()->print_on(st, _head->outer()->method()->max_locals()); 4.817 + st->cr(); 4.818 + for (Loop* ch = child(); ch != NULL; ch = ch->sibling()) 4.819 + ch->print(st, indent+2); 4.820 +} 4.821 +#endif 4.822 + 4.823 +// ------------------------------------------------------------------ 4.824 +// ciTypeFlow::df_flow_types 4.825 +// 4.826 +// Perform the depth first type flow analysis. Helper for flow_types. 4.827 +void ciTypeFlow::df_flow_types(Block* start, 4.828 + bool do_flow, 4.829 + StateVector* temp_vector, 4.830 + JsrSet* temp_set) { 4.831 + int dft_len = 100; 4.832 + GrowableArray<Block*> stk(arena(), dft_len, 0, NULL); 4.833 + 4.834 + ciBlock* dummy = _methodBlocks->make_dummy_block(); 4.835 + JsrSet* root_set = new JsrSet(NULL, 0); 4.836 + Block* root_head = new (arena()) Block(this, dummy, root_set); 4.837 + Block* root_tail = new (arena()) Block(this, dummy, root_set); 4.838 + root_head->set_pre_order(0); 4.839 + root_head->set_post_order(0); 4.840 + root_tail->set_pre_order(max_jint); 4.841 + root_tail->set_post_order(max_jint); 4.842 + set_loop_tree_root(new (arena()) Loop(root_head, root_tail)); 4.843 + 4.844 + stk.push(start); 4.845 + 4.846 + _next_pre_order = 0; // initialize pre_order counter 4.847 + _rpo_list = NULL; 4.848 + int next_po = 0; // initialize post_order counter 4.849 + 4.850 + // Compute RPO and the control flow graph 4.851 + int size; 4.852 + while ((size = stk.length()) > 0) { 4.853 + Block* blk = stk.top(); // Leave node on stack 4.854 + if (!blk->is_visited()) { 4.855 + // forward arc in graph 4.856 + assert (!blk->has_pre_order(), ""); 4.857 + blk->set_next_pre_order(); 4.858 + 4.859 + if (_next_pre_order >= MaxNodeLimit / 2) { 4.860 + // Too many basic blocks. Bail out. 4.861 + // This can happen when try/finally constructs are nested to depth N, 4.862 + // and there is O(2**N) cloning of jsr bodies. See bug 4697245! 4.863 + // "MaxNodeLimit / 2" is used because probably the parser will 4.864 + // generate at least twice that many nodes and bail out. 4.865 + record_failure("too many basic blocks"); 4.866 + return; 4.867 + } 4.868 + if (do_flow) { 4.869 + flow_block(blk, temp_vector, temp_set); 4.870 + if (failing()) return; // Watch for bailouts. 4.871 + } 4.872 + } else if (!blk->is_post_visited()) { 4.873 + // cross or back arc 4.874 + for (SuccIter iter(blk); !iter.done(); iter.next()) { 4.875 + Block* succ = iter.succ(); 4.876 + if (!succ->is_visited()) { 4.877 + stk.push(succ); 4.878 + } 4.879 + } 4.880 + if (stk.length() == size) { 4.881 + // There were no additional children, post visit node now 4.882 + stk.pop(); // Remove node from stack 4.883 + 4.884 + build_loop_tree(blk); 4.885 + blk->set_post_order(next_po++); // Assign post order 4.886 + prepend_to_rpo_list(blk); 4.887 + assert(blk->is_post_visited(), ""); 4.888 + 4.889 + if (blk->is_loop_head() && !blk->is_on_work_list()) { 4.890 + // Assume loop heads need more data flow 4.891 + add_to_work_list(blk); 4.892 + } 4.893 + } 4.894 + } else { 4.895 + stk.pop(); // Remove post-visited node from stack 4.896 + } 4.897 + } 4.898 +} 4.899 + 4.900 +// ------------------------------------------------------------------ 4.901 // ciTypeFlow::flow_types 4.902 // 4.903 // Perform the type flow analysis, creating and cloning Blocks as 4.904 @@ -2233,91 +2660,93 @@ 4.905 JsrSet* temp_set = new JsrSet(NULL, 16); 4.906 4.907 // Create the method entry block. 4.908 - Block* block = block_at(start_bci(), temp_set); 4.909 - block->set_pre_order(_next_pre_order++); 4.910 - assert(block->is_start(), "start block must have order #0"); 4.911 + Block* start = block_at(start_bci(), temp_set); 4.912 4.913 // Load the initial state into it. 4.914 const StateVector* start_state = get_start_state(); 4.915 if (failing()) return; 4.916 - block->meet(start_state); 4.917 - add_to_work_list(block); 4.918 + start->meet(start_state); 4.919 4.920 - // Trickle away. 4.921 + // Depth first visit 4.922 + df_flow_types(start, true /*do flow*/, temp_vector, temp_set); 4.923 + 4.924 + if (failing()) return; 4.925 + assert(_rpo_list == start, "must be start"); 4.926 + 4.927 + // Any loops found? 4.928 + if (loop_tree_root()->child() != NULL && 4.929 + env()->comp_level() >= CompLevel_full_optimization) { 4.930 + // Loop optimizations are not performed on Tier1 compiles. 4.931 + 4.932 + bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set); 4.933 + 4.934 + // If some loop heads were cloned, recompute postorder and loop tree 4.935 + if (changed) { 4.936 + loop_tree_root()->set_child(NULL); 4.937 + for (Block* blk = _rpo_list; blk != NULL;) { 4.938 + Block* next = blk->rpo_next(); 4.939 + blk->df_init(); 4.940 + blk = next; 4.941 + } 4.942 + df_flow_types(start, false /*no flow*/, temp_vector, temp_set); 4.943 + } 4.944 + } 4.945 + 4.946 + if (CITraceTypeFlow) { 4.947 + tty->print_cr("\nLoop tree"); 4.948 + loop_tree_root()->print(); 4.949 + } 4.950 + 4.951 + // Continue flow analysis until fixed point reached 4.952 + 4.953 + debug_only(int max_block = _next_pre_order;) 4.954 + 4.955 while (!work_list_empty()) { 4.956 - Block* block = work_list_next(); 4.957 - flow_block(block, temp_vector, temp_set); 4.958 + Block* blk = work_list_next(); 4.959 + assert (blk->has_post_order(), "post order assigned above"); 4.960 4.961 + flow_block(blk, temp_vector, temp_set); 4.962 4.963 - // NodeCountCutoff is the number of nodes at which the parser 4.964 - // will bail out. Probably if we already have lots of BBs, 4.965 - // the parser will generate at least twice that many nodes and bail out. 4.966 - // Therefore, this is a conservatively large limit at which to 4.967 - // bail out in the pre-parse typeflow pass. 4.968 - int block_limit = MaxNodeLimit / 2; 4.969 - 4.970 - if (_next_pre_order >= block_limit) { 4.971 - // Too many basic blocks. Bail out. 4.972 - // 4.973 - // This can happen when try/finally constructs are nested to depth N, 4.974 - // and there is O(2**N) cloning of jsr bodies. See bug 4697245! 4.975 - record_failure("too many basic blocks"); 4.976 - return; 4.977 - } 4.978 - 4.979 - // Watch for bailouts. 4.980 - if (failing()) return; 4.981 + assert (max_block == _next_pre_order, "no new blocks"); 4.982 + assert (!failing(), "no more bailouts"); 4.983 } 4.984 } 4.985 4.986 // ------------------------------------------------------------------ 4.987 // ciTypeFlow::map_blocks 4.988 // 4.989 -// Create the block map, which indexes blocks in pre_order. 4.990 +// Create the block map, which indexes blocks in reverse post-order. 4.991 void ciTypeFlow::map_blocks() { 4.992 assert(_block_map == NULL, "single initialization"); 4.993 - int pre_order_limit = _next_pre_order; 4.994 - _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit); 4.995 - assert(pre_order_limit == block_count(), ""); 4.996 - int po; 4.997 - for (po = 0; po < pre_order_limit; po++) { 4.998 - debug_only(_block_map[po] = NULL); 4.999 + int block_ct = _next_pre_order; 4.1000 + _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct); 4.1001 + assert(block_ct == block_count(), ""); 4.1002 + 4.1003 + Block* blk = _rpo_list; 4.1004 + for (int m = 0; m < block_ct; m++) { 4.1005 + int rpo = blk->rpo(); 4.1006 + assert(rpo == m, "should be sequential"); 4.1007 + _block_map[rpo] = blk; 4.1008 + blk = blk->rpo_next(); 4.1009 } 4.1010 - ciMethodBlocks *mblks = _methodBlocks; 4.1011 - ciBlock* current = NULL; 4.1012 - int limit_bci = code_size(); 4.1013 - for (int bci = 0; bci < limit_bci; bci++) { 4.1014 - ciBlock* ciblk = mblks->block_containing(bci); 4.1015 - if (ciblk != NULL && ciblk != current) { 4.1016 - current = ciblk; 4.1017 - int curidx = ciblk->index(); 4.1018 - int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length(); 4.1019 - for (int i = 0; i < block_count; i++) { 4.1020 - Block* block = _idx_to_blocklist[curidx]->at(i); 4.1021 - if (!block->has_pre_order()) continue; 4.1022 - int po = block->pre_order(); 4.1023 - assert(_block_map[po] == NULL, "unique ref to block"); 4.1024 - assert(0 <= po && po < pre_order_limit, ""); 4.1025 - _block_map[po] = block; 4.1026 - } 4.1027 - } 4.1028 - } 4.1029 - for (po = 0; po < pre_order_limit; po++) { 4.1030 - assert(_block_map[po] != NULL, "must not drop any blocks"); 4.1031 - Block* block = _block_map[po]; 4.1032 + assert(blk == NULL, "should be done"); 4.1033 + 4.1034 + for (int j = 0; j < block_ct; j++) { 4.1035 + assert(_block_map[j] != NULL, "must not drop any blocks"); 4.1036 + Block* block = _block_map[j]; 4.1037 // Remove dead blocks from successor lists: 4.1038 for (int e = 0; e <= 1; e++) { 4.1039 GrowableArray<Block*>* l = e? block->exceptions(): block->successors(); 4.1040 - for (int i = 0; i < l->length(); i++) { 4.1041 - Block* s = l->at(i); 4.1042 - if (!s->has_pre_order()) { 4.1043 + for (int k = 0; k < l->length(); k++) { 4.1044 + Block* s = l->at(k); 4.1045 + if (!s->has_post_order()) { 4.1046 if (CITraceTypeFlow) { 4.1047 tty->print("Removing dead %s successor of #%d: ", (e? "exceptional": "normal"), block->pre_order()); 4.1048 s->print_value_on(tty); 4.1049 tty->cr(); 4.1050 } 4.1051 l->remove(s); 4.1052 - --i; 4.1053 + --k; 4.1054 } 4.1055 } 4.1056 } 4.1057 @@ -2329,7 +2758,7 @@ 4.1058 // 4.1059 // Find a block with this ciBlock which has a compatible JsrSet. 4.1060 // If no such block exists, create it, unless the option is no_create. 4.1061 -// If the option is create_private_copy, always create a fresh private copy. 4.1062 +// If the option is create_backedge_copy, always create a fresh backedge copy. 4.1063 ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) { 4.1064 Arena* a = arena(); 4.1065 GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex]; 4.1066 @@ -2342,11 +2771,11 @@ 4.1067 _idx_to_blocklist[ciBlockIndex] = blocks; 4.1068 } 4.1069 4.1070 - if (option != create_private_copy) { 4.1071 + if (option != create_backedge_copy) { 4.1072 int len = blocks->length(); 4.1073 for (int i = 0; i < len; i++) { 4.1074 Block* block = blocks->at(i); 4.1075 - if (!block->is_private_copy() && block->is_compatible_with(jsrs)) { 4.1076 + if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) { 4.1077 return block; 4.1078 } 4.1079 } 4.1080 @@ -2357,15 +2786,15 @@ 4.1081 4.1082 // We did not find a compatible block. Create one. 4.1083 Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs); 4.1084 - if (option == create_private_copy) new_block->set_private_copy(true); 4.1085 + if (option == create_backedge_copy) new_block->set_backedge_copy(true); 4.1086 blocks->append(new_block); 4.1087 return new_block; 4.1088 } 4.1089 4.1090 // ------------------------------------------------------------------ 4.1091 -// ciTypeFlow::private_copy_count 4.1092 +// ciTypeFlow::backedge_copy_count 4.1093 // 4.1094 -int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { 4.1095 +int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { 4.1096 GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex]; 4.1097 4.1098 if (blocks == NULL) { 4.1099 @@ -2376,7 +2805,7 @@ 4.1100 int len = blocks->length(); 4.1101 for (int i = 0; i < len; i++) { 4.1102 Block* block = blocks->at(i); 4.1103 - if (block->is_private_copy() && block->is_compatible_with(jsrs)) { 4.1104 + if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) { 4.1105 count++; 4.1106 } 4.1107 } 4.1108 @@ -2405,10 +2834,12 @@ 4.1109 if (failing()) { 4.1110 return; 4.1111 } 4.1112 + 4.1113 + map_blocks(); 4.1114 + 4.1115 if (CIPrintTypeFlow || CITraceTypeFlow) { 4.1116 - print_on(tty); 4.1117 + rpo_print_on(tty); 4.1118 } 4.1119 - map_blocks(); 4.1120 } 4.1121 4.1122 // ------------------------------------------------------------------ 4.1123 @@ -2466,4 +2897,19 @@ 4.1124 st->print_cr("********************************************************"); 4.1125 st->cr(); 4.1126 } 4.1127 + 4.1128 +void ciTypeFlow::rpo_print_on(outputStream* st) const { 4.1129 + st->print_cr("********************************************************"); 4.1130 + st->print ("TypeFlow for "); 4.1131 + method()->name()->print_symbol_on(st); 4.1132 + int limit_bci = code_size(); 4.1133 + st->print_cr(" %d bytes", limit_bci); 4.1134 + for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) { 4.1135 + blk->print_on(st); 4.1136 + st->print_cr("--------------------------------------------------------"); 4.1137 + st->cr(); 4.1138 + } 4.1139 + st->print_cr("********************************************************"); 4.1140 + st->cr(); 4.1141 +} 4.1142 #endif
5.1 --- a/src/share/vm/ci/ciTypeFlow.hpp Tue Sep 30 12:24:27 2008 -0400 5.2 +++ b/src/share/vm/ci/ciTypeFlow.hpp Wed Oct 01 20:15:03 2008 -0400 5.3 @@ -34,11 +34,13 @@ 5.4 int _max_locals; 5.5 int _max_stack; 5.6 int _code_size; 5.7 + bool _has_irreducible_entry; 5.8 5.9 const char* _failure_reason; 5.10 5.11 public: 5.12 class StateVector; 5.13 + class Loop; 5.14 class Block; 5.15 5.16 // Build a type flow analyzer 5.17 @@ -55,6 +57,7 @@ 5.18 int max_stack() const { return _max_stack; } 5.19 int max_cells() const { return _max_locals + _max_stack; } 5.20 int code_size() const { return _code_size; } 5.21 + bool has_irreducible_entry() const { return _has_irreducible_entry; } 5.22 5.23 // Represents information about an "active" jsr call. This 5.24 // class represents a call to the routine at some entry address 5.25 @@ -125,6 +128,19 @@ 5.26 void print_on(outputStream* st) const PRODUCT_RETURN; 5.27 }; 5.28 5.29 + class LocalSet VALUE_OBJ_CLASS_SPEC { 5.30 + private: 5.31 + enum Constants { max = 63 }; 5.32 + uint64_t _bits; 5.33 + public: 5.34 + LocalSet() : _bits(0) {} 5.35 + void add(uint32_t i) { if (i < (uint32_t)max) _bits |= (1LL << i); } 5.36 + void add(LocalSet* ls) { _bits |= ls->_bits; } 5.37 + bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; } 5.38 + void clear() { _bits = 0; } 5.39 + void print_on(outputStream* st, int limit) const PRODUCT_RETURN; 5.40 + }; 5.41 + 5.42 // Used as a combined index for locals and temps 5.43 enum Cell { 5.44 Cell_0, Cell_max = INT_MAX 5.45 @@ -142,6 +158,8 @@ 5.46 int _trap_bci; 5.47 int _trap_index; 5.48 5.49 + LocalSet _def_locals; // For entire block 5.50 + 5.51 static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer); 5.52 5.53 public: 5.54 @@ -181,6 +199,9 @@ 5.55 int monitor_count() const { return _monitor_count; } 5.56 void set_monitor_count(int mc) { _monitor_count = mc; } 5.57 5.58 + LocalSet* def_locals() { return &_def_locals; } 5.59 + const LocalSet* def_locals() const { return &_def_locals; } 5.60 + 5.61 static Cell start_cell() { return (Cell)0; } 5.62 static Cell next_cell(Cell c) { return (Cell)(((int)c) + 1); } 5.63 Cell limit_cell() const { 5.64 @@ -250,6 +271,10 @@ 5.65 return type->basic_type() == T_DOUBLE; 5.66 } 5.67 5.68 + void store_to_local(int lnum) { 5.69 + _def_locals.add((uint) lnum); 5.70 + } 5.71 + 5.72 void push_translate(ciType* type); 5.73 5.74 void push_int() { 5.75 @@ -358,6 +383,7 @@ 5.76 "must be reference type or return address"); 5.77 overwrite_local_double_long(index); 5.78 set_type_at(local(index), type); 5.79 + store_to_local(index); 5.80 } 5.81 5.82 void load_local_double(int index) { 5.83 @@ -376,6 +402,8 @@ 5.84 overwrite_local_double_long(index); 5.85 set_type_at(local(index), type); 5.86 set_type_at(local(index+1), type2); 5.87 + store_to_local(index); 5.88 + store_to_local(index+1); 5.89 } 5.90 5.91 void load_local_float(int index) { 5.92 @@ -388,6 +416,7 @@ 5.93 assert(is_float(type), "must be float type"); 5.94 overwrite_local_double_long(index); 5.95 set_type_at(local(index), type); 5.96 + store_to_local(index); 5.97 } 5.98 5.99 void load_local_int(int index) { 5.100 @@ -400,6 +429,7 @@ 5.101 assert(is_int(type), "must be int type"); 5.102 overwrite_local_double_long(index); 5.103 set_type_at(local(index), type); 5.104 + store_to_local(index); 5.105 } 5.106 5.107 void load_local_long(int index) { 5.108 @@ -418,6 +448,8 @@ 5.109 overwrite_local_double_long(index); 5.110 set_type_at(local(index), type); 5.111 set_type_at(local(index+1), type2); 5.112 + store_to_local(index); 5.113 + store_to_local(index+1); 5.114 } 5.115 5.116 // Stop interpretation of this path with a trap. 5.117 @@ -450,13 +482,31 @@ 5.118 }; 5.119 5.120 // Parameter for "find_block" calls: 5.121 - // Describes the difference between a public and private copy. 5.122 + // Describes the difference between a public and backedge copy. 5.123 enum CreateOption { 5.124 create_public_copy, 5.125 - create_private_copy, 5.126 + create_backedge_copy, 5.127 no_create 5.128 }; 5.129 5.130 + // Successor iterator 5.131 + class SuccIter : public StackObj { 5.132 + private: 5.133 + Block* _pred; 5.134 + int _index; 5.135 + Block* _succ; 5.136 + public: 5.137 + SuccIter() : _pred(NULL), _index(-1), _succ(NULL) {} 5.138 + SuccIter(Block* pred) : _pred(pred), _index(-1), _succ(NULL) { next(); } 5.139 + int index() { return _index; } 5.140 + Block* pred() { return _pred; } // Return predecessor 5.141 + bool done() { return _index < 0; } // Finished? 5.142 + Block* succ() { return _succ; } // Return current successor 5.143 + void next(); // Advance 5.144 + void set_succ(Block* succ); // Update current successor 5.145 + bool is_normal_ctrl() { return index() < _pred->successors()->length(); } 5.146 + }; 5.147 + 5.148 // A basic block 5.149 class Block : public ResourceObj { 5.150 private: 5.151 @@ -470,15 +520,24 @@ 5.152 int _trap_bci; 5.153 int _trap_index; 5.154 5.155 - // A reasonable approximation to pre-order, provided.to the client. 5.156 + // pre_order, assigned at first visit. Used as block ID and "visited" tag 5.157 int _pre_order; 5.158 5.159 - // Has this block been cloned for some special purpose? 5.160 - bool _private_copy; 5.161 + // A post-order, used to compute the reverse post order (RPO) provided to the client 5.162 + int _post_order; // used to compute rpo 5.163 + 5.164 + // Has this block been cloned for a loop backedge? 5.165 + bool _backedge_copy; 5.166 5.167 // A pointer used for our internal work list 5.168 - Block* _next; 5.169 - bool _on_work_list; 5.170 + Block* _next; 5.171 + bool _on_work_list; // on the work list 5.172 + Block* _rpo_next; // Reverse post order list 5.173 + 5.174 + // Loop info 5.175 + Loop* _loop; // nearest loop 5.176 + bool _irreducible_entry; // entry to irreducible loop 5.177 + bool _exception_entry; // entry to exception handler 5.178 5.179 ciBlock* ciblock() const { return _ciblock; } 5.180 StateVector* state() const { return _state; } 5.181 @@ -504,10 +563,11 @@ 5.182 int start() const { return _ciblock->start_bci(); } 5.183 int limit() const { return _ciblock->limit_bci(); } 5.184 int control() const { return _ciblock->control_bci(); } 5.185 + JsrSet* jsrs() const { return _jsrs; } 5.186 5.187 - bool is_private_copy() const { return _private_copy; } 5.188 - void set_private_copy(bool z); 5.189 - int private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); } 5.190 + bool is_backedge_copy() const { return _backedge_copy; } 5.191 + void set_backedge_copy(bool z); 5.192 + int backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); } 5.193 5.194 // access to entry state 5.195 int stack_size() const { return _state->stack_size(); } 5.196 @@ -515,6 +575,20 @@ 5.197 ciType* local_type_at(int i) const { return _state->local_type_at(i); } 5.198 ciType* stack_type_at(int i) const { return _state->stack_type_at(i); } 5.199 5.200 + // Data flow on locals 5.201 + bool is_invariant_local(uint v) const { 5.202 + assert(is_loop_head(), "only loop heads"); 5.203 + // Find outermost loop with same loop head 5.204 + Loop* lp = loop(); 5.205 + while (lp->parent() != NULL) { 5.206 + if (lp->parent()->head() != lp->head()) break; 5.207 + lp = lp->parent(); 5.208 + } 5.209 + return !lp->def_locals()->test(v); 5.210 + } 5.211 + LocalSet* def_locals() { return _state->def_locals(); } 5.212 + const LocalSet* def_locals() const { return _state->def_locals(); } 5.213 + 5.214 // Get the successors for this Block. 5.215 GrowableArray<Block*>* successors(ciBytecodeStream* str, 5.216 StateVector* state, 5.217 @@ -524,13 +598,6 @@ 5.218 return _successors; 5.219 } 5.220 5.221 - // Helper function for "successors" when making private copies of 5.222 - // loop heads for C2. 5.223 - Block * clone_loop_head(ciTypeFlow* analyzer, 5.224 - int branch_bci, 5.225 - Block* target, 5.226 - JsrSet* jsrs); 5.227 - 5.228 // Get the exceptional successors for this Block. 5.229 GrowableArray<Block*>* exceptions() { 5.230 if (_exceptions == NULL) { 5.231 @@ -584,17 +651,126 @@ 5.232 bool is_on_work_list() const { return _on_work_list; } 5.233 5.234 bool has_pre_order() const { return _pre_order >= 0; } 5.235 - void set_pre_order(int po) { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; } 5.236 + void set_pre_order(int po) { assert(!has_pre_order(), ""); _pre_order = po; } 5.237 int pre_order() const { assert(has_pre_order(), ""); return _pre_order; } 5.238 + void set_next_pre_order() { set_pre_order(outer()->inc_next_pre_order()); } 5.239 bool is_start() const { return _pre_order == outer()->start_block_num(); } 5.240 5.241 - // A ranking used in determining order within the work list. 5.242 - bool is_simpler_than(Block* other); 5.243 + // Reverse post order 5.244 + void df_init(); 5.245 + bool has_post_order() const { return _post_order >= 0; } 5.246 + void set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; } 5.247 + void reset_post_order(int o){ _post_order = o; } 5.248 + int post_order() const { assert(has_post_order(), ""); return _post_order; } 5.249 + 5.250 + bool has_rpo() const { return has_post_order() && outer()->have_block_count(); } 5.251 + int rpo() const { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; } 5.252 + void set_rpo_next(Block* b) { _rpo_next = b; } 5.253 + Block* rpo_next() { return _rpo_next; } 5.254 + 5.255 + // Loops 5.256 + Loop* loop() const { return _loop; } 5.257 + void set_loop(Loop* lp) { _loop = lp; } 5.258 + bool is_loop_head() const { return _loop && _loop->head() == this; } 5.259 + void set_irreducible_entry(bool c) { _irreducible_entry = c; } 5.260 + bool is_irreducible_entry() const { return _irreducible_entry; } 5.261 + bool is_visited() const { return has_pre_order(); } 5.262 + bool is_post_visited() const { return has_post_order(); } 5.263 + bool is_clonable_exit(Loop* lp); 5.264 + Block* looping_succ(Loop* lp); // Successor inside of loop 5.265 + bool is_single_entry_loop_head() const { 5.266 + if (!is_loop_head()) return false; 5.267 + for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent()) 5.268 + if (lp->is_irreducible()) return false; 5.269 + return true; 5.270 + } 5.271 5.272 void print_value_on(outputStream* st) const PRODUCT_RETURN; 5.273 void print_on(outputStream* st) const PRODUCT_RETURN; 5.274 }; 5.275 5.276 + // Loop 5.277 + class Loop : public ResourceObj { 5.278 + private: 5.279 + Loop* _parent; 5.280 + Loop* _sibling; // List of siblings, null terminated 5.281 + Loop* _child; // Head of child list threaded thru sibling pointer 5.282 + Block* _head; // Head of loop 5.283 + Block* _tail; // Tail of loop 5.284 + bool _irreducible; 5.285 + LocalSet _def_locals; 5.286 + 5.287 + public: 5.288 + Loop(Block* head, Block* tail) : 5.289 + _head(head), _tail(tail), 5.290 + _parent(NULL), _sibling(NULL), _child(NULL), 5.291 + _irreducible(false), _def_locals() {} 5.292 + 5.293 + Loop* parent() const { return _parent; } 5.294 + Loop* sibling() const { return _sibling; } 5.295 + Loop* child() const { return _child; } 5.296 + Block* head() const { return _head; } 5.297 + Block* tail() const { return _tail; } 5.298 + void set_parent(Loop* p) { _parent = p; } 5.299 + void set_sibling(Loop* s) { _sibling = s; } 5.300 + void set_child(Loop* c) { _child = c; } 5.301 + void set_head(Block* hd) { _head = hd; } 5.302 + void set_tail(Block* tl) { _tail = tl; } 5.303 + 5.304 + int depth() const; // nesting depth 5.305 + 5.306 + // Returns true if lp is a nested loop or us. 5.307 + bool contains(Loop* lp) const; 5.308 + bool contains(Block* blk) const { return contains(blk->loop()); } 5.309 + 5.310 + // Data flow on locals 5.311 + LocalSet* def_locals() { return &_def_locals; } 5.312 + const LocalSet* def_locals() const { return &_def_locals; } 5.313 + 5.314 + // Merge the branch lp into this branch, sorting on the loop head 5.315 + // pre_orders. Returns the new branch. 5.316 + Loop* sorted_merge(Loop* lp); 5.317 + 5.318 + // Mark non-single entry to loop 5.319 + void set_irreducible(Block* entry) { 5.320 + _irreducible = true; 5.321 + entry->set_irreducible_entry(true); 5.322 + } 5.323 + bool is_irreducible() const { return _irreducible; } 5.324 + 5.325 + bool is_root() const { return _tail->pre_order() == max_jint; } 5.326 + 5.327 + void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN; 5.328 + }; 5.329 + 5.330 + // Postorder iteration over the loop tree. 5.331 + class PostorderLoops : public StackObj { 5.332 + private: 5.333 + Loop* _root; 5.334 + Loop* _current; 5.335 + public: 5.336 + PostorderLoops(Loop* root) : _root(root), _current(root) { 5.337 + while (_current->child() != NULL) { 5.338 + _current = _current->child(); 5.339 + } 5.340 + } 5.341 + bool done() { return _current == NULL; } // Finished iterating? 5.342 + void next(); // Advance to next loop 5.343 + Loop* current() { return _current; } // Return current loop. 5.344 + }; 5.345 + 5.346 + // Preorder iteration over the loop tree. 5.347 + class PreorderLoops : public StackObj { 5.348 + private: 5.349 + Loop* _root; 5.350 + Loop* _current; 5.351 + public: 5.352 + PreorderLoops(Loop* root) : _root(root), _current(root) {} 5.353 + bool done() { return _current == NULL; } // Finished iterating? 5.354 + void next(); // Advance to next loop 5.355 + Loop* current() { return _current; } // Return current loop. 5.356 + }; 5.357 + 5.358 // Standard indexes of successors, for various bytecodes. 5.359 enum { 5.360 FALL_THROUGH = 0, // normal control 5.361 @@ -619,6 +795,12 @@ 5.362 // Tells if a given instruction is able to generate an exception edge. 5.363 bool can_trap(ciBytecodeStream& str); 5.364 5.365 + // Clone the loop heads. Returns true if any cloning occurred. 5.366 + bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); 5.367 + 5.368 + // Clone lp's head and replace tail's successors with clone. 5.369 + Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); 5.370 + 5.371 public: 5.372 // Return the block beginning at bci which has a JsrSet compatible 5.373 // with jsrs. 5.374 @@ -627,8 +809,8 @@ 5.375 // block factory 5.376 Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy); 5.377 5.378 - // How many of the blocks have the private_copy bit set? 5.379 - int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const; 5.380 + // How many of the blocks have the backedge_copy bit set? 5.381 + int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const; 5.382 5.383 // Return an existing block containing bci which has a JsrSet compatible 5.384 // with jsrs, or NULL if there is none. 5.385 @@ -651,11 +833,18 @@ 5.386 return _block_map[po]; } 5.387 Block* start_block() const { return pre_order_at(start_block_num()); } 5.388 int start_block_num() const { return 0; } 5.389 + Block* rpo_at(int rpo) const { assert(0 <= rpo && rpo < block_count(), "out of bounds"); 5.390 + return _block_map[rpo]; } 5.391 + int next_pre_order() { return _next_pre_order; } 5.392 + int inc_next_pre_order() { return _next_pre_order++; } 5.393 5.394 private: 5.395 // A work list used during flow analysis. 5.396 Block* _work_list; 5.397 5.398 + // List of blocks in reverse post order 5.399 + Block* _rpo_list; 5.400 + 5.401 // Next Block::_pre_order. After mapping, doubles as block_count. 5.402 int _next_pre_order; 5.403 5.404 @@ -668,6 +857,15 @@ 5.405 // Add a basic block to our work list. 5.406 void add_to_work_list(Block* block); 5.407 5.408 + // Prepend a basic block to rpo list. 5.409 + void prepend_to_rpo_list(Block* blk) { 5.410 + blk->set_rpo_next(_rpo_list); 5.411 + _rpo_list = blk; 5.412 + } 5.413 + 5.414 + // Root of the loop tree 5.415 + Loop* _loop_tree_root; 5.416 + 5.417 // State used for make_jsr_record 5.418 int _jsr_count; 5.419 GrowableArray<JsrRecord*>* _jsr_records; 5.420 @@ -677,6 +875,9 @@ 5.421 // does not already exist. 5.422 JsrRecord* make_jsr_record(int entry_address, int return_address); 5.423 5.424 + void set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; } 5.425 + Loop* loop_tree_root() { return _loop_tree_root; } 5.426 + 5.427 private: 5.428 // Get the initial state for start_bci: 5.429 const StateVector* get_start_state(); 5.430 @@ -703,6 +904,15 @@ 5.431 // necessary. 5.432 void flow_types(); 5.433 5.434 + // Perform the depth first type flow analysis. Helper for flow_types. 5.435 + void df_flow_types(Block* start, 5.436 + bool do_flow, 5.437 + StateVector* temp_vector, 5.438 + JsrSet* temp_set); 5.439 + 5.440 + // Incrementally build loop tree. 5.441 + void build_loop_tree(Block* blk); 5.442 + 5.443 // Create the block map, which indexes blocks in pre_order. 5.444 void map_blocks(); 5.445 5.446 @@ -711,4 +921,6 @@ 5.447 void do_flow(); 5.448 5.449 void print_on(outputStream* st) const PRODUCT_RETURN; 5.450 + 5.451 + void rpo_print_on(outputStream* st) const PRODUCT_RETURN; 5.452 };
6.1 --- a/src/share/vm/code/nmethod.cpp Tue Sep 30 12:24:27 2008 -0400 6.2 +++ b/src/share/vm/code/nmethod.cpp Wed Oct 01 20:15:03 2008 -0400 6.3 @@ -1350,11 +1350,7 @@ 6.4 return false; 6.5 } 6.6 } 6.7 - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { 6.8 - // Cannot do this test if verification of the UseParallelOldGC 6.9 - // code using the PSMarkSweep code is being done. 6.10 - assert(unloading_occurred, "Inconsistency in unloading"); 6.11 - } 6.12 + assert(unloading_occurred, "Inconsistency in unloading"); 6.13 make_unloaded(is_alive, obj); 6.14 return true; 6.15 }
7.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Sep 30 12:24:27 2008 -0400 7.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Oct 01 20:15:03 2008 -0400 7.3 @@ -210,10 +210,6 @@ 7.4 PSScavenge::initialize(); 7.5 if (UseParallelOldGC) { 7.6 PSParallelCompact::post_initialize(); 7.7 - if (VerifyParallelOldWithMarkSweep) { 7.8 - // Will be used for verification of par old. 7.9 - PSMarkSweep::initialize(); 7.10 - } 7.11 } else { 7.12 PSMarkSweep::initialize(); 7.13 } 7.14 @@ -402,7 +398,7 @@ 7.15 return result; 7.16 } 7.17 if (!is_tlab && 7.18 - size >= (young_gen()->eden_space()->capacity_in_words() / 2)) { 7.19 + size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) { 7.20 result = old_gen()->allocate(size, is_tlab); 7.21 if (result != NULL) { 7.22 return result;
8.1 --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Tue Sep 30 12:24:27 2008 -0400 8.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Wed Oct 01 20:15:03 2008 -0400 8.3 @@ -146,7 +146,7 @@ 8.4 { 8.5 ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); 8.6 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 8.7 - ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array(); 8.8 + RegionTaskQueueSet* qset = ParCompactionManager::region_array(); 8.9 ParallelTaskTerminator terminator(parallel_gc_threads, qset); 8.10 GCTaskQueue* q = GCTaskQueue::create(); 8.11 for(uint i=0; i<parallel_gc_threads; i++) { 8.12 @@ -205,38 +205,38 @@ 8.13 } 8.14 8.15 // 8.16 -// StealChunkCompactionTask 8.17 +// StealRegionCompactionTask 8.18 // 8.19 8.20 8.21 -StealChunkCompactionTask::StealChunkCompactionTask(ParallelTaskTerminator* t) : 8.22 - _terminator(t) {}; 8.23 +StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t): 8.24 + _terminator(t) {} 8.25 8.26 -void StealChunkCompactionTask::do_it(GCTaskManager* manager, uint which) { 8.27 +void StealRegionCompactionTask::do_it(GCTaskManager* manager, uint which) { 8.28 assert(Universe::heap()->is_gc_active(), "called outside gc"); 8.29 8.30 - NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask", 8.31 + NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask", 8.32 PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty)); 8.33 8.34 ParCompactionManager* cm = 8.35 ParCompactionManager::gc_thread_compaction_manager(which); 8.36 8.37 - // Has to drain stacks first because there may be chunks on 8.38 + // Has to drain stacks first because there may be regions on 8.39 // preloaded onto the stack and this thread may never have 8.40 // done a draining task. Are the draining tasks needed? 8.41 8.42 - cm->drain_chunk_stacks(); 8.43 + cm->drain_region_stacks(); 8.44 8.45 - size_t chunk_index = 0; 8.46 + size_t region_index = 0; 8.47 int random_seed = 17; 8.48 8.49 // If we're the termination task, try 10 rounds of stealing before 8.50 // setting the termination flag 8.51 8.52 while(true) { 8.53 - if (ParCompactionManager::steal(which, &random_seed, chunk_index)) { 8.54 - PSParallelCompact::fill_and_update_chunk(cm, chunk_index); 8.55 - cm->drain_chunk_stacks(); 8.56 + if (ParCompactionManager::steal(which, &random_seed, region_index)) { 8.57 + PSParallelCompact::fill_and_update_region(cm, region_index); 8.58 + cm->drain_region_stacks(); 8.59 } else { 8.60 if (terminator()->offer_termination()) { 8.61 break; 8.62 @@ -249,11 +249,10 @@ 8.63 8.64 UpdateDensePrefixTask::UpdateDensePrefixTask( 8.65 PSParallelCompact::SpaceId space_id, 8.66 - size_t chunk_index_start, 8.67 - size_t chunk_index_end) : 8.68 - _space_id(space_id), _chunk_index_start(chunk_index_start), 8.69 - _chunk_index_end(chunk_index_end) 8.70 -{} 8.71 + size_t region_index_start, 8.72 + size_t region_index_end) : 8.73 + _space_id(space_id), _region_index_start(region_index_start), 8.74 + _region_index_end(region_index_end) {} 8.75 8.76 void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) { 8.77 8.78 @@ -265,8 +264,8 @@ 8.79 8.80 PSParallelCompact::update_and_deadwood_in_dense_prefix(cm, 8.81 _space_id, 8.82 - _chunk_index_start, 8.83 - _chunk_index_end); 8.84 + _region_index_start, 8.85 + _region_index_end); 8.86 } 8.87 8.88 void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) { 8.89 @@ -278,6 +277,6 @@ 8.90 ParCompactionManager* cm = 8.91 ParCompactionManager::gc_thread_compaction_manager(which); 8.92 8.93 - // Process any chunks already in the compaction managers stacks. 8.94 - cm->drain_chunk_stacks(); 8.95 + // Process any regions already in the compaction managers stacks. 8.96 + cm->drain_region_stacks(); 8.97 }
9.1 --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Tue Sep 30 12:24:27 2008 -0400 9.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Wed Oct 01 20:15:03 2008 -0400 9.3 @@ -188,18 +188,18 @@ 9.4 }; 9.5 9.6 // 9.7 -// StealChunkCompactionTask 9.8 +// StealRegionCompactionTask 9.9 // 9.10 // This task is used to distribute work to idle threads. 9.11 // 9.12 9.13 -class StealChunkCompactionTask : public GCTask { 9.14 +class StealRegionCompactionTask : public GCTask { 9.15 private: 9.16 ParallelTaskTerminator* const _terminator; 9.17 public: 9.18 - StealChunkCompactionTask(ParallelTaskTerminator* t); 9.19 + StealRegionCompactionTask(ParallelTaskTerminator* t); 9.20 9.21 - char* name() { return (char *)"steal-chunk-task"; } 9.22 + char* name() { return (char *)"steal-region-task"; } 9.23 ParallelTaskTerminator* terminator() { return _terminator; } 9.24 9.25 virtual void do_it(GCTaskManager* manager, uint which); 9.26 @@ -215,15 +215,15 @@ 9.27 class UpdateDensePrefixTask : public GCTask { 9.28 private: 9.29 PSParallelCompact::SpaceId _space_id; 9.30 - size_t _chunk_index_start; 9.31 - size_t _chunk_index_end; 9.32 + size_t _region_index_start; 9.33 + size_t _region_index_end; 9.34 9.35 public: 9.36 char* name() { return (char *)"update-dense_prefix-task"; } 9.37 9.38 UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id, 9.39 - size_t chunk_index_start, 9.40 - size_t chunk_index_end); 9.41 + size_t region_index_start, 9.42 + size_t region_index_end); 9.43 9.44 virtual void do_it(GCTaskManager* manager, uint which); 9.45 }; 9.46 @@ -231,17 +231,17 @@ 9.47 // 9.48 // DrainStacksCompactionTask 9.49 // 9.50 -// This task processes chunks that have been added to the stacks of each 9.51 +// This task processes regions that have been added to the stacks of each 9.52 // compaction manager. 9.53 // 9.54 // Trying to use one draining thread does not work because there are no 9.55 // guarantees about which task will be picked up by which thread. For example, 9.56 -// if thread A gets all the preloaded chunks, thread A may not get a draining 9.57 +// if thread A gets all the preloaded regions, thread A may not get a draining 9.58 // task (they may all be done by other threads). 9.59 // 9.60 9.61 class DrainStacksCompactionTask : public GCTask { 9.62 public: 9.63 - char* name() { return (char *)"drain-chunk-task"; } 9.64 + char* name() { return (char *)"drain-region-task"; } 9.65 virtual void do_it(GCTaskManager* manager, uint which); 9.66 };
10.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Tue Sep 30 12:24:27 2008 -0400 10.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Wed Oct 01 20:15:03 2008 -0400 10.3 @@ -30,7 +30,7 @@ 10.4 OopTaskQueueSet* ParCompactionManager::_stack_array = NULL; 10.5 ObjectStartArray* ParCompactionManager::_start_array = NULL; 10.6 ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; 10.7 -ChunkTaskQueueSet* ParCompactionManager::_chunk_array = NULL; 10.8 +RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; 10.9 10.10 ParCompactionManager::ParCompactionManager() : 10.11 _action(CopyAndUpdate) { 10.12 @@ -46,13 +46,13 @@ 10.13 10.14 // We want the overflow stack to be permanent 10.15 _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(10, true); 10.16 -#ifdef USE_ChunkTaskQueueWithOverflow 10.17 - chunk_stack()->initialize(); 10.18 +#ifdef USE_RegionTaskQueueWithOverflow 10.19 + region_stack()->initialize(); 10.20 #else 10.21 - chunk_stack()->initialize(); 10.22 + region_stack()->initialize(); 10.23 10.24 // We want the overflow stack to be permanent 10.25 - _chunk_overflow_stack = 10.26 + _region_overflow_stack = 10.27 new (ResourceObj::C_HEAP) GrowableArray<size_t>(10, true); 10.28 #endif 10.29 10.30 @@ -86,18 +86,18 @@ 10.31 10.32 _stack_array = new OopTaskQueueSet(parallel_gc_threads); 10.33 guarantee(_stack_array != NULL, "Count not initialize promotion manager"); 10.34 - _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads); 10.35 - guarantee(_chunk_array != NULL, "Count not initialize promotion manager"); 10.36 + _region_array = new RegionTaskQueueSet(parallel_gc_threads); 10.37 + guarantee(_region_array != NULL, "Count not initialize promotion manager"); 10.38 10.39 // Create and register the ParCompactionManager(s) for the worker threads. 10.40 for(uint i=0; i<parallel_gc_threads; i++) { 10.41 _manager_array[i] = new ParCompactionManager(); 10.42 guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager"); 10.43 stack_array()->register_queue(i, _manager_array[i]->marking_stack()); 10.44 -#ifdef USE_ChunkTaskQueueWithOverflow 10.45 - chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue()); 10.46 +#ifdef USE_RegionTaskQueueWithOverflow 10.47 + region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue()); 10.48 #else 10.49 - chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()); 10.50 + region_array()->register_queue(i, _manager_array[i]->region_stack()); 10.51 #endif 10.52 } 10.53 10.54 @@ -153,31 +153,31 @@ 10.55 return NULL; 10.56 } 10.57 10.58 -// Save chunk on a stack 10.59 -void ParCompactionManager::save_for_processing(size_t chunk_index) { 10.60 +// Save region on a stack 10.61 +void ParCompactionManager::save_for_processing(size_t region_index) { 10.62 #ifdef ASSERT 10.63 const ParallelCompactData& sd = PSParallelCompact::summary_data(); 10.64 - ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index); 10.65 - assert(chunk_ptr->claimed(), "must be claimed"); 10.66 - assert(chunk_ptr->_pushed++ == 0, "should only be pushed once"); 10.67 + ParallelCompactData::RegionData* const region_ptr = sd.region(region_index); 10.68 + assert(region_ptr->claimed(), "must be claimed"); 10.69 + assert(region_ptr->_pushed++ == 0, "should only be pushed once"); 10.70 #endif 10.71 - chunk_stack_push(chunk_index); 10.72 + region_stack_push(region_index); 10.73 } 10.74 10.75 -void ParCompactionManager::chunk_stack_push(size_t chunk_index) { 10.76 +void ParCompactionManager::region_stack_push(size_t region_index) { 10.77 10.78 -#ifdef USE_ChunkTaskQueueWithOverflow 10.79 - chunk_stack()->save(chunk_index); 10.80 +#ifdef USE_RegionTaskQueueWithOverflow 10.81 + region_stack()->save(region_index); 10.82 #else 10.83 - if(!chunk_stack()->push(chunk_index)) { 10.84 - chunk_overflow_stack()->push(chunk_index); 10.85 + if(!region_stack()->push(region_index)) { 10.86 + region_overflow_stack()->push(region_index); 10.87 } 10.88 #endif 10.89 } 10.90 10.91 -bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) { 10.92 -#ifdef USE_ChunkTaskQueueWithOverflow 10.93 - return chunk_stack()->retrieve(chunk_index); 10.94 +bool ParCompactionManager::retrieve_for_processing(size_t& region_index) { 10.95 +#ifdef USE_RegionTaskQueueWithOverflow 10.96 + return region_stack()->retrieve(region_index); 10.97 #else 10.98 // Should not be used in the parallel case 10.99 ShouldNotReachHere(); 10.100 @@ -230,14 +230,14 @@ 10.101 assert(overflow_stack()->length() == 0, "Sanity"); 10.102 } 10.103 10.104 -void ParCompactionManager::drain_chunk_overflow_stack() { 10.105 - size_t chunk_index = (size_t) -1; 10.106 - while(chunk_stack()->retrieve_from_overflow(chunk_index)) { 10.107 - PSParallelCompact::fill_and_update_chunk(this, chunk_index); 10.108 +void ParCompactionManager::drain_region_overflow_stack() { 10.109 + size_t region_index = (size_t) -1; 10.110 + while(region_stack()->retrieve_from_overflow(region_index)) { 10.111 + PSParallelCompact::fill_and_update_region(this, region_index); 10.112 } 10.113 } 10.114 10.115 -void ParCompactionManager::drain_chunk_stacks() { 10.116 +void ParCompactionManager::drain_region_stacks() { 10.117 #ifdef ASSERT 10.118 ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); 10.119 assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); 10.120 @@ -249,42 +249,42 @@ 10.121 #if 1 // def DO_PARALLEL - the serial code hasn't been updated 10.122 do { 10.123 10.124 -#ifdef USE_ChunkTaskQueueWithOverflow 10.125 +#ifdef USE_RegionTaskQueueWithOverflow 10.126 // Drain overflow stack first, so other threads can steal from 10.127 // claimed stack while we work. 10.128 - size_t chunk_index = (size_t) -1; 10.129 - while(chunk_stack()->retrieve_from_overflow(chunk_index)) { 10.130 - PSParallelCompact::fill_and_update_chunk(this, chunk_index); 10.131 + size_t region_index = (size_t) -1; 10.132 + while(region_stack()->retrieve_from_overflow(region_index)) { 10.133 + PSParallelCompact::fill_and_update_region(this, region_index); 10.134 } 10.135 10.136 - while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) { 10.137 - PSParallelCompact::fill_and_update_chunk(this, chunk_index); 10.138 + while (region_stack()->retrieve_from_stealable_queue(region_index)) { 10.139 + PSParallelCompact::fill_and_update_region(this, region_index); 10.140 } 10.141 - } while (!chunk_stack()->is_empty()); 10.142 + } while (!region_stack()->is_empty()); 10.143 #else 10.144 // Drain overflow stack first, so other threads can steal from 10.145 // claimed stack while we work. 10.146 - while(!chunk_overflow_stack()->is_empty()) { 10.147 - size_t chunk_index = chunk_overflow_stack()->pop(); 10.148 - PSParallelCompact::fill_and_update_chunk(this, chunk_index); 10.149 + while(!region_overflow_stack()->is_empty()) { 10.150 + size_t region_index = region_overflow_stack()->pop(); 10.151 + PSParallelCompact::fill_and_update_region(this, region_index); 10.152 } 10.153 10.154 - size_t chunk_index = -1; 10.155 + size_t region_index = -1; 10.156 // obj is a reference!!! 10.157 - while (chunk_stack()->pop_local(chunk_index)) { 10.158 + while (region_stack()->pop_local(region_index)) { 10.159 // It would be nice to assert about the type of objects we might 10.160 // pop, but they can come from anywhere, unfortunately. 10.161 - PSParallelCompact::fill_and_update_chunk(this, chunk_index); 10.162 + PSParallelCompact::fill_and_update_region(this, region_index); 10.163 } 10.164 - } while((chunk_stack()->size() != 0) || 10.165 - (chunk_overflow_stack()->length() != 0)); 10.166 + } while((region_stack()->size() != 0) || 10.167 + (region_overflow_stack()->length() != 0)); 10.168 #endif 10.169 10.170 -#ifdef USE_ChunkTaskQueueWithOverflow 10.171 - assert(chunk_stack()->is_empty(), "Sanity"); 10.172 +#ifdef USE_RegionTaskQueueWithOverflow 10.173 + assert(region_stack()->is_empty(), "Sanity"); 10.174 #else 10.175 - assert(chunk_stack()->size() == 0, "Sanity"); 10.176 - assert(chunk_overflow_stack()->length() == 0, "Sanity"); 10.177 + assert(region_stack()->size() == 0, "Sanity"); 10.178 + assert(region_overflow_stack()->length() == 0, "Sanity"); 10.179 #endif 10.180 #else 10.181 oop obj;
11.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Tue Sep 30 12:24:27 2008 -0400 11.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Wed Oct 01 20:15:03 2008 -0400 11.3 @@ -52,7 +52,7 @@ 11.4 friend class ParallelTaskTerminator; 11.5 friend class ParMarkBitMap; 11.6 friend class PSParallelCompact; 11.7 - friend class StealChunkCompactionTask; 11.8 + friend class StealRegionCompactionTask; 11.9 friend class UpdateAndFillClosure; 11.10 friend class RefProcTaskExecutor; 11.11 11.12 @@ -72,27 +72,27 @@ 11.13 // ------------------------ End don't putback if not needed 11.14 11.15 private: 11.16 - static ParCompactionManager** _manager_array; 11.17 - static OopTaskQueueSet* _stack_array; 11.18 - static ObjectStartArray* _start_array; 11.19 - static ChunkTaskQueueSet* _chunk_array; 11.20 - static PSOldGen* _old_gen; 11.21 + static ParCompactionManager** _manager_array; 11.22 + static OopTaskQueueSet* _stack_array; 11.23 + static ObjectStartArray* _start_array; 11.24 + static RegionTaskQueueSet* _region_array; 11.25 + static PSOldGen* _old_gen; 11.26 11.27 - OopTaskQueue _marking_stack; 11.28 - GrowableArray<oop>* _overflow_stack; 11.29 + OopTaskQueue _marking_stack; 11.30 + GrowableArray<oop>* _overflow_stack; 11.31 // Is there a way to reuse the _marking_stack for the 11.32 - // saving empty chunks? For now just create a different 11.33 + // saving empty regions? For now just create a different 11.34 // type of TaskQueue. 11.35 11.36 -#ifdef USE_ChunkTaskQueueWithOverflow 11.37 - ChunkTaskQueueWithOverflow _chunk_stack; 11.38 +#ifdef USE_RegionTaskQueueWithOverflow 11.39 + RegionTaskQueueWithOverflow _region_stack; 11.40 #else 11.41 - ChunkTaskQueue _chunk_stack; 11.42 - GrowableArray<size_t>* _chunk_overflow_stack; 11.43 + RegionTaskQueue _region_stack; 11.44 + GrowableArray<size_t>* _region_overflow_stack; 11.45 #endif 11.46 11.47 #if 1 // does this happen enough to need a per thread stack? 11.48 - GrowableArray<Klass*>* _revisit_klass_stack; 11.49 + GrowableArray<Klass*>* _revisit_klass_stack; 11.50 #endif 11.51 static ParMarkBitMap* _mark_bitmap; 11.52 11.53 @@ -100,21 +100,22 @@ 11.54 11.55 static PSOldGen* old_gen() { return _old_gen; } 11.56 static ObjectStartArray* start_array() { return _start_array; } 11.57 - static OopTaskQueueSet* stack_array() { return _stack_array; } 11.58 + static OopTaskQueueSet* stack_array() { return _stack_array; } 11.59 11.60 static void initialize(ParMarkBitMap* mbm); 11.61 11.62 protected: 11.63 // Array of tasks. Needed by the ParallelTaskTerminator. 11.64 - static ChunkTaskQueueSet* chunk_array() { return _chunk_array; } 11.65 - 11.66 - OopTaskQueue* marking_stack() { return &_marking_stack; } 11.67 - GrowableArray<oop>* overflow_stack() { return _overflow_stack; } 11.68 -#ifdef USE_ChunkTaskQueueWithOverflow 11.69 - ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; } 11.70 + static RegionTaskQueueSet* region_array() { return _region_array; } 11.71 + OopTaskQueue* marking_stack() { return &_marking_stack; } 11.72 + GrowableArray<oop>* overflow_stack() { return _overflow_stack; } 11.73 +#ifdef USE_RegionTaskQueueWithOverflow 11.74 + RegionTaskQueueWithOverflow* region_stack() { return &_region_stack; } 11.75 #else 11.76 - ChunkTaskQueue* chunk_stack() { return &_chunk_stack; } 11.77 - GrowableArray<size_t>* chunk_overflow_stack() { return _chunk_overflow_stack; } 11.78 + RegionTaskQueue* region_stack() { return &_region_stack; } 11.79 + GrowableArray<size_t>* region_overflow_stack() { 11.80 + return _region_overflow_stack; 11.81 + } 11.82 #endif 11.83 11.84 // Pushes onto the marking stack. If the marking stack is full, 11.85 @@ -123,9 +124,9 @@ 11.86 // Do not implement an equivalent stack_pop. Deal with the 11.87 // marking stack and overflow stack directly. 11.88 11.89 - // Pushes onto the chunk stack. If the chunk stack is full, 11.90 - // pushes onto the chunk overflow stack. 11.91 - void chunk_stack_push(size_t chunk_index); 11.92 + // Pushes onto the region stack. If the region stack is full, 11.93 + // pushes onto the region overflow stack. 11.94 + void region_stack_push(size_t region_index); 11.95 public: 11.96 11.97 Action action() { return _action; } 11.98 @@ -160,10 +161,10 @@ 11.99 // Get a oop for scanning. If returns null, no oop were found. 11.100 oop retrieve_for_scanning(); 11.101 11.102 - // Save chunk for later processing. Must not fail. 11.103 - void save_for_processing(size_t chunk_index); 11.104 - // Get a chunk for processing. If returns null, no chunk were found. 11.105 - bool retrieve_for_processing(size_t& chunk_index); 11.106 + // Save region for later processing. Must not fail. 11.107 + void save_for_processing(size_t region_index); 11.108 + // Get a region for processing. If returns null, no region were found. 11.109 + bool retrieve_for_processing(size_t& region_index); 11.110 11.111 // Access function for compaction managers 11.112 static ParCompactionManager* gc_thread_compaction_manager(int index); 11.113 @@ -172,18 +173,18 @@ 11.114 return stack_array()->steal(queue_num, seed, t); 11.115 } 11.116 11.117 - static bool steal(int queue_num, int* seed, ChunkTask& t) { 11.118 - return chunk_array()->steal(queue_num, seed, t); 11.119 + static bool steal(int queue_num, int* seed, RegionTask& t) { 11.120 + return region_array()->steal(queue_num, seed, t); 11.121 } 11.122 11.123 // Process tasks remaining on any stack 11.124 void drain_marking_stacks(OopClosure *blk); 11.125 11.126 // Process tasks remaining on any stack 11.127 - void drain_chunk_stacks(); 11.128 + void drain_region_stacks(); 11.129 11.130 // Process tasks remaining on any stack 11.131 - void drain_chunk_overflow_stack(); 11.132 + void drain_region_overflow_stack(); 11.133 11.134 // Debugging support 11.135 #ifdef ASSERT
12.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Tue Sep 30 12:24:27 2008 -0400 12.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Wed Oct 01 20:15:03 2008 -0400 12.3 @@ -35,9 +35,7 @@ 12.4 _ref_processor = new ReferenceProcessor(mr, 12.5 true, // atomic_discovery 12.6 false); // mt_discovery 12.7 - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { 12.8 - _counters = new CollectorCounters("PSMarkSweep", 1); 12.9 - } 12.10 + _counters = new CollectorCounters("PSMarkSweep", 1); 12.11 } 12.12 12.13 // This method contains all heap specific policy for invoking mark sweep. 12.14 @@ -518,9 +516,6 @@ 12.15 follow_stack(); 12.16 12.17 // Process reference objects found during marking 12.18 - 12.19 - // Skipping the reference processing for VerifyParallelOldWithMarkSweep 12.20 - // affects the marking (makes it different). 12.21 { 12.22 ReferencePolicy *soft_ref_policy; 12.23 if (clear_all_softrefs) {
13.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Tue Sep 30 12:24:27 2008 -0400 13.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Wed Oct 01 20:15:03 2008 -0400 13.3 @@ -152,20 +152,15 @@ 13.4 oop(q)->forward_to(oop(compact_top)); 13.5 assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark"); 13.6 } else { 13.7 - // Don't clear the mark since it's confuses parallel old 13.8 - // verification. 13.9 - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { 13.10 - // if the object isn't moving we can just set the mark to the default 13.11 - // mark and handle it specially later on. 13.12 - oop(q)->init_mark(); 13.13 - } 13.14 + // if the object isn't moving we can just set the mark to the default 13.15 + // mark and handle it specially later on. 13.16 + oop(q)->init_mark(); 13.17 assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL"); 13.18 } 13.19 13.20 // Update object start array 13.21 - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { 13.22 - if (start_array) 13.23 - start_array->allocate_block(compact_top); 13.24 + if (start_array) { 13.25 + start_array->allocate_block(compact_top); 13.26 } 13.27 13.28 VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size)); 13.29 @@ -219,19 +214,14 @@ 13.30 assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark"); 13.31 } else { 13.32 // if the object isn't moving we can just set the mark to the default 13.33 - // Don't clear the mark since it's confuses parallel old 13.34 - // verification. 13.35 - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { 13.36 - // mark and handle it specially later on. 13.37 - oop(q)->init_mark(); 13.38 - } 13.39 + // mark and handle it specially later on. 13.40 + oop(q)->init_mark(); 13.41 assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL"); 13.42 } 13.43 13.44 - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { 13.45 - // Update object start array 13.46 - if (start_array) 13.47 - start_array->allocate_block(compact_top); 13.48 + // Update object start array 13.49 + if (start_array) { 13.50 + start_array->allocate_block(compact_top); 13.51 } 13.52 13.53 VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz));
14.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Tue Sep 30 12:24:27 2008 -0400 14.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Wed Oct 01 20:15:03 2008 -0400 14.3 @@ -152,9 +152,7 @@ 14.4 assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); 14.5 14.6 // Reset start array first. 14.7 - debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {) 14.8 start_array()->reset(); 14.9 - debug_only(}) 14.10 14.11 object_mark_sweep()->precompact(); 14.12
15.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Tue Sep 30 12:24:27 2008 -0400 15.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Oct 01 20:15:03 2008 -0400 15.3 @@ -28,43 +28,31 @@ 15.4 #include <math.h> 15.5 15.6 // All sizes are in HeapWords. 15.7 -const size_t ParallelCompactData::Log2ChunkSize = 9; // 512 words 15.8 -const size_t ParallelCompactData::ChunkSize = (size_t)1 << Log2ChunkSize; 15.9 -const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize; 15.10 -const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1; 15.11 -const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1; 15.12 -const size_t ParallelCompactData::ChunkAddrMask = ~ChunkAddrOffsetMask; 15.13 - 15.14 -// 32-bit: 128 words covers 4 bitmap words 15.15 -// 64-bit: 128 words covers 2 bitmap words 15.16 -const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words 15.17 -const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; 15.18 -const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1; 15.19 -const size_t ParallelCompactData::BlockMask = ~BlockOffsetMask; 15.20 - 15.21 -const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize; 15.22 - 15.23 -const ParallelCompactData::ChunkData::chunk_sz_t 15.24 -ParallelCompactData::ChunkData::dc_shift = 27; 15.25 - 15.26 -const ParallelCompactData::ChunkData::chunk_sz_t 15.27 -ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift; 15.28 - 15.29 -const ParallelCompactData::ChunkData::chunk_sz_t 15.30 -ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift; 15.31 - 15.32 -const ParallelCompactData::ChunkData::chunk_sz_t 15.33 -ParallelCompactData::ChunkData::los_mask = ~dc_mask; 15.34 - 15.35 -const ParallelCompactData::ChunkData::chunk_sz_t 15.36 -ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift; 15.37 - 15.38 -const ParallelCompactData::ChunkData::chunk_sz_t 15.39 -ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift; 15.40 - 15.41 -#ifdef ASSERT 15.42 -short ParallelCompactData::BlockData::_cur_phase = 0; 15.43 -#endif 15.44 +const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words 15.45 +const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; 15.46 +const size_t ParallelCompactData::RegionSizeBytes = 15.47 + RegionSize << LogHeapWordSize; 15.48 +const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; 15.49 +const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; 15.50 +const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; 15.51 + 15.52 +const ParallelCompactData::RegionData::region_sz_t 15.53 +ParallelCompactData::RegionData::dc_shift = 27; 15.54 + 15.55 +const ParallelCompactData::RegionData::region_sz_t 15.56 +ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift; 15.57 + 15.58 +const ParallelCompactData::RegionData::region_sz_t 15.59 +ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift; 15.60 + 15.61 +const ParallelCompactData::RegionData::region_sz_t 15.62 +ParallelCompactData::RegionData::los_mask = ~dc_mask; 15.63 + 15.64 +const ParallelCompactData::RegionData::region_sz_t 15.65 +ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift; 15.66 + 15.67 +const ParallelCompactData::RegionData::region_sz_t 15.68 +ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift; 15.69 15.70 SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id]; 15.71 bool PSParallelCompact::_print_phases = false; 15.72 @@ -100,99 +88,12 @@ 15.73 GrowableArray<size_t> * PSParallelCompact::_last_gc_live_oops_size = NULL; 15.74 #endif 15.75 15.76 -// XXX beg - verification code; only works while we also mark in object headers 15.77 -static void 15.78 -verify_mark_bitmap(ParMarkBitMap& _mark_bitmap) 15.79 -{ 15.80 - ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); 15.81 - 15.82 - PSPermGen* perm_gen = heap->perm_gen(); 15.83 - PSOldGen* old_gen = heap->old_gen(); 15.84 - PSYoungGen* young_gen = heap->young_gen(); 15.85 - 15.86 - MutableSpace* perm_space = perm_gen->object_space(); 15.87 - MutableSpace* old_space = old_gen->object_space(); 15.88 - MutableSpace* eden_space = young_gen->eden_space(); 15.89 - MutableSpace* from_space = young_gen->from_space(); 15.90 - MutableSpace* to_space = young_gen->to_space(); 15.91 - 15.92 - // 'from_space' here is the survivor space at the lower address. 15.93 - if (to_space->bottom() < from_space->bottom()) { 15.94 - from_space = to_space; 15.95 - to_space = young_gen->from_space(); 15.96 - } 15.97 - 15.98 - HeapWord* boundaries[12]; 15.99 - unsigned int bidx = 0; 15.100 - const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]); 15.101 - 15.102 - boundaries[0] = perm_space->bottom(); 15.103 - boundaries[1] = perm_space->top(); 15.104 - boundaries[2] = old_space->bottom(); 15.105 - boundaries[3] = old_space->top(); 15.106 - boundaries[4] = eden_space->bottom(); 15.107 - boundaries[5] = eden_space->top(); 15.108 - boundaries[6] = from_space->bottom(); 15.109 - boundaries[7] = from_space->top(); 15.110 - boundaries[8] = to_space->bottom(); 15.111 - boundaries[9] = to_space->top(); 15.112 - boundaries[10] = to_space->end(); 15.113 - boundaries[11] = to_space->end(); 15.114 - 15.115 - BitMap::idx_t beg_bit = 0; 15.116 - BitMap::idx_t end_bit; 15.117 - BitMap::idx_t tmp_bit; 15.118 - const BitMap::idx_t last_bit = _mark_bitmap.size(); 15.119 - do { 15.120 - HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit); 15.121 - if (_mark_bitmap.is_marked(beg_bit)) { 15.122 - oop obj = (oop)addr; 15.123 - assert(obj->is_gc_marked(), "obj header is not marked"); 15.124 - end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit); 15.125 - const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit); 15.126 - assert(size == (size_t)obj->size(), "end bit wrong?"); 15.127 - beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit); 15.128 - assert(beg_bit > end_bit, "bit set in middle of an obj"); 15.129 - } else { 15.130 - if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) { 15.131 - // a dead object in the current space. 15.132 - oop obj = (oop)addr; 15.133 - end_bit = _mark_bitmap.addr_to_bit(addr + obj->size()); 15.134 - assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap"); 15.135 - tmp_bit = beg_bit + 1; 15.136 - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit); 15.137 - assert(beg_bit == end_bit, "beg bit set in unmarked obj"); 15.138 - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit); 15.139 - assert(beg_bit == end_bit, "end bit set in unmarked obj"); 15.140 - } else if (addr < boundaries[bidx + 2]) { 15.141 - // addr is between top in the current space and bottom in the next. 15.142 - end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr); 15.143 - tmp_bit = beg_bit; 15.144 - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit); 15.145 - assert(beg_bit == end_bit, "beg bit set above top"); 15.146 - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit); 15.147 - assert(beg_bit == end_bit, "end bit set above top"); 15.148 - bidx += 2; 15.149 - } else if (bidx < bidx_max - 2) { 15.150 - bidx += 2; // ??? 15.151 - } else { 15.152 - tmp_bit = beg_bit; 15.153 - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit); 15.154 - assert(beg_bit == last_bit, "beg bit set outside heap"); 15.155 - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit); 15.156 - assert(beg_bit == last_bit, "end bit set outside heap"); 15.157 - } 15.158 - } 15.159 - } while (beg_bit < last_bit); 15.160 -} 15.161 -// XXX end - verification code; only works while we also mark in object headers 15.162 - 15.163 #ifndef PRODUCT 15.164 const char* PSParallelCompact::space_names[] = { 15.165 "perm", "old ", "eden", "from", "to " 15.166 }; 15.167 15.168 -void PSParallelCompact::print_chunk_ranges() 15.169 +void PSParallelCompact::print_region_ranges() 15.170 { 15.171 tty->print_cr("space bottom top end new_top"); 15.172 tty->print_cr("------ ---------- ---------- ---------- ----------"); 15.173 @@ -203,31 +104,31 @@ 15.174 SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " " 15.175 SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ", 15.176 id, space_names[id], 15.177 - summary_data().addr_to_chunk_idx(space->bottom()), 15.178 - summary_data().addr_to_chunk_idx(space->top()), 15.179 - summary_data().addr_to_chunk_idx(space->end()), 15.180 - summary_data().addr_to_chunk_idx(_space_info[id].new_top())); 15.181 + summary_data().addr_to_region_idx(space->bottom()), 15.182 + summary_data().addr_to_region_idx(space->top()), 15.183 + summary_data().addr_to_region_idx(space->end()), 15.184 + summary_data().addr_to_region_idx(_space_info[id].new_top())); 15.185 } 15.186 } 15.187 15.188 void 15.189 -print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c) 15.190 +print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c) 15.191 { 15.192 -#define CHUNK_IDX_FORMAT SIZE_FORMAT_W(7) 15.193 -#define CHUNK_DATA_FORMAT SIZE_FORMAT_W(5) 15.194 +#define REGION_IDX_FORMAT SIZE_FORMAT_W(7) 15.195 +#define REGION_DATA_FORMAT SIZE_FORMAT_W(5) 15.196 15.197 ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.198 - size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0; 15.199 - tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " " 15.200 - CHUNK_IDX_FORMAT " " PTR_FORMAT " " 15.201 - CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " " 15.202 - CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d", 15.203 + size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0; 15.204 + tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " " 15.205 + REGION_IDX_FORMAT " " PTR_FORMAT " " 15.206 + REGION_DATA_FORMAT " " REGION_DATA_FORMAT " " 15.207 + REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d", 15.208 i, c->data_location(), dci, c->destination(), 15.209 c->partial_obj_size(), c->live_obj_size(), 15.210 - c->data_size(), c->source_chunk(), c->destination_count()); 15.211 - 15.212 -#undef CHUNK_IDX_FORMAT 15.213 -#undef CHUNK_DATA_FORMAT 15.214 + c->data_size(), c->source_region(), c->destination_count()); 15.215 + 15.216 +#undef REGION_IDX_FORMAT 15.217 +#undef REGION_DATA_FORMAT 15.218 } 15.219 15.220 void 15.221 @@ -236,14 +137,14 @@ 15.222 HeapWord* const end_addr) 15.223 { 15.224 size_t total_words = 0; 15.225 - size_t i = summary_data.addr_to_chunk_idx(beg_addr); 15.226 - const size_t last = summary_data.addr_to_chunk_idx(end_addr); 15.227 + size_t i = summary_data.addr_to_region_idx(beg_addr); 15.228 + const size_t last = summary_data.addr_to_region_idx(end_addr); 15.229 HeapWord* pdest = 0; 15.230 15.231 while (i <= last) { 15.232 - ParallelCompactData::ChunkData* c = summary_data.chunk(i); 15.233 + ParallelCompactData::RegionData* c = summary_data.region(i); 15.234 if (c->data_size() != 0 || c->destination() != pdest) { 15.235 - print_generic_summary_chunk(i, c); 15.236 + print_generic_summary_region(i, c); 15.237 total_words += c->data_size(); 15.238 pdest = c->destination(); 15.239 } 15.240 @@ -265,16 +166,16 @@ 15.241 } 15.242 15.243 void 15.244 -print_initial_summary_chunk(size_t i, 15.245 - const ParallelCompactData::ChunkData* c, 15.246 - bool newline = true) 15.247 +print_initial_summary_region(size_t i, 15.248 + const ParallelCompactData::RegionData* c, 15.249 + bool newline = true) 15.250 { 15.251 tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " " 15.252 SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " " 15.253 SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d", 15.254 i, c->destination(), 15.255 c->partial_obj_size(), c->live_obj_size(), 15.256 - c->data_size(), c->source_chunk(), c->destination_count()); 15.257 + c->data_size(), c->source_region(), c->destination_count()); 15.258 if (newline) tty->cr(); 15.259 } 15.260 15.261 @@ -285,47 +186,48 @@ 15.262 return; 15.263 } 15.264 15.265 - const size_t chunk_size = ParallelCompactData::ChunkSize; 15.266 - HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top()); 15.267 - const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up); 15.268 - const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1); 15.269 + const size_t region_size = ParallelCompactData::RegionSize; 15.270 + typedef ParallelCompactData::RegionData RegionData; 15.271 + HeapWord* const top_aligned_up = summary_data.region_align_up(space->top()); 15.272 + const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up); 15.273 + const RegionData* c = summary_data.region(end_region - 1); 15.274 HeapWord* end_addr = c->destination() + c->data_size(); 15.275 const size_t live_in_space = pointer_delta(end_addr, space->bottom()); 15.276 15.277 - // Print (and count) the full chunks at the beginning of the space. 15.278 - size_t full_chunk_count = 0; 15.279 - size_t i = summary_data.addr_to_chunk_idx(space->bottom()); 15.280 - while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) { 15.281 - print_initial_summary_chunk(i, summary_data.chunk(i)); 15.282 - ++full_chunk_count; 15.283 + // Print (and count) the full regions at the beginning of the space. 15.284 + size_t full_region_count = 0; 15.285 + size_t i = summary_data.addr_to_region_idx(space->bottom()); 15.286 + while (i < end_region && summary_data.region(i)->data_size() == region_size) { 15.287 + print_initial_summary_region(i, summary_data.region(i)); 15.288 + ++full_region_count; 15.289 ++i; 15.290 } 15.291 15.292 - size_t live_to_right = live_in_space - full_chunk_count * chunk_size; 15.293 + size_t live_to_right = live_in_space - full_region_count * region_size; 15.294 15.295 double max_reclaimed_ratio = 0.0; 15.296 - size_t max_reclaimed_ratio_chunk = 0; 15.297 + size_t max_reclaimed_ratio_region = 0; 15.298 size_t max_dead_to_right = 0; 15.299 size_t max_live_to_right = 0; 15.300 15.301 - // Print the 'reclaimed ratio' for chunks while there is something live in the 15.302 - // chunk or to the right of it. The remaining chunks are empty (and 15.303 + // Print the 'reclaimed ratio' for regions while there is something live in 15.304 + // the region or to the right of it. The remaining regions are empty (and 15.305 // uninteresting), and computing the ratio will result in division by 0. 15.306 - while (i < end_chunk && live_to_right > 0) { 15.307 - c = summary_data.chunk(i); 15.308 - HeapWord* const chunk_addr = summary_data.chunk_to_addr(i); 15.309 - const size_t used_to_right = pointer_delta(space->top(), chunk_addr); 15.310 + while (i < end_region && live_to_right > 0) { 15.311 + c = summary_data.region(i); 15.312 + HeapWord* const region_addr = summary_data.region_to_addr(i); 15.313 + const size_t used_to_right = pointer_delta(space->top(), region_addr); 15.314 const size_t dead_to_right = used_to_right - live_to_right; 15.315 const double reclaimed_ratio = double(dead_to_right) / live_to_right; 15.316 15.317 if (reclaimed_ratio > max_reclaimed_ratio) { 15.318 max_reclaimed_ratio = reclaimed_ratio; 15.319 - max_reclaimed_ratio_chunk = i; 15.320 + max_reclaimed_ratio_region = i; 15.321 max_dead_to_right = dead_to_right; 15.322 max_live_to_right = live_to_right; 15.323 } 15.324 15.325 - print_initial_summary_chunk(i, c, false); 15.326 + print_initial_summary_region(i, c, false); 15.327 tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10), 15.328 reclaimed_ratio, dead_to_right, live_to_right); 15.329 15.330 @@ -333,14 +235,14 @@ 15.331 ++i; 15.332 } 15.333 15.334 - // Any remaining chunks are empty. Print one more if there is one. 15.335 - if (i < end_chunk) { 15.336 - print_initial_summary_chunk(i, summary_data.chunk(i)); 15.337 + // Any remaining regions are empty. Print one more if there is one. 15.338 + if (i < end_region) { 15.339 + print_initial_summary_region(i, summary_data.region(i)); 15.340 } 15.341 15.342 tty->print_cr("max: " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " " 15.343 "l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f", 15.344 - max_reclaimed_ratio_chunk, max_dead_to_right, 15.345 + max_reclaimed_ratio_region, max_dead_to_right, 15.346 max_live_to_right, max_reclaimed_ratio); 15.347 } 15.348 15.349 @@ -372,13 +274,9 @@ 15.350 { 15.351 _region_start = 0; 15.352 15.353 - _chunk_vspace = 0; 15.354 - _chunk_data = 0; 15.355 - _chunk_count = 0; 15.356 - 15.357 - _block_vspace = 0; 15.358 - _block_data = 0; 15.359 - _block_count = 0; 15.360 + _region_vspace = 0; 15.361 + _region_data = 0; 15.362 + _region_count = 0; 15.363 } 15.364 15.365 bool ParallelCompactData::initialize(MemRegion covered_region) 15.366 @@ -387,18 +285,12 @@ 15.367 const size_t region_size = covered_region.word_size(); 15.368 DEBUG_ONLY(_region_end = _region_start + region_size;) 15.369 15.370 - assert(chunk_align_down(_region_start) == _region_start, 15.371 + assert(region_align_down(_region_start) == _region_start, 15.372 "region start not aligned"); 15.373 - assert((region_size & ChunkSizeOffsetMask) == 0, 15.374 - "region size not a multiple of ChunkSize"); 15.375 - 15.376 - bool result = initialize_chunk_data(region_size); 15.377 - 15.378 - // Initialize the block data if it will be used for updating pointers, or if 15.379 - // this is a debug build. 15.380 - if (!UseParallelOldGCChunkPointerCalc || trueInDebug) { 15.381 - result = result && initialize_block_data(region_size); 15.382 - } 15.383 + assert((region_size & RegionSizeOffsetMask) == 0, 15.384 + "region size not a multiple of RegionSize"); 15.385 + 15.386 + bool result = initialize_region_data(region_size); 15.387 15.388 return result; 15.389 } 15.390 @@ -429,64 +321,41 @@ 15.391 return 0; 15.392 } 15.393 15.394 -bool ParallelCompactData::initialize_chunk_data(size_t region_size) 15.395 +bool ParallelCompactData::initialize_region_data(size_t region_size) 15.396 { 15.397 - const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize; 15.398 - _chunk_vspace = create_vspace(count, sizeof(ChunkData)); 15.399 - if (_chunk_vspace != 0) { 15.400 - _chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr(); 15.401 - _chunk_count = count; 15.402 + const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize; 15.403 + _region_vspace = create_vspace(count, sizeof(RegionData)); 15.404 + if (_region_vspace != 0) { 15.405 + _region_data = (RegionData*)_region_vspace->reserved_low_addr(); 15.406 + _region_count = count; 15.407 return true; 15.408 } 15.409 return false; 15.410 } 15.411 15.412 -bool ParallelCompactData::initialize_block_data(size_t region_size) 15.413 -{ 15.414 - const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize; 15.415 - _block_vspace = create_vspace(count, sizeof(BlockData)); 15.416 - if (_block_vspace != 0) { 15.417 - _block_data = (BlockData*)_block_vspace->reserved_low_addr(); 15.418 - _block_count = count; 15.419 - return true; 15.420 - } 15.421 - return false; 15.422 -} 15.423 - 15.424 void ParallelCompactData::clear() 15.425 { 15.426 - if (_block_data) { 15.427 - memset(_block_data, 0, _block_vspace->committed_size()); 15.428 - } 15.429 - memset(_chunk_data, 0, _chunk_vspace->committed_size()); 15.430 + memset(_region_data, 0, _region_vspace->committed_size()); 15.431 } 15.432 15.433 -void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) { 15.434 - assert(beg_chunk <= _chunk_count, "beg_chunk out of range"); 15.435 - assert(end_chunk <= _chunk_count, "end_chunk out of range"); 15.436 - assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize"); 15.437 - 15.438 - const size_t chunk_cnt = end_chunk - beg_chunk; 15.439 - 15.440 - if (_block_data) { 15.441 - const size_t blocks_per_chunk = ChunkSize / BlockSize; 15.442 - const size_t beg_block = beg_chunk * blocks_per_chunk; 15.443 - const size_t block_cnt = chunk_cnt * blocks_per_chunk; 15.444 - memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); 15.445 - } 15.446 - memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData)); 15.447 +void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { 15.448 + assert(beg_region <= _region_count, "beg_region out of range"); 15.449 + assert(end_region <= _region_count, "end_region out of range"); 15.450 + 15.451 + const size_t region_cnt = end_region - beg_region; 15.452 + memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); 15.453 } 15.454 15.455 -HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const 15.456 +HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const 15.457 { 15.458 - const ChunkData* cur_cp = chunk(chunk_idx); 15.459 - const ChunkData* const end_cp = chunk(chunk_count() - 1); 15.460 - 15.461 - HeapWord* result = chunk_to_addr(chunk_idx); 15.462 + const RegionData* cur_cp = region(region_idx); 15.463 + const RegionData* const end_cp = region(region_count() - 1); 15.464 + 15.465 + HeapWord* result = region_to_addr(region_idx); 15.466 if (cur_cp < end_cp) { 15.467 do { 15.468 result += cur_cp->partial_obj_size(); 15.469 - } while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp); 15.470 + } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp); 15.471 } 15.472 return result; 15.473 } 15.474 @@ -494,56 +363,56 @@ 15.475 void ParallelCompactData::add_obj(HeapWord* addr, size_t len) 15.476 { 15.477 const size_t obj_ofs = pointer_delta(addr, _region_start); 15.478 - const size_t beg_chunk = obj_ofs >> Log2ChunkSize; 15.479 - const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize; 15.480 + const size_t beg_region = obj_ofs >> Log2RegionSize; 15.481 + const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize; 15.482 15.483 DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);) 15.484 DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);) 15.485 15.486 - if (beg_chunk == end_chunk) { 15.487 - // All in one chunk. 15.488 - _chunk_data[beg_chunk].add_live_obj(len); 15.489 + if (beg_region == end_region) { 15.490 + // All in one region. 15.491 + _region_data[beg_region].add_live_obj(len); 15.492 return; 15.493 } 15.494 15.495 - // First chunk. 15.496 - const size_t beg_ofs = chunk_offset(addr); 15.497 - _chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs); 15.498 + // First region. 15.499 + const size_t beg_ofs = region_offset(addr); 15.500 + _region_data[beg_region].add_live_obj(RegionSize - beg_ofs); 15.501 15.502 klassOop klass = ((oop)addr)->klass(); 15.503 - // Middle chunks--completely spanned by this object. 15.504 - for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) { 15.505 - _chunk_data[chunk].set_partial_obj_size(ChunkSize); 15.506 - _chunk_data[chunk].set_partial_obj_addr(addr); 15.507 + // Middle regions--completely spanned by this object. 15.508 + for (size_t region = beg_region + 1; region < end_region; ++region) { 15.509 + _region_data[region].set_partial_obj_size(RegionSize); 15.510 + _region_data[region].set_partial_obj_addr(addr); 15.511 } 15.512 15.513 - // Last chunk. 15.514 - const size_t end_ofs = chunk_offset(addr + len - 1); 15.515 - _chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1); 15.516 - _chunk_data[end_chunk].set_partial_obj_addr(addr); 15.517 + // Last region. 15.518 + const size_t end_ofs = region_offset(addr + len - 1); 15.519 + _region_data[end_region].set_partial_obj_size(end_ofs + 1); 15.520 + _region_data[end_region].set_partial_obj_addr(addr); 15.521 } 15.522 15.523 void 15.524 ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end) 15.525 { 15.526 - assert(chunk_offset(beg) == 0, "not ChunkSize aligned"); 15.527 - assert(chunk_offset(end) == 0, "not ChunkSize aligned"); 15.528 - 15.529 - size_t cur_chunk = addr_to_chunk_idx(beg); 15.530 - const size_t end_chunk = addr_to_chunk_idx(end); 15.531 + assert(region_offset(beg) == 0, "not RegionSize aligned"); 15.532 + assert(region_offset(end) == 0, "not RegionSize aligned"); 15.533 + 15.534 + size_t cur_region = addr_to_region_idx(beg); 15.535 + const size_t end_region = addr_to_region_idx(end); 15.536 HeapWord* addr = beg; 15.537 - while (cur_chunk < end_chunk) { 15.538 - _chunk_data[cur_chunk].set_destination(addr); 15.539 - _chunk_data[cur_chunk].set_destination_count(0); 15.540 - _chunk_data[cur_chunk].set_source_chunk(cur_chunk); 15.541 - _chunk_data[cur_chunk].set_data_location(addr); 15.542 - 15.543 - // Update live_obj_size so the chunk appears completely full. 15.544 - size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size(); 15.545 - _chunk_data[cur_chunk].set_live_obj_size(live_size); 15.546 - 15.547 - ++cur_chunk; 15.548 - addr += ChunkSize; 15.549 + while (cur_region < end_region) { 15.550 + _region_data[cur_region].set_destination(addr); 15.551 + _region_data[cur_region].set_destination_count(0); 15.552 + _region_data[cur_region].set_source_region(cur_region); 15.553 + _region_data[cur_region].set_data_location(addr); 15.554 + 15.555 + // Update live_obj_size so the region appears completely full. 15.556 + size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size(); 15.557 + _region_data[cur_region].set_live_obj_size(live_size); 15.558 + 15.559 + ++cur_region; 15.560 + addr += RegionSize; 15.561 } 15.562 } 15.563 15.564 @@ -552,7 +421,7 @@ 15.565 HeapWord** target_next, 15.566 HeapWord** source_next) { 15.567 // This is too strict. 15.568 - // assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned"); 15.569 + // assert(region_offset(source_beg) == 0, "not RegionSize aligned"); 15.570 15.571 if (TraceParallelOldGCSummaryPhase) { 15.572 tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " " 15.573 @@ -564,125 +433,93 @@ 15.574 source_next != 0 ? *source_next : (HeapWord*) 0); 15.575 } 15.576 15.577 - size_t cur_chunk = addr_to_chunk_idx(source_beg); 15.578 - const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end)); 15.579 + size_t cur_region = addr_to_region_idx(source_beg); 15.580 + const size_t end_region = addr_to_region_idx(region_align_up(source_end)); 15.581 15.582 HeapWord *dest_addr = target_beg; 15.583 - while (cur_chunk < end_chunk) { 15.584 - size_t words = _chunk_data[cur_chunk].data_size(); 15.585 + while (cur_region < end_region) { 15.586 + size_t words = _region_data[cur_region].data_size(); 15.587 15.588 #if 1 15.589 assert(pointer_delta(target_end, dest_addr) >= words, 15.590 "source region does not fit into target region"); 15.591 #else 15.592 - // XXX - need some work on the corner cases here. If the chunk does not 15.593 - // fit, then must either make sure any partial_obj from the chunk fits, or 15.594 - // 'undo' the initial part of the partial_obj that is in the previous chunk. 15.595 + // XXX - need some work on the corner cases here. If the region does not 15.596 + // fit, then must either make sure any partial_obj from the region fits, or 15.597 + // "undo" the initial part of the partial_obj that is in the previous 15.598 + // region. 15.599 if (dest_addr + words >= target_end) { 15.600 // Let the caller know where to continue. 15.601 *target_next = dest_addr; 15.602 - *source_next = chunk_to_addr(cur_chunk); 15.603 + *source_next = region_to_addr(cur_region); 15.604 return false; 15.605 } 15.606 #endif // #if 1 15.607 15.608 - _chunk_data[cur_chunk].set_destination(dest_addr); 15.609 - 15.610 - // Set the destination_count for cur_chunk, and if necessary, update 15.611 - // source_chunk for a destination chunk. The source_chunk field is updated 15.612 - // if cur_chunk is the first (left-most) chunk to be copied to a destination 15.613 - // chunk. 15.614 + _region_data[cur_region].set_destination(dest_addr); 15.615 + 15.616 + // Set the destination_count for cur_region, and if necessary, update 15.617 + // source_region for a destination region. The source_region field is 15.618 + // updated if cur_region is the first (left-most) region to be copied to a 15.619 + // destination region. 15.620 // 15.621 - // The destination_count calculation is a bit subtle. A chunk that has data 15.622 - // that compacts into itself does not count itself as a destination. This 15.623 - // maintains the invariant that a zero count means the chunk is available 15.624 - // and can be claimed and then filled. 15.625 + // The destination_count calculation is a bit subtle. A region that has 15.626 + // data that compacts into itself does not count itself as a destination. 15.627 + // This maintains the invariant that a zero count means the region is 15.628 + // available and can be claimed and then filled. 15.629 if (words > 0) { 15.630 HeapWord* const last_addr = dest_addr + words - 1; 15.631 - const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr); 15.632 - const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr); 15.633 + const size_t dest_region_1 = addr_to_region_idx(dest_addr); 15.634 + const size_t dest_region_2 = addr_to_region_idx(last_addr); 15.635 #if 0 15.636 - // Initially assume that the destination chunks will be the same and 15.637 + // Initially assume that the destination regions will be the same and 15.638 // adjust the value below if necessary. Under this assumption, if 15.639 - // cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely 15.640 - // into itself. 15.641 - uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1; 15.642 - if (dest_chunk_1 != dest_chunk_2) { 15.643 - // Destination chunks differ; adjust destination_count. 15.644 + // cur_region == dest_region_2, then cur_region will be compacted 15.645 + // completely into itself. 15.646 + uint destination_count = cur_region == dest_region_2 ? 0 : 1; 15.647 + if (dest_region_1 != dest_region_2) { 15.648 + // Destination regions differ; adjust destination_count. 15.649 destination_count += 1; 15.650 - // Data from cur_chunk will be copied to the start of dest_chunk_2. 15.651 - _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk); 15.652 - } else if (chunk_offset(dest_addr) == 0) { 15.653 - // Data from cur_chunk will be copied to the start of the destination 15.654 - // chunk. 15.655 - _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk); 15.656 + // Data from cur_region will be copied to the start of dest_region_2. 15.657 + _region_data[dest_region_2].set_source_region(cur_region); 15.658 + } else if (region_offset(dest_addr) == 0) { 15.659 + // Data from cur_region will be copied to the start of the destination 15.660 + // region. 15.661 + _region_data[dest_region_1].set_source_region(cur_region); 15.662 } 15.663 #else 15.664 - // Initially assume that the destination chunks will be different and 15.665 + // Initially assume that the destination regions will be different and 15.666 // adjust the value below if necessary. Under this assumption, if 15.667 - // cur_chunk == dest_chunk2, then cur_chunk will be compacted partially 15.668 - // into dest_chunk_1 and partially into itself. 15.669 - uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2; 15.670 - if (dest_chunk_1 != dest_chunk_2) { 15.671 - // Data from cur_chunk will be copied to the start of dest_chunk_2. 15.672 - _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk); 15.673 + // cur_region == dest_region2, then cur_region will be compacted partially 15.674 + // into dest_region_1 and partially into itself. 15.675 + uint destination_count = cur_region == dest_region_2 ? 1 : 2; 15.676 + if (dest_region_1 != dest_region_2) { 15.677 + // Data from cur_region will be copied to the start of dest_region_2. 15.678 + _region_data[dest_region_2].set_source_region(cur_region); 15.679 } else { 15.680 - // Destination chunks are the same; adjust destination_count. 15.681 + // Destination regions are the same; adjust destination_count. 15.682 destination_count -= 1; 15.683 - if (chunk_offset(dest_addr) == 0) { 15.684 - // Data from cur_chunk will be copied to the start of the destination 15.685 - // chunk. 15.686 - _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk); 15.687 + if (region_offset(dest_addr) == 0) { 15.688 + // Data from cur_region will be copied to the start of the destination 15.689 + // region. 15.690 + _region_data[dest_region_1].set_source_region(cur_region); 15.691 } 15.692 } 15.693 #endif // #if 0 15.694 15.695 - _chunk_data[cur_chunk].set_destination_count(destination_count); 15.696 - _chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk)); 15.697 + _region_data[cur_region].set_destination_count(destination_count); 15.698 + _region_data[cur_region].set_data_location(region_to_addr(cur_region)); 15.699 dest_addr += words; 15.700 } 15.701 15.702 - ++cur_chunk; 15.703 + ++cur_region; 15.704 } 15.705 15.706 *target_next = dest_addr; 15.707 return true; 15.708 } 15.709 15.710 -bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) { 15.711 - HeapWord* block_addr = block_to_addr(block_index); 15.712 - HeapWord* block_end_addr = block_addr + BlockSize; 15.713 - size_t chunk_index = addr_to_chunk_idx(block_addr); 15.714 - HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index); 15.715 - 15.716 - // An object that ends at the end of the block, ends 15.717 - // in the block (the last word of the object is to 15.718 - // the left of the end). 15.719 - if ((block_addr < partial_obj_end_addr) && 15.720 - (partial_obj_end_addr <= block_end_addr)) { 15.721 - return true; 15.722 - } 15.723 - 15.724 - return false; 15.725 -} 15.726 - 15.727 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { 15.728 - HeapWord* result = NULL; 15.729 - if (UseParallelOldGCChunkPointerCalc) { 15.730 - result = chunk_calc_new_pointer(addr); 15.731 - } else { 15.732 - result = block_calc_new_pointer(addr); 15.733 - } 15.734 - return result; 15.735 -} 15.736 - 15.737 -// This method is overly complicated (expensive) to be called 15.738 -// for every reference. 15.739 -// Try to restructure this so that a NULL is returned if 15.740 -// the object is dead. But don't wast the cycles to explicitly check 15.741 -// that it is dead since only live objects should be passed in. 15.742 - 15.743 -HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) { 15.744 assert(addr != NULL, "Should detect NULL oop earlier"); 15.745 assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); 15.746 #ifdef ASSERT 15.747 @@ -692,30 +529,30 @@ 15.748 #endif 15.749 assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); 15.750 15.751 - // Chunk covering the object. 15.752 - size_t chunk_index = addr_to_chunk_idx(addr); 15.753 - const ChunkData* const chunk_ptr = chunk(chunk_index); 15.754 - HeapWord* const chunk_addr = chunk_align_down(addr); 15.755 - 15.756 - assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object"); 15.757 - assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check"); 15.758 - 15.759 - HeapWord* result = chunk_ptr->destination(); 15.760 - 15.761 - // If all the data in the chunk is live, then the new location of the object 15.762 - // can be calculated from the destination of the chunk plus the offset of the 15.763 - // object in the chunk. 15.764 - if (chunk_ptr->data_size() == ChunkSize) { 15.765 - result += pointer_delta(addr, chunk_addr); 15.766 + // Region covering the object. 15.767 + size_t region_index = addr_to_region_idx(addr); 15.768 + const RegionData* const region_ptr = region(region_index); 15.769 + HeapWord* const region_addr = region_align_down(addr); 15.770 + 15.771 + assert(addr < region_addr + RegionSize, "Region does not cover object"); 15.772 + assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); 15.773 + 15.774 + HeapWord* result = region_ptr->destination(); 15.775 + 15.776 + // If all the data in the region is live, then the new location of the object 15.777 + // can be calculated from the destination of the region plus the offset of the 15.778 + // object in the region. 15.779 + if (region_ptr->data_size() == RegionSize) { 15.780 + result += pointer_delta(addr, region_addr); 15.781 return result; 15.782 } 15.783 15.784 // The new location of the object is 15.785 - // chunk destination + 15.786 - // size of the partial object extending onto the chunk + 15.787 - // sizes of the live objects in the Chunk that are to the left of addr 15.788 - const size_t partial_obj_size = chunk_ptr->partial_obj_size(); 15.789 - HeapWord* const search_start = chunk_addr + partial_obj_size; 15.790 + // region destination + 15.791 + // size of the partial object extending onto the region + 15.792 + // sizes of the live objects in the Region that are to the left of addr 15.793 + const size_t partial_obj_size = region_ptr->partial_obj_size(); 15.794 + HeapWord* const search_start = region_addr + partial_obj_size; 15.795 15.796 const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); 15.797 size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); 15.798 @@ -725,50 +562,6 @@ 15.799 return result; 15.800 } 15.801 15.802 -HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) { 15.803 - assert(addr != NULL, "Should detect NULL oop earlier"); 15.804 - assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); 15.805 -#ifdef ASSERT 15.806 - if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { 15.807 - gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); 15.808 - } 15.809 -#endif 15.810 - assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); 15.811 - 15.812 - // Chunk covering the object. 15.813 - size_t chunk_index = addr_to_chunk_idx(addr); 15.814 - const ChunkData* const chunk_ptr = chunk(chunk_index); 15.815 - HeapWord* const chunk_addr = chunk_align_down(addr); 15.816 - 15.817 - assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object"); 15.818 - assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check"); 15.819 - 15.820 - HeapWord* result = chunk_ptr->destination(); 15.821 - 15.822 - // If all the data in the chunk is live, then the new location of the object 15.823 - // can be calculated from the destination of the chunk plus the offset of the 15.824 - // object in the chunk. 15.825 - if (chunk_ptr->data_size() == ChunkSize) { 15.826 - result += pointer_delta(addr, chunk_addr); 15.827 - return result; 15.828 - } 15.829 - 15.830 - // The new location of the object is 15.831 - // chunk destination + 15.832 - // block offset + 15.833 - // sizes of the live objects in the Block that are to the left of addr 15.834 - const size_t block_offset = addr_to_block_ptr(addr)->offset(); 15.835 - HeapWord* const search_start = chunk_addr + block_offset; 15.836 - 15.837 - const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); 15.838 - size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); 15.839 - 15.840 - result += block_offset + live_to_left; 15.841 - assert(result <= addr, "object cannot move to the right"); 15.842 - assert(result == chunk_calc_new_pointer(addr), "Should match"); 15.843 - return result; 15.844 -} 15.845 - 15.846 klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) { 15.847 klassOop updated_klass; 15.848 if (PSParallelCompact::should_update_klass(old_klass)) { 15.849 @@ -792,15 +585,14 @@ 15.850 15.851 void ParallelCompactData::verify_clear() 15.852 { 15.853 - verify_clear(_chunk_vspace); 15.854 - verify_clear(_block_vspace); 15.855 + verify_clear(_region_vspace); 15.856 } 15.857 #endif // #ifdef ASSERT 15.858 15.859 #ifdef NOT_PRODUCT 15.860 -ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) { 15.861 +ParallelCompactData::RegionData* debug_region(size_t region_index) { 15.862 ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.863 - return sd.chunk(chunk_index); 15.864 + return sd.region(region_index); 15.865 } 15.866 #endif 15.867 15.868 @@ -953,10 +745,10 @@ 15.869 const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top)); 15.870 _mark_bitmap.clear_range(beg_bit, end_bit); 15.871 15.872 - const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot); 15.873 - const size_t end_chunk = 15.874 - _summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top)); 15.875 - _summary_data.clear_range(beg_chunk, end_chunk); 15.876 + const size_t beg_region = _summary_data.addr_to_region_idx(bot); 15.877 + const size_t end_region = 15.878 + _summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top)); 15.879 + _summary_data.clear_range(beg_region, end_region); 15.880 } 15.881 15.882 void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values) 15.883 @@ -1072,19 +864,19 @@ 15.884 PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id, 15.885 bool maximum_compaction) 15.886 { 15.887 - const size_t chunk_size = ParallelCompactData::ChunkSize; 15.888 + const size_t region_size = ParallelCompactData::RegionSize; 15.889 const ParallelCompactData& sd = summary_data(); 15.890 15.891 const MutableSpace* const space = _space_info[id].space(); 15.892 - HeapWord* const top_aligned_up = sd.chunk_align_up(space->top()); 15.893 - const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom()); 15.894 - const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up); 15.895 - 15.896 - // Skip full chunks at the beginning of the space--they are necessarily part 15.897 + HeapWord* const top_aligned_up = sd.region_align_up(space->top()); 15.898 + const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom()); 15.899 + const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up); 15.900 + 15.901 + // Skip full regions at the beginning of the space--they are necessarily part 15.902 // of the dense prefix. 15.903 size_t full_count = 0; 15.904 - const ChunkData* cp; 15.905 - for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) { 15.906 + const RegionData* cp; 15.907 + for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) { 15.908 ++full_count; 15.909 } 15.910 15.911 @@ -1093,7 +885,7 @@ 15.912 const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval; 15.913 if (maximum_compaction || cp == end_cp || interval_ended) { 15.914 _maximum_compaction_gc_num = total_invocations(); 15.915 - return sd.chunk_to_addr(cp); 15.916 + return sd.region_to_addr(cp); 15.917 } 15.918 15.919 HeapWord* const new_top = _space_info[id].new_top(); 15.920 @@ -1116,52 +908,53 @@ 15.921 } 15.922 15.923 // XXX - Use binary search? 15.924 - HeapWord* dense_prefix = sd.chunk_to_addr(cp); 15.925 - const ChunkData* full_cp = cp; 15.926 - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1); 15.927 + HeapWord* dense_prefix = sd.region_to_addr(cp); 15.928 + const RegionData* full_cp = cp; 15.929 + const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1); 15.930 while (cp < end_cp) { 15.931 - HeapWord* chunk_destination = cp->destination(); 15.932 - const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination); 15.933 + HeapWord* region_destination = cp->destination(); 15.934 + const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination); 15.935 if (TraceParallelOldGCDensePrefix && Verbose) { 15.936 tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " " 15.937 "dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8), 15.938 - sd.chunk(cp), chunk_destination, 15.939 + sd.region(cp), region_destination, 15.940 dense_prefix, cur_deadwood); 15.941 } 15.942 15.943 if (cur_deadwood >= deadwood_goal) { 15.944 - // Found the chunk that has the correct amount of deadwood to the left. 15.945 - // This typically occurs after crossing a fairly sparse set of chunks, so 15.946 - // iterate backwards over those sparse chunks, looking for the chunk that 15.947 - // has the lowest density of live objects 'to the right.' 15.948 - size_t space_to_left = sd.chunk(cp) * chunk_size; 15.949 + // Found the region that has the correct amount of deadwood to the left. 15.950 + // This typically occurs after crossing a fairly sparse set of regions, so 15.951 + // iterate backwards over those sparse regions, looking for the region 15.952 + // that has the lowest density of live objects 'to the right.' 15.953 + size_t space_to_left = sd.region(cp) * region_size; 15.954 size_t live_to_left = space_to_left - cur_deadwood; 15.955 size_t space_to_right = space_capacity - space_to_left; 15.956 size_t live_to_right = space_live - live_to_left; 15.957 double density_to_right = double(live_to_right) / space_to_right; 15.958 while (cp > full_cp) { 15.959 --cp; 15.960 - const size_t prev_chunk_live_to_right = live_to_right - cp->data_size(); 15.961 - const size_t prev_chunk_space_to_right = space_to_right + chunk_size; 15.962 - double prev_chunk_density_to_right = 15.963 - double(prev_chunk_live_to_right) / prev_chunk_space_to_right; 15.964 - if (density_to_right <= prev_chunk_density_to_right) { 15.965 + const size_t prev_region_live_to_right = live_to_right - 15.966 + cp->data_size(); 15.967 + const size_t prev_region_space_to_right = space_to_right + region_size; 15.968 + double prev_region_density_to_right = 15.969 + double(prev_region_live_to_right) / prev_region_space_to_right; 15.970 + if (density_to_right <= prev_region_density_to_right) { 15.971 return dense_prefix; 15.972 } 15.973 if (TraceParallelOldGCDensePrefix && Verbose) { 15.974 tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f " 15.975 - "pc_d2r=%10.8f", sd.chunk(cp), density_to_right, 15.976 - prev_chunk_density_to_right); 15.977 + "pc_d2r=%10.8f", sd.region(cp), density_to_right, 15.978 + prev_region_density_to_right); 15.979 } 15.980 - dense_prefix -= chunk_size; 15.981 - live_to_right = prev_chunk_live_to_right; 15.982 - space_to_right = prev_chunk_space_to_right; 15.983 - density_to_right = prev_chunk_density_to_right; 15.984 + dense_prefix -= region_size; 15.985 + live_to_right = prev_region_live_to_right; 15.986 + space_to_right = prev_region_space_to_right; 15.987 + density_to_right = prev_region_density_to_right; 15.988 } 15.989 return dense_prefix; 15.990 } 15.991 15.992 - dense_prefix += chunk_size; 15.993 + dense_prefix += region_size; 15.994 ++cp; 15.995 } 15.996 15.997 @@ -1174,8 +967,8 @@ 15.998 const bool maximum_compaction, 15.999 HeapWord* const addr) 15.1000 { 15.1001 - const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr); 15.1002 - ChunkData* const cp = summary_data().chunk(chunk_idx); 15.1003 + const size_t region_idx = summary_data().addr_to_region_idx(addr); 15.1004 + RegionData* const cp = summary_data().region(region_idx); 15.1005 const MutableSpace* const space = _space_info[id].space(); 15.1006 HeapWord* const new_top = _space_info[id].new_top(); 15.1007 15.1008 @@ -1191,7 +984,7 @@ 15.1009 "d2l=" SIZE_FORMAT " d2l%%=%6.4f " 15.1010 "d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT 15.1011 " ratio=%10.8f", 15.1012 - algorithm, addr, chunk_idx, 15.1013 + algorithm, addr, region_idx, 15.1014 space_live, 15.1015 dead_to_left, dead_to_left_pct, 15.1016 dead_to_right, live_to_right, 15.1017 @@ -1253,52 +1046,52 @@ 15.1018 return MAX2(limit, 0.0); 15.1019 } 15.1020 15.1021 -ParallelCompactData::ChunkData* 15.1022 -PSParallelCompact::first_dead_space_chunk(const ChunkData* beg, 15.1023 - const ChunkData* end) 15.1024 +ParallelCompactData::RegionData* 15.1025 +PSParallelCompact::first_dead_space_region(const RegionData* beg, 15.1026 + const RegionData* end) 15.1027 { 15.1028 - const size_t chunk_size = ParallelCompactData::ChunkSize; 15.1029 + const size_t region_size = ParallelCompactData::RegionSize; 15.1030 ParallelCompactData& sd = summary_data(); 15.1031 - size_t left = sd.chunk(beg); 15.1032 - size_t right = end > beg ? sd.chunk(end) - 1 : left; 15.1033 + size_t left = sd.region(beg); 15.1034 + size_t right = end > beg ? sd.region(end) - 1 : left; 15.1035 15.1036 // Binary search. 15.1037 while (left < right) { 15.1038 // Equivalent to (left + right) / 2, but does not overflow. 15.1039 const size_t middle = left + (right - left) / 2; 15.1040 - ChunkData* const middle_ptr = sd.chunk(middle); 15.1041 + RegionData* const middle_ptr = sd.region(middle); 15.1042 HeapWord* const dest = middle_ptr->destination(); 15.1043 - HeapWord* const addr = sd.chunk_to_addr(middle); 15.1044 + HeapWord* const addr = sd.region_to_addr(middle); 15.1045 assert(dest != NULL, "sanity"); 15.1046 assert(dest <= addr, "must move left"); 15.1047 15.1048 if (middle > left && dest < addr) { 15.1049 right = middle - 1; 15.1050 - } else if (middle < right && middle_ptr->data_size() == chunk_size) { 15.1051 + } else if (middle < right && middle_ptr->data_size() == region_size) { 15.1052 left = middle + 1; 15.1053 } else { 15.1054 return middle_ptr; 15.1055 } 15.1056 } 15.1057 - return sd.chunk(left); 15.1058 + return sd.region(left); 15.1059 } 15.1060 15.1061 -ParallelCompactData::ChunkData* 15.1062 -PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg, 15.1063 - const ChunkData* end, 15.1064 - size_t dead_words) 15.1065 +ParallelCompactData::RegionData* 15.1066 +PSParallelCompact::dead_wood_limit_region(const RegionData* beg, 15.1067 + const RegionData* end, 15.1068 + size_t dead_words) 15.1069 { 15.1070 ParallelCompactData& sd = summary_data(); 15.1071 - size_t left = sd.chunk(beg); 15.1072 - size_t right = end > beg ? sd.chunk(end) - 1 : left; 15.1073 + size_t left = sd.region(beg); 15.1074 + size_t right = end > beg ? sd.region(end) - 1 : left; 15.1075 15.1076 // Binary search. 15.1077 while (left < right) { 15.1078 // Equivalent to (left + right) / 2, but does not overflow. 15.1079 const size_t middle = left + (right - left) / 2; 15.1080 - ChunkData* const middle_ptr = sd.chunk(middle); 15.1081 + RegionData* const middle_ptr = sd.region(middle); 15.1082 HeapWord* const dest = middle_ptr->destination(); 15.1083 - HeapWord* const addr = sd.chunk_to_addr(middle); 15.1084 + HeapWord* const addr = sd.region_to_addr(middle); 15.1085 assert(dest != NULL, "sanity"); 15.1086 assert(dest <= addr, "must move left"); 15.1087 15.1088 @@ -1311,13 +1104,13 @@ 15.1089 return middle_ptr; 15.1090 } 15.1091 } 15.1092 - return sd.chunk(left); 15.1093 + return sd.region(left); 15.1094 } 15.1095 15.1096 // The result is valid during the summary phase, after the initial summarization 15.1097 // of each space into itself, and before final summarization. 15.1098 inline double 15.1099 -PSParallelCompact::reclaimed_ratio(const ChunkData* const cp, 15.1100 +PSParallelCompact::reclaimed_ratio(const RegionData* const cp, 15.1101 HeapWord* const bottom, 15.1102 HeapWord* const top, 15.1103 HeapWord* const new_top) 15.1104 @@ -1331,12 +1124,13 @@ 15.1105 assert(top >= new_top, "summary data problem?"); 15.1106 assert(new_top > bottom, "space is empty; should not be here"); 15.1107 assert(new_top >= cp->destination(), "sanity"); 15.1108 - assert(top >= sd.chunk_to_addr(cp), "sanity"); 15.1109 + assert(top >= sd.region_to_addr(cp), "sanity"); 15.1110 15.1111 HeapWord* const destination = cp->destination(); 15.1112 const size_t dense_prefix_live = pointer_delta(destination, bottom); 15.1113 const size_t compacted_region_live = pointer_delta(new_top, destination); 15.1114 - const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp)); 15.1115 + const size_t compacted_region_used = pointer_delta(top, 15.1116 + sd.region_to_addr(cp)); 15.1117 const size_t reclaimable = compacted_region_used - compacted_region_live; 15.1118 15.1119 const double divisor = dense_prefix_live + 1.25 * compacted_region_live; 15.1120 @@ -1344,39 +1138,40 @@ 15.1121 } 15.1122 15.1123 // Return the address of the end of the dense prefix, a.k.a. the start of the 15.1124 -// compacted region. The address is always on a chunk boundary. 15.1125 +// compacted region. The address is always on a region boundary. 15.1126 // 15.1127 -// Completely full chunks at the left are skipped, since no compaction can occur 15.1128 -// in those chunks. Then the maximum amount of dead wood to allow is computed, 15.1129 -// based on the density (amount live / capacity) of the generation; the chunk 15.1130 -// with approximately that amount of dead space to the left is identified as the 15.1131 -// limit chunk. Chunks between the last completely full chunk and the limit 15.1132 -// chunk are scanned and the one that has the best (maximum) reclaimed_ratio() 15.1133 -// is selected. 15.1134 +// Completely full regions at the left are skipped, since no compaction can 15.1135 +// occur in those regions. Then the maximum amount of dead wood to allow is 15.1136 +// computed, based on the density (amount live / capacity) of the generation; 15.1137 +// the region with approximately that amount of dead space to the left is 15.1138 +// identified as the limit region. Regions between the last completely full 15.1139 +// region and the limit region are scanned and the one that has the best 15.1140 +// (maximum) reclaimed_ratio() is selected. 15.1141 HeapWord* 15.1142 PSParallelCompact::compute_dense_prefix(const SpaceId id, 15.1143 bool maximum_compaction) 15.1144 { 15.1145 - const size_t chunk_size = ParallelCompactData::ChunkSize; 15.1146 + const size_t region_size = ParallelCompactData::RegionSize; 15.1147 const ParallelCompactData& sd = summary_data(); 15.1148 15.1149 const MutableSpace* const space = _space_info[id].space(); 15.1150 HeapWord* const top = space->top(); 15.1151 - HeapWord* const top_aligned_up = sd.chunk_align_up(top); 15.1152 + HeapWord* const top_aligned_up = sd.region_align_up(top); 15.1153 HeapWord* const new_top = _space_info[id].new_top(); 15.1154 - HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top); 15.1155 + HeapWord* const new_top_aligned_up = sd.region_align_up(new_top); 15.1156 HeapWord* const bottom = space->bottom(); 15.1157 - const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom); 15.1158 - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up); 15.1159 - const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up); 15.1160 - 15.1161 - // Skip full chunks at the beginning of the space--they are necessarily part 15.1162 + const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom); 15.1163 + const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up); 15.1164 + const RegionData* const new_top_cp = 15.1165 + sd.addr_to_region_ptr(new_top_aligned_up); 15.1166 + 15.1167 + // Skip full regions at the beginning of the space--they are necessarily part 15.1168 // of the dense prefix. 15.1169 - const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp); 15.1170 - assert(full_cp->destination() == sd.chunk_to_addr(full_cp) || 15.1171 + const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp); 15.1172 + assert(full_cp->destination() == sd.region_to_addr(full_cp) || 15.1173 space->is_empty(), "no dead space allowed to the left"); 15.1174 - assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1, 15.1175 - "chunk must have dead space"); 15.1176 + assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1, 15.1177 + "region must have dead space"); 15.1178 15.1179 // The gc number is saved whenever a maximum compaction is done, and used to 15.1180 // determine when the maximum compaction interval has expired. This avoids 15.1181 @@ -1387,7 +1182,7 @@ 15.1182 total_invocations() == HeapFirstMaximumCompactionCount; 15.1183 if (maximum_compaction || full_cp == top_cp || interval_ended) { 15.1184 _maximum_compaction_gc_num = total_invocations(); 15.1185 - return sd.chunk_to_addr(full_cp); 15.1186 + return sd.region_to_addr(full_cp); 15.1187 } 15.1188 15.1189 const size_t space_live = pointer_delta(new_top, bottom); 15.1190 @@ -1413,15 +1208,15 @@ 15.1191 dead_wood_max, dead_wood_limit); 15.1192 } 15.1193 15.1194 - // Locate the chunk with the desired amount of dead space to the left. 15.1195 - const ChunkData* const limit_cp = 15.1196 - dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit); 15.1197 - 15.1198 - // Scan from the first chunk with dead space to the limit chunk and find the 15.1199 + // Locate the region with the desired amount of dead space to the left. 15.1200 + const RegionData* const limit_cp = 15.1201 + dead_wood_limit_region(full_cp, top_cp, dead_wood_limit); 15.1202 + 15.1203 + // Scan from the first region with dead space to the limit region and find the 15.1204 // one with the best (largest) reclaimed ratio. 15.1205 double best_ratio = 0.0; 15.1206 - const ChunkData* best_cp = full_cp; 15.1207 - for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) { 15.1208 + const RegionData* best_cp = full_cp; 15.1209 + for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) { 15.1210 double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top); 15.1211 if (tmp_ratio > best_ratio) { 15.1212 best_cp = cp; 15.1213 @@ -1430,18 +1225,18 @@ 15.1214 } 15.1215 15.1216 #if 0 15.1217 - // Something to consider: if the chunk with the best ratio is 'close to' the 15.1218 - // first chunk w/free space, choose the first chunk with free space 15.1219 - // ("first-free"). The first-free chunk is usually near the start of the 15.1220 + // Something to consider: if the region with the best ratio is 'close to' the 15.1221 + // first region w/free space, choose the first region with free space 15.1222 + // ("first-free"). The first-free region is usually near the start of the 15.1223 // heap, which means we are copying most of the heap already, so copy a bit 15.1224 // more to get complete compaction. 15.1225 - if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) { 15.1226 + if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) { 15.1227 _maximum_compaction_gc_num = total_invocations(); 15.1228 best_cp = full_cp; 15.1229 } 15.1230 #endif // #if 0 15.1231 15.1232 - return sd.chunk_to_addr(best_cp); 15.1233 + return sd.region_to_addr(best_cp); 15.1234 } 15.1235 15.1236 void PSParallelCompact::summarize_spaces_quick() 15.1237 @@ -1459,9 +1254,9 @@ 15.1238 void PSParallelCompact::fill_dense_prefix_end(SpaceId id) 15.1239 { 15.1240 HeapWord* const dense_prefix_end = dense_prefix(id); 15.1241 - const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end); 15.1242 + const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end); 15.1243 const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end); 15.1244 - if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) { 15.1245 + if (dead_space_crosses_boundary(region, dense_prefix_bit)) { 15.1246 // Only enough dead space is filled so that any remaining dead space to the 15.1247 // left is larger than the minimum filler object. (The remainder is filled 15.1248 // during the copy/update phase.) 15.1249 @@ -1552,7 +1347,7 @@ 15.1250 fill_dense_prefix_end(id); 15.1251 } 15.1252 15.1253 - // Compute the destination of each Chunk, and thus each object. 15.1254 + // Compute the destination of each Region, and thus each object. 15.1255 _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end); 15.1256 _summary_data.summarize(dense_prefix_end, space->end(), 15.1257 dense_prefix_end, space->top(), 15.1258 @@ -1560,19 +1355,19 @@ 15.1259 } 15.1260 15.1261 if (TraceParallelOldGCSummaryPhase) { 15.1262 - const size_t chunk_size = ParallelCompactData::ChunkSize; 15.1263 + const size_t region_size = ParallelCompactData::RegionSize; 15.1264 HeapWord* const dense_prefix_end = _space_info[id].dense_prefix(); 15.1265 - const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end); 15.1266 + const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end); 15.1267 const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom()); 15.1268 HeapWord* const new_top = _space_info[id].new_top(); 15.1269 - const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top); 15.1270 + const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top); 15.1271 const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end); 15.1272 tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " " 15.1273 - "dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " " 15.1274 + "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " " 15.1275 "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT, 15.1276 id, space->capacity_in_words(), dense_prefix_end, 15.1277 - dp_chunk, dp_words / chunk_size, 15.1278 - cr_words / chunk_size, new_top); 15.1279 + dp_region, dp_words / region_size, 15.1280 + cr_words / region_size, new_top); 15.1281 } 15.1282 } 15.1283 15.1284 @@ -1584,11 +1379,6 @@ 15.1285 // trace("2"); 15.1286 15.1287 #ifdef ASSERT 15.1288 - if (VerifyParallelOldWithMarkSweep && 15.1289 - (PSParallelCompact::total_invocations() % 15.1290 - VerifyParallelOldWithMarkSweepInterval) == 0) { 15.1291 - verify_mark_bitmap(_mark_bitmap); 15.1292 - } 15.1293 if (TraceParallelOldGCMarkingPhase) { 15.1294 tty->print_cr("add_obj_count=" SIZE_FORMAT " " 15.1295 "add_obj_bytes=" SIZE_FORMAT, 15.1296 @@ -1605,7 +1395,7 @@ 15.1297 if (TraceParallelOldGCSummaryPhase) { 15.1298 tty->print_cr("summary_phase: after summarizing each space to self"); 15.1299 Universe::print(); 15.1300 - NOT_PRODUCT(print_chunk_ranges()); 15.1301 + NOT_PRODUCT(print_region_ranges()); 15.1302 if (Verbose) { 15.1303 NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info)); 15.1304 } 15.1305 @@ -1651,14 +1441,15 @@ 15.1306 space->bottom(), space->top(), 15.1307 new_top_addr); 15.1308 15.1309 - // Clear the source_chunk field for each chunk in the space. 15.1310 + // Clear the source_region field for each region in the space. 15.1311 HeapWord* const new_top = _space_info[id].new_top(); 15.1312 - HeapWord* const clear_end = _summary_data.chunk_align_up(new_top); 15.1313 - ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom()); 15.1314 - ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end); 15.1315 - while (beg_chunk < end_chunk) { 15.1316 - beg_chunk->set_source_chunk(0); 15.1317 - ++beg_chunk; 15.1318 + HeapWord* const clear_end = _summary_data.region_align_up(new_top); 15.1319 + RegionData* beg_region = 15.1320 + _summary_data.addr_to_region_ptr(space->bottom()); 15.1321 + RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end); 15.1322 + while (beg_region < end_region) { 15.1323 + beg_region->set_source_region(0); 15.1324 + ++beg_region; 15.1325 } 15.1326 15.1327 // Reset the new_top value for the space. 15.1328 @@ -1666,243 +1457,16 @@ 15.1329 } 15.1330 } 15.1331 15.1332 - // Fill in the block data after any changes to the chunks have 15.1333 - // been made. 15.1334 -#ifdef ASSERT 15.1335 - summarize_blocks(cm, perm_space_id); 15.1336 - summarize_blocks(cm, old_space_id); 15.1337 -#else 15.1338 - if (!UseParallelOldGCChunkPointerCalc) { 15.1339 - summarize_blocks(cm, perm_space_id); 15.1340 - summarize_blocks(cm, old_space_id); 15.1341 - } 15.1342 -#endif 15.1343 - 15.1344 if (TraceParallelOldGCSummaryPhase) { 15.1345 tty->print_cr("summary_phase: after final summarization"); 15.1346 Universe::print(); 15.1347 - NOT_PRODUCT(print_chunk_ranges()); 15.1348 + NOT_PRODUCT(print_region_ranges()); 15.1349 if (Verbose) { 15.1350 NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info)); 15.1351 } 15.1352 } 15.1353 } 15.1354 15.1355 -// Fill in the BlockData. 15.1356 -// Iterate over the spaces and within each space iterate over 15.1357 -// the chunks and fill in the BlockData for each chunk. 15.1358 - 15.1359 -void PSParallelCompact::summarize_blocks(ParCompactionManager* cm, 15.1360 - SpaceId first_compaction_space_id) { 15.1361 -#if 0 15.1362 - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);) 15.1363 - for (SpaceId cur_space_id = first_compaction_space_id; 15.1364 - cur_space_id != last_space_id; 15.1365 - cur_space_id = next_compaction_space_id(cur_space_id)) { 15.1366 - // Iterate over the chunks in the space 15.1367 - size_t start_chunk_index = 15.1368 - _summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom()); 15.1369 - BitBlockUpdateClosure bbu(mark_bitmap(), 15.1370 - cm, 15.1371 - start_chunk_index); 15.1372 - // Iterate over blocks. 15.1373 - for (size_t chunk_index = start_chunk_index; 15.1374 - chunk_index < _summary_data.chunk_count() && 15.1375 - _summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top(); 15.1376 - chunk_index++) { 15.1377 - 15.1378 - // Reset the closure for the new chunk. Note that the closure 15.1379 - // maintains some data that does not get reset for each chunk 15.1380 - // so a new instance of the closure is no appropriate. 15.1381 - bbu.reset_chunk(chunk_index); 15.1382 - 15.1383 - // Start the iteration with the first live object. This 15.1384 - // may return the end of the chunk. That is acceptable since 15.1385 - // it will properly limit the iterations. 15.1386 - ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit( 15.1387 - _summary_data.first_live_or_end_in_chunk(chunk_index)); 15.1388 - 15.1389 - // End the iteration at the end of the chunk. 15.1390 - HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index); 15.1391 - HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize; 15.1392 - ParMarkBitMap::idx_t right_offset = 15.1393 - mark_bitmap()->addr_to_bit(chunk_end); 15.1394 - 15.1395 - // Blocks that have not objects starting in them can be 15.1396 - // skipped because their data will never be used. 15.1397 - if (left_offset < right_offset) { 15.1398 - 15.1399 - // Iterate through the objects in the chunk. 15.1400 - ParMarkBitMap::idx_t last_offset = 15.1401 - mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset); 15.1402 - 15.1403 - // If last_offset is less than right_offset, then the iterations 15.1404 - // terminated while it was looking for an end bit. "last_offset" 15.1405 - // is then the offset for the last start bit. In this situation 15.1406 - // the "offset" field for the next block to the right (_cur_block + 1) 15.1407 - // will not have been update although there may be live data 15.1408 - // to the left of the chunk. 15.1409 - 15.1410 - size_t cur_block_plus_1 = bbu.cur_block() + 1; 15.1411 - HeapWord* cur_block_plus_1_addr = 15.1412 - _summary_data.block_to_addr(bbu.cur_block()) + 15.1413 - ParallelCompactData::BlockSize; 15.1414 - HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset); 15.1415 - #if 1 // This code works. The else doesn't but should. Why does it? 15.1416 - // The current block (cur_block()) has already been updated. 15.1417 - // The last block that may need to be updated is either the 15.1418 - // next block (current block + 1) or the block where the 15.1419 - // last object starts (which can be greater than the 15.1420 - // next block if there were no objects found in intervening 15.1421 - // blocks). 15.1422 - size_t last_block = 15.1423 - MAX2(bbu.cur_block() + 1, 15.1424 - _summary_data.addr_to_block_idx(last_offset_addr)); 15.1425 - #else 15.1426 - // The current block has already been updated. The only block 15.1427 - // that remains to be updated is the block where the last 15.1428 - // object in the chunk starts. 15.1429 - size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr); 15.1430 - #endif 15.1431 - assert_bit_is_start(last_offset); 15.1432 - assert((last_block == _summary_data.block_count()) || 15.1433 - (_summary_data.block(last_block)->raw_offset() == 0), 15.1434 - "Should not have been set"); 15.1435 - // Is the last block still in the current chunk? If still 15.1436 - // in this chunk, update the last block (the counting that 15.1437 - // included the current block is meant for the offset of the last 15.1438 - // block). If not in this chunk, do nothing. Should not 15.1439 - // update a block in the next chunk. 15.1440 - if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(), 15.1441 - last_block)) { 15.1442 - if (last_offset < right_offset) { 15.1443 - // The last object started in this chunk but ends beyond 15.1444 - // this chunk. Update the block for this last object. 15.1445 - assert(mark_bitmap()->is_marked(last_offset), "Should be marked"); 15.1446 - // No end bit was found. The closure takes care of 15.1447 - // the cases where 15.1448 - // an objects crosses over into the next block 15.1449 - // an objects starts and ends in the next block 15.1450 - // It does not handle the case where an object is 15.1451 - // the first object in a later block and extends 15.1452 - // past the end of the chunk (i.e., the closure 15.1453 - // only handles complete objects that are in the range 15.1454 - // it is given). That object is handed back here 15.1455 - // for any special consideration necessary. 15.1456 - // 15.1457 - // Is the first bit in the last block a start or end bit? 15.1458 - // 15.1459 - // If the partial object ends in the last block L, 15.1460 - // then the 1st bit in L may be an end bit. 15.1461 - // 15.1462 - // Else does the last object start in a block after the current 15.1463 - // block? A block AA will already have been updated if an 15.1464 - // object ends in the next block AA+1. An object found to end in 15.1465 - // the AA+1 is the trigger that updates AA. Objects are being 15.1466 - // counted in the current block for updaing a following 15.1467 - // block. An object may start in later block 15.1468 - // block but may extend beyond the last block in the chunk. 15.1469 - // Updates are only done when the end of an object has been 15.1470 - // found. If the last object (covered by block L) starts 15.1471 - // beyond the current block, then no object ends in L (otherwise 15.1472 - // L would be the current block). So the first bit in L is 15.1473 - // a start bit. 15.1474 - // 15.1475 - // Else the last objects start in the current block and ends 15.1476 - // beyond the chunk. The current block has already been 15.1477 - // updated and there is no later block (with an object 15.1478 - // starting in it) that needs to be updated. 15.1479 - // 15.1480 - if (_summary_data.partial_obj_ends_in_block(last_block)) { 15.1481 - _summary_data.block(last_block)->set_end_bit_offset( 15.1482 - bbu.live_data_left()); 15.1483 - } else if (last_offset_addr >= cur_block_plus_1_addr) { 15.1484 - // The start of the object is on a later block 15.1485 - // (to the right of the current block and there are no 15.1486 - // complete live objects to the left of this last object 15.1487 - // within the chunk. 15.1488 - // The first bit in the block is for the start of the 15.1489 - // last object. 15.1490 - _summary_data.block(last_block)->set_start_bit_offset( 15.1491 - bbu.live_data_left()); 15.1492 - } else { 15.1493 - // The start of the last object was found in 15.1494 - // the current chunk (which has already 15.1495 - // been updated). 15.1496 - assert(bbu.cur_block() == 15.1497 - _summary_data.addr_to_block_idx(last_offset_addr), 15.1498 - "Should be a block already processed"); 15.1499 - } 15.1500 -#ifdef ASSERT 15.1501 - // Is there enough block information to find this object? 15.1502 - // The destination of the chunk has not been set so the 15.1503 - // values returned by calc_new_pointer() and 15.1504 - // block_calc_new_pointer() will only be 15.1505 - // offsets. But they should agree. 15.1506 - HeapWord* moved_obj_with_chunks = 15.1507 - _summary_data.chunk_calc_new_pointer(last_offset_addr); 15.1508 - HeapWord* moved_obj_with_blocks = 15.1509 - _summary_data.calc_new_pointer(last_offset_addr); 15.1510 - assert(moved_obj_with_chunks == moved_obj_with_blocks, 15.1511 - "Block calculation is wrong"); 15.1512 -#endif 15.1513 - } else if (last_block < _summary_data.block_count()) { 15.1514 - // Iterations ended looking for a start bit (but 15.1515 - // did not run off the end of the block table). 15.1516 - _summary_data.block(last_block)->set_start_bit_offset( 15.1517 - bbu.live_data_left()); 15.1518 - } 15.1519 - } 15.1520 -#ifdef ASSERT 15.1521 - // Is there enough block information to find this object? 15.1522 - HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset); 15.1523 - HeapWord* moved_obj_with_chunks = 15.1524 - _summary_data.calc_new_pointer(left_offset_addr); 15.1525 - HeapWord* moved_obj_with_blocks = 15.1526 - _summary_data.calc_new_pointer(left_offset_addr); 15.1527 - assert(moved_obj_with_chunks == moved_obj_with_blocks, 15.1528 - "Block calculation is wrong"); 15.1529 -#endif 15.1530 - 15.1531 - // Is there another block after the end of this chunk? 15.1532 -#ifdef ASSERT 15.1533 - if (last_block < _summary_data.block_count()) { 15.1534 - // No object may have been found in a block. If that 15.1535 - // block is at the end of the chunk, the iteration will 15.1536 - // terminate without incrementing the current block so 15.1537 - // that the current block is not the last block in the 15.1538 - // chunk. That situation precludes asserting that the 15.1539 - // current block is the last block in the chunk. Assert 15.1540 - // the lesser condition that the current block does not 15.1541 - // exceed the chunk. 15.1542 - assert(_summary_data.block_to_addr(last_block) <= 15.1543 - (_summary_data.chunk_to_addr(chunk_index) + 15.1544 - ParallelCompactData::ChunkSize), 15.1545 - "Chunk and block inconsistency"); 15.1546 - assert(last_offset <= right_offset, "Iteration over ran end"); 15.1547 - } 15.1548 -#endif 15.1549 - } 15.1550 -#ifdef ASSERT 15.1551 - if (PrintGCDetails && Verbose) { 15.1552 - if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) { 15.1553 - size_t first_block = 15.1554 - chunk_index / ParallelCompactData::BlocksPerChunk; 15.1555 - gclog_or_tty->print_cr("first_block " PTR_FORMAT 15.1556 - " _offset " PTR_FORMAT 15.1557 - "_first_is_start_bit %d", 15.1558 - first_block, 15.1559 - _summary_data.block(first_block)->raw_offset(), 15.1560 - _summary_data.block(first_block)->first_is_start_bit()); 15.1561 - } 15.1562 - } 15.1563 -#endif 15.1564 - } 15.1565 - } 15.1566 - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);) 15.1567 -#endif // #if 0 15.1568 -} 15.1569 - 15.1570 // This method should contain all heap-specific policy for invoking a full 15.1571 // collection. invoke_no_policy() will only attempt to compact the heap; it 15.1572 // will do nothing further. If we need to bail out for policy reasons, scavenge 15.1573 @@ -1937,18 +1501,9 @@ 15.1574 } 15.1575 } 15.1576 15.1577 -bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) { 15.1578 - size_t addr_chunk_index = addr_to_chunk_idx(addr); 15.1579 - return chunk_index == addr_chunk_index; 15.1580 -} 15.1581 - 15.1582 -bool ParallelCompactData::chunk_contains_block(size_t chunk_index, 15.1583 - size_t block_index) { 15.1584 - size_t first_block_in_chunk = chunk_index * BlocksPerChunk; 15.1585 - size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1; 15.1586 - 15.1587 - return (first_block_in_chunk <= block_index) && 15.1588 - (block_index <= last_block_in_chunk); 15.1589 +bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { 15.1590 + size_t addr_region_index = addr_to_region_idx(addr); 15.1591 + return region_index == addr_region_index; 15.1592 } 15.1593 15.1594 // This method contains no policy. You should probably 15.1595 @@ -2038,39 +1593,9 @@ 15.1596 } 15.1597 #endif // #ifndef PRODUCT 15.1598 15.1599 -#ifdef ASSERT 15.1600 - if (VerifyParallelOldWithMarkSweep && 15.1601 - (PSParallelCompact::total_invocations() % 15.1602 - VerifyParallelOldWithMarkSweepInterval) == 0) { 15.1603 - gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()"); 15.1604 - if (PrintGCDetails && Verbose) { 15.1605 - gclog_or_tty->print_cr("mark_sweep_phase1:"); 15.1606 - } 15.1607 - // Clear the discovered lists so that discovered objects 15.1608 - // don't look like they have been discovered twice. 15.1609 - ref_processor()->clear_discovered_references(); 15.1610 - 15.1611 - PSMarkSweep::allocate_stacks(); 15.1612 - MemRegion mr = Universe::heap()->reserved_region(); 15.1613 - PSMarkSweep::ref_processor()->enable_discovery(); 15.1614 - PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction); 15.1615 - } 15.1616 -#endif 15.1617 - 15.1618 bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc; 15.1619 summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc); 15.1620 15.1621 -#ifdef ASSERT 15.1622 - if (VerifyParallelOldWithMarkSweep && 15.1623 - (PSParallelCompact::total_invocations() % 15.1624 - VerifyParallelOldWithMarkSweepInterval) == 0) { 15.1625 - if (PrintGCDetails && Verbose) { 15.1626 - gclog_or_tty->print_cr("mark_sweep_phase2:"); 15.1627 - } 15.1628 - PSMarkSweep::mark_sweep_phase2(); 15.1629 - } 15.1630 -#endif 15.1631 - 15.1632 COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity")); 15.1633 COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); 15.1634 15.1635 @@ -2078,28 +1603,6 @@ 15.1636 // needed by the compaction for filling holes in the dense prefix. 15.1637 adjust_roots(); 15.1638 15.1639 -#ifdef ASSERT 15.1640 - if (VerifyParallelOldWithMarkSweep && 15.1641 - (PSParallelCompact::total_invocations() % 15.1642 - VerifyParallelOldWithMarkSweepInterval) == 0) { 15.1643 - // Do a separate verify phase so that the verify 15.1644 - // code can use the the forwarding pointers to 15.1645 - // check the new pointer calculation. The restore_marks() 15.1646 - // has to be done before the real compact. 15.1647 - vmthread_cm->set_action(ParCompactionManager::VerifyUpdate); 15.1648 - compact_perm(vmthread_cm); 15.1649 - compact_serial(vmthread_cm); 15.1650 - vmthread_cm->set_action(ParCompactionManager::ResetObjects); 15.1651 - compact_perm(vmthread_cm); 15.1652 - compact_serial(vmthread_cm); 15.1653 - vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy); 15.1654 - 15.1655 - // For debugging only 15.1656 - PSMarkSweep::restore_marks(); 15.1657 - PSMarkSweep::deallocate_stacks(); 15.1658 - } 15.1659 -#endif 15.1660 - 15.1661 compaction_start.update(); 15.1662 // Does the perm gen always have to be done serially because 15.1663 // klasses are used in the update of an object? 15.1664 @@ -2349,7 +1852,7 @@ 15.1665 15.1666 ParallelScavengeHeap* heap = gc_heap(); 15.1667 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 15.1668 - TaskQueueSetSuper* qset = ParCompactionManager::chunk_array(); 15.1669 + TaskQueueSetSuper* qset = ParCompactionManager::region_array(); 15.1670 ParallelTaskTerminator terminator(parallel_gc_threads, qset); 15.1671 15.1672 PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); 15.1673 @@ -2487,8 +1990,9 @@ 15.1674 move_and_update(cm, perm_space_id); 15.1675 } 15.1676 15.1677 -void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q, 15.1678 - uint parallel_gc_threads) { 15.1679 +void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q, 15.1680 + uint parallel_gc_threads) 15.1681 +{ 15.1682 TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty); 15.1683 15.1684 const unsigned int task_count = MAX2(parallel_gc_threads, 1U); 15.1685 @@ -2496,13 +2000,13 @@ 15.1686 q->enqueue(new DrainStacksCompactionTask()); 15.1687 } 15.1688 15.1689 - // Find all chunks that are available (can be filled immediately) and 15.1690 + // Find all regions that are available (can be filled immediately) and 15.1691 // distribute them to the thread stacks. The iteration is done in reverse 15.1692 - // order (high to low) so the chunks will be removed in ascending order. 15.1693 + // order (high to low) so the regions will be removed in ascending order. 15.1694 15.1695 const ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.1696 15.1697 - size_t fillable_chunks = 0; // A count for diagnostic purposes. 15.1698 + size_t fillable_regions = 0; // A count for diagnostic purposes. 15.1699 unsigned int which = 0; // The worker thread number. 15.1700 15.1701 for (unsigned int id = to_space_id; id > perm_space_id; --id) { 15.1702 @@ -2510,25 +2014,26 @@ 15.1703 MutableSpace* const space = space_info->space(); 15.1704 HeapWord* const new_top = space_info->new_top(); 15.1705 15.1706 - const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix()); 15.1707 - const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top)); 15.1708 - assert(end_chunk > 0, "perm gen cannot be empty"); 15.1709 - 15.1710 - for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) { 15.1711 - if (sd.chunk(cur)->claim_unsafe()) { 15.1712 + const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix()); 15.1713 + const size_t end_region = 15.1714 + sd.addr_to_region_idx(sd.region_align_up(new_top)); 15.1715 + assert(end_region > 0, "perm gen cannot be empty"); 15.1716 + 15.1717 + for (size_t cur = end_region - 1; cur >= beg_region; --cur) { 15.1718 + if (sd.region(cur)->claim_unsafe()) { 15.1719 ParCompactionManager* cm = ParCompactionManager::manager_array(which); 15.1720 cm->save_for_processing(cur); 15.1721 15.1722 if (TraceParallelOldGCCompactionPhase && Verbose) { 15.1723 - const size_t count_mod_8 = fillable_chunks & 7; 15.1724 + const size_t count_mod_8 = fillable_regions & 7; 15.1725 if (count_mod_8 == 0) gclog_or_tty->print("fillable: "); 15.1726 gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur); 15.1727 if (count_mod_8 == 7) gclog_or_tty->cr(); 15.1728 } 15.1729 15.1730 - NOT_PRODUCT(++fillable_chunks;) 15.1731 - 15.1732 - // Assign chunks to threads in round-robin fashion. 15.1733 + NOT_PRODUCT(++fillable_regions;) 15.1734 + 15.1735 + // Assign regions to threads in round-robin fashion. 15.1736 if (++which == task_count) { 15.1737 which = 0; 15.1738 } 15.1739 @@ -2537,8 +2042,8 @@ 15.1740 } 15.1741 15.1742 if (TraceParallelOldGCCompactionPhase) { 15.1743 - if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr(); 15.1744 - gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks); 15.1745 + if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr(); 15.1746 + gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions); 15.1747 } 15.1748 } 15.1749 15.1750 @@ -2551,7 +2056,7 @@ 15.1751 ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.1752 15.1753 // Iterate over all the spaces adding tasks for updating 15.1754 - // chunks in the dense prefix. Assume that 1 gc thread 15.1755 + // regions in the dense prefix. Assume that 1 gc thread 15.1756 // will work on opening the gaps and the remaining gc threads 15.1757 // will work on the dense prefix. 15.1758 SpaceId space_id = old_space_id; 15.1759 @@ -2565,30 +2070,31 @@ 15.1760 continue; 15.1761 } 15.1762 15.1763 - // The dense prefix is before this chunk. 15.1764 - size_t chunk_index_end_dense_prefix = 15.1765 - sd.addr_to_chunk_idx(dense_prefix_end); 15.1766 - ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix); 15.1767 + // The dense prefix is before this region. 15.1768 + size_t region_index_end_dense_prefix = 15.1769 + sd.addr_to_region_idx(dense_prefix_end); 15.1770 + RegionData* const dense_prefix_cp = 15.1771 + sd.region(region_index_end_dense_prefix); 15.1772 assert(dense_prefix_end == space->end() || 15.1773 dense_prefix_cp->available() || 15.1774 dense_prefix_cp->claimed(), 15.1775 - "The chunk after the dense prefix should always be ready to fill"); 15.1776 - 15.1777 - size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom()); 15.1778 + "The region after the dense prefix should always be ready to fill"); 15.1779 + 15.1780 + size_t region_index_start = sd.addr_to_region_idx(space->bottom()); 15.1781 15.1782 // Is there dense prefix work? 15.1783 - size_t total_dense_prefix_chunks = 15.1784 - chunk_index_end_dense_prefix - chunk_index_start; 15.1785 - // How many chunks of the dense prefix should be given to 15.1786 + size_t total_dense_prefix_regions = 15.1787 + region_index_end_dense_prefix - region_index_start; 15.1788 + // How many regions of the dense prefix should be given to 15.1789 // each thread? 15.1790 - if (total_dense_prefix_chunks > 0) { 15.1791 + if (total_dense_prefix_regions > 0) { 15.1792 uint tasks_for_dense_prefix = 1; 15.1793 if (UseParallelDensePrefixUpdate) { 15.1794 - if (total_dense_prefix_chunks <= 15.1795 + if (total_dense_prefix_regions <= 15.1796 (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) { 15.1797 // Don't over partition. This assumes that 15.1798 // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value 15.1799 - // so there are not many chunks to process. 15.1800 + // so there are not many regions to process. 15.1801 tasks_for_dense_prefix = parallel_gc_threads; 15.1802 } else { 15.1803 // Over partition 15.1804 @@ -2596,50 +2102,50 @@ 15.1805 PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING; 15.1806 } 15.1807 } 15.1808 - size_t chunks_per_thread = total_dense_prefix_chunks / 15.1809 + size_t regions_per_thread = total_dense_prefix_regions / 15.1810 tasks_for_dense_prefix; 15.1811 - // Give each thread at least 1 chunk. 15.1812 - if (chunks_per_thread == 0) { 15.1813 - chunks_per_thread = 1; 15.1814 + // Give each thread at least 1 region. 15.1815 + if (regions_per_thread == 0) { 15.1816 + regions_per_thread = 1; 15.1817 } 15.1818 15.1819 for (uint k = 0; k < tasks_for_dense_prefix; k++) { 15.1820 - if (chunk_index_start >= chunk_index_end_dense_prefix) { 15.1821 + if (region_index_start >= region_index_end_dense_prefix) { 15.1822 break; 15.1823 } 15.1824 - // chunk_index_end is not processed 15.1825 - size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread, 15.1826 - chunk_index_end_dense_prefix); 15.1827 + // region_index_end is not processed 15.1828 + size_t region_index_end = MIN2(region_index_start + regions_per_thread, 15.1829 + region_index_end_dense_prefix); 15.1830 q->enqueue(new UpdateDensePrefixTask( 15.1831 space_id, 15.1832 - chunk_index_start, 15.1833 - chunk_index_end)); 15.1834 - chunk_index_start = chunk_index_end; 15.1835 + region_index_start, 15.1836 + region_index_end)); 15.1837 + region_index_start = region_index_end; 15.1838 } 15.1839 } 15.1840 // This gets any part of the dense prefix that did not 15.1841 // fit evenly. 15.1842 - if (chunk_index_start < chunk_index_end_dense_prefix) { 15.1843 + if (region_index_start < region_index_end_dense_prefix) { 15.1844 q->enqueue(new UpdateDensePrefixTask( 15.1845 space_id, 15.1846 - chunk_index_start, 15.1847 - chunk_index_end_dense_prefix)); 15.1848 + region_index_start, 15.1849 + region_index_end_dense_prefix)); 15.1850 } 15.1851 space_id = next_compaction_space_id(space_id); 15.1852 } // End tasks for dense prefix 15.1853 } 15.1854 15.1855 -void PSParallelCompact::enqueue_chunk_stealing_tasks( 15.1856 +void PSParallelCompact::enqueue_region_stealing_tasks( 15.1857 GCTaskQueue* q, 15.1858 ParallelTaskTerminator* terminator_ptr, 15.1859 uint parallel_gc_threads) { 15.1860 TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty); 15.1861 15.1862 - // Once a thread has drained it's stack, it should try to steal chunks from 15.1863 + // Once a thread has drained it's stack, it should try to steal regions from 15.1864 // other threads. 15.1865 if (parallel_gc_threads > 1) { 15.1866 for (uint j = 0; j < parallel_gc_threads; j++) { 15.1867 - q->enqueue(new StealChunkCompactionTask(terminator_ptr)); 15.1868 + q->enqueue(new StealRegionCompactionTask(terminator_ptr)); 15.1869 } 15.1870 } 15.1871 } 15.1872 @@ -2654,13 +2160,13 @@ 15.1873 PSOldGen* old_gen = heap->old_gen(); 15.1874 old_gen->start_array()->reset(); 15.1875 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 15.1876 - TaskQueueSetSuper* qset = ParCompactionManager::chunk_array(); 15.1877 + TaskQueueSetSuper* qset = ParCompactionManager::region_array(); 15.1878 ParallelTaskTerminator terminator(parallel_gc_threads, qset); 15.1879 15.1880 GCTaskQueue* q = GCTaskQueue::create(); 15.1881 - enqueue_chunk_draining_tasks(q, parallel_gc_threads); 15.1882 + enqueue_region_draining_tasks(q, parallel_gc_threads); 15.1883 enqueue_dense_prefix_tasks(q, parallel_gc_threads); 15.1884 - enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads); 15.1885 + enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads); 15.1886 15.1887 { 15.1888 TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty); 15.1889 @@ -2676,9 +2182,9 @@ 15.1890 WaitForBarrierGCTask::destroy(fin); 15.1891 15.1892 #ifdef ASSERT 15.1893 - // Verify that all chunks have been processed before the deferred updates. 15.1894 + // Verify that all regions have been processed before the deferred updates. 15.1895 // Note that perm_space_id is skipped; this type of verification is not 15.1896 - // valid until the perm gen is compacted by chunks. 15.1897 + // valid until the perm gen is compacted by regions. 15.1898 for (unsigned int id = old_space_id; id < last_space_id; ++id) { 15.1899 verify_complete(SpaceId(id)); 15.1900 } 15.1901 @@ -2697,42 +2203,42 @@ 15.1902 15.1903 #ifdef ASSERT 15.1904 void PSParallelCompact::verify_complete(SpaceId space_id) { 15.1905 - // All Chunks between space bottom() to new_top() should be marked as filled 15.1906 - // and all Chunks between new_top() and top() should be available (i.e., 15.1907 + // All Regions between space bottom() to new_top() should be marked as filled 15.1908 + // and all Regions between new_top() and top() should be available (i.e., 15.1909 // should have been emptied). 15.1910 ParallelCompactData& sd = summary_data(); 15.1911 SpaceInfo si = _space_info[space_id]; 15.1912 - HeapWord* new_top_addr = sd.chunk_align_up(si.new_top()); 15.1913 - HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top()); 15.1914 - const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom()); 15.1915 - const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr); 15.1916 - const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr); 15.1917 + HeapWord* new_top_addr = sd.region_align_up(si.new_top()); 15.1918 + HeapWord* old_top_addr = sd.region_align_up(si.space()->top()); 15.1919 + const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom()); 15.1920 + const size_t new_top_region = sd.addr_to_region_idx(new_top_addr); 15.1921 + const size_t old_top_region = sd.addr_to_region_idx(old_top_addr); 15.1922 15.1923 bool issued_a_warning = false; 15.1924 15.1925 - size_t cur_chunk; 15.1926 - for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) { 15.1927 - const ChunkData* const c = sd.chunk(cur_chunk); 15.1928 + size_t cur_region; 15.1929 + for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) { 15.1930 + const RegionData* const c = sd.region(cur_region); 15.1931 if (!c->completed()) { 15.1932 - warning("chunk " SIZE_FORMAT " not filled: " 15.1933 + warning("region " SIZE_FORMAT " not filled: " 15.1934 "destination_count=" SIZE_FORMAT, 15.1935 - cur_chunk, c->destination_count()); 15.1936 + cur_region, c->destination_count()); 15.1937 issued_a_warning = true; 15.1938 } 15.1939 } 15.1940 15.1941 - for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) { 15.1942 - const ChunkData* const c = sd.chunk(cur_chunk); 15.1943 + for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) { 15.1944 + const RegionData* const c = sd.region(cur_region); 15.1945 if (!c->available()) { 15.1946 - warning("chunk " SIZE_FORMAT " not empty: " 15.1947 + warning("region " SIZE_FORMAT " not empty: " 15.1948 "destination_count=" SIZE_FORMAT, 15.1949 - cur_chunk, c->destination_count()); 15.1950 + cur_region, c->destination_count()); 15.1951 issued_a_warning = true; 15.1952 } 15.1953 } 15.1954 15.1955 if (issued_a_warning) { 15.1956 - print_chunk_ranges(); 15.1957 + print_region_ranges(); 15.1958 } 15.1959 } 15.1960 #endif // #ifdef ASSERT 15.1961 @@ -2933,46 +2439,47 @@ 15.1962 } 15.1963 #endif //VALIDATE_MARK_SWEEP 15.1964 15.1965 -// Update interior oops in the ranges of chunks [beg_chunk, end_chunk). 15.1966 +// Update interior oops in the ranges of regions [beg_region, end_region). 15.1967 void 15.1968 PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, 15.1969 SpaceId space_id, 15.1970 - size_t beg_chunk, 15.1971 - size_t end_chunk) { 15.1972 + size_t beg_region, 15.1973 + size_t end_region) { 15.1974 ParallelCompactData& sd = summary_data(); 15.1975 ParMarkBitMap* const mbm = mark_bitmap(); 15.1976 15.1977 - HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk); 15.1978 - HeapWord* const end_addr = sd.chunk_to_addr(end_chunk); 15.1979 - assert(beg_chunk <= end_chunk, "bad chunk range"); 15.1980 + HeapWord* beg_addr = sd.region_to_addr(beg_region); 15.1981 + HeapWord* const end_addr = sd.region_to_addr(end_region); 15.1982 + assert(beg_region <= end_region, "bad region range"); 15.1983 assert(end_addr <= dense_prefix(space_id), "not in the dense prefix"); 15.1984 15.1985 #ifdef ASSERT 15.1986 - // Claim the chunks to avoid triggering an assert when they are marked as 15.1987 + // Claim the regions to avoid triggering an assert when they are marked as 15.1988 // filled. 15.1989 - for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) { 15.1990 - assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed"); 15.1991 + for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) { 15.1992 + assert(sd.region(claim_region)->claim_unsafe(), "claim() failed"); 15.1993 } 15.1994 #endif // #ifdef ASSERT 15.1995 15.1996 if (beg_addr != space(space_id)->bottom()) { 15.1997 // Find the first live object or block of dead space that *starts* in this 15.1998 - // range of chunks. If a partial object crosses onto the chunk, skip it; it 15.1999 - // will be marked for 'deferred update' when the object head is processed. 15.2000 - // If dead space crosses onto the chunk, it is also skipped; it will be 15.2001 - // filled when the prior chunk is processed. If neither of those apply, the 15.2002 - // first word in the chunk is the start of a live object or dead space. 15.2003 + // range of regions. If a partial object crosses onto the region, skip it; 15.2004 + // it will be marked for 'deferred update' when the object head is 15.2005 + // processed. If dead space crosses onto the region, it is also skipped; it 15.2006 + // will be filled when the prior region is processed. If neither of those 15.2007 + // apply, the first word in the region is the start of a live object or dead 15.2008 + // space. 15.2009 assert(beg_addr > space(space_id)->bottom(), "sanity"); 15.2010 - const ChunkData* const cp = sd.chunk(beg_chunk); 15.2011 + const RegionData* const cp = sd.region(beg_region); 15.2012 if (cp->partial_obj_size() != 0) { 15.2013 - beg_addr = sd.partial_obj_end(beg_chunk); 15.2014 + beg_addr = sd.partial_obj_end(beg_region); 15.2015 } else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) { 15.2016 beg_addr = mbm->find_obj_beg(beg_addr, end_addr); 15.2017 } 15.2018 } 15.2019 15.2020 if (beg_addr < end_addr) { 15.2021 - // A live object or block of dead space starts in this range of Chunks. 15.2022 + // A live object or block of dead space starts in this range of Regions. 15.2023 HeapWord* const dense_prefix_end = dense_prefix(space_id); 15.2024 15.2025 // Create closures and iterate. 15.2026 @@ -2986,10 +2493,10 @@ 15.2027 } 15.2028 } 15.2029 15.2030 - // Mark the chunks as filled. 15.2031 - ChunkData* const beg_cp = sd.chunk(beg_chunk); 15.2032 - ChunkData* const end_cp = sd.chunk(end_chunk); 15.2033 - for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) { 15.2034 + // Mark the regions as filled. 15.2035 + RegionData* const beg_cp = sd.region(beg_region); 15.2036 + RegionData* const end_cp = sd.region(end_region); 15.2037 + for (RegionData* cp = beg_cp; cp < end_cp; ++cp) { 15.2038 cp->set_completed(); 15.2039 } 15.2040 } 15.2041 @@ -3021,13 +2528,13 @@ 15.2042 const MutableSpace* const space = space_info->space(); 15.2043 assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set"); 15.2044 HeapWord* const beg_addr = space_info->dense_prefix(); 15.2045 - HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top()); 15.2046 - 15.2047 - const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr); 15.2048 - const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr); 15.2049 - const ChunkData* cur_chunk; 15.2050 - for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) { 15.2051 - HeapWord* const addr = cur_chunk->deferred_obj_addr(); 15.2052 + HeapWord* const end_addr = sd.region_align_up(space_info->new_top()); 15.2053 + 15.2054 + const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr); 15.2055 + const RegionData* const end_region = sd.addr_to_region_ptr(end_addr); 15.2056 + const RegionData* cur_region; 15.2057 + for (cur_region = beg_region; cur_region < end_region; ++cur_region) { 15.2058 + HeapWord* const addr = cur_region->deferred_obj_addr(); 15.2059 if (addr != NULL) { 15.2060 if (start_array != NULL) { 15.2061 start_array->allocate_block(addr); 15.2062 @@ -3073,45 +2580,45 @@ 15.2063 15.2064 HeapWord* 15.2065 PSParallelCompact::first_src_addr(HeapWord* const dest_addr, 15.2066 - size_t src_chunk_idx) 15.2067 + size_t src_region_idx) 15.2068 { 15.2069 ParMarkBitMap* const bitmap = mark_bitmap(); 15.2070 const ParallelCompactData& sd = summary_data(); 15.2071 - const size_t ChunkSize = ParallelCompactData::ChunkSize; 15.2072 - 15.2073 - assert(sd.is_chunk_aligned(dest_addr), "not aligned"); 15.2074 - 15.2075 - const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx); 15.2076 - const size_t partial_obj_size = src_chunk_ptr->partial_obj_size(); 15.2077 - HeapWord* const src_chunk_destination = src_chunk_ptr->destination(); 15.2078 - 15.2079 - assert(dest_addr >= src_chunk_destination, "wrong src chunk"); 15.2080 - assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty"); 15.2081 - 15.2082 - HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx); 15.2083 - HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize; 15.2084 - 15.2085 - HeapWord* addr = src_chunk_beg; 15.2086 - if (dest_addr == src_chunk_destination) { 15.2087 - // Return the first live word in the source chunk. 15.2088 + const size_t RegionSize = ParallelCompactData::RegionSize; 15.2089 + 15.2090 + assert(sd.is_region_aligned(dest_addr), "not aligned"); 15.2091 + 15.2092 + const RegionData* const src_region_ptr = sd.region(src_region_idx); 15.2093 + const size_t partial_obj_size = src_region_ptr->partial_obj_size(); 15.2094 + HeapWord* const src_region_destination = src_region_ptr->destination(); 15.2095 + 15.2096 + assert(dest_addr >= src_region_destination, "wrong src region"); 15.2097 + assert(src_region_ptr->data_size() > 0, "src region cannot be empty"); 15.2098 + 15.2099 + HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx); 15.2100 + HeapWord* const src_region_end = src_region_beg + RegionSize; 15.2101 + 15.2102 + HeapWord* addr = src_region_beg; 15.2103 + if (dest_addr == src_region_destination) { 15.2104 + // Return the first live word in the source region. 15.2105 if (partial_obj_size == 0) { 15.2106 - addr = bitmap->find_obj_beg(addr, src_chunk_end); 15.2107 - assert(addr < src_chunk_end, "no objects start in src chunk"); 15.2108 + addr = bitmap->find_obj_beg(addr, src_region_end); 15.2109 + assert(addr < src_region_end, "no objects start in src region"); 15.2110 } 15.2111 return addr; 15.2112 } 15.2113 15.2114 // Must skip some live data. 15.2115 - size_t words_to_skip = dest_addr - src_chunk_destination; 15.2116 - assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk"); 15.2117 + size_t words_to_skip = dest_addr - src_region_destination; 15.2118 + assert(src_region_ptr->data_size() > words_to_skip, "wrong src region"); 15.2119 15.2120 if (partial_obj_size >= words_to_skip) { 15.2121 // All the live words to skip are part of the partial object. 15.2122 addr += words_to_skip; 15.2123 if (partial_obj_size == words_to_skip) { 15.2124 // Find the first live word past the partial object. 15.2125 - addr = bitmap->find_obj_beg(addr, src_chunk_end); 15.2126 - assert(addr < src_chunk_end, "wrong src chunk"); 15.2127 + addr = bitmap->find_obj_beg(addr, src_region_end); 15.2128 + assert(addr < src_region_end, "wrong src region"); 15.2129 } 15.2130 return addr; 15.2131 } 15.2132 @@ -3122,63 +2629,64 @@ 15.2133 addr += partial_obj_size; 15.2134 } 15.2135 15.2136 - // Skip over live words due to objects that start in the chunk. 15.2137 - addr = skip_live_words(addr, src_chunk_end, words_to_skip); 15.2138 - assert(addr < src_chunk_end, "wrong src chunk"); 15.2139 + // Skip over live words due to objects that start in the region. 15.2140 + addr = skip_live_words(addr, src_region_end, words_to_skip); 15.2141 + assert(addr < src_region_end, "wrong src region"); 15.2142 return addr; 15.2143 } 15.2144 15.2145 void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm, 15.2146 - size_t beg_chunk, 15.2147 + size_t beg_region, 15.2148 HeapWord* end_addr) 15.2149 { 15.2150 ParallelCompactData& sd = summary_data(); 15.2151 - ChunkData* const beg = sd.chunk(beg_chunk); 15.2152 - HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr); 15.2153 - ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up); 15.2154 - size_t cur_idx = beg_chunk; 15.2155 - for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) { 15.2156 - assert(cur->data_size() > 0, "chunk must have live data"); 15.2157 + RegionData* const beg = sd.region(beg_region); 15.2158 + HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr); 15.2159 + RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up); 15.2160 + size_t cur_idx = beg_region; 15.2161 + for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) { 15.2162 + assert(cur->data_size() > 0, "region must have live data"); 15.2163 cur->decrement_destination_count(); 15.2164 - if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) { 15.2165 + if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) { 15.2166 cm->save_for_processing(cur_idx); 15.2167 } 15.2168 } 15.2169 } 15.2170 15.2171 -size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure, 15.2172 - SpaceId& src_space_id, 15.2173 - HeapWord*& src_space_top, 15.2174 - HeapWord* end_addr) 15.2175 +size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure, 15.2176 + SpaceId& src_space_id, 15.2177 + HeapWord*& src_space_top, 15.2178 + HeapWord* end_addr) 15.2179 { 15.2180 - typedef ParallelCompactData::ChunkData ChunkData; 15.2181 + typedef ParallelCompactData::RegionData RegionData; 15.2182 15.2183 ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.2184 - const size_t chunk_size = ParallelCompactData::ChunkSize; 15.2185 - 15.2186 - size_t src_chunk_idx = 0; 15.2187 - 15.2188 - // Skip empty chunks (if any) up to the top of the space. 15.2189 - HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr); 15.2190 - ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up); 15.2191 - HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top); 15.2192 - const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up); 15.2193 - while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) { 15.2194 - ++src_chunk_ptr; 15.2195 + const size_t region_size = ParallelCompactData::RegionSize; 15.2196 + 15.2197 + size_t src_region_idx = 0; 15.2198 + 15.2199 + // Skip empty regions (if any) up to the top of the space. 15.2200 + HeapWord* const src_aligned_up = sd.region_align_up(end_addr); 15.2201 + RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up); 15.2202 + HeapWord* const top_aligned_up = sd.region_align_up(src_space_top); 15.2203 + const RegionData* const top_region_ptr = 15.2204 + sd.addr_to_region_ptr(top_aligned_up); 15.2205 + while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) { 15.2206 + ++src_region_ptr; 15.2207 } 15.2208 15.2209 - if (src_chunk_ptr < top_chunk_ptr) { 15.2210 - // The next source chunk is in the current space. Update src_chunk_idx and 15.2211 - // the source address to match src_chunk_ptr. 15.2212 - src_chunk_idx = sd.chunk(src_chunk_ptr); 15.2213 - HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx); 15.2214 - if (src_chunk_addr > closure.source()) { 15.2215 - closure.set_source(src_chunk_addr); 15.2216 + if (src_region_ptr < top_region_ptr) { 15.2217 + // The next source region is in the current space. Update src_region_idx 15.2218 + // and the source address to match src_region_ptr. 15.2219 + src_region_idx = sd.region(src_region_ptr); 15.2220 + HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx); 15.2221 + if (src_region_addr > closure.source()) { 15.2222 + closure.set_source(src_region_addr); 15.2223 } 15.2224 - return src_chunk_idx; 15.2225 + return src_region_idx; 15.2226 } 15.2227 15.2228 - // Switch to a new source space and find the first non-empty chunk. 15.2229 + // Switch to a new source space and find the first non-empty region. 15.2230 unsigned int space_id = src_space_id + 1; 15.2231 assert(space_id < last_space_id, "not enough spaces"); 15.2232 15.2233 @@ -3187,14 +2695,14 @@ 15.2234 do { 15.2235 MutableSpace* space = _space_info[space_id].space(); 15.2236 HeapWord* const bottom = space->bottom(); 15.2237 - const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom); 15.2238 + const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom); 15.2239 15.2240 // Iterate over the spaces that do not compact into themselves. 15.2241 if (bottom_cp->destination() != bottom) { 15.2242 - HeapWord* const top_aligned_up = sd.chunk_align_up(space->top()); 15.2243 - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up); 15.2244 - 15.2245 - for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) { 15.2246 + HeapWord* const top_aligned_up = sd.region_align_up(space->top()); 15.2247 + const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up); 15.2248 + 15.2249 + for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) { 15.2250 if (src_cp->live_obj_size() > 0) { 15.2251 // Found it. 15.2252 assert(src_cp->destination() == destination, 15.2253 @@ -3204,9 +2712,9 @@ 15.2254 15.2255 src_space_id = SpaceId(space_id); 15.2256 src_space_top = space->top(); 15.2257 - const size_t src_chunk_idx = sd.chunk(src_cp); 15.2258 - closure.set_source(sd.chunk_to_addr(src_chunk_idx)); 15.2259 - return src_chunk_idx; 15.2260 + const size_t src_region_idx = sd.region(src_cp); 15.2261 + closure.set_source(sd.region_to_addr(src_region_idx)); 15.2262 + return src_region_idx; 15.2263 } else { 15.2264 assert(src_cp->data_size() == 0, "sanity"); 15.2265 } 15.2266 @@ -3214,38 +2722,38 @@ 15.2267 } 15.2268 } while (++space_id < last_space_id); 15.2269 15.2270 - assert(false, "no source chunk was found"); 15.2271 + assert(false, "no source region was found"); 15.2272 return 0; 15.2273 } 15.2274 15.2275 -void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx) 15.2276 +void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx) 15.2277 { 15.2278 typedef ParMarkBitMap::IterationStatus IterationStatus; 15.2279 - const size_t ChunkSize = ParallelCompactData::ChunkSize; 15.2280 + const size_t RegionSize = ParallelCompactData::RegionSize; 15.2281 ParMarkBitMap* const bitmap = mark_bitmap(); 15.2282 ParallelCompactData& sd = summary_data(); 15.2283 - ChunkData* const chunk_ptr = sd.chunk(chunk_idx); 15.2284 + RegionData* const region_ptr = sd.region(region_idx); 15.2285 15.2286 // Get the items needed to construct the closure. 15.2287 - HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx); 15.2288 + HeapWord* dest_addr = sd.region_to_addr(region_idx); 15.2289 SpaceId dest_space_id = space_id(dest_addr); 15.2290 ObjectStartArray* start_array = _space_info[dest_space_id].start_array(); 15.2291 HeapWord* new_top = _space_info[dest_space_id].new_top(); 15.2292 assert(dest_addr < new_top, "sanity"); 15.2293 - const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize); 15.2294 - 15.2295 - // Get the source chunk and related info. 15.2296 - size_t src_chunk_idx = chunk_ptr->source_chunk(); 15.2297 - SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx)); 15.2298 + const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize); 15.2299 + 15.2300 + // Get the source region and related info. 15.2301 + size_t src_region_idx = region_ptr->source_region(); 15.2302 + SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx)); 15.2303 HeapWord* src_space_top = _space_info[src_space_id].space()->top(); 15.2304 15.2305 MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words); 15.2306 - closure.set_source(first_src_addr(dest_addr, src_chunk_idx)); 15.2307 - 15.2308 - // Adjust src_chunk_idx to prepare for decrementing destination counts (the 15.2309 - // destination count is not decremented when a chunk is copied to itself). 15.2310 - if (src_chunk_idx == chunk_idx) { 15.2311 - src_chunk_idx += 1; 15.2312 + closure.set_source(first_src_addr(dest_addr, src_region_idx)); 15.2313 + 15.2314 + // Adjust src_region_idx to prepare for decrementing destination counts (the 15.2315 + // destination count is not decremented when a region is copied to itself). 15.2316 + if (src_region_idx == region_idx) { 15.2317 + src_region_idx += 1; 15.2318 } 15.2319 15.2320 if (bitmap->is_unmarked(closure.source())) { 15.2321 @@ -3255,32 +2763,33 @@ 15.2322 HeapWord* const old_src_addr = closure.source(); 15.2323 closure.copy_partial_obj(); 15.2324 if (closure.is_full()) { 15.2325 - decrement_destination_counts(cm, src_chunk_idx, closure.source()); 15.2326 - chunk_ptr->set_deferred_obj_addr(NULL); 15.2327 - chunk_ptr->set_completed(); 15.2328 + decrement_destination_counts(cm, src_region_idx, closure.source()); 15.2329 + region_ptr->set_deferred_obj_addr(NULL); 15.2330 + region_ptr->set_completed(); 15.2331 return; 15.2332 } 15.2333 15.2334 - HeapWord* const end_addr = sd.chunk_align_down(closure.source()); 15.2335 - if (sd.chunk_align_down(old_src_addr) != end_addr) { 15.2336 - // The partial object was copied from more than one source chunk. 15.2337 - decrement_destination_counts(cm, src_chunk_idx, end_addr); 15.2338 - 15.2339 - // Move to the next source chunk, possibly switching spaces as well. All 15.2340 + HeapWord* const end_addr = sd.region_align_down(closure.source()); 15.2341 + if (sd.region_align_down(old_src_addr) != end_addr) { 15.2342 + // The partial object was copied from more than one source region. 15.2343 + decrement_destination_counts(cm, src_region_idx, end_addr); 15.2344 + 15.2345 + // Move to the next source region, possibly switching spaces as well. All 15.2346 // args except end_addr may be modified. 15.2347 - src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top, 15.2348 - end_addr); 15.2349 + src_region_idx = next_src_region(closure, src_space_id, src_space_top, 15.2350 + end_addr); 15.2351 } 15.2352 } 15.2353 15.2354 do { 15.2355 HeapWord* const cur_addr = closure.source(); 15.2356 - HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1), 15.2357 + HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1), 15.2358 src_space_top); 15.2359 IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr); 15.2360 15.2361 if (status == ParMarkBitMap::incomplete) { 15.2362 - // The last obj that starts in the source chunk does not end in the chunk. 15.2363 + // The last obj that starts in the source region does not end in the 15.2364 + // region. 15.2365 assert(closure.source() < end_addr, "sanity") 15.2366 HeapWord* const obj_beg = closure.source(); 15.2367 HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(), 15.2368 @@ -3299,28 +2808,28 @@ 15.2369 15.2370 if (status == ParMarkBitMap::would_overflow) { 15.2371 // The last object did not fit. Note that interior oop updates were 15.2372 - // deferred, then copy enough of the object to fill the chunk. 15.2373 - chunk_ptr->set_deferred_obj_addr(closure.destination()); 15.2374 + // deferred, then copy enough of the object to fill the region. 15.2375 + region_ptr->set_deferred_obj_addr(closure.destination()); 15.2376 status = closure.copy_until_full(); // copies from closure.source() 15.2377 15.2378 - decrement_destination_counts(cm, src_chunk_idx, closure.source()); 15.2379 - chunk_ptr->set_completed(); 15.2380 + decrement_destination_counts(cm, src_region_idx, closure.source()); 15.2381 + region_ptr->set_completed(); 15.2382 return; 15.2383 } 15.2384 15.2385 if (status == ParMarkBitMap::full) { 15.2386 - decrement_destination_counts(cm, src_chunk_idx, closure.source()); 15.2387 - chunk_ptr->set_deferred_obj_addr(NULL); 15.2388 - chunk_ptr->set_completed(); 15.2389 + decrement_destination_counts(cm, src_region_idx, closure.source()); 15.2390 + region_ptr->set_deferred_obj_addr(NULL); 15.2391 + region_ptr->set_completed(); 15.2392 return; 15.2393 } 15.2394 15.2395 - decrement_destination_counts(cm, src_chunk_idx, end_addr); 15.2396 - 15.2397 - // Move to the next source chunk, possibly switching spaces as well. All 15.2398 + decrement_destination_counts(cm, src_region_idx, end_addr); 15.2399 + 15.2400 + // Move to the next source region, possibly switching spaces as well. All 15.2401 // args except end_addr may be modified. 15.2402 - src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top, 15.2403 - end_addr); 15.2404 + src_region_idx = next_src_region(closure, src_space_id, src_space_top, 15.2405 + end_addr); 15.2406 } while (true); 15.2407 } 15.2408 15.2409 @@ -3352,15 +2861,15 @@ 15.2410 } 15.2411 #endif 15.2412 15.2413 - const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr); 15.2414 - const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr); 15.2415 - if (beg_chunk < dp_chunk) { 15.2416 - update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk); 15.2417 + const size_t beg_region = sd.addr_to_region_idx(beg_addr); 15.2418 + const size_t dp_region = sd.addr_to_region_idx(dp_addr); 15.2419 + if (beg_region < dp_region) { 15.2420 + update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region); 15.2421 } 15.2422 15.2423 - // The destination of the first live object that starts in the chunk is one 15.2424 - // past the end of the partial object entering the chunk (if any). 15.2425 - HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk); 15.2426 + // The destination of the first live object that starts in the region is one 15.2427 + // past the end of the partial object entering the region (if any). 15.2428 + HeapWord* const dest_addr = sd.partial_obj_end(dp_region); 15.2429 HeapWord* const new_top = _space_info[space_id].new_top(); 15.2430 assert(new_top >= dest_addr, "bad new_top value"); 15.2431 const size_t words = pointer_delta(new_top, dest_addr); 15.2432 @@ -3469,172 +2978,6 @@ 15.2433 return ParMarkBitMap::incomplete; 15.2434 } 15.2435 15.2436 -BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm, 15.2437 - ParCompactionManager* cm, 15.2438 - size_t chunk_index) : 15.2439 - ParMarkBitMapClosure(mbm, cm), 15.2440 - _live_data_left(0), 15.2441 - _cur_block(0) { 15.2442 - _chunk_start = 15.2443 - PSParallelCompact::summary_data().chunk_to_addr(chunk_index); 15.2444 - _chunk_end = 15.2445 - PSParallelCompact::summary_data().chunk_to_addr(chunk_index) + 15.2446 - ParallelCompactData::ChunkSize; 15.2447 - _chunk_index = chunk_index; 15.2448 - _cur_block = 15.2449 - PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start); 15.2450 -} 15.2451 - 15.2452 -bool BitBlockUpdateClosure::chunk_contains_cur_block() { 15.2453 - return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block); 15.2454 -} 15.2455 - 15.2456 -void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) { 15.2457 - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);) 15.2458 - ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.2459 - _chunk_index = chunk_index; 15.2460 - _live_data_left = 0; 15.2461 - _chunk_start = sd.chunk_to_addr(chunk_index); 15.2462 - _chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize; 15.2463 - 15.2464 - // The first block in this chunk 15.2465 - size_t first_block = sd.addr_to_block_idx(_chunk_start); 15.2466 - size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size(); 15.2467 - 15.2468 - // Set the offset to 0. By definition it should have that value 15.2469 - // but it may have been written while processing an earlier chunk. 15.2470 - if (partial_live_size == 0) { 15.2471 - // No live object extends onto the chunk. The first bit 15.2472 - // in the bit map for the first chunk must be a start bit. 15.2473 - // Although there may not be any marked bits, it is safe 15.2474 - // to set it as a start bit. 15.2475 - sd.block(first_block)->set_start_bit_offset(0); 15.2476 - sd.block(first_block)->set_first_is_start_bit(true); 15.2477 - } else if (sd.partial_obj_ends_in_block(first_block)) { 15.2478 - sd.block(first_block)->set_end_bit_offset(0); 15.2479 - sd.block(first_block)->set_first_is_start_bit(false); 15.2480 - } else { 15.2481 - // The partial object extends beyond the first block. 15.2482 - // There is no object starting in the first block 15.2483 - // so the offset and bit parity are not needed. 15.2484 - // Set the the bit parity to start bit so assertions 15.2485 - // work when not bit is found. 15.2486 - sd.block(first_block)->set_end_bit_offset(0); 15.2487 - sd.block(first_block)->set_first_is_start_bit(false); 15.2488 - } 15.2489 - _cur_block = first_block; 15.2490 -#ifdef ASSERT 15.2491 - if (sd.block(first_block)->first_is_start_bit()) { 15.2492 - assert(!sd.partial_obj_ends_in_block(first_block), 15.2493 - "Partial object cannot end in first block"); 15.2494 - } 15.2495 - 15.2496 - if (PrintGCDetails && Verbose) { 15.2497 - if (partial_live_size == 1) { 15.2498 - gclog_or_tty->print_cr("first_block " PTR_FORMAT 15.2499 - " _offset " PTR_FORMAT 15.2500 - " _first_is_start_bit %d", 15.2501 - first_block, 15.2502 - sd.block(first_block)->raw_offset(), 15.2503 - sd.block(first_block)->first_is_start_bit()); 15.2504 - } 15.2505 - } 15.2506 -#endif 15.2507 - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);) 15.2508 -} 15.2509 - 15.2510 -// This method is called when a object has been found (both beginning 15.2511 -// and end of the object) in the range of iteration. This method is 15.2512 -// calculating the words of live data to the left of a block. That live 15.2513 -// data includes any object starting to the left of the block (i.e., 15.2514 -// the live-data-to-the-left of block AAA will include the full size 15.2515 -// of any object entering AAA). 15.2516 - 15.2517 -ParMarkBitMapClosure::IterationStatus 15.2518 -BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) { 15.2519 - // add the size to the block data. 15.2520 - HeapWord* obj = addr; 15.2521 - ParallelCompactData& sd = PSParallelCompact::summary_data(); 15.2522 - 15.2523 - assert(bitmap()->obj_size(obj) == words, "bad size"); 15.2524 - assert(_chunk_start <= obj, "object is not in chunk"); 15.2525 - assert(obj + words <= _chunk_end, "object is not in chunk"); 15.2526 - 15.2527 - // Update the live data to the left 15.2528 - size_t prev_live_data_left = _live_data_left; 15.2529 - _live_data_left = _live_data_left + words; 15.2530 - 15.2531 - // Is this object in the current block. 15.2532 - size_t block_of_obj = sd.addr_to_block_idx(obj); 15.2533 - size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1); 15.2534 - HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last); 15.2535 - if (_cur_block < block_of_obj) { 15.2536 - 15.2537 - // 15.2538 - // No object crossed the block boundary and this object was found 15.2539 - // on the other side of the block boundary. Update the offset for 15.2540 - // the new block with the data size that does not include this object. 15.2541 - // 15.2542 - // The first bit in block_of_obj is a start bit except in the 15.2543 - // case where the partial object for the chunk extends into 15.2544 - // this block. 15.2545 - if (sd.partial_obj_ends_in_block(block_of_obj)) { 15.2546 - sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left); 15.2547 - } else { 15.2548 - sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left); 15.2549 - } 15.2550 - 15.2551 - // Does this object pass beyond the its block? 15.2552 - if (block_of_obj < block_of_obj_last) { 15.2553 - // Object crosses block boundary. Two blocks need to be udpated: 15.2554 - // the current block where the object started 15.2555 - // the block where the object ends 15.2556 - // 15.2557 - // The offset for blocks with no objects starting in them 15.2558 - // (e.g., blocks between _cur_block and block_of_obj_last) 15.2559 - // should not be needed. 15.2560 - // Note that block_of_obj_last may be in another chunk. If so, 15.2561 - // it should be overwritten later. This is a problem (writting 15.2562 - // into a block in a later chunk) for parallel execution. 15.2563 - assert(obj < block_of_obj_last_addr, 15.2564 - "Object should start in previous block"); 15.2565 - 15.2566 - // obj is crossing into block_of_obj_last so the first bit 15.2567 - // is and end bit. 15.2568 - sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left); 15.2569 - 15.2570 - _cur_block = block_of_obj_last; 15.2571 - } else { 15.2572 - // _first_is_start_bit has already been set correctly 15.2573 - // in the if-then-else above so don't reset it here. 15.2574 - _cur_block = block_of_obj; 15.2575 - } 15.2576 - } else { 15.2577 - // The current block only changes if the object extends beyound 15.2578 - // the block it starts in. 15.2579 - // 15.2580 - // The object starts in the current block. 15.2581 - // Does this object pass beyond the end of it? 15.2582 - if (block_of_obj < block_of_obj_last) { 15.2583 - // Object crosses block boundary. 15.2584 - // See note above on possible blocks between block_of_obj and 15.2585 - // block_of_obj_last 15.2586 - assert(obj < block_of_obj_last_addr, 15.2587 - "Object should start in previous block"); 15.2588 - 15.2589 - sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left); 15.2590 - 15.2591 - _cur_block = block_of_obj_last; 15.2592 - } 15.2593 - } 15.2594 - 15.2595 - // Return incomplete if there are more blocks to be done. 15.2596 - if (chunk_contains_cur_block()) { 15.2597 - return ParMarkBitMap::incomplete; 15.2598 - } 15.2599 - return ParMarkBitMap::complete; 15.2600 -} 15.2601 - 15.2602 // Verify the new location using the forwarding pointer 15.2603 // from MarkSweep::mark_sweep_phase2(). Set the mark_word 15.2604 // to the initial value. 15.2605 @@ -3707,12 +3050,3 @@ 15.2606 return last_space_id; 15.2607 } 15.2608 } 15.2609 - 15.2610 -// Here temporarily for debugging 15.2611 -#ifdef ASSERT 15.2612 - size_t ParallelCompactData::block_idx(BlockData* block) { 15.2613 - size_t index = pointer_delta(block, 15.2614 - PSParallelCompact::summary_data()._block_data, sizeof(BlockData)); 15.2615 - return index; 15.2616 - } 15.2617 -#endif
16.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Tue Sep 30 12:24:27 2008 -0400 16.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Wed Oct 01 20:15:03 2008 -0400 16.3 @@ -76,87 +76,80 @@ 16.4 { 16.5 public: 16.6 // Sizes are in HeapWords, unless indicated otherwise. 16.7 - static const size_t Log2ChunkSize; 16.8 - static const size_t ChunkSize; 16.9 - static const size_t ChunkSizeBytes; 16.10 + static const size_t Log2RegionSize; 16.11 + static const size_t RegionSize; 16.12 + static const size_t RegionSizeBytes; 16.13 16.14 - // Mask for the bits in a size_t to get an offset within a chunk. 16.15 - static const size_t ChunkSizeOffsetMask; 16.16 - // Mask for the bits in a pointer to get an offset within a chunk. 16.17 - static const size_t ChunkAddrOffsetMask; 16.18 - // Mask for the bits in a pointer to get the address of the start of a chunk. 16.19 - static const size_t ChunkAddrMask; 16.20 + // Mask for the bits in a size_t to get an offset within a region. 16.21 + static const size_t RegionSizeOffsetMask; 16.22 + // Mask for the bits in a pointer to get an offset within a region. 16.23 + static const size_t RegionAddrOffsetMask; 16.24 + // Mask for the bits in a pointer to get the address of the start of a region. 16.25 + static const size_t RegionAddrMask; 16.26 16.27 - static const size_t Log2BlockSize; 16.28 - static const size_t BlockSize; 16.29 - static const size_t BlockOffsetMask; 16.30 - static const size_t BlockMask; 16.31 - 16.32 - static const size_t BlocksPerChunk; 16.33 - 16.34 - class ChunkData 16.35 + class RegionData 16.36 { 16.37 public: 16.38 - // Destination address of the chunk. 16.39 + // Destination address of the region. 16.40 HeapWord* destination() const { return _destination; } 16.41 16.42 - // The first chunk containing data destined for this chunk. 16.43 - size_t source_chunk() const { return _source_chunk; } 16.44 + // The first region containing data destined for this region. 16.45 + size_t source_region() const { return _source_region; } 16.46 16.47 - // The object (if any) starting in this chunk and ending in a different 16.48 - // chunk that could not be updated during the main (parallel) compaction 16.49 + // The object (if any) starting in this region and ending in a different 16.50 + // region that could not be updated during the main (parallel) compaction 16.51 // phase. This is different from _partial_obj_addr, which is an object that 16.52 - // extends onto a source chunk. However, the two uses do not overlap in 16.53 + // extends onto a source region. However, the two uses do not overlap in 16.54 // time, so the same field is used to save space. 16.55 HeapWord* deferred_obj_addr() const { return _partial_obj_addr; } 16.56 16.57 - // The starting address of the partial object extending onto the chunk. 16.58 + // The starting address of the partial object extending onto the region. 16.59 HeapWord* partial_obj_addr() const { return _partial_obj_addr; } 16.60 16.61 - // Size of the partial object extending onto the chunk (words). 16.62 + // Size of the partial object extending onto the region (words). 16.63 size_t partial_obj_size() const { return _partial_obj_size; } 16.64 16.65 - // Size of live data that lies within this chunk due to objects that start 16.66 - // in this chunk (words). This does not include the partial object 16.67 - // extending onto the chunk (if any), or the part of an object that extends 16.68 - // onto the next chunk (if any). 16.69 + // Size of live data that lies within this region due to objects that start 16.70 + // in this region (words). This does not include the partial object 16.71 + // extending onto the region (if any), or the part of an object that extends 16.72 + // onto the next region (if any). 16.73 size_t live_obj_size() const { return _dc_and_los & los_mask; } 16.74 16.75 - // Total live data that lies within the chunk (words). 16.76 + // Total live data that lies within the region (words). 16.77 size_t data_size() const { return partial_obj_size() + live_obj_size(); } 16.78 16.79 - // The destination_count is the number of other chunks to which data from 16.80 - // this chunk will be copied. At the end of the summary phase, the valid 16.81 + // The destination_count is the number of other regions to which data from 16.82 + // this region will be copied. At the end of the summary phase, the valid 16.83 // values of destination_count are 16.84 // 16.85 - // 0 - data from the chunk will be compacted completely into itself, or the 16.86 - // chunk is empty. The chunk can be claimed and then filled. 16.87 - // 1 - data from the chunk will be compacted into 1 other chunk; some 16.88 - // data from the chunk may also be compacted into the chunk itself. 16.89 - // 2 - data from the chunk will be copied to 2 other chunks. 16.90 + // 0 - data from the region will be compacted completely into itself, or the 16.91 + // region is empty. The region can be claimed and then filled. 16.92 + // 1 - data from the region will be compacted into 1 other region; some 16.93 + // data from the region may also be compacted into the region itself. 16.94 + // 2 - data from the region will be copied to 2 other regions. 16.95 // 16.96 - // During compaction as chunks are emptied, the destination_count is 16.97 + // During compaction as regions are emptied, the destination_count is 16.98 // decremented (atomically) and when it reaches 0, it can be claimed and 16.99 // then filled. 16.100 // 16.101 - // A chunk is claimed for processing by atomically changing the 16.102 - // destination_count to the claimed value (dc_claimed). After a chunk has 16.103 + // A region is claimed for processing by atomically changing the 16.104 + // destination_count to the claimed value (dc_claimed). After a region has 16.105 // been filled, the destination_count should be set to the completed value 16.106 // (dc_completed). 16.107 inline uint destination_count() const; 16.108 inline uint destination_count_raw() const; 16.109 16.110 - // The location of the java heap data that corresponds to this chunk. 16.111 + // The location of the java heap data that corresponds to this region. 16.112 inline HeapWord* data_location() const; 16.113 16.114 - // The highest address referenced by objects in this chunk. 16.115 + // The highest address referenced by objects in this region. 16.116 inline HeapWord* highest_ref() const; 16.117 16.118 - // Whether this chunk is available to be claimed, has been claimed, or has 16.119 + // Whether this region is available to be claimed, has been claimed, or has 16.120 // been completed. 16.121 // 16.122 - // Minor subtlety: claimed() returns true if the chunk is marked 16.123 - // completed(), which is desirable since a chunk must be claimed before it 16.124 + // Minor subtlety: claimed() returns true if the region is marked 16.125 + // completed(), which is desirable since a region must be claimed before it 16.126 // can be completed. 16.127 bool available() const { return _dc_and_los < dc_one; } 16.128 bool claimed() const { return _dc_and_los >= dc_claimed; } 16.129 @@ -164,11 +157,11 @@ 16.130 16.131 // These are not atomic. 16.132 void set_destination(HeapWord* addr) { _destination = addr; } 16.133 - void set_source_chunk(size_t chunk) { _source_chunk = chunk; } 16.134 + void set_source_region(size_t region) { _source_region = region; } 16.135 void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } 16.136 void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } 16.137 void set_partial_obj_size(size_t words) { 16.138 - _partial_obj_size = (chunk_sz_t) words; 16.139 + _partial_obj_size = (region_sz_t) words; 16.140 } 16.141 16.142 inline void set_destination_count(uint count); 16.143 @@ -184,129 +177,57 @@ 16.144 inline bool claim(); 16.145 16.146 private: 16.147 - // The type used to represent object sizes within a chunk. 16.148 - typedef uint chunk_sz_t; 16.149 + // The type used to represent object sizes within a region. 16.150 + typedef uint region_sz_t; 16.151 16.152 // Constants for manipulating the _dc_and_los field, which holds both the 16.153 // destination count and live obj size. The live obj size lives at the 16.154 // least significant end so no masking is necessary when adding. 16.155 - static const chunk_sz_t dc_shift; // Shift amount. 16.156 - static const chunk_sz_t dc_mask; // Mask for destination count. 16.157 - static const chunk_sz_t dc_one; // 1, shifted appropriately. 16.158 - static const chunk_sz_t dc_claimed; // Chunk has been claimed. 16.159 - static const chunk_sz_t dc_completed; // Chunk has been completed. 16.160 - static const chunk_sz_t los_mask; // Mask for live obj size. 16.161 + static const region_sz_t dc_shift; // Shift amount. 16.162 + static const region_sz_t dc_mask; // Mask for destination count. 16.163 + static const region_sz_t dc_one; // 1, shifted appropriately. 16.164 + static const region_sz_t dc_claimed; // Region has been claimed. 16.165 + static const region_sz_t dc_completed; // Region has been completed. 16.166 + static const region_sz_t los_mask; // Mask for live obj size. 16.167 16.168 - HeapWord* _destination; 16.169 - size_t _source_chunk; 16.170 - HeapWord* _partial_obj_addr; 16.171 - chunk_sz_t _partial_obj_size; 16.172 - chunk_sz_t volatile _dc_and_los; 16.173 + HeapWord* _destination; 16.174 + size_t _source_region; 16.175 + HeapWord* _partial_obj_addr; 16.176 + region_sz_t _partial_obj_size; 16.177 + region_sz_t volatile _dc_and_los; 16.178 #ifdef ASSERT 16.179 // These enable optimizations that are only partially implemented. Use 16.180 // debug builds to prevent the code fragments from breaking. 16.181 - HeapWord* _data_location; 16.182 - HeapWord* _highest_ref; 16.183 + HeapWord* _data_location; 16.184 + HeapWord* _highest_ref; 16.185 #endif // #ifdef ASSERT 16.186 16.187 #ifdef ASSERT 16.188 public: 16.189 - uint _pushed; // 0 until chunk is pushed onto a worker's stack 16.190 + uint _pushed; // 0 until region is pushed onto a worker's stack 16.191 private: 16.192 #endif 16.193 }; 16.194 16.195 - // 'Blocks' allow shorter sections of the bitmap to be searched. Each Block 16.196 - // holds an offset, which is the amount of live data in the Chunk to the left 16.197 - // of the first live object in the Block. This amount of live data will 16.198 - // include any object extending into the block. The first block in 16.199 - // a chunk does not include any partial object extending into the 16.200 - // the chunk. 16.201 - // 16.202 - // The offset also encodes the 16.203 - // 'parity' of the first 1 bit in the Block: a positive offset means the 16.204 - // first 1 bit marks the start of an object, a negative offset means the first 16.205 - // 1 bit marks the end of an object. 16.206 - class BlockData 16.207 - { 16.208 - public: 16.209 - typedef short int blk_ofs_t; 16.210 - 16.211 - blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; } 16.212 - blk_ofs_t raw_offset() const { return _offset; } 16.213 - void set_first_is_start_bit(bool v) { _first_is_start_bit = v; } 16.214 - 16.215 -#if 0 16.216 - // The need for this method was anticipated but it is 16.217 - // never actually used. Do not include it for now. If 16.218 - // it is needed, consider the problem of what is passed 16.219 - // as "v". To avoid warning errors the method set_start_bit_offset() 16.220 - // was changed to take a size_t as the parameter and to do the 16.221 - // check for the possible overflow. Doing the cast in these 16.222 - // methods better limits the potential problems because of 16.223 - // the size of the field to this class. 16.224 - void set_raw_offset(blk_ofs_t v) { _offset = v; } 16.225 -#endif 16.226 - void set_start_bit_offset(size_t val) { 16.227 - assert(val >= 0, "sanity"); 16.228 - _offset = (blk_ofs_t) val; 16.229 - assert(val == (size_t) _offset, "Value is too large"); 16.230 - _first_is_start_bit = true; 16.231 - } 16.232 - void set_end_bit_offset(size_t val) { 16.233 - assert(val >= 0, "sanity"); 16.234 - _offset = (blk_ofs_t) val; 16.235 - assert(val == (size_t) _offset, "Value is too large"); 16.236 - _offset = - _offset; 16.237 - _first_is_start_bit = false; 16.238 - } 16.239 - bool first_is_start_bit() { 16.240 - assert(_set_phase > 0, "Not initialized"); 16.241 - return _first_is_start_bit; 16.242 - } 16.243 - bool first_is_end_bit() { 16.244 - assert(_set_phase > 0, "Not initialized"); 16.245 - return !_first_is_start_bit; 16.246 - } 16.247 - 16.248 - private: 16.249 - blk_ofs_t _offset; 16.250 - // This is temporary until the mark_bitmap is separated into 16.251 - // a start bit array and an end bit array. 16.252 - bool _first_is_start_bit; 16.253 -#ifdef ASSERT 16.254 - short _set_phase; 16.255 - static short _cur_phase; 16.256 - public: 16.257 - static void set_cur_phase(short v) { _cur_phase = v; } 16.258 -#endif 16.259 - }; 16.260 - 16.261 public: 16.262 ParallelCompactData(); 16.263 bool initialize(MemRegion covered_region); 16.264 16.265 - size_t chunk_count() const { return _chunk_count; } 16.266 + size_t region_count() const { return _region_count; } 16.267 16.268 - // Convert chunk indices to/from ChunkData pointers. 16.269 - inline ChunkData* chunk(size_t chunk_idx) const; 16.270 - inline size_t chunk(const ChunkData* const chunk_ptr) const; 16.271 + // Convert region indices to/from RegionData pointers. 16.272 + inline RegionData* region(size_t region_idx) const; 16.273 + inline size_t region(const RegionData* const region_ptr) const; 16.274 16.275 - // Returns true if the given address is contained within the chunk 16.276 - bool chunk_contains(size_t chunk_index, HeapWord* addr); 16.277 - 16.278 - size_t block_count() const { return _block_count; } 16.279 - inline BlockData* block(size_t n) const; 16.280 - 16.281 - // Returns true if the given block is in the given chunk. 16.282 - static bool chunk_contains_block(size_t chunk_index, size_t block_index); 16.283 + // Returns true if the given address is contained within the region 16.284 + bool region_contains(size_t region_index, HeapWord* addr); 16.285 16.286 void add_obj(HeapWord* addr, size_t len); 16.287 void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); } 16.288 16.289 - // Fill in the chunks covering [beg, end) so that no data moves; i.e., the 16.290 - // destination of chunk n is simply the start of chunk n. The argument beg 16.291 - // must be chunk-aligned; end need not be. 16.292 + // Fill in the regions covering [beg, end) so that no data moves; i.e., the 16.293 + // destination of region n is simply the start of region n. The argument beg 16.294 + // must be region-aligned; end need not be. 16.295 void summarize_dense_prefix(HeapWord* beg, HeapWord* end); 16.296 16.297 bool summarize(HeapWord* target_beg, HeapWord* target_end, 16.298 @@ -314,48 +235,33 @@ 16.299 HeapWord** target_next, HeapWord** source_next = 0); 16.300 16.301 void clear(); 16.302 - void clear_range(size_t beg_chunk, size_t end_chunk); 16.303 + void clear_range(size_t beg_region, size_t end_region); 16.304 void clear_range(HeapWord* beg, HeapWord* end) { 16.305 - clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end)); 16.306 + clear_range(addr_to_region_idx(beg), addr_to_region_idx(end)); 16.307 } 16.308 16.309 - // Return the number of words between addr and the start of the chunk 16.310 + // Return the number of words between addr and the start of the region 16.311 // containing addr. 16.312 - inline size_t chunk_offset(const HeapWord* addr) const; 16.313 + inline size_t region_offset(const HeapWord* addr) const; 16.314 16.315 - // Convert addresses to/from a chunk index or chunk pointer. 16.316 - inline size_t addr_to_chunk_idx(const HeapWord* addr) const; 16.317 - inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const; 16.318 - inline HeapWord* chunk_to_addr(size_t chunk) const; 16.319 - inline HeapWord* chunk_to_addr(size_t chunk, size_t offset) const; 16.320 - inline HeapWord* chunk_to_addr(const ChunkData* chunk) const; 16.321 + // Convert addresses to/from a region index or region pointer. 16.322 + inline size_t addr_to_region_idx(const HeapWord* addr) const; 16.323 + inline RegionData* addr_to_region_ptr(const HeapWord* addr) const; 16.324 + inline HeapWord* region_to_addr(size_t region) const; 16.325 + inline HeapWord* region_to_addr(size_t region, size_t offset) const; 16.326 + inline HeapWord* region_to_addr(const RegionData* region) const; 16.327 16.328 - inline HeapWord* chunk_align_down(HeapWord* addr) const; 16.329 - inline HeapWord* chunk_align_up(HeapWord* addr) const; 16.330 - inline bool is_chunk_aligned(HeapWord* addr) const; 16.331 - 16.332 - // Analogous to chunk_offset() for blocks. 16.333 - size_t block_offset(const HeapWord* addr) const; 16.334 - size_t addr_to_block_idx(const HeapWord* addr) const; 16.335 - size_t addr_to_block_idx(const oop obj) const { 16.336 - return addr_to_block_idx((HeapWord*) obj); 16.337 - } 16.338 - inline BlockData* addr_to_block_ptr(const HeapWord* addr) const; 16.339 - inline HeapWord* block_to_addr(size_t block) const; 16.340 + inline HeapWord* region_align_down(HeapWord* addr) const; 16.341 + inline HeapWord* region_align_up(HeapWord* addr) const; 16.342 + inline bool is_region_aligned(HeapWord* addr) const; 16.343 16.344 // Return the address one past the end of the partial object. 16.345 - HeapWord* partial_obj_end(size_t chunk_idx) const; 16.346 + HeapWord* partial_obj_end(size_t region_idx) const; 16.347 16.348 // Return the new location of the object p after the 16.349 // the compaction. 16.350 HeapWord* calc_new_pointer(HeapWord* addr); 16.351 16.352 - // Same as calc_new_pointer() using blocks. 16.353 - HeapWord* block_calc_new_pointer(HeapWord* addr); 16.354 - 16.355 - // Same as calc_new_pointer() using chunks. 16.356 - HeapWord* chunk_calc_new_pointer(HeapWord* addr); 16.357 - 16.358 HeapWord* calc_new_pointer(oop p) { 16.359 return calc_new_pointer((HeapWord*) p); 16.360 } 16.361 @@ -363,22 +269,13 @@ 16.362 // Return the updated address for the given klass 16.363 klassOop calc_new_klass(klassOop); 16.364 16.365 - // Given a block returns true if the partial object for the 16.366 - // corresponding chunk ends in the block. Returns false, otherwise 16.367 - // If there is no partial object, returns false. 16.368 - bool partial_obj_ends_in_block(size_t block_index); 16.369 - 16.370 - // Returns the block index for the block 16.371 - static size_t block_idx(BlockData* block); 16.372 - 16.373 #ifdef ASSERT 16.374 void verify_clear(const PSVirtualSpace* vspace); 16.375 void verify_clear(); 16.376 #endif // #ifdef ASSERT 16.377 16.378 private: 16.379 - bool initialize_block_data(size_t region_size); 16.380 - bool initialize_chunk_data(size_t region_size); 16.381 + bool initialize_region_data(size_t region_size); 16.382 PSVirtualSpace* create_vspace(size_t count, size_t element_size); 16.383 16.384 private: 16.385 @@ -387,74 +284,70 @@ 16.386 HeapWord* _region_end; 16.387 #endif // #ifdef ASSERT 16.388 16.389 - PSVirtualSpace* _chunk_vspace; 16.390 - ChunkData* _chunk_data; 16.391 - size_t _chunk_count; 16.392 - 16.393 - PSVirtualSpace* _block_vspace; 16.394 - BlockData* _block_data; 16.395 - size_t _block_count; 16.396 + PSVirtualSpace* _region_vspace; 16.397 + RegionData* _region_data; 16.398 + size_t _region_count; 16.399 }; 16.400 16.401 inline uint 16.402 -ParallelCompactData::ChunkData::destination_count_raw() const 16.403 +ParallelCompactData::RegionData::destination_count_raw() const 16.404 { 16.405 return _dc_and_los & dc_mask; 16.406 } 16.407 16.408 inline uint 16.409 -ParallelCompactData::ChunkData::destination_count() const 16.410 +ParallelCompactData::RegionData::destination_count() const 16.411 { 16.412 return destination_count_raw() >> dc_shift; 16.413 } 16.414 16.415 inline void 16.416 -ParallelCompactData::ChunkData::set_destination_count(uint count) 16.417 +ParallelCompactData::RegionData::set_destination_count(uint count) 16.418 { 16.419 assert(count <= (dc_completed >> dc_shift), "count too large"); 16.420 - const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size(); 16.421 + const region_sz_t live_sz = (region_sz_t) live_obj_size(); 16.422 _dc_and_los = (count << dc_shift) | live_sz; 16.423 } 16.424 16.425 -inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words) 16.426 +inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words) 16.427 { 16.428 assert(words <= los_mask, "would overflow"); 16.429 - _dc_and_los = destination_count_raw() | (chunk_sz_t)words; 16.430 + _dc_and_los = destination_count_raw() | (region_sz_t)words; 16.431 } 16.432 16.433 -inline void ParallelCompactData::ChunkData::decrement_destination_count() 16.434 +inline void ParallelCompactData::RegionData::decrement_destination_count() 16.435 { 16.436 assert(_dc_and_los < dc_claimed, "already claimed"); 16.437 assert(_dc_and_los >= dc_one, "count would go negative"); 16.438 Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los); 16.439 } 16.440 16.441 -inline HeapWord* ParallelCompactData::ChunkData::data_location() const 16.442 +inline HeapWord* ParallelCompactData::RegionData::data_location() const 16.443 { 16.444 DEBUG_ONLY(return _data_location;) 16.445 NOT_DEBUG(return NULL;) 16.446 } 16.447 16.448 -inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const 16.449 +inline HeapWord* ParallelCompactData::RegionData::highest_ref() const 16.450 { 16.451 DEBUG_ONLY(return _highest_ref;) 16.452 NOT_DEBUG(return NULL;) 16.453 } 16.454 16.455 -inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr) 16.456 +inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr) 16.457 { 16.458 DEBUG_ONLY(_data_location = addr;) 16.459 } 16.460 16.461 -inline void ParallelCompactData::ChunkData::set_completed() 16.462 +inline void ParallelCompactData::RegionData::set_completed() 16.463 { 16.464 assert(claimed(), "must be claimed first"); 16.465 - _dc_and_los = dc_completed | (chunk_sz_t) live_obj_size(); 16.466 + _dc_and_los = dc_completed | (region_sz_t) live_obj_size(); 16.467 } 16.468 16.469 -// MT-unsafe claiming of a chunk. Should only be used during single threaded 16.470 +// MT-unsafe claiming of a region. Should only be used during single threaded 16.471 // execution. 16.472 -inline bool ParallelCompactData::ChunkData::claim_unsafe() 16.473 +inline bool ParallelCompactData::RegionData::claim_unsafe() 16.474 { 16.475 if (available()) { 16.476 _dc_and_los |= dc_claimed; 16.477 @@ -463,13 +356,13 @@ 16.478 return false; 16.479 } 16.480 16.481 -inline void ParallelCompactData::ChunkData::add_live_obj(size_t words) 16.482 +inline void ParallelCompactData::RegionData::add_live_obj(size_t words) 16.483 { 16.484 assert(words <= (size_t)los_mask - live_obj_size(), "overflow"); 16.485 Atomic::add((int) words, (volatile int*) &_dc_and_los); 16.486 } 16.487 16.488 -inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr) 16.489 +inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr) 16.490 { 16.491 #ifdef ASSERT 16.492 HeapWord* tmp = _highest_ref; 16.493 @@ -479,7 +372,7 @@ 16.494 #endif // #ifdef ASSERT 16.495 } 16.496 16.497 -inline bool ParallelCompactData::ChunkData::claim() 16.498 +inline bool ParallelCompactData::RegionData::claim() 16.499 { 16.500 const int los = (int) live_obj_size(); 16.501 const int old = Atomic::cmpxchg(dc_claimed | los, 16.502 @@ -487,119 +380,85 @@ 16.503 return old == los; 16.504 } 16.505 16.506 -inline ParallelCompactData::ChunkData* 16.507 -ParallelCompactData::chunk(size_t chunk_idx) const 16.508 +inline ParallelCompactData::RegionData* 16.509 +ParallelCompactData::region(size_t region_idx) const 16.510 { 16.511 - assert(chunk_idx <= chunk_count(), "bad arg"); 16.512 - return _chunk_data + chunk_idx; 16.513 + assert(region_idx <= region_count(), "bad arg"); 16.514 + return _region_data + region_idx; 16.515 } 16.516 16.517 inline size_t 16.518 -ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const 16.519 +ParallelCompactData::region(const RegionData* const region_ptr) const 16.520 { 16.521 - assert(chunk_ptr >= _chunk_data, "bad arg"); 16.522 - assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg"); 16.523 - return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData)); 16.524 -} 16.525 - 16.526 -inline ParallelCompactData::BlockData* 16.527 -ParallelCompactData::block(size_t n) const { 16.528 - assert(n < block_count(), "bad arg"); 16.529 - return _block_data + n; 16.530 + assert(region_ptr >= _region_data, "bad arg"); 16.531 + assert(region_ptr <= _region_data + region_count(), "bad arg"); 16.532 + return pointer_delta(region_ptr, _region_data, sizeof(RegionData)); 16.533 } 16.534 16.535 inline size_t 16.536 -ParallelCompactData::chunk_offset(const HeapWord* addr) const 16.537 +ParallelCompactData::region_offset(const HeapWord* addr) const 16.538 { 16.539 assert(addr >= _region_start, "bad addr"); 16.540 assert(addr <= _region_end, "bad addr"); 16.541 - return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize; 16.542 + return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize; 16.543 } 16.544 16.545 inline size_t 16.546 -ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const 16.547 +ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const 16.548 { 16.549 assert(addr >= _region_start, "bad addr"); 16.550 assert(addr <= _region_end, "bad addr"); 16.551 - return pointer_delta(addr, _region_start) >> Log2ChunkSize; 16.552 + return pointer_delta(addr, _region_start) >> Log2RegionSize; 16.553 } 16.554 16.555 -inline ParallelCompactData::ChunkData* 16.556 -ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const 16.557 +inline ParallelCompactData::RegionData* 16.558 +ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const 16.559 { 16.560 - return chunk(addr_to_chunk_idx(addr)); 16.561 + return region(addr_to_region_idx(addr)); 16.562 } 16.563 16.564 inline HeapWord* 16.565 -ParallelCompactData::chunk_to_addr(size_t chunk) const 16.566 +ParallelCompactData::region_to_addr(size_t region) const 16.567 { 16.568 - assert(chunk <= _chunk_count, "chunk out of range"); 16.569 - return _region_start + (chunk << Log2ChunkSize); 16.570 + assert(region <= _region_count, "region out of range"); 16.571 + return _region_start + (region << Log2RegionSize); 16.572 } 16.573 16.574 inline HeapWord* 16.575 -ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const 16.576 +ParallelCompactData::region_to_addr(const RegionData* region) const 16.577 { 16.578 - return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData))); 16.579 + return region_to_addr(pointer_delta(region, _region_data, 16.580 + sizeof(RegionData))); 16.581 } 16.582 16.583 inline HeapWord* 16.584 -ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const 16.585 +ParallelCompactData::region_to_addr(size_t region, size_t offset) const 16.586 { 16.587 - assert(chunk <= _chunk_count, "chunk out of range"); 16.588 - assert(offset < ChunkSize, "offset too big"); // This may be too strict. 16.589 - return chunk_to_addr(chunk) + offset; 16.590 + assert(region <= _region_count, "region out of range"); 16.591 + assert(offset < RegionSize, "offset too big"); // This may be too strict. 16.592 + return region_to_addr(region) + offset; 16.593 } 16.594 16.595 inline HeapWord* 16.596 -ParallelCompactData::chunk_align_down(HeapWord* addr) const 16.597 +ParallelCompactData::region_align_down(HeapWord* addr) const 16.598 { 16.599 assert(addr >= _region_start, "bad addr"); 16.600 - assert(addr < _region_end + ChunkSize, "bad addr"); 16.601 - return (HeapWord*)(size_t(addr) & ChunkAddrMask); 16.602 + assert(addr < _region_end + RegionSize, "bad addr"); 16.603 + return (HeapWord*)(size_t(addr) & RegionAddrMask); 16.604 } 16.605 16.606 inline HeapWord* 16.607 -ParallelCompactData::chunk_align_up(HeapWord* addr) const 16.608 +ParallelCompactData::region_align_up(HeapWord* addr) const 16.609 { 16.610 assert(addr >= _region_start, "bad addr"); 16.611 assert(addr <= _region_end, "bad addr"); 16.612 - return chunk_align_down(addr + ChunkSizeOffsetMask); 16.613 + return region_align_down(addr + RegionSizeOffsetMask); 16.614 } 16.615 16.616 inline bool 16.617 -ParallelCompactData::is_chunk_aligned(HeapWord* addr) const 16.618 +ParallelCompactData::is_region_aligned(HeapWord* addr) const 16.619 { 16.620 - return chunk_offset(addr) == 0; 16.621 -} 16.622 - 16.623 -inline size_t 16.624 -ParallelCompactData::block_offset(const HeapWord* addr) const 16.625 -{ 16.626 - assert(addr >= _region_start, "bad addr"); 16.627 - assert(addr <= _region_end, "bad addr"); 16.628 - return pointer_delta(addr, _region_start) & BlockOffsetMask; 16.629 -} 16.630 - 16.631 -inline size_t 16.632 -ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const 16.633 -{ 16.634 - assert(addr >= _region_start, "bad addr"); 16.635 - assert(addr <= _region_end, "bad addr"); 16.636 - return pointer_delta(addr, _region_start) >> Log2BlockSize; 16.637 -} 16.638 - 16.639 -inline ParallelCompactData::BlockData* 16.640 -ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const 16.641 -{ 16.642 - return block(addr_to_block_idx(addr)); 16.643 -} 16.644 - 16.645 -inline HeapWord* 16.646 -ParallelCompactData::block_to_addr(size_t block) const 16.647 -{ 16.648 - assert(block < _block_count, "block out of range"); 16.649 - return _region_start + (block << Log2BlockSize); 16.650 + return region_offset(addr) == 0; 16.651 } 16.652 16.653 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the 16.654 @@ -687,45 +546,15 @@ 16.655 _words_remaining -= words; 16.656 } 16.657 16.658 -// Closure for updating the block data during the summary phase. 16.659 -class BitBlockUpdateClosure: public ParMarkBitMapClosure { 16.660 - // ParallelCompactData::BlockData::blk_ofs_t _live_data_left; 16.661 - size_t _live_data_left; 16.662 - size_t _cur_block; 16.663 - HeapWord* _chunk_start; 16.664 - HeapWord* _chunk_end; 16.665 - size_t _chunk_index; 16.666 - 16.667 - public: 16.668 - BitBlockUpdateClosure(ParMarkBitMap* mbm, 16.669 - ParCompactionManager* cm, 16.670 - size_t chunk_index); 16.671 - 16.672 - size_t cur_block() { return _cur_block; } 16.673 - size_t chunk_index() { return _chunk_index; } 16.674 - size_t live_data_left() { return _live_data_left; } 16.675 - // Returns true the first bit in the current block (cur_block) is 16.676 - // a start bit. 16.677 - // Returns true if the current block is within the chunk for the closure; 16.678 - bool chunk_contains_cur_block(); 16.679 - 16.680 - // Set the chunk index and related chunk values for 16.681 - // a new chunk. 16.682 - void reset_chunk(size_t chunk_index); 16.683 - 16.684 - virtual IterationStatus do_addr(HeapWord* addr, size_t words); 16.685 -}; 16.686 - 16.687 -// The UseParallelOldGC collector is a stop-the-world garbage 16.688 -// collector that does parts of the collection using parallel threads. 16.689 -// The collection includes the tenured generation and the young 16.690 -// generation. The permanent generation is collected at the same 16.691 -// time as the other two generations but the permanent generation 16.692 -// is collect by a single GC thread. The permanent generation is 16.693 -// collected serially because of the requirement that during the 16.694 -// processing of a klass AAA, any objects reference by AAA must 16.695 -// already have been processed. This requirement is enforced by 16.696 -// a left (lower address) to right (higher address) sliding compaction. 16.697 +// The UseParallelOldGC collector is a stop-the-world garbage collector that 16.698 +// does parts of the collection using parallel threads. The collection includes 16.699 +// the tenured generation and the young generation. The permanent generation is 16.700 +// collected at the same time as the other two generations but the permanent 16.701 +// generation is collect by a single GC thread. The permanent generation is 16.702 +// collected serially because of the requirement that during the processing of a 16.703 +// klass AAA, any objects reference by AAA must already have been processed. 16.704 +// This requirement is enforced by a left (lower address) to right (higher 16.705 +// address) sliding compaction. 16.706 // 16.707 // There are four phases of the collection. 16.708 // 16.709 @@ -740,81 +569,75 @@ 16.710 // - move the objects to their destination 16.711 // - update some references and reinitialize some variables 16.712 // 16.713 -// These three phases are invoked in PSParallelCompact::invoke_no_policy(). 16.714 -// The marking phase is implemented in PSParallelCompact::marking_phase() 16.715 -// and does a complete marking of the heap. 16.716 -// The summary phase is implemented in PSParallelCompact::summary_phase(). 16.717 -// The move and update phase is implemented in PSParallelCompact::compact(). 16.718 +// These three phases are invoked in PSParallelCompact::invoke_no_policy(). The 16.719 +// marking phase is implemented in PSParallelCompact::marking_phase() and does a 16.720 +// complete marking of the heap. The summary phase is implemented in 16.721 +// PSParallelCompact::summary_phase(). The move and update phase is implemented 16.722 +// in PSParallelCompact::compact(). 16.723 // 16.724 -// A space that is being collected is divided into chunks and with 16.725 -// each chunk is associated an object of type ParallelCompactData. 16.726 -// Each chunk is of a fixed size and typically will contain more than 16.727 -// 1 object and may have parts of objects at the front and back of the 16.728 -// chunk. 16.729 +// A space that is being collected is divided into regions and with each region 16.730 +// is associated an object of type ParallelCompactData. Each region is of a 16.731 +// fixed size and typically will contain more than 1 object and may have parts 16.732 +// of objects at the front and back of the region. 16.733 // 16.734 -// chunk -----+---------------------+---------- 16.735 +// region -----+---------------------+---------- 16.736 // objects covered [ AAA )[ BBB )[ CCC )[ DDD ) 16.737 // 16.738 -// The marking phase does a complete marking of all live objects in the 16.739 -// heap. The marking also compiles the size of the data for 16.740 -// all live objects covered by the chunk. This size includes the 16.741 -// part of any live object spanning onto the chunk (part of AAA 16.742 -// if it is live) from the front, all live objects contained in the chunk 16.743 -// (BBB and/or CCC if they are live), and the part of any live objects 16.744 -// covered by the chunk that extends off the chunk (part of DDD if it is 16.745 -// live). The marking phase uses multiple GC threads and marking is 16.746 -// done in a bit array of type ParMarkBitMap. The marking of the 16.747 -// bit map is done atomically as is the accumulation of the size of the 16.748 -// live objects covered by a chunk. 16.749 +// The marking phase does a complete marking of all live objects in the heap. 16.750 +// The marking also compiles the size of the data for all live objects covered 16.751 +// by the region. This size includes the part of any live object spanning onto 16.752 +// the region (part of AAA if it is live) from the front, all live objects 16.753 +// contained in the region (BBB and/or CCC if they are live), and the part of 16.754 +// any live objects covered by the region that extends off the region (part of 16.755 +// DDD if it is live). The marking phase uses multiple GC threads and marking 16.756 +// is done in a bit array of type ParMarkBitMap. The marking of the bit map is 16.757 +// done atomically as is the accumulation of the size of the live objects 16.758 +// covered by a region. 16.759 // 16.760 -// The summary phase calculates the total live data to the left of 16.761 -// each chunk XXX. Based on that total and the bottom of the space, 16.762 -// it can calculate the starting location of the live data in XXX. 16.763 -// The summary phase calculates for each chunk XXX quantites such as 16.764 +// The summary phase calculates the total live data to the left of each region 16.765 +// XXX. Based on that total and the bottom of the space, it can calculate the 16.766 +// starting location of the live data in XXX. The summary phase calculates for 16.767 +// each region XXX quantites such as 16.768 // 16.769 -// - the amount of live data at the beginning of a chunk from an object 16.770 -// entering the chunk. 16.771 -// - the location of the first live data on the chunk 16.772 -// - a count of the number of chunks receiving live data from XXX. 16.773 +// - the amount of live data at the beginning of a region from an object 16.774 +// entering the region. 16.775 +// - the location of the first live data on the region 16.776 +// - a count of the number of regions receiving live data from XXX. 16.777 // 16.778 // See ParallelCompactData for precise details. The summary phase also 16.779 -// calculates the dense prefix for the compaction. The dense prefix 16.780 -// is a portion at the beginning of the space that is not moved. The 16.781 -// objects in the dense prefix do need to have their object references 16.782 -// updated. See method summarize_dense_prefix(). 16.783 +// calculates the dense prefix for the compaction. The dense prefix is a 16.784 +// portion at the beginning of the space that is not moved. The objects in the 16.785 +// dense prefix do need to have their object references updated. See method 16.786 +// summarize_dense_prefix(). 16.787 // 16.788 // The summary phase is done using 1 GC thread. 16.789 // 16.790 -// The compaction phase moves objects to their new location and updates 16.791 -// all references in the object. 16.792 +// The compaction phase moves objects to their new location and updates all 16.793 +// references in the object. 16.794 // 16.795 -// A current exception is that objects that cross a chunk boundary 16.796 -// are moved but do not have their references updated. References are 16.797 -// not updated because it cannot easily be determined if the klass 16.798 -// pointer KKK for the object AAA has been updated. KKK likely resides 16.799 -// in a chunk to the left of the chunk containing AAA. These AAA's 16.800 -// have there references updated at the end in a clean up phase. 16.801 -// See the method PSParallelCompact::update_deferred_objects(). An 16.802 -// alternate strategy is being investigated for this deferral of updating. 16.803 +// A current exception is that objects that cross a region boundary are moved 16.804 +// but do not have their references updated. References are not updated because 16.805 +// it cannot easily be determined if the klass pointer KKK for the object AAA 16.806 +// has been updated. KKK likely resides in a region to the left of the region 16.807 +// containing AAA. These AAA's have there references updated at the end in a 16.808 +// clean up phase. See the method PSParallelCompact::update_deferred_objects(). 16.809 +// An alternate strategy is being investigated for this deferral of updating. 16.810 // 16.811 -// Compaction is done on a chunk basis. A chunk that is ready to be 16.812 -// filled is put on a ready list and GC threads take chunk off the list 16.813 -// and fill them. A chunk is ready to be filled if it 16.814 -// empty of live objects. Such a chunk may have been initially 16.815 -// empty (only contained 16.816 -// dead objects) or may have had all its live objects copied out already. 16.817 -// A chunk that compacts into itself is also ready for filling. The 16.818 -// ready list is initially filled with empty chunks and chunks compacting 16.819 -// into themselves. There is always at least 1 chunk that can be put on 16.820 -// the ready list. The chunks are atomically added and removed from 16.821 -// the ready list. 16.822 -// 16.823 +// Compaction is done on a region basis. A region that is ready to be filled is 16.824 +// put on a ready list and GC threads take region off the list and fill them. A 16.825 +// region is ready to be filled if it empty of live objects. Such a region may 16.826 +// have been initially empty (only contained dead objects) or may have had all 16.827 +// its live objects copied out already. A region that compacts into itself is 16.828 +// also ready for filling. The ready list is initially filled with empty 16.829 +// regions and regions compacting into themselves. There is always at least 1 16.830 +// region that can be put on the ready list. The regions are atomically added 16.831 +// and removed from the ready list. 16.832 + 16.833 class PSParallelCompact : AllStatic { 16.834 public: 16.835 // Convenient access to type names. 16.836 typedef ParMarkBitMap::idx_t idx_t; 16.837 - typedef ParallelCompactData::ChunkData ChunkData; 16.838 - typedef ParallelCompactData::BlockData BlockData; 16.839 + typedef ParallelCompactData::RegionData RegionData; 16.840 16.841 typedef enum { 16.842 perm_space_id, old_space_id, eden_space_id, 16.843 @@ -977,26 +800,26 @@ 16.844 // not reclaimed). 16.845 static double dead_wood_limiter(double density, size_t min_percent); 16.846 16.847 - // Find the first (left-most) chunk in the range [beg, end) that has at least 16.848 + // Find the first (left-most) region in the range [beg, end) that has at least 16.849 // dead_words of dead space to the left. The argument beg must be the first 16.850 - // chunk in the space that is not completely live. 16.851 - static ChunkData* dead_wood_limit_chunk(const ChunkData* beg, 16.852 - const ChunkData* end, 16.853 - size_t dead_words); 16.854 + // region in the space that is not completely live. 16.855 + static RegionData* dead_wood_limit_region(const RegionData* beg, 16.856 + const RegionData* end, 16.857 + size_t dead_words); 16.858 16.859 - // Return a pointer to the first chunk in the range [beg, end) that is not 16.860 + // Return a pointer to the first region in the range [beg, end) that is not 16.861 // completely full. 16.862 - static ChunkData* first_dead_space_chunk(const ChunkData* beg, 16.863 - const ChunkData* end); 16.864 + static RegionData* first_dead_space_region(const RegionData* beg, 16.865 + const RegionData* end); 16.866 16.867 // Return a value indicating the benefit or 'yield' if the compacted region 16.868 // were to start (or equivalently if the dense prefix were to end) at the 16.869 - // candidate chunk. Higher values are better. 16.870 + // candidate region. Higher values are better. 16.871 // 16.872 // The value is based on the amount of space reclaimed vs. the costs of (a) 16.873 // updating references in the dense prefix plus (b) copying objects and 16.874 // updating references in the compacted region. 16.875 - static inline double reclaimed_ratio(const ChunkData* const candidate, 16.876 + static inline double reclaimed_ratio(const RegionData* const candidate, 16.877 HeapWord* const bottom, 16.878 HeapWord* const top, 16.879 HeapWord* const new_top); 16.880 @@ -1005,9 +828,9 @@ 16.881 static HeapWord* compute_dense_prefix(const SpaceId id, 16.882 bool maximum_compaction); 16.883 16.884 - // Return true if dead space crosses onto the specified Chunk; bit must be the 16.885 - // bit index corresponding to the first word of the Chunk. 16.886 - static inline bool dead_space_crosses_boundary(const ChunkData* chunk, 16.887 + // Return true if dead space crosses onto the specified Region; bit must be 16.888 + // the bit index corresponding to the first word of the Region. 16.889 + static inline bool dead_space_crosses_boundary(const RegionData* region, 16.890 idx_t bit); 16.891 16.892 // Summary phase utility routine to fill dead space (if any) at the dense 16.893 @@ -1019,12 +842,6 @@ 16.894 static void summarize_space(SpaceId id, bool maximum_compaction); 16.895 static void summary_phase(ParCompactionManager* cm, bool maximum_compaction); 16.896 16.897 - static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr); 16.898 - 16.899 - // Fill in the BlockData 16.900 - static void summarize_blocks(ParCompactionManager* cm, 16.901 - SpaceId first_compaction_space_id); 16.902 - 16.903 // The space that is compacted after space_id. 16.904 static SpaceId next_compaction_space_id(SpaceId space_id); 16.905 16.906 @@ -1038,16 +855,16 @@ 16.907 static void compact_perm(ParCompactionManager* cm); 16.908 static void compact(); 16.909 16.910 - // Add available chunks to the stack and draining tasks to the task queue. 16.911 - static void enqueue_chunk_draining_tasks(GCTaskQueue* q, 16.912 - uint parallel_gc_threads); 16.913 + // Add available regions to the stack and draining tasks to the task queue. 16.914 + static void enqueue_region_draining_tasks(GCTaskQueue* q, 16.915 + uint parallel_gc_threads); 16.916 16.917 // Add dense prefix update tasks to the task queue. 16.918 static void enqueue_dense_prefix_tasks(GCTaskQueue* q, 16.919 uint parallel_gc_threads); 16.920 16.921 - // Add chunk stealing tasks to the task queue. 16.922 - static void enqueue_chunk_stealing_tasks( 16.923 + // Add region stealing tasks to the task queue. 16.924 + static void enqueue_region_stealing_tasks( 16.925 GCTaskQueue* q, 16.926 ParallelTaskTerminator* terminator_ptr, 16.927 uint parallel_gc_threads); 16.928 @@ -1154,56 +971,56 @@ 16.929 // Move and update the live objects in the specified space. 16.930 static void move_and_update(ParCompactionManager* cm, SpaceId space_id); 16.931 16.932 - // Process the end of the given chunk range in the dense prefix. 16.933 + // Process the end of the given region range in the dense prefix. 16.934 // This includes saving any object not updated. 16.935 - static void dense_prefix_chunks_epilogue(ParCompactionManager* cm, 16.936 - size_t chunk_start_index, 16.937 - size_t chunk_end_index, 16.938 - idx_t exiting_object_offset, 16.939 - idx_t chunk_offset_start, 16.940 - idx_t chunk_offset_end); 16.941 + static void dense_prefix_regions_epilogue(ParCompactionManager* cm, 16.942 + size_t region_start_index, 16.943 + size_t region_end_index, 16.944 + idx_t exiting_object_offset, 16.945 + idx_t region_offset_start, 16.946 + idx_t region_offset_end); 16.947 16.948 - // Update a chunk in the dense prefix. For each live object 16.949 - // in the chunk, update it's interior references. For each 16.950 + // Update a region in the dense prefix. For each live object 16.951 + // in the region, update it's interior references. For each 16.952 // dead object, fill it with deadwood. Dead space at the end 16.953 - // of a chunk range will be filled to the start of the next 16.954 - // live object regardless of the chunk_index_end. None of the 16.955 + // of a region range will be filled to the start of the next 16.956 + // live object regardless of the region_index_end. None of the 16.957 // objects in the dense prefix move and dead space is dead 16.958 // (holds only dead objects that don't need any processing), so 16.959 // dead space can be filled in any order. 16.960 static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, 16.961 SpaceId space_id, 16.962 - size_t chunk_index_start, 16.963 - size_t chunk_index_end); 16.964 + size_t region_index_start, 16.965 + size_t region_index_end); 16.966 16.967 // Return the address of the count + 1st live word in the range [beg, end). 16.968 static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count); 16.969 16.970 // Return the address of the word to be copied to dest_addr, which must be 16.971 - // aligned to a chunk boundary. 16.972 + // aligned to a region boundary. 16.973 static HeapWord* first_src_addr(HeapWord* const dest_addr, 16.974 - size_t src_chunk_idx); 16.975 + size_t src_region_idx); 16.976 16.977 - // Determine the next source chunk, set closure.source() to the start of the 16.978 - // new chunk return the chunk index. Parameter end_addr is the address one 16.979 + // Determine the next source region, set closure.source() to the start of the 16.980 + // new region return the region index. Parameter end_addr is the address one 16.981 // beyond the end of source range just processed. If necessary, switch to a 16.982 // new source space and set src_space_id (in-out parameter) and src_space_top 16.983 // (out parameter) accordingly. 16.984 - static size_t next_src_chunk(MoveAndUpdateClosure& closure, 16.985 - SpaceId& src_space_id, 16.986 - HeapWord*& src_space_top, 16.987 - HeapWord* end_addr); 16.988 + static size_t next_src_region(MoveAndUpdateClosure& closure, 16.989 + SpaceId& src_space_id, 16.990 + HeapWord*& src_space_top, 16.991 + HeapWord* end_addr); 16.992 16.993 - // Decrement the destination count for each non-empty source chunk in the 16.994 - // range [beg_chunk, chunk(chunk_align_up(end_addr))). 16.995 + // Decrement the destination count for each non-empty source region in the 16.996 + // range [beg_region, region(region_align_up(end_addr))). 16.997 static void decrement_destination_counts(ParCompactionManager* cm, 16.998 - size_t beg_chunk, 16.999 + size_t beg_region, 16.1000 HeapWord* end_addr); 16.1001 16.1002 - // Fill a chunk, copying objects from one or more source chunks. 16.1003 - static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx); 16.1004 - static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) { 16.1005 - fill_chunk(cm, chunk); 16.1006 + // Fill a region, copying objects from one or more source regions. 16.1007 + static void fill_region(ParCompactionManager* cm, size_t region_idx); 16.1008 + static void fill_and_update_region(ParCompactionManager* cm, size_t region) { 16.1009 + fill_region(cm, region); 16.1010 } 16.1011 16.1012 // Update the deferred objects in the space. 16.1013 @@ -1259,7 +1076,7 @@ 16.1014 #ifndef PRODUCT 16.1015 // Debugging support. 16.1016 static const char* space_names[last_space_id]; 16.1017 - static void print_chunk_ranges(); 16.1018 + static void print_region_ranges(); 16.1019 static void print_dense_prefix_stats(const char* const algorithm, 16.1020 const SpaceId id, 16.1021 const bool maximum_compaction, 16.1022 @@ -1267,7 +1084,7 @@ 16.1023 #endif // #ifndef PRODUCT 16.1024 16.1025 #ifdef ASSERT 16.1026 - // Verify that all the chunks have been emptied. 16.1027 + // Verify that all the regions have been emptied. 16.1028 static void verify_complete(SpaceId space_id); 16.1029 #endif // #ifdef ASSERT 16.1030 }; 16.1031 @@ -1376,17 +1193,17 @@ 16.1032 } 16.1033 16.1034 inline bool 16.1035 -PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk, 16.1036 +PSParallelCompact::dead_space_crosses_boundary(const RegionData* region, 16.1037 idx_t bit) 16.1038 { 16.1039 - assert(bit > 0, "cannot call this for the first bit/chunk"); 16.1040 - assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit), 16.1041 + assert(bit > 0, "cannot call this for the first bit/region"); 16.1042 + assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit), 16.1043 "sanity check"); 16.1044 16.1045 // Dead space crosses the boundary if (1) a partial object does not extend 16.1046 - // onto the chunk, (2) an object does not start at the beginning of the chunk, 16.1047 - // and (3) an object does not end at the end of the prior chunk. 16.1048 - return chunk->partial_obj_size() == 0 && 16.1049 + // onto the region, (2) an object does not start at the beginning of the 16.1050 + // region, and (3) an object does not end at the end of the prior region. 16.1051 + return region->partial_obj_size() == 0 && 16.1052 !_mark_bitmap.is_obj_beg(bit) && 16.1053 !_mark_bitmap.is_obj_end(bit - 1); 16.1054 }
17.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp Tue Sep 30 12:24:27 2008 -0400 17.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp Wed Oct 01 20:15:03 2008 -0400 17.3 @@ -123,8 +123,6 @@ 17.4 17.5 void PSPermGen::precompact() { 17.6 // Reset start array first. 17.7 - debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {) 17.8 _start_array.reset(); 17.9 - debug_only(}) 17.10 object_mark_sweep()->precompact(); 17.11 }
18.1 --- a/src/share/vm/gc_implementation/shared/immutableSpace.hpp Tue Sep 30 12:24:27 2008 -0400 18.2 +++ b/src/share/vm/gc_implementation/shared/immutableSpace.hpp Wed Oct 01 20:15:03 2008 -0400 18.3 @@ -50,7 +50,8 @@ 18.4 size_t capacity_in_bytes() const { return capacity_in_words() * HeapWordSize; } 18.5 18.6 // Size computations. Sizes are in heapwords. 18.7 - size_t capacity_in_words() const { return pointer_delta(end(), bottom()); } 18.8 + size_t capacity_in_words() const { return pointer_delta(end(), bottom()); } 18.9 + virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); } 18.10 18.11 // Iteration. 18.12 virtual void oop_iterate(OopClosure* cl);
19.1 --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Tue Sep 30 12:24:27 2008 -0400 19.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Wed Oct 01 20:15:03 2008 -0400 19.3 @@ -23,13 +23,6 @@ 19.4 */ 19.5 19.6 inline void MarkSweep::mark_object(oop obj) { 19.7 -#ifndef SERIALGC 19.8 - if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) { 19.9 - assert(PSParallelCompact::mark_bitmap()->is_marked(obj), 19.10 - "Should be marked in the marking bitmap"); 19.11 - } 19.12 -#endif // SERIALGC 19.13 - 19.14 // some marks may contain information we need to preserve so we store them away 19.15 // and overwrite the mark. We'll restore it at the end of markSweep. 19.16 markOop mark = obj->mark();
20.1 --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Tue Sep 30 12:24:27 2008 -0400 20.2 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Wed Oct 01 20:15:03 2008 -0400 20.3 @@ -181,6 +181,25 @@ 20.4 return lgrp_spaces()->at(i)->space()->free_in_bytes(); 20.5 } 20.6 20.7 + 20.8 +size_t MutableNUMASpace::capacity_in_words(Thread* thr) const { 20.9 + guarantee(thr != NULL, "No thread"); 20.10 + int lgrp_id = thr->lgrp_id(); 20.11 + if (lgrp_id == -1) { 20.12 + if (lgrp_spaces()->length() > 0) { 20.13 + return capacity_in_words() / lgrp_spaces()->length(); 20.14 + } else { 20.15 + assert(false, "There should be at least one locality group"); 20.16 + return 0; 20.17 + } 20.18 + } 20.19 + int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals); 20.20 + if (i == -1) { 20.21 + return 0; 20.22 + } 20.23 + return lgrp_spaces()->at(i)->space()->capacity_in_words(); 20.24 +} 20.25 + 20.26 // Check if the NUMA topology has changed. Add and remove spaces if needed. 20.27 // The update can be forced by setting the force parameter equal to true. 20.28 bool MutableNUMASpace::update_layout(bool force) { 20.29 @@ -722,7 +741,8 @@ 20.30 i = os::random() % lgrp_spaces()->length(); 20.31 } 20.32 20.33 - MutableSpace *s = lgrp_spaces()->at(i)->space(); 20.34 + LGRPSpace* ls = lgrp_spaces()->at(i); 20.35 + MutableSpace *s = ls->space(); 20.36 HeapWord *p = s->allocate(size); 20.37 20.38 if (p != NULL) { 20.39 @@ -743,6 +763,9 @@ 20.40 *(int*)i = 0; 20.41 } 20.42 } 20.43 + if (p == NULL) { 20.44 + ls->set_allocation_failed(); 20.45 + } 20.46 return p; 20.47 } 20.48 20.49 @@ -761,7 +784,8 @@ 20.50 if (i == -1) { 20.51 i = os::random() % lgrp_spaces()->length(); 20.52 } 20.53 - MutableSpace *s = lgrp_spaces()->at(i)->space(); 20.54 + LGRPSpace *ls = lgrp_spaces()->at(i); 20.55 + MutableSpace *s = ls->space(); 20.56 HeapWord *p = s->cas_allocate(size); 20.57 if (p != NULL) { 20.58 size_t remainder = pointer_delta(s->end(), p + size); 20.59 @@ -790,6 +814,9 @@ 20.60 *(int*)i = 0; 20.61 } 20.62 } 20.63 + if (p == NULL) { 20.64 + ls->set_allocation_failed(); 20.65 + } 20.66 return p; 20.67 } 20.68
21.1 --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Tue Sep 30 12:24:27 2008 -0400 21.2 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Wed Oct 01 20:15:03 2008 -0400 21.3 @@ -60,6 +60,7 @@ 21.4 MutableSpace* _space; 21.5 MemRegion _invalid_region; 21.6 AdaptiveWeightedAverage *_alloc_rate; 21.7 + bool _allocation_failed; 21.8 21.9 struct SpaceStats { 21.10 size_t _local_space, _remote_space, _unbiased_space, _uncommited_space; 21.11 @@ -81,7 +82,7 @@ 21.12 char* last_page_scanned() { return _last_page_scanned; } 21.13 void set_last_page_scanned(char* p) { _last_page_scanned = p; } 21.14 public: 21.15 - LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) { 21.16 + LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) { 21.17 _space = new MutableSpace(); 21.18 _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight); 21.19 } 21.20 @@ -103,8 +104,21 @@ 21.21 return *(int*)lgrp_id_value == p->lgrp_id(); 21.22 } 21.23 21.24 + // Report a failed allocation. 21.25 + void set_allocation_failed() { _allocation_failed = true; } 21.26 + 21.27 void sample() { 21.28 - alloc_rate()->sample(space()->used_in_bytes()); 21.29 + // If there was a failed allocation make allocation rate equal 21.30 + // to the size of the whole chunk. This ensures the progress of 21.31 + // the adaptation process. 21.32 + size_t alloc_rate_sample; 21.33 + if (_allocation_failed) { 21.34 + alloc_rate_sample = space()->capacity_in_bytes(); 21.35 + _allocation_failed = false; 21.36 + } else { 21.37 + alloc_rate_sample = space()->used_in_bytes(); 21.38 + } 21.39 + alloc_rate()->sample(alloc_rate_sample); 21.40 } 21.41 21.42 MemRegion invalid_region() const { return _invalid_region; } 21.43 @@ -190,6 +204,9 @@ 21.44 virtual void ensure_parsability(); 21.45 virtual size_t used_in_words() const; 21.46 virtual size_t free_in_words() const; 21.47 + 21.48 + using MutableSpace::capacity_in_words; 21.49 + virtual size_t capacity_in_words(Thread* thr) const; 21.50 virtual size_t tlab_capacity(Thread* thr) const; 21.51 virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; 21.52
22.1 --- a/src/share/vm/includeDB_compiler2 Tue Sep 30 12:24:27 2008 -0400 22.2 +++ b/src/share/vm/includeDB_compiler2 Wed Oct 01 20:15:03 2008 -0400 22.3 @@ -586,6 +586,7 @@ 22.4 loopTransform.cpp addnode.hpp 22.5 loopTransform.cpp allocation.inline.hpp 22.6 loopTransform.cpp connode.hpp 22.7 +loopTransform.cpp compileLog.hpp 22.8 loopTransform.cpp divnode.hpp 22.9 loopTransform.cpp loopnode.hpp 22.10 loopTransform.cpp mulnode.hpp 22.11 @@ -601,6 +602,7 @@ 22.12 loopnode.cpp allocation.inline.hpp 22.13 loopnode.cpp callnode.hpp 22.14 loopnode.cpp ciMethodData.hpp 22.15 +loopnode.cpp compileLog.hpp 22.16 loopnode.cpp connode.hpp 22.17 loopnode.cpp divnode.hpp 22.18 loopnode.cpp loopnode.hpp
23.1 --- a/src/share/vm/opto/bytecodeInfo.cpp Tue Sep 30 12:24:27 2008 -0400 23.2 +++ b/src/share/vm/opto/bytecodeInfo.cpp Wed Oct 01 20:15:03 2008 -0400 23.3 @@ -25,19 +25,6 @@ 23.4 #include "incls/_precompiled.incl" 23.5 #include "incls/_bytecodeInfo.cpp.incl" 23.6 23.7 -// These variables are declared in parse1.cpp 23.8 -extern int explicit_null_checks_inserted; 23.9 -extern int explicit_null_checks_elided; 23.10 -extern int explicit_null_checks_inserted_old; 23.11 -extern int explicit_null_checks_elided_old; 23.12 -extern int nodes_created_old; 23.13 -extern int nodes_created; 23.14 -extern int methods_parsed_old; 23.15 -extern int methods_parsed; 23.16 -extern int methods_seen; 23.17 -extern int methods_seen_old; 23.18 - 23.19 - 23.20 //============================================================================= 23.21 //------------------------------InlineTree------------------------------------- 23.22 InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio ) 23.23 @@ -517,27 +504,3 @@ 23.24 } 23.25 return iltp; 23.26 } 23.27 - 23.28 -// ---------------------------------------------------------------------------- 23.29 -#ifndef PRODUCT 23.30 - 23.31 -static void per_method_stats() { 23.32 - // Compute difference between this method's cumulative totals and old totals 23.33 - int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old; 23.34 - int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old; 23.35 - 23.36 - // Print differences 23.37 - if( explicit_null_checks_cur ) 23.38 - tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur); 23.39 - if( elided_null_checks_cur ) 23.40 - tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur); 23.41 - 23.42 - // Store the current cumulative totals 23.43 - nodes_created_old = nodes_created; 23.44 - methods_parsed_old = methods_parsed; 23.45 - methods_seen_old = methods_seen; 23.46 - explicit_null_checks_inserted_old = explicit_null_checks_inserted; 23.47 - explicit_null_checks_elided_old = explicit_null_checks_elided; 23.48 -} 23.49 - 23.50 -#endif
24.1 --- a/src/share/vm/opto/callnode.cpp Tue Sep 30 12:24:27 2008 -0400 24.2 +++ b/src/share/vm/opto/callnode.cpp Wed Oct 01 20:15:03 2008 -0400 24.3 @@ -1034,6 +1034,39 @@ 24.4 //============================================================================= 24.5 uint AllocateArrayNode::size_of() const { return sizeof(*this); } 24.6 24.7 +// Retrieve the length from the AllocateArrayNode. Narrow the type with a 24.8 +// CastII, if appropriate. If we are not allowed to create new nodes, and 24.9 +// a CastII is appropriate, return NULL. 24.10 +Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) { 24.11 + Node *length = in(AllocateNode::ALength); 24.12 + assert(length != NULL, "length is not null"); 24.13 + 24.14 + const TypeInt* length_type = phase->find_int_type(length); 24.15 + const TypeAryPtr* ary_type = oop_type->isa_aryptr(); 24.16 + 24.17 + if (ary_type != NULL && length_type != NULL) { 24.18 + const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type); 24.19 + if (narrow_length_type != length_type) { 24.20 + // Assert one of: 24.21 + // - the narrow_length is 0 24.22 + // - the narrow_length is not wider than length 24.23 + assert(narrow_length_type == TypeInt::ZERO || 24.24 + (narrow_length_type->_hi <= length_type->_hi && 24.25 + narrow_length_type->_lo >= length_type->_lo), 24.26 + "narrow type must be narrower than length type"); 24.27 + 24.28 + // Return NULL if new nodes are not allowed 24.29 + if (!allow_new_nodes) return NULL; 24.30 + // Create a cast which is control dependent on the initialization to 24.31 + // propagate the fact that the array length must be positive. 24.32 + length = new (phase->C, 2) CastIINode(length, narrow_length_type); 24.33 + length->set_req(0, initialization()->proj_out(0)); 24.34 + } 24.35 + } 24.36 + 24.37 + return length; 24.38 +} 24.39 + 24.40 //============================================================================= 24.41 uint LockNode::size_of() const { return sizeof(*this); } 24.42
25.1 --- a/src/share/vm/opto/callnode.hpp Tue Sep 30 12:24:27 2008 -0400 25.2 +++ b/src/share/vm/opto/callnode.hpp Wed Oct 01 20:15:03 2008 -0400 25.3 @@ -755,6 +755,15 @@ 25.4 virtual int Opcode() const; 25.5 virtual uint size_of() const; // Size is bigger 25.6 25.7 + // Dig the length operand out of a array allocation site. 25.8 + Node* Ideal_length() { 25.9 + return in(AllocateNode::ALength); 25.10 + } 25.11 + 25.12 + // Dig the length operand out of a array allocation site and narrow the 25.13 + // type with a CastII, if necesssary 25.14 + Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true); 25.15 + 25.16 // Pattern-match a possible usage of AllocateArrayNode. 25.17 // Return null if no allocation is recognized. 25.18 static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) { 25.19 @@ -762,12 +771,6 @@ 25.20 return (allo == NULL || !allo->is_AllocateArray()) 25.21 ? NULL : allo->as_AllocateArray(); 25.22 } 25.23 - 25.24 - // Dig the length operand out of a (possible) array allocation site. 25.25 - static Node* Ideal_length(Node* ptr, PhaseTransform* phase) { 25.26 - AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase); 25.27 - return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength); 25.28 - } 25.29 }; 25.30 25.31 //------------------------------AbstractLockNode-----------------------------------
26.1 --- a/src/share/vm/opto/cfgnode.cpp Tue Sep 30 12:24:27 2008 -0400 26.2 +++ b/src/share/vm/opto/cfgnode.cpp Wed Oct 01 20:15:03 2008 -0400 26.3 @@ -1665,7 +1665,11 @@ 26.4 // compress paths and change unreachable cycles to TOP 26.5 // If not, we can update the input infinitely along a MergeMem cycle 26.6 // Equivalent code is in MemNode::Ideal_common 26.7 - Node *m = phase->transform(n); 26.8 + Node *m = phase->transform(n); 26.9 + if (outcnt() == 0) { // Above transform() may kill us! 26.10 + progress = phase->C->top(); 26.11 + break; 26.12 + } 26.13 // If tranformed to a MergeMem, get the desired slice 26.14 // Otherwise the returned node represents memory for every slice 26.15 Node *new_mem = (m->is_MergeMem()) ? 26.16 @@ -1765,6 +1769,51 @@ 26.17 } 26.18 } 26.19 26.20 +#ifdef _LP64 26.21 + // Push DecodeN down through phi. 26.22 + // The rest of phi graph will transform by split EncodeP node though phis up. 26.23 + if (UseCompressedOops && can_reshape && progress == NULL) { 26.24 + bool may_push = true; 26.25 + bool has_decodeN = false; 26.26 + Node* in_decodeN = NULL; 26.27 + for (uint i=1; i<req(); ++i) {// For all paths in 26.28 + Node *ii = in(i); 26.29 + if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) { 26.30 + has_decodeN = true; 26.31 + in_decodeN = ii->in(1); 26.32 + } else if (!ii->is_Phi()) { 26.33 + may_push = false; 26.34 + } 26.35 + } 26.36 + 26.37 + if (has_decodeN && may_push) { 26.38 + PhaseIterGVN *igvn = phase->is_IterGVN(); 26.39 + // Note: in_decodeN is used only to define the type of new phi here. 26.40 + PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN); 26.41 + uint orig_cnt = req(); 26.42 + for (uint i=1; i<req(); ++i) {// For all paths in 26.43 + Node *ii = in(i); 26.44 + Node* new_ii = NULL; 26.45 + if (ii->is_DecodeN()) { 26.46 + assert(ii->bottom_type() == bottom_type(), "sanity"); 26.47 + new_ii = ii->in(1); 26.48 + } else { 26.49 + assert(ii->is_Phi(), "sanity"); 26.50 + if (ii->as_Phi() == this) { 26.51 + new_ii = new_phi; 26.52 + } else { 26.53 + new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type()); 26.54 + igvn->register_new_node_with_optimizer(new_ii); 26.55 + } 26.56 + } 26.57 + new_phi->set_req(i, new_ii); 26.58 + } 26.59 + igvn->register_new_node_with_optimizer(new_phi, this); 26.60 + progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type()); 26.61 + } 26.62 + } 26.63 +#endif 26.64 + 26.65 return progress; // Return any progress 26.66 } 26.67
27.1 --- a/src/share/vm/opto/compile.cpp Tue Sep 30 12:24:27 2008 -0400 27.2 +++ b/src/share/vm/opto/compile.cpp Wed Oct 01 20:15:03 2008 -0400 27.3 @@ -467,6 +467,7 @@ 27.4 } 27.5 } 27.6 set_print_assembly(print_opto_assembly); 27.7 + set_parsed_irreducible_loop(false); 27.8 #endif 27.9 27.10 if (ProfileTraps) { 27.11 @@ -550,6 +551,8 @@ 27.12 rethrow_exceptions(kit.transfer_exceptions_into_jvms()); 27.13 } 27.14 27.15 + print_method("Before RemoveUseless"); 27.16 + 27.17 // Remove clutter produced by parsing. 27.18 if (!failing()) { 27.19 ResourceMark rm; 27.20 @@ -615,8 +618,6 @@ 27.21 if (failing()) return; 27.22 NOT_PRODUCT( verify_graph_edges(); ) 27.23 27.24 - print_method("Before Matching"); 27.25 - 27.26 #ifndef PRODUCT 27.27 if (PrintIdeal) { 27.28 ttyLocker ttyl; // keep the following output all in one block 27.29 @@ -720,6 +721,7 @@ 27.30 TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false); 27.31 TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false); 27.32 set_print_assembly(PrintFrameConverterAssembly); 27.33 + set_parsed_irreducible_loop(false); 27.34 #endif 27.35 CompileWrapper cw(this); 27.36 Init(/*AliasLevel=*/ 0); 27.37 @@ -2073,6 +2075,44 @@ 27.38 } 27.39 27.40 #ifdef _LP64 27.41 + case Op_CastPP: 27.42 + if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) { 27.43 + Compile* C = Compile::current(); 27.44 + Node* in1 = n->in(1); 27.45 + const Type* t = n->bottom_type(); 27.46 + Node* new_in1 = in1->clone(); 27.47 + new_in1->as_DecodeN()->set_type(t); 27.48 + 27.49 + if (!Matcher::clone_shift_expressions) { 27.50 + // 27.51 + // x86, ARM and friends can handle 2 adds in addressing mode 27.52 + // and Matcher can fold a DecodeN node into address by using 27.53 + // a narrow oop directly and do implicit NULL check in address: 27.54 + // 27.55 + // [R12 + narrow_oop_reg<<3 + offset] 27.56 + // NullCheck narrow_oop_reg 27.57 + // 27.58 + // On other platforms (Sparc) we have to keep new DecodeN node and 27.59 + // use it to do implicit NULL check in address: 27.60 + // 27.61 + // decode_not_null narrow_oop_reg, base_reg 27.62 + // [base_reg + offset] 27.63 + // NullCheck base_reg 27.64 + // 27.65 + // Pin the new DecodeN node to non-null path on these patforms (Sparc) 27.66 + // to keep the information to which NULL check the new DecodeN node 27.67 + // corresponds to use it as value in implicit_null_check(). 27.68 + // 27.69 + new_in1->set_req(0, n->in(0)); 27.70 + } 27.71 + 27.72 + n->subsume_by(new_in1); 27.73 + if (in1->outcnt() == 0) { 27.74 + in1->disconnect_inputs(NULL); 27.75 + } 27.76 + } 27.77 + break; 27.78 + 27.79 case Op_CmpP: 27.80 // Do this transformation here to preserve CmpPNode::sub() and 27.81 // other TypePtr related Ideal optimizations (for example, ptr nullness). 27.82 @@ -2092,24 +2132,44 @@ 27.83 } else if (in2->Opcode() == Op_ConP) { 27.84 const Type* t = in2->bottom_type(); 27.85 if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) { 27.86 - if (Matcher::clone_shift_expressions) { 27.87 - // x86, ARM and friends can handle 2 adds in addressing mode. 27.88 - // Decode a narrow oop and do implicit NULL check in address 27.89 - // [R12 + narrow_oop_reg<<3 + offset] 27.90 - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); 27.91 - } else { 27.92 - // Don't replace CmpP(o ,null) if 'o' is used in AddP 27.93 - // to generate implicit NULL check on Sparc where 27.94 - // narrow oops can't be used in address. 27.95 - uint i = 0; 27.96 - for (; i < in1->outcnt(); i++) { 27.97 - if (in1->raw_out(i)->is_AddP()) 27.98 - break; 27.99 - } 27.100 - if (i >= in1->outcnt()) { 27.101 - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); 27.102 - } 27.103 - } 27.104 + new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); 27.105 + // 27.106 + // This transformation together with CastPP transformation above 27.107 + // will generated code for implicit NULL checks for compressed oops. 27.108 + // 27.109 + // The original code after Optimize() 27.110 + // 27.111 + // LoadN memory, narrow_oop_reg 27.112 + // decode narrow_oop_reg, base_reg 27.113 + // CmpP base_reg, NULL 27.114 + // CastPP base_reg // NotNull 27.115 + // Load [base_reg + offset], val_reg 27.116 + // 27.117 + // after these transformations will be 27.118 + // 27.119 + // LoadN memory, narrow_oop_reg 27.120 + // CmpN narrow_oop_reg, NULL 27.121 + // decode_not_null narrow_oop_reg, base_reg 27.122 + // Load [base_reg + offset], val_reg 27.123 + // 27.124 + // and the uncommon path (== NULL) will use narrow_oop_reg directly 27.125 + // since narrow oops can be used in debug info now (see the code in 27.126 + // final_graph_reshaping_walk()). 27.127 + // 27.128 + // At the end the code will be matched to 27.129 + // on x86: 27.130 + // 27.131 + // Load_narrow_oop memory, narrow_oop_reg 27.132 + // Load [R12 + narrow_oop_reg<<3 + offset], val_reg 27.133 + // NullCheck narrow_oop_reg 27.134 + // 27.135 + // and on sparc: 27.136 + // 27.137 + // Load_narrow_oop memory, narrow_oop_reg 27.138 + // decode_not_null narrow_oop_reg, base_reg 27.139 + // Load [base_reg + offset], val_reg 27.140 + // NullCheck base_reg 27.141 + // 27.142 } else if (t->isa_oopptr()) { 27.143 new_in2 = ConNode::make(C, t->make_narrowoop()); 27.144 } 27.145 @@ -2126,6 +2186,49 @@ 27.146 } 27.147 } 27.148 break; 27.149 + 27.150 + case Op_DecodeN: 27.151 + assert(!n->in(1)->is_EncodeP(), "should be optimized out"); 27.152 + break; 27.153 + 27.154 + case Op_EncodeP: { 27.155 + Node* in1 = n->in(1); 27.156 + if (in1->is_DecodeN()) { 27.157 + n->subsume_by(in1->in(1)); 27.158 + } else if (in1->Opcode() == Op_ConP) { 27.159 + Compile* C = Compile::current(); 27.160 + const Type* t = in1->bottom_type(); 27.161 + if (t == TypePtr::NULL_PTR) { 27.162 + n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR)); 27.163 + } else if (t->isa_oopptr()) { 27.164 + n->subsume_by(ConNode::make(C, t->make_narrowoop())); 27.165 + } 27.166 + } 27.167 + if (in1->outcnt() == 0) { 27.168 + in1->disconnect_inputs(NULL); 27.169 + } 27.170 + break; 27.171 + } 27.172 + 27.173 + case Op_Phi: 27.174 + if (n->as_Phi()->bottom_type()->isa_narrowoop()) { 27.175 + // The EncodeP optimization may create Phi with the same edges 27.176 + // for all paths. It is not handled well by Register Allocator. 27.177 + Node* unique_in = n->in(1); 27.178 + assert(unique_in != NULL, ""); 27.179 + uint cnt = n->req(); 27.180 + for (uint i = 2; i < cnt; i++) { 27.181 + Node* m = n->in(i); 27.182 + assert(m != NULL, ""); 27.183 + if (unique_in != m) 27.184 + unique_in = NULL; 27.185 + } 27.186 + if (unique_in != NULL) { 27.187 + n->subsume_by(unique_in); 27.188 + } 27.189 + } 27.190 + break; 27.191 + 27.192 #endif 27.193 27.194 case Op_ModI:
28.1 --- a/src/share/vm/opto/compile.hpp Tue Sep 30 12:24:27 2008 -0400 28.2 +++ b/src/share/vm/opto/compile.hpp Wed Oct 01 20:15:03 2008 -0400 28.3 @@ -160,6 +160,7 @@ 28.4 bool _print_assembly; // True if we should dump assembly code for this compilation 28.5 #ifndef PRODUCT 28.6 bool _trace_opto_output; 28.7 + bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing 28.8 #endif 28.9 28.10 // Compilation environment. 28.11 @@ -319,6 +320,8 @@ 28.12 } 28.13 #ifndef PRODUCT 28.14 bool trace_opto_output() const { return _trace_opto_output; } 28.15 + bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; } 28.16 + void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; } 28.17 #endif 28.18 28.19 void begin_method() {
29.1 --- a/src/share/vm/opto/connode.cpp Tue Sep 30 12:24:27 2008 -0400 29.2 +++ b/src/share/vm/opto/connode.cpp Wed Oct 01 20:15:03 2008 -0400 29.3 @@ -433,8 +433,8 @@ 29.4 // If not converting int->oop, throw away cast after constant propagation 29.5 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { 29.6 const Type *t = ccp->type(in(1)); 29.7 - if (!t->isa_oop_ptr()) { 29.8 - return NULL; // do not transform raw pointers 29.9 + if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) { 29.10 + return NULL; // do not transform raw pointers or narrow oops 29.11 } 29.12 return ConstraintCastNode::Ideal_DU_postCCP(ccp); 29.13 }
30.1 --- a/src/share/vm/opto/doCall.cpp Tue Sep 30 12:24:27 2008 -0400 30.2 +++ b/src/share/vm/opto/doCall.cpp Wed Oct 01 20:15:03 2008 -0400 30.3 @@ -795,7 +795,7 @@ 30.4 30.5 ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass(); 30.6 if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() && 30.7 - (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) { 30.8 + (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) { 30.9 // ikl is a same or better type than the original actual_receiver, 30.10 // e.g. static receiver from bytecodes. 30.11 actual_receiver = ikl;
31.1 --- a/src/share/vm/opto/graphKit.cpp Tue Sep 30 12:24:27 2008 -0400 31.2 +++ b/src/share/vm/opto/graphKit.cpp Wed Oct 01 20:15:03 2008 -0400 31.3 @@ -587,7 +587,7 @@ 31.4 #ifdef ASSERT 31.5 _bci = kit->bci(); 31.6 Parse* parser = kit->is_Parse(); 31.7 - int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order(); 31.8 + int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); 31.9 _block = block; 31.10 #endif 31.11 } 31.12 @@ -596,7 +596,7 @@ 31.13 #ifdef ASSERT 31.14 assert(kit->bci() == _bci, "bci must not shift"); 31.15 Parse* parser = kit->is_Parse(); 31.16 - int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order(); 31.17 + int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); 31.18 assert(block == _block, "block must not shift"); 31.19 #endif 31.20 kit->set_map(_map); 31.21 @@ -1049,10 +1049,19 @@ 31.22 //-------------------------load_array_length----------------------------------- 31.23 Node* GraphKit::load_array_length(Node* array) { 31.24 // Special-case a fresh allocation to avoid building nodes: 31.25 - Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn); 31.26 - if (alen != NULL) return alen; 31.27 - Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); 31.28 - return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); 31.29 + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn); 31.30 + Node *alen; 31.31 + if (alloc == NULL) { 31.32 + Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); 31.33 + alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); 31.34 + } else { 31.35 + alen = alloc->Ideal_length(); 31.36 + Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn); 31.37 + if (ccast != alen) { 31.38 + alen = _gvn.transform(ccast); 31.39 + } 31.40 + } 31.41 + return alen; 31.42 } 31.43 31.44 //------------------------------do_null_check---------------------------------- 31.45 @@ -2847,20 +2856,18 @@ 31.46 assert(just_allocated_object(control()) == javaoop, "just allocated"); 31.47 31.48 #ifdef ASSERT 31.49 - { // Verify that the AllocateNode::Ideal_foo recognizers work: 31.50 - Node* kn = alloc->in(AllocateNode::KlassNode); 31.51 - Node* ln = alloc->in(AllocateNode::ALength); 31.52 - assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn, 31.53 - "Ideal_klass works"); 31.54 - assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn, 31.55 - "Ideal_klass works"); 31.56 + { // Verify that the AllocateNode::Ideal_allocation recognizers work: 31.57 + assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc, 31.58 + "Ideal_allocation works"); 31.59 + assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc, 31.60 + "Ideal_allocation works"); 31.61 if (alloc->is_AllocateArray()) { 31.62 - assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln, 31.63 - "Ideal_length works"); 31.64 - assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln, 31.65 - "Ideal_length works"); 31.66 + assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(), 31.67 + "Ideal_allocation works"); 31.68 + assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(), 31.69 + "Ideal_allocation works"); 31.70 } else { 31.71 - assert(ln->is_top(), "no length, please"); 31.72 + assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please"); 31.73 } 31.74 } 31.75 #endif //ASSERT 31.76 @@ -3109,25 +3116,20 @@ 31.77 // (This happens via a non-constant argument to inline_native_newArray.) 31.78 // In any case, the value of klass_node provides the desired array type. 31.79 const TypeInt* length_type = _gvn.find_int_type(length); 31.80 - const TypeInt* narrow_length_type = NULL; 31.81 const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type(); 31.82 if (ary_type->isa_aryptr() && length_type != NULL) { 31.83 // Try to get a better type than POS for the size 31.84 ary_type = ary_type->is_aryptr()->cast_to_size(length_type); 31.85 - narrow_length_type = ary_type->is_aryptr()->size(); 31.86 - if (narrow_length_type == length_type) 31.87 - narrow_length_type = NULL; 31.88 } 31.89 31.90 Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only); 31.91 31.92 - // Cast length on remaining path to be positive: 31.93 - if (narrow_length_type != NULL) { 31.94 - Node* ccast = new (C, 2) CastIINode(length, narrow_length_type); 31.95 - ccast->set_req(0, control()); 31.96 - _gvn.set_type_bottom(ccast); 31.97 - record_for_igvn(ccast); 31.98 - if (map()->find_edge(length) >= 0) { 31.99 + // Cast length on remaining path to be as narrow as possible 31.100 + if (map()->find_edge(length) >= 0) { 31.101 + Node* ccast = alloc->make_ideal_length(ary_type, &_gvn); 31.102 + if (ccast != length) { 31.103 + _gvn.set_type_bottom(ccast); 31.104 + record_for_igvn(ccast); 31.105 replace_in_map(length, ccast); 31.106 } 31.107 }
32.1 --- a/src/share/vm/opto/ifg.cpp Tue Sep 30 12:24:27 2008 -0400 32.2 +++ b/src/share/vm/opto/ifg.cpp Wed Oct 01 20:15:03 2008 -0400 32.3 @@ -485,8 +485,9 @@ 32.4 // Liveout things are presumed live for the whole block. We accumulate 32.5 // 'area' accordingly. If they get killed in the block, we'll subtract 32.6 // the unused part of the block from the area. 32.7 - double cost = b->_freq * double(last_inst-last_phi); 32.8 - assert( cost >= 0, "negative spill cost" ); 32.9 + int inst_count = last_inst - last_phi; 32.10 + double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); 32.11 + assert(!(cost < 0.0), "negative spill cost" ); 32.12 IndexSetIterator elements(&liveout); 32.13 uint lidx; 32.14 while ((lidx = elements.next()) != 0) { 32.15 @@ -590,7 +591,7 @@ 32.16 } else { // Else it is live 32.17 // A DEF also ends 'area' partway through the block. 32.18 lrgs(r)._area -= cost; 32.19 - assert( lrgs(r)._area >= 0, "negative spill area" ); 32.20 + assert(!(lrgs(r)._area < 0.0), "negative spill area" ); 32.21 32.22 // Insure high score for immediate-use spill copies so they get a color 32.23 if( n->is_SpillCopy() 32.24 @@ -703,8 +704,9 @@ 32.25 32.26 } // End of if normal register-allocated value 32.27 32.28 - cost -= b->_freq; // Area remaining in the block 32.29 - if( cost < 0.0 ) cost = 0.0; // Cost goes negative in the Phi area 32.30 + // Area remaining in the block 32.31 + inst_count--; 32.32 + cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); 32.33 32.34 // Make all inputs live 32.35 if( !n->is_Phi() ) { // Phi function uses come from prior block 32.36 @@ -751,7 +753,7 @@ 32.37 assert( pressure[0] == count_int_pressure (&liveout), "" ); 32.38 assert( pressure[1] == count_float_pressure(&liveout), "" ); 32.39 } 32.40 - assert( lrg._area >= 0, "negative spill area" ); 32.41 + assert(!(lrg._area < 0.0), "negative spill area" ); 32.42 } 32.43 } 32.44 } // End of reverse pass over all instructions in block
33.1 --- a/src/share/vm/opto/loopTransform.cpp Tue Sep 30 12:24:27 2008 -0400 33.2 +++ b/src/share/vm/opto/loopTransform.cpp Wed Oct 01 20:15:03 2008 -0400 33.3 @@ -1012,6 +1012,8 @@ 33.4 if (!has_ctrl(old)) 33.5 set_loop(nnn, loop); 33.6 } 33.7 + 33.8 + loop->record_for_igvn(); 33.9 } 33.10 33.11 //------------------------------do_maximally_unroll----------------------------
34.1 --- a/src/share/vm/opto/loopnode.cpp Tue Sep 30 12:24:27 2008 -0400 34.2 +++ b/src/share/vm/opto/loopnode.cpp Wed Oct 01 20:15:03 2008 -0400 34.3 @@ -1279,7 +1279,7 @@ 34.4 // Visit all children, looking for Phis 34.5 for (DUIterator i = cl->outs(); cl->has_out(i); i++) { 34.6 Node *out = cl->out(i); 34.7 - if (!out->is_Phi()) continue; // Looking for phis 34.8 + if (!out->is_Phi() || out == phi) continue; // Looking for other phis 34.9 PhiNode* phi2 = out->as_Phi(); 34.10 Node *incr2 = phi2->in( LoopNode::LoopBackControl ); 34.11 // Look for induction variables of the form: X += constant 34.12 @@ -1388,6 +1388,37 @@ 34.13 34.14 #endif 34.15 34.16 +static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) { 34.17 + if (loop == root) { 34.18 + if (loop->_child != NULL) { 34.19 + log->begin_head("loop_tree"); 34.20 + log->end_head(); 34.21 + if( loop->_child ) log_loop_tree(root, loop->_child, log); 34.22 + log->tail("loop_tree"); 34.23 + assert(loop->_next == NULL, "what?"); 34.24 + } 34.25 + } else { 34.26 + Node* head = loop->_head; 34.27 + log->begin_head("loop"); 34.28 + log->print(" idx='%d' ", head->_idx); 34.29 + if (loop->_irreducible) log->print("irreducible='1' "); 34.30 + if (head->is_Loop()) { 34.31 + if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' "); 34.32 + if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' "); 34.33 + } 34.34 + if (head->is_CountedLoop()) { 34.35 + CountedLoopNode* cl = head->as_CountedLoop(); 34.36 + if (cl->is_pre_loop()) log->print("pre_loop='%d' ", cl->main_idx()); 34.37 + if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx); 34.38 + if (cl->is_post_loop()) log->print("post_loop='%d' ", cl->main_idx()); 34.39 + } 34.40 + log->end_head(); 34.41 + if( loop->_child ) log_loop_tree(root, loop->_child, log); 34.42 + log->tail("loop"); 34.43 + if( loop->_next ) log_loop_tree(root, loop->_next, log); 34.44 + } 34.45 +} 34.46 + 34.47 //============================================================================= 34.48 //------------------------------PhaseIdealLoop--------------------------------- 34.49 // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to 34.50 @@ -1624,10 +1655,13 @@ 34.51 // Cleanup any modified bits 34.52 _igvn.optimize(); 34.53 34.54 - // Do not repeat loop optimizations if irreducible loops are present 34.55 - // by claiming no-progress. 34.56 - if( _has_irreducible_loops ) 34.57 - C->clear_major_progress(); 34.58 + // disable assert until issue with split_flow_path is resolved (6742111) 34.59 + // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(), 34.60 + // "shouldn't introduce irreducible loops"); 34.61 + 34.62 + if (C->log() != NULL) { 34.63 + log_loop_tree(_ltree_root, _ltree_root, C->log()); 34.64 + } 34.65 } 34.66 34.67 #ifndef PRODUCT 34.68 @@ -2732,11 +2766,7 @@ 34.69 } 34.70 34.71 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { 34.72 - 34.73 - // Indent by loop nesting depth 34.74 - for( uint x = 0; x < loop->_nest; x++ ) 34.75 - tty->print(" "); 34.76 - tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx); 34.77 + loop->dump_head(); 34.78 34.79 // Now scan for CFG nodes in the same loop 34.80 for( uint j=idx; j > 0; j-- ) {
35.1 --- a/src/share/vm/opto/loopnode.hpp Tue Sep 30 12:24:27 2008 -0400 35.2 +++ b/src/share/vm/opto/loopnode.hpp Wed Oct 01 20:15:03 2008 -0400 35.3 @@ -192,6 +192,8 @@ 35.4 int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; } 35.5 void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; } 35.6 35.7 + int main_idx() const { return _main_idx; } 35.8 + 35.9 35.10 void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } 35.11 void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
36.1 --- a/src/share/vm/opto/loopopts.cpp Tue Sep 30 12:24:27 2008 -0400 36.2 +++ b/src/share/vm/opto/loopopts.cpp Wed Oct 01 20:15:03 2008 -0400 36.3 @@ -2667,6 +2667,10 @@ 36.4 // Fix this by adjusting to use the post-increment trip counter. 36.5 Node *phi = cl->phi(); 36.6 if( !phi ) return; // Dead infinite loop 36.7 + 36.8 + // Shape messed up, probably by iteration_split_impl 36.9 + if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return; 36.10 + 36.11 bool progress = true; 36.12 while (progress) { 36.13 progress = false;
37.1 --- a/src/share/vm/opto/matcher.cpp Tue Sep 30 12:24:27 2008 -0400 37.2 +++ b/src/share/vm/opto/matcher.cpp Wed Oct 01 20:15:03 2008 -0400 37.3 @@ -273,7 +273,7 @@ 37.4 find_shared( C->root() ); 37.5 find_shared( C->top() ); 37.6 37.7 - C->print_method("Before Matching", 2); 37.8 + C->print_method("Before Matching"); 37.9 37.10 // Swap out to old-space; emptying new-space 37.11 Arena *old = C->node_arena()->move_contents(C->old_arena()); 37.12 @@ -840,7 +840,7 @@ 37.13 _new2old_map.map(m->_idx, n); 37.14 #endif 37.15 if (m->in(0) != NULL) // m might be top 37.16 - collect_null_checks(m); 37.17 + collect_null_checks(m, n); 37.18 } else { // Else just a regular 'ol guy 37.19 m = n->clone(); // So just clone into new-space 37.20 #ifdef ASSERT 37.21 @@ -1478,12 +1478,19 @@ 37.22 m = _mem_node; 37.23 assert(m != NULL && m->is_Mem(), "expecting memory node"); 37.24 } 37.25 - if (m->adr_type() != mach->adr_type()) { 37.26 + const Type* mach_at = mach->adr_type(); 37.27 + // DecodeN node consumed by an address may have different type 37.28 + // then its input. Don't compare types for such case. 37.29 + if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() && 37.30 + m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) { 37.31 + mach_at = m->adr_type(); 37.32 + } 37.33 + if (m->adr_type() != mach_at) { 37.34 m->dump(); 37.35 tty->print_cr("mach:"); 37.36 mach->dump(1); 37.37 } 37.38 - assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type"); 37.39 + assert(m->adr_type() == mach_at, "matcher should not change adr type"); 37.40 } 37.41 #endif 37.42 } 37.43 @@ -1995,7 +2002,7 @@ 37.44 // it. Used by later implicit-null-check handling. Actually collects 37.45 // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal 37.46 // value being tested. 37.47 -void Matcher::collect_null_checks( Node *proj ) { 37.48 +void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) { 37.49 Node *iff = proj->in(0); 37.50 if( iff->Opcode() == Op_If ) { 37.51 // During matching If's have Bool & Cmp side-by-side 37.52 @@ -2008,20 +2015,47 @@ 37.53 if (ct == TypePtr::NULL_PTR || 37.54 (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) { 37.55 37.56 + bool push_it = false; 37.57 if( proj->Opcode() == Op_IfTrue ) { 37.58 extern int all_null_checks_found; 37.59 all_null_checks_found++; 37.60 if( b->_test._test == BoolTest::ne ) { 37.61 - _null_check_tests.push(proj); 37.62 - _null_check_tests.push(cmp->in(1)); 37.63 + push_it = true; 37.64 } 37.65 } else { 37.66 assert( proj->Opcode() == Op_IfFalse, "" ); 37.67 if( b->_test._test == BoolTest::eq ) { 37.68 - _null_check_tests.push(proj); 37.69 - _null_check_tests.push(cmp->in(1)); 37.70 + push_it = true; 37.71 } 37.72 } 37.73 + if( push_it ) { 37.74 + _null_check_tests.push(proj); 37.75 + Node* val = cmp->in(1); 37.76 +#ifdef _LP64 37.77 + if (UseCompressedOops && !Matcher::clone_shift_expressions && 37.78 + val->bottom_type()->isa_narrowoop()) { 37.79 + // 37.80 + // Look for DecodeN node which should be pinned to orig_proj. 37.81 + // On platforms (Sparc) which can not handle 2 adds 37.82 + // in addressing mode we have to keep a DecodeN node and 37.83 + // use it to do implicit NULL check in address. 37.84 + // 37.85 + // DecodeN node was pinned to non-null path (orig_proj) during 37.86 + // CastPP transformation in final_graph_reshaping_impl(). 37.87 + // 37.88 + uint cnt = orig_proj->outcnt(); 37.89 + for (uint i = 0; i < orig_proj->outcnt(); i++) { 37.90 + Node* d = orig_proj->raw_out(i); 37.91 + if (d->is_DecodeN() && d->in(1) == val) { 37.92 + val = d; 37.93 + val->set_req(0, NULL); // Unpin now. 37.94 + break; 37.95 + } 37.96 + } 37.97 + } 37.98 +#endif 37.99 + _null_check_tests.push(val); 37.100 + } 37.101 } 37.102 } 37.103 }
38.1 --- a/src/share/vm/opto/matcher.hpp Tue Sep 30 12:24:27 2008 -0400 38.2 +++ b/src/share/vm/opto/matcher.hpp Wed Oct 01 20:15:03 2008 -0400 38.3 @@ -166,7 +166,7 @@ 38.4 // List of IfFalse or IfTrue Nodes that indicate a taken null test. 38.5 // List is valid in the post-matching space. 38.6 Node_List _null_check_tests; 38.7 - void collect_null_checks( Node *proj ); 38.8 + void collect_null_checks( Node *proj, Node *orig_proj ); 38.9 void validate_null_checks( ); 38.10 38.11 Matcher( Node_List &proj_list );
39.1 --- a/src/share/vm/opto/memnode.cpp Tue Sep 30 12:24:27 2008 -0400 39.2 +++ b/src/share/vm/opto/memnode.cpp Wed Oct 01 20:15:03 2008 -0400 39.3 @@ -1887,6 +1887,38 @@ 39.4 return tap->size(); 39.5 } 39.6 39.7 +//-------------------------------Ideal--------------------------------------- 39.8 +// Feed through the length in AllocateArray(...length...)._length. 39.9 +Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) { 39.10 + Node* p = MemNode::Ideal_common(phase, can_reshape); 39.11 + if (p) return (p == NodeSentinel) ? NULL : p; 39.12 + 39.13 + // Take apart the address into an oop and and offset. 39.14 + // Return 'this' if we cannot. 39.15 + Node* adr = in(MemNode::Address); 39.16 + intptr_t offset = 0; 39.17 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 39.18 + if (base == NULL) return NULL; 39.19 + const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); 39.20 + if (tary == NULL) return NULL; 39.21 + 39.22 + // We can fetch the length directly through an AllocateArrayNode. 39.23 + // This works even if the length is not constant (clone or newArray). 39.24 + if (offset == arrayOopDesc::length_offset_in_bytes()) { 39.25 + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); 39.26 + if (alloc != NULL) { 39.27 + Node* allocated_length = alloc->Ideal_length(); 39.28 + Node* len = alloc->make_ideal_length(tary, phase); 39.29 + if (allocated_length != len) { 39.30 + // New CastII improves on this. 39.31 + return len; 39.32 + } 39.33 + } 39.34 + } 39.35 + 39.36 + return NULL; 39.37 +} 39.38 + 39.39 //------------------------------Identity--------------------------------------- 39.40 // Feed through the length in AllocateArray(...length...)._length. 39.41 Node* LoadRangeNode::Identity( PhaseTransform *phase ) { 39.42 @@ -1905,15 +1937,22 @@ 39.43 // We can fetch the length directly through an AllocateArrayNode. 39.44 // This works even if the length is not constant (clone or newArray). 39.45 if (offset == arrayOopDesc::length_offset_in_bytes()) { 39.46 - Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase); 39.47 - if (allocated_length != NULL) { 39.48 - return allocated_length; 39.49 + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); 39.50 + if (alloc != NULL) { 39.51 + Node* allocated_length = alloc->Ideal_length(); 39.52 + // Do not allow make_ideal_length to allocate a CastII node. 39.53 + Node* len = alloc->make_ideal_length(tary, phase, false); 39.54 + if (allocated_length == len) { 39.55 + // Return allocated_length only if it would not be improved by a CastII. 39.56 + return allocated_length; 39.57 + } 39.58 } 39.59 } 39.60 39.61 return this; 39.62 39.63 } 39.64 + 39.65 //============================================================================= 39.66 //---------------------------StoreNode::make----------------------------------- 39.67 // Polymorphic factory method:
40.1 --- a/src/share/vm/opto/memnode.hpp Tue Sep 30 12:24:27 2008 -0400 40.2 +++ b/src/share/vm/opto/memnode.hpp Wed Oct 01 20:15:03 2008 -0400 40.3 @@ -241,6 +241,7 @@ 40.4 virtual int Opcode() const; 40.5 virtual const Type *Value( PhaseTransform *phase ) const; 40.6 virtual Node *Identity( PhaseTransform *phase ); 40.7 + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 40.8 }; 40.9 40.10 //------------------------------LoadLNode--------------------------------------
41.1 --- a/src/share/vm/opto/parse.hpp Tue Sep 30 12:24:27 2008 -0400 41.2 +++ b/src/share/vm/opto/parse.hpp Wed Oct 01 20:15:03 2008 -0400 41.3 @@ -167,9 +167,19 @@ 41.4 41.5 int start() const { return flow()->start(); } 41.6 int limit() const { return flow()->limit(); } 41.7 - int pre_order() const { return flow()->pre_order(); } 41.8 + int rpo() const { return flow()->rpo(); } 41.9 int start_sp() const { return flow()->stack_size(); } 41.10 41.11 + bool is_loop_head() const { return flow()->is_loop_head(); } 41.12 + bool is_SEL_head() const { return flow()->is_single_entry_loop_head(); } 41.13 + bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); } 41.14 + bool is_invariant_local(uint i) const { 41.15 + const JVMState* jvms = start_map()->jvms(); 41.16 + if (!jvms->is_loc(i)) return false; 41.17 + return flow()->is_invariant_local(i - jvms->locoff()); 41.18 + } 41.19 + bool can_elide_SEL_phi(uint i) const { assert(is_SEL_head(),""); return is_invariant_local(i); } 41.20 + 41.21 const Type* peek(int off=0) const { return stack_type_at(start_sp() - (off+1)); } 41.22 41.23 const Type* stack_type_at(int i) const; 41.24 @@ -305,7 +315,7 @@ 41.25 // entry_bci() -- see osr_bci, etc. 41.26 41.27 ciTypeFlow* flow() const { return _flow; } 41.28 - // blocks() -- see pre_order_at, start_block, etc. 41.29 + // blocks() -- see rpo_at, start_block, etc. 41.30 int block_count() const { return _block_count; } 41.31 41.32 GraphKit& exits() { return _exits; } 41.33 @@ -330,12 +340,12 @@ 41.34 // Must this parse be aborted? 41.35 bool failing() { return C->failing(); } 41.36 41.37 - Block* pre_order_at(int po) { 41.38 - assert(0 <= po && po < _block_count, "oob"); 41.39 - return &_blocks[po]; 41.40 + Block* rpo_at(int rpo) { 41.41 + assert(0 <= rpo && rpo < _block_count, "oob"); 41.42 + return &_blocks[rpo]; 41.43 } 41.44 Block* start_block() { 41.45 - return pre_order_at(flow()->start_block()->pre_order()); 41.46 + return rpo_at(flow()->start_block()->rpo()); 41.47 } 41.48 // Can return NULL if the flow pass did not complete a block. 41.49 Block* successor_for_bci(int bci) { 41.50 @@ -359,9 +369,6 @@ 41.51 // Parse all the basic blocks. 41.52 void do_all_blocks(); 41.53 41.54 - // Helper for do_all_blocks; makes one pass in pre-order. 41.55 - void visit_blocks(); 41.56 - 41.57 // Parse the current basic block 41.58 void do_one_block(); 41.59
42.1 --- a/src/share/vm/opto/parse1.cpp Tue Sep 30 12:24:27 2008 -0400 42.2 +++ b/src/share/vm/opto/parse1.cpp Wed Oct 01 20:15:03 2008 -0400 42.3 @@ -29,17 +29,17 @@ 42.4 // the most. Some of the non-static variables are needed in bytecodeInfo.cpp 42.5 // and eventually should be encapsulated in a proper class (gri 8/18/98). 42.6 42.7 -int nodes_created = 0; int nodes_created_old = 0; 42.8 -int methods_parsed = 0; int methods_parsed_old = 0; 42.9 -int methods_seen = 0; int methods_seen_old = 0; 42.10 +int nodes_created = 0; 42.11 +int methods_parsed = 0; 42.12 +int methods_seen = 0; 42.13 +int blocks_parsed = 0; 42.14 +int blocks_seen = 0; 42.15 42.16 -int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0; 42.17 -int explicit_null_checks_elided = 0, explicit_null_checks_elided_old = 0; 42.18 +int explicit_null_checks_inserted = 0; 42.19 +int explicit_null_checks_elided = 0; 42.20 int all_null_checks_found = 0, implicit_null_checks = 0; 42.21 int implicit_null_throws = 0; 42.22 42.23 -int parse_idx = 0; 42.24 -size_t parse_arena = 0; 42.25 int reclaim_idx = 0; 42.26 int reclaim_in = 0; 42.27 int reclaim_node = 0; 42.28 @@ -61,6 +61,7 @@ 42.29 tty->cr(); 42.30 if (methods_seen != methods_parsed) 42.31 tty->print_cr("Reasons for parse failures (NOT cumulative):"); 42.32 + tty->print_cr("Blocks parsed: %d Blocks seen: %d", blocks_parsed, blocks_seen); 42.33 42.34 if( explicit_null_checks_inserted ) 42.35 tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found); 42.36 @@ -373,6 +374,12 @@ 42.37 C->record_method_not_compilable_all_tiers(_flow->failure_reason()); 42.38 } 42.39 42.40 +#ifndef PRODUCT 42.41 + if (_flow->has_irreducible_entry()) { 42.42 + C->set_parsed_irreducible_loop(true); 42.43 + } 42.44 +#endif 42.45 + 42.46 if (_expected_uses <= 0) { 42.47 _prof_factor = 1; 42.48 } else { 42.49 @@ -556,118 +563,93 @@ 42.50 set_map(entry_map); 42.51 do_exits(); 42.52 42.53 - // Collect a few more statistics. 42.54 - parse_idx += C->unique(); 42.55 - parse_arena += C->node_arena()->used(); 42.56 - 42.57 if (log) log->done("parse nodes='%d' memory='%d'", 42.58 C->unique(), C->node_arena()->used()); 42.59 } 42.60 42.61 //---------------------------do_all_blocks------------------------------------- 42.62 void Parse::do_all_blocks() { 42.63 - _blocks_merged = 0; 42.64 - _blocks_parsed = 0; 42.65 + bool has_irreducible = flow()->has_irreducible_entry(); 42.66 42.67 - int old_blocks_merged = -1; 42.68 - int old_blocks_parsed = -1; 42.69 + // Walk over all blocks in Reverse Post-Order. 42.70 + while (true) { 42.71 + bool progress = false; 42.72 + for (int rpo = 0; rpo < block_count(); rpo++) { 42.73 + Block* block = rpo_at(rpo); 42.74 42.75 - for (int tries = 0; ; tries++) { 42.76 - visit_blocks(); 42.77 - if (failing()) return; // Check for bailout 42.78 + if (block->is_parsed()) continue; 42.79 42.80 - // No need for a work list. The outer loop is hardly ever repeated. 42.81 - // The following loop traverses the blocks in a reasonable pre-order, 42.82 - // as produced by the ciTypeFlow pass. 42.83 + if (!block->is_merged()) { 42.84 + // Dead block, no state reaches this block 42.85 + continue; 42.86 + } 42.87 42.88 - // This loop can be taken more than once if there are two entries to 42.89 - // a loop (irreduceable CFG), and the edge which ciTypeFlow chose 42.90 - // as the first predecessor to the loop goes dead in the parser, 42.91 - // due to parse-time optimization. (Could happen with obfuscated code.) 42.92 + // Prepare to parse this block. 42.93 + load_state_from(block); 42.94 42.95 - // Look for progress, or the lack of it: 42.96 - if (_blocks_parsed == block_count()) { 42.97 - // That's all, folks. 42.98 - if (TraceOptoParse) { 42.99 - tty->print_cr("All blocks parsed."); 42.100 + if (stopped()) { 42.101 + // Block is dead. 42.102 + continue; 42.103 } 42.104 + 42.105 + blocks_parsed++; 42.106 + 42.107 + progress = true; 42.108 + if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) { 42.109 + // Not all preds have been parsed. We must build phis everywhere. 42.110 + // (Note that dead locals do not get phis built, ever.) 42.111 + ensure_phis_everywhere(); 42.112 + 42.113 + // Leave behind an undisturbed copy of the map, for future merges. 42.114 + set_map(clone_map()); 42.115 + } 42.116 + 42.117 + if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) { 42.118 + // In the absence of irreducible loops, the Region and Phis 42.119 + // associated with a merge that doesn't involve a backedge can 42.120 + // be simplfied now since the RPO parsing order guarantees 42.121 + // that any path which was supposed to reach here has already 42.122 + // been parsed or must be dead. 42.123 + Node* c = control(); 42.124 + Node* result = _gvn.transform_no_reclaim(control()); 42.125 + if (c != result && TraceOptoParse) { 42.126 + tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx); 42.127 + } 42.128 + if (result != top()) { 42.129 + record_for_igvn(result); 42.130 + } 42.131 + } 42.132 + 42.133 + // Parse the block. 42.134 + do_one_block(); 42.135 + 42.136 + // Check for bailouts. 42.137 + if (failing()) return; 42.138 + } 42.139 + 42.140 + // with irreducible loops multiple passes might be necessary to parse everything 42.141 + if (!has_irreducible || !progress) { 42.142 break; 42.143 } 42.144 + } 42.145 42.146 - // How much work was done this time around? 42.147 - int new_blocks_merged = _blocks_merged - old_blocks_merged; 42.148 - int new_blocks_parsed = _blocks_parsed - old_blocks_parsed; 42.149 - if (new_blocks_merged == 0) { 42.150 - if (TraceOptoParse) { 42.151 - tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed); 42.152 - } 42.153 - // No new blocks have become parseable. Some blocks are just dead. 42.154 - break; 42.155 - } 42.156 - assert(new_blocks_parsed > 0, "must make progress"); 42.157 - assert(tries < block_count(), "the pre-order cannot be this bad!"); 42.158 - 42.159 - old_blocks_merged = _blocks_merged; 42.160 - old_blocks_parsed = _blocks_parsed; 42.161 - } 42.162 + blocks_seen += block_count(); 42.163 42.164 #ifndef PRODUCT 42.165 // Make sure there are no half-processed blocks remaining. 42.166 // Every remaining unprocessed block is dead and may be ignored now. 42.167 - for (int po = 0; po < block_count(); po++) { 42.168 - Block* block = pre_order_at(po); 42.169 + for (int rpo = 0; rpo < block_count(); rpo++) { 42.170 + Block* block = rpo_at(rpo); 42.171 if (!block->is_parsed()) { 42.172 if (TraceOptoParse) { 42.173 - tty->print("Skipped dead block %d at bci:%d", po, block->start()); 42.174 - assert(!block->is_merged(), "no half-processed blocks"); 42.175 + tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start()); 42.176 } 42.177 + assert(!block->is_merged(), "no half-processed blocks"); 42.178 } 42.179 } 42.180 #endif 42.181 } 42.182 42.183 -//---------------------------visit_blocks-------------------------------------- 42.184 -void Parse::visit_blocks() { 42.185 - // Walk over all blocks, parsing every one that has been reached (merged). 42.186 - for (int po = 0; po < block_count(); po++) { 42.187 - Block* block = pre_order_at(po); 42.188 - 42.189 - if (block->is_parsed()) { 42.190 - // Do not parse twice. 42.191 - continue; 42.192 - } 42.193 - 42.194 - if (!block->is_merged()) { 42.195 - // No state on this block. It had not yet been reached. 42.196 - // Delay reaching it until later. 42.197 - continue; 42.198 - } 42.199 - 42.200 - // Prepare to parse this block. 42.201 - load_state_from(block); 42.202 - 42.203 - if (stopped()) { 42.204 - // Block is dead. 42.205 - continue; 42.206 - } 42.207 - 42.208 - if (!block->is_ready() || block->is_handler()) { 42.209 - // Not all preds have been parsed. We must build phis everywhere. 42.210 - // (Note that dead locals do not get phis built, ever.) 42.211 - ensure_phis_everywhere(); 42.212 - 42.213 - // Leave behind an undisturbed copy of the map, for future merges. 42.214 - set_map(clone_map()); 42.215 - } 42.216 - 42.217 - // Ready or not, parse the block. 42.218 - do_one_block(); 42.219 - 42.220 - // Check for bailouts. 42.221 - if (failing()) return; 42.222 - } 42.223 -} 42.224 - 42.225 //-------------------------------build_exits---------------------------------- 42.226 // Build normal and exceptional exit merge points. 42.227 void Parse::build_exits() { 42.228 @@ -1134,24 +1116,24 @@ 42.229 _blocks = NEW_RESOURCE_ARRAY(Block, _block_count); 42.230 Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count); 42.231 42.232 - int po; 42.233 + int rpo; 42.234 42.235 // Initialize the structs. 42.236 - for (po = 0; po < block_count(); po++) { 42.237 - Block* block = pre_order_at(po); 42.238 - block->init_node(this, po); 42.239 + for (rpo = 0; rpo < block_count(); rpo++) { 42.240 + Block* block = rpo_at(rpo); 42.241 + block->init_node(this, rpo); 42.242 } 42.243 42.244 // Collect predecessor and successor information. 42.245 - for (po = 0; po < block_count(); po++) { 42.246 - Block* block = pre_order_at(po); 42.247 + for (rpo = 0; rpo < block_count(); rpo++) { 42.248 + Block* block = rpo_at(rpo); 42.249 block->init_graph(this); 42.250 } 42.251 } 42.252 42.253 //-------------------------------init_node------------------------------------- 42.254 -void Parse::Block::init_node(Parse* outer, int po) { 42.255 - _flow = outer->flow()->pre_order_at(po); 42.256 +void Parse::Block::init_node(Parse* outer, int rpo) { 42.257 + _flow = outer->flow()->rpo_at(rpo); 42.258 _pred_count = 0; 42.259 _preds_parsed = 0; 42.260 _count = 0; 42.261 @@ -1177,7 +1159,7 @@ 42.262 int p = 0; 42.263 for (int i = 0; i < ns+ne; i++) { 42.264 ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns); 42.265 - Block* block2 = outer->pre_order_at(tf2->pre_order()); 42.266 + Block* block2 = outer->rpo_at(tf2->rpo()); 42.267 _successors[i] = block2; 42.268 42.269 // Accumulate pred info for the other block, too. 42.270 @@ -1368,10 +1350,11 @@ 42.271 int nt = b->all_successors(); 42.272 42.273 tty->print("Parsing block #%d at bci [%d,%d), successors: ", 42.274 - block()->pre_order(), block()->start(), block()->limit()); 42.275 + block()->rpo(), block()->start(), block()->limit()); 42.276 for (int i = 0; i < nt; i++) { 42.277 - tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order()); 42.278 + tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo()); 42.279 } 42.280 + if (b->is_loop_head()) tty->print(" lphd"); 42.281 tty->print_cr(""); 42.282 } 42.283 42.284 @@ -1501,7 +1484,7 @@ 42.285 #ifndef PRODUCT 42.286 Block* b = block(); 42.287 int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1; 42.288 - tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci); 42.289 + tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci); 42.290 #endif 42.291 ShouldNotReachHere(); 42.292 } 42.293 @@ -1509,7 +1492,7 @@ 42.294 //--------------------------merge_common--------------------------------------- 42.295 void Parse::merge_common(Parse::Block* target, int pnum) { 42.296 if (TraceOptoParse) { 42.297 - tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start()); 42.298 + tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start()); 42.299 } 42.300 42.301 // Zap extra stack slots to top 42.302 @@ -1534,6 +1517,7 @@ 42.303 // which must not be allowed into this block's map.) 42.304 if (pnum > PhiNode::Input // Known multiple inputs. 42.305 || target->is_handler() // These have unpredictable inputs. 42.306 + || target->is_loop_head() // Known multiple inputs 42.307 || control()->is_Region()) { // We must hide this guy. 42.308 // Add a Region to start the new basic block. Phis will be added 42.309 // later lazily. 42.310 @@ -1575,15 +1559,21 @@ 42.311 42.312 // Compute where to merge into 42.313 // Merge incoming control path 42.314 - r->set_req(pnum, newin->control()); 42.315 + r->init_req(pnum, newin->control()); 42.316 42.317 if (pnum == 1) { // Last merge for this Region? 42.318 - _gvn.transform_no_reclaim(r); 42.319 + if (!block()->flow()->is_irreducible_entry()) { 42.320 + Node* result = _gvn.transform_no_reclaim(r); 42.321 + if (r != result && TraceOptoParse) { 42.322 + tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx); 42.323 + } 42.324 + } 42.325 record_for_igvn(r); 42.326 } 42.327 42.328 // Update all the non-control inputs to map: 42.329 assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms"); 42.330 + bool check_elide_phi = target->is_SEL_backedge(save_block); 42.331 for (uint j = 1; j < newin->req(); j++) { 42.332 Node* m = map()->in(j); // Current state of target. 42.333 Node* n = newin->in(j); // Incoming change to target state. 42.334 @@ -1603,7 +1593,11 @@ 42.335 merge_memory_edges(n->as_MergeMem(), pnum, nophi); 42.336 continue; 42.337 default: // All normal stuff 42.338 - if (phi == NULL) phi = ensure_phi(j, nophi); 42.339 + if (phi == NULL) { 42.340 + if (!check_elide_phi || !target->can_elide_SEL_phi(j)) { 42.341 + phi = ensure_phi(j, nophi); 42.342 + } 42.343 + } 42.344 break; 42.345 } 42.346 } 42.347 @@ -1736,9 +1730,13 @@ 42.348 uint nof_monitors = map()->jvms()->nof_monitors(); 42.349 42.350 assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms"); 42.351 + bool check_elide_phi = block()->is_SEL_head(); 42.352 for (uint i = TypeFunc::Parms; i < monoff; i++) { 42.353 - ensure_phi(i); 42.354 + if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) { 42.355 + ensure_phi(i); 42.356 + } 42.357 } 42.358 + 42.359 // Even monitors need Phis, though they are well-structured. 42.360 // This is true for OSR methods, and also for the rare cases where 42.361 // a monitor object is the subject of a replace_in_map operation.
43.1 --- a/src/share/vm/opto/parse2.cpp Tue Sep 30 12:24:27 2008 -0400 43.2 +++ b/src/share/vm/opto/parse2.cpp Wed Oct 01 20:15:03 2008 -0400 43.3 @@ -100,16 +100,17 @@ 43.4 43.5 // Do the range check 43.6 if (GenerateRangeChecks && need_range_check) { 43.7 - // Range is constant in array-oop, so we can use the original state of mem 43.8 - Node* len = load_array_length(ary); 43.9 Node* tst; 43.10 if (sizetype->_hi <= 0) { 43.11 - // If the greatest array bound is negative, we can conclude that we're 43.12 + // The greatest array bound is negative, so we can conclude that we're 43.13 // compiling unreachable code, but the unsigned compare trick used below 43.14 // only works with non-negative lengths. Instead, hack "tst" to be zero so 43.15 // the uncommon_trap path will always be taken. 43.16 tst = _gvn.intcon(0); 43.17 } else { 43.18 + // Range is constant in array-oop, so we can use the original state of mem 43.19 + Node* len = load_array_length(ary); 43.20 + 43.21 // Test length vs index (standard trick using unsigned compare) 43.22 Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) ); 43.23 BoolTest::mask btest = BoolTest::lt; 43.24 @@ -137,9 +138,12 @@ 43.25 // Check for always knowing you are throwing a range-check exception 43.26 if (stopped()) return top(); 43.27 43.28 - Node* ptr = array_element_address( ary, idx, type, sizetype); 43.29 + Node* ptr = array_element_address(ary, idx, type, sizetype); 43.30 43.31 if (result2 != NULL) *result2 = elemtype; 43.32 + 43.33 + assert(ptr != top(), "top should go hand-in-hand with stopped"); 43.34 + 43.35 return ptr; 43.36 } 43.37
44.1 --- a/src/share/vm/opto/type.cpp Tue Sep 30 12:24:27 2008 -0400 44.2 +++ b/src/share/vm/opto/type.cpp Wed Oct 01 20:15:03 2008 -0400 44.3 @@ -3157,17 +3157,18 @@ 44.4 44.5 // Narrow the given size type to the index range for the given array base type. 44.6 // Return NULL if the resulting int type becomes empty. 44.7 -const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) { 44.8 +const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const { 44.9 jint hi = size->_hi; 44.10 jint lo = size->_lo; 44.11 jint min_lo = 0; 44.12 - jint max_hi = max_array_length(elem); 44.13 + jint max_hi = max_array_length(elem()->basic_type()); 44.14 //if (index_not_size) --max_hi; // type of a valid array index, FTR 44.15 bool chg = false; 44.16 if (lo < min_lo) { lo = min_lo; chg = true; } 44.17 if (hi > max_hi) { hi = max_hi; chg = true; } 44.18 + // Negative length arrays will produce weird intermediate dead fath-path code 44.19 if (lo > hi) 44.20 - return NULL; 44.21 + return TypeInt::ZERO; 44.22 if (!chg) 44.23 return size; 44.24 return TypeInt::make(lo, hi, Type::WidenMin); 44.25 @@ -3176,9 +3177,7 @@ 44.26 //-------------------------------cast_to_size---------------------------------- 44.27 const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const { 44.28 assert(new_size != NULL, ""); 44.29 - new_size = narrow_size_type(new_size, elem()->basic_type()); 44.30 - if (new_size == NULL) // Negative length arrays will produce weird 44.31 - new_size = TypeInt::ZERO; // intermediate dead fast-path goo 44.32 + new_size = narrow_size_type(new_size); 44.33 if (new_size == size()) return this; 44.34 const TypeAry* new_ary = TypeAry::make(elem(), new_size); 44.35 return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id);
45.1 --- a/src/share/vm/opto/type.hpp Tue Sep 30 12:24:27 2008 -0400 45.2 +++ b/src/share/vm/opto/type.hpp Wed Oct 01 20:15:03 2008 -0400 45.3 @@ -840,6 +840,7 @@ 45.4 virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const; 45.5 45.6 virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const; 45.7 + virtual const TypeInt* narrow_size_type(const TypeInt* size) const; 45.8 45.9 virtual bool empty(void) const; // TRUE if type is vacuous 45.10 virtual const TypePtr *add_offset( intptr_t offset ) const; 45.11 @@ -865,7 +866,6 @@ 45.12 } 45.13 static const TypeAryPtr *_array_body_type[T_CONFLICT+1]; 45.14 // sharpen the type of an int which is used as an array size 45.15 - static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem); 45.16 #ifndef PRODUCT 45.17 virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping 45.18 #endif
46.1 --- a/src/share/vm/prims/jvmtiEnvBase.cpp Tue Sep 30 12:24:27 2008 -0400 46.2 +++ b/src/share/vm/prims/jvmtiEnvBase.cpp Wed Oct 01 20:15:03 2008 -0400 46.3 @@ -121,7 +121,7 @@ 46.4 JvmtiEventController::env_initialize((JvmtiEnv*)this); 46.5 46.6 #ifdef JVMTI_TRACE 46.7 - _jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface; 46.8 + _jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface; 46.9 #else 46.10 _jvmti_external.functions = &jvmti_Interface; 46.11 #endif
47.1 --- a/src/share/vm/prims/jvmtiTrace.cpp Tue Sep 30 12:24:27 2008 -0400 47.2 +++ b/src/share/vm/prims/jvmtiTrace.cpp Wed Oct 01 20:15:03 2008 -0400 47.3 @@ -73,7 +73,7 @@ 47.4 47.5 const char *very_end; 47.6 const char *curr; 47.7 - if (strlen(TraceJVMTI)) { 47.8 + if (TraceJVMTI != NULL) { 47.9 curr = TraceJVMTI; 47.10 } else { 47.11 curr = ""; // hack in fixed tracing here
48.1 --- a/src/share/vm/runtime/globals.cpp Tue Sep 30 12:24:27 2008 -0400 48.2 +++ b/src/share/vm/runtime/globals.cpp Wed Oct 01 20:15:03 2008 -0400 48.3 @@ -365,8 +365,11 @@ 48.4 if (result == NULL) return false; 48.5 if (!result->is_ccstr()) return false; 48.6 ccstr old_value = result->get_ccstr(); 48.7 - char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1); 48.8 - strcpy(new_value, *value); 48.9 + char* new_value = NULL; 48.10 + if (*value != NULL) { 48.11 + new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1); 48.12 + strcpy(new_value, *value); 48.13 + } 48.14 result->set_ccstr(new_value); 48.15 if (result->origin == DEFAULT && old_value != NULL) { 48.16 // Prior value is NOT heap allocated, but was a literal constant.
49.1 --- a/src/share/vm/runtime/globals.hpp Tue Sep 30 12:24:27 2008 -0400 49.2 +++ b/src/share/vm/runtime/globals.hpp Wed Oct 01 20:15:03 2008 -0400 49.3 @@ -707,7 +707,7 @@ 49.4 diagnostic(bool, PrintAssembly, false, \ 49.5 "Print assembly code (using external disassembler.so)") \ 49.6 \ 49.7 - diagnostic(ccstr, PrintAssemblyOptions, false, \ 49.8 + diagnostic(ccstr, PrintAssemblyOptions, NULL, \ 49.9 "Options string passed to disassembler.so") \ 49.10 \ 49.11 diagnostic(bool, PrintNMethods, false, \ 49.12 @@ -848,7 +848,7 @@ 49.13 "Use LWP-based instead of libthread-based synchronization " \ 49.14 "(SPARC only)") \ 49.15 \ 49.16 - product(ccstr, SyncKnobs, "", \ 49.17 + product(ccstr, SyncKnobs, NULL, \ 49.18 "(Unstable) Various monitor synchronization tunables") \ 49.19 \ 49.20 product(intx, EmitSync, 0, \ 49.21 @@ -1032,7 +1032,7 @@ 49.22 notproduct(bool, TraceJVMCalls, false, \ 49.23 "Trace JVM calls") \ 49.24 \ 49.25 - product(ccstr, TraceJVMTI, "", \ 49.26 + product(ccstr, TraceJVMTI, NULL, \ 49.27 "Trace flags for JVMTI functions and events") \ 49.28 \ 49.29 /* This option can change an EMCP method into an obsolete method. */ \ 49.30 @@ -1157,10 +1157,6 @@ 49.31 "In the Parallel Old garbage collector use parallel dense" \ 49.32 " prefix update") \ 49.33 \ 49.34 - develop(bool, UseParallelOldGCChunkPointerCalc, true, \ 49.35 - "In the Parallel Old garbage collector use chucks to calculate" \ 49.36 - " new object locations") \ 49.37 - \ 49.38 product(uintx, HeapMaximumCompactionInterval, 20, \ 49.39 "How often should we maximally compact the heap (not allowing " \ 49.40 "any dead space)") \ 49.41 @@ -1189,21 +1185,14 @@ 49.42 product(uintx, ParallelCMSThreads, 0, \ 49.43 "Max number of threads CMS will use for concurrent work") \ 49.44 \ 49.45 - develop(bool, VerifyParallelOldWithMarkSweep, false, \ 49.46 - "Use the MarkSweep code to verify phases of Parallel Old") \ 49.47 - \ 49.48 - develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1, \ 49.49 - "Interval at which the MarkSweep code is used to verify " \ 49.50 - "phases of Parallel Old") \ 49.51 - \ 49.52 develop(bool, ParallelOldMTUnsafeMarkBitMap, false, \ 49.53 "Use the Parallel Old MT unsafe in marking the bitmap") \ 49.54 \ 49.55 develop(bool, ParallelOldMTUnsafeUpdateLiveData, false, \ 49.56 "Use the Parallel Old MT unsafe in update of live size") \ 49.57 \ 49.58 - develop(bool, TraceChunkTasksQueuing, false, \ 49.59 - "Trace the queuing of the chunk tasks") \ 49.60 + develop(bool, TraceRegionTasksQueuing, false, \ 49.61 + "Trace the queuing of the region tasks") \ 49.62 \ 49.63 product(uintx, ParallelMarkingThreads, 0, \ 49.64 "Number of marking threads concurrent gc will use") \
50.1 --- a/src/share/vm/utilities/taskqueue.cpp Tue Sep 30 12:24:27 2008 -0400 50.2 +++ b/src/share/vm/utilities/taskqueue.cpp Wed Oct 01 20:15:03 2008 -0400 50.3 @@ -109,72 +109,72 @@ 50.4 } 50.5 } 50.6 50.7 -bool ChunkTaskQueueWithOverflow::is_empty() { 50.8 - return (_chunk_queue.size() == 0) && 50.9 +bool RegionTaskQueueWithOverflow::is_empty() { 50.10 + return (_region_queue.size() == 0) && 50.11 (_overflow_stack->length() == 0); 50.12 } 50.13 50.14 -bool ChunkTaskQueueWithOverflow::stealable_is_empty() { 50.15 - return _chunk_queue.size() == 0; 50.16 +bool RegionTaskQueueWithOverflow::stealable_is_empty() { 50.17 + return _region_queue.size() == 0; 50.18 } 50.19 50.20 -bool ChunkTaskQueueWithOverflow::overflow_is_empty() { 50.21 +bool RegionTaskQueueWithOverflow::overflow_is_empty() { 50.22 return _overflow_stack->length() == 0; 50.23 } 50.24 50.25 -void ChunkTaskQueueWithOverflow::initialize() { 50.26 - _chunk_queue.initialize(); 50.27 +void RegionTaskQueueWithOverflow::initialize() { 50.28 + _region_queue.initialize(); 50.29 assert(_overflow_stack == 0, "Creating memory leak"); 50.30 _overflow_stack = 50.31 - new (ResourceObj::C_HEAP) GrowableArray<ChunkTask>(10, true); 50.32 + new (ResourceObj::C_HEAP) GrowableArray<RegionTask>(10, true); 50.33 } 50.34 50.35 -void ChunkTaskQueueWithOverflow::save(ChunkTask t) { 50.36 - if (TraceChunkTasksQueuing && Verbose) { 50.37 +void RegionTaskQueueWithOverflow::save(RegionTask t) { 50.38 + if (TraceRegionTasksQueuing && Verbose) { 50.39 gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t); 50.40 } 50.41 - if(!_chunk_queue.push(t)) { 50.42 + if(!_region_queue.push(t)) { 50.43 _overflow_stack->push(t); 50.44 } 50.45 } 50.46 50.47 -// Note that using this method will retrieve all chunks 50.48 +// Note that using this method will retrieve all regions 50.49 // that have been saved but that it will always check 50.50 // the overflow stack. It may be more efficient to 50.51 // check the stealable queue and the overflow stack 50.52 // separately. 50.53 -bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) { 50.54 - bool result = retrieve_from_overflow(chunk_task); 50.55 +bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) { 50.56 + bool result = retrieve_from_overflow(region_task); 50.57 if (!result) { 50.58 - result = retrieve_from_stealable_queue(chunk_task); 50.59 + result = retrieve_from_stealable_queue(region_task); 50.60 } 50.61 - if (TraceChunkTasksQueuing && Verbose && result) { 50.62 + if (TraceRegionTasksQueuing && Verbose && result) { 50.63 gclog_or_tty->print_cr(" CTQ: retrieve " PTR_FORMAT, result); 50.64 } 50.65 return result; 50.66 } 50.67 50.68 -bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue( 50.69 - ChunkTask& chunk_task) { 50.70 - bool result = _chunk_queue.pop_local(chunk_task); 50.71 - if (TraceChunkTasksQueuing && Verbose) { 50.72 - gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task); 50.73 +bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue( 50.74 + RegionTask& region_task) { 50.75 + bool result = _region_queue.pop_local(region_task); 50.76 + if (TraceRegionTasksQueuing && Verbose) { 50.77 + gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task); 50.78 } 50.79 return result; 50.80 } 50.81 50.82 -bool ChunkTaskQueueWithOverflow::retrieve_from_overflow( 50.83 - ChunkTask& chunk_task) { 50.84 +bool 50.85 +RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) { 50.86 bool result; 50.87 if (!_overflow_stack->is_empty()) { 50.88 - chunk_task = _overflow_stack->pop(); 50.89 + region_task = _overflow_stack->pop(); 50.90 result = true; 50.91 } else { 50.92 - chunk_task = (ChunkTask) NULL; 50.93 + region_task = (RegionTask) NULL; 50.94 result = false; 50.95 } 50.96 - if (TraceChunkTasksQueuing && Verbose) { 50.97 - gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task); 50.98 + if (TraceRegionTasksQueuing && Verbose) { 50.99 + gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task); 50.100 } 50.101 return result; 50.102 }
51.1 --- a/src/share/vm/utilities/taskqueue.hpp Tue Sep 30 12:24:27 2008 -0400 51.2 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Oct 01 20:15:03 2008 -0400 51.3 @@ -557,32 +557,32 @@ 51.4 typedef GenericTaskQueue<StarTask> OopStarTaskQueue; 51.5 typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet; 51.6 51.7 -typedef size_t ChunkTask; // index for chunk 51.8 -typedef GenericTaskQueue<ChunkTask> ChunkTaskQueue; 51.9 -typedef GenericTaskQueueSet<ChunkTask> ChunkTaskQueueSet; 51.10 +typedef size_t RegionTask; // index for region 51.11 +typedef GenericTaskQueue<RegionTask> RegionTaskQueue; 51.12 +typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet; 51.13 51.14 -class ChunkTaskQueueWithOverflow: public CHeapObj { 51.15 +class RegionTaskQueueWithOverflow: public CHeapObj { 51.16 protected: 51.17 - ChunkTaskQueue _chunk_queue; 51.18 - GrowableArray<ChunkTask>* _overflow_stack; 51.19 + RegionTaskQueue _region_queue; 51.20 + GrowableArray<RegionTask>* _overflow_stack; 51.21 51.22 public: 51.23 - ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {} 51.24 + RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {} 51.25 // Initialize both stealable queue and overflow 51.26 void initialize(); 51.27 // Save first to stealable queue and then to overflow 51.28 - void save(ChunkTask t); 51.29 + void save(RegionTask t); 51.30 // Retrieve first from overflow and then from stealable queue 51.31 - bool retrieve(ChunkTask& chunk_index); 51.32 + bool retrieve(RegionTask& region_index); 51.33 // Retrieve from stealable queue 51.34 - bool retrieve_from_stealable_queue(ChunkTask& chunk_index); 51.35 + bool retrieve_from_stealable_queue(RegionTask& region_index); 51.36 // Retrieve from overflow 51.37 - bool retrieve_from_overflow(ChunkTask& chunk_index); 51.38 + bool retrieve_from_overflow(RegionTask& region_index); 51.39 bool is_empty(); 51.40 bool stealable_is_empty(); 51.41 bool overflow_is_empty(); 51.42 - juint stealable_size() { return _chunk_queue.size(); } 51.43 - ChunkTaskQueue* task_queue() { return &_chunk_queue; } 51.44 + juint stealable_size() { return _region_queue.size(); } 51.45 + RegionTaskQueue* task_queue() { return &_region_queue; } 51.46 }; 51.47 51.48 -#define USE_ChunkTaskQueueWithOverflow 51.49 +#define USE_RegionTaskQueueWithOverflow
52.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 52.2 +++ b/test/compiler/6711100/Test.java Wed Oct 01 20:15:03 2008 -0400 52.3 @@ -0,0 +1,53 @@ 52.4 +/* 52.5 + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. 52.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 52.7 + * 52.8 + * This code is free software; you can redistribute it and/or modify it 52.9 + * under the terms of the GNU General Public License version 2 only, as 52.10 + * published by the Free Software Foundation. 52.11 + * 52.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 52.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 52.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 52.15 + * version 2 for more details (a copy is included in the LICENSE file that 52.16 + * accompanied this code). 52.17 + * 52.18 + * You should have received a copy of the GNU General Public License version 52.19 + * 2 along with this work; if not, write to the Free Software Foundation, 52.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 52.21 + * 52.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 52.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 52.24 + * have any questions. 52.25 + */ 52.26 + 52.27 +/* 52.28 + * @test 52.29 + * @bug 6711100 52.30 + * @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int") 52.31 + * @run main/othervm -Xcomp -XX:CompileOnly=Test.<init> Test 52.32 + */ 52.33 + 52.34 +public class Test { 52.35 + 52.36 + static byte b; 52.37 + 52.38 + // The server compiler chokes on compiling 52.39 + // this method when f() is not inlined 52.40 + public Test() { 52.41 + b = (new byte[1])[(new byte[f()])[-1]]; 52.42 + } 52.43 + 52.44 + protected static int f() { 52.45 + return 1; 52.46 + } 52.47 + 52.48 + public static void main(String[] args) { 52.49 + try { 52.50 + Test t = new Test(); 52.51 + } catch (ArrayIndexOutOfBoundsException e) { 52.52 + } 52.53 + } 52.54 +} 52.55 + 52.56 +