1.1 --- a/src/share/vm/opto/block.cpp Thu Oct 30 17:08:48 2008 -0700 1.2 +++ b/src/share/vm/opto/block.cpp Thu Nov 06 14:59:10 2008 -0800 1.3 @@ -57,6 +57,14 @@ 1.4 _blocks[i] = b; 1.5 } 1.6 1.7 +#ifndef PRODUCT 1.8 +void Block_List::print() { 1.9 + for (uint i=0; i < size(); i++) { 1.10 + tty->print("B%d ", _blocks[i]->_pre_order); 1.11 + } 1.12 + tty->print("size = %d\n", size()); 1.13 +} 1.14 +#endif 1.15 1.16 //============================================================================= 1.17 1.18 @@ -66,6 +74,12 @@ 1.19 // Check for Start block 1.20 if( _pre_order == 1 ) return InteriorEntryAlignment; 1.21 // Check for loop alignment 1.22 + if (has_loop_alignment()) return loop_alignment(); 1.23 + 1.24 + return 1; // no particular alignment 1.25 +} 1.26 + 1.27 +uint Block::compute_loop_alignment() { 1.28 Node *h = head(); 1.29 if( h->is_Loop() && h->as_Loop()->is_inner_loop() ) { 1.30 // Pre- and post-loops have low trip count so do not bother with 1.31 @@ -83,13 +97,15 @@ 1.32 } 1.33 return OptoLoopAlignment; // Otherwise align loop head 1.34 } 1.35 + 1.36 return 1; // no particular alignment 1.37 } 1.38 1.39 //----------------------------------------------------------------------------- 1.40 // Compute the size of first 'inst_cnt' instructions in this block. 1.41 // Return the number of instructions left to compute if the block has 1.42 -// less then 'inst_cnt' instructions. 1.43 +// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size 1.44 +// exceeds OptoLoopAlignment. 1.45 uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt, 1.46 PhaseRegAlloc* ra) { 1.47 uint last_inst = _nodes.size(); 1.48 @@ -307,6 +323,8 @@ 1.49 tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order); 1.50 // Dump any loop-specific bits, especially for CountedLoops. 1.51 loop->dump_spec(tty); 1.52 + } else if (has_loop_alignment()) { 1.53 + tty->print(" top-of-loop"); 1.54 } 1.55 tty->print(" Freq: %g",_freq); 1.56 if( Verbose || WizardMode ) { 1.57 @@ -509,9 +527,11 @@ 1.58 int branch_idx = b->_nodes.size() - b->_num_succs-1; 1.59 if( branch_idx < 1 ) return false; 1.60 Node *bra = b->_nodes[branch_idx]; 1.61 - if( bra->is_Catch() ) return true; 1.62 + if( bra->is_Catch() ) 1.63 + return true; 1.64 if( bra->is_Mach() ) { 1.65 - if( bra->is_MachNullCheck() ) return true; 1.66 + if( bra->is_MachNullCheck() ) 1.67 + return true; 1.68 int iop = bra->as_Mach()->ideal_Opcode(); 1.69 if( iop == Op_FastLock || iop == Op_FastUnlock ) 1.70 return true; 1.71 @@ -557,10 +577,10 @@ 1.72 dead->_nodes[k]->del_req(j); 1.73 } 1.74 1.75 -//------------------------------MoveToNext------------------------------------- 1.76 +//------------------------------move_to_next----------------------------------- 1.77 // Helper function to move block bx to the slot following b_index. Return 1.78 // true if the move is successful, otherwise false 1.79 -bool PhaseCFG::MoveToNext(Block* bx, uint b_index) { 1.80 +bool PhaseCFG::move_to_next(Block* bx, uint b_index) { 1.81 if (bx == NULL) return false; 1.82 1.83 // Return false if bx is already scheduled. 1.84 @@ -591,9 +611,9 @@ 1.85 return true; 1.86 } 1.87 1.88 -//------------------------------MoveToEnd-------------------------------------- 1.89 +//------------------------------move_to_end------------------------------------ 1.90 // Move empty and uncommon blocks to the end. 1.91 -void PhaseCFG::MoveToEnd(Block *b, uint i) { 1.92 +void PhaseCFG::move_to_end(Block *b, uint i) { 1.93 int e = b->is_Empty(); 1.94 if (e != Block::not_empty) { 1.95 if (e == Block::empty_with_goto) { 1.96 @@ -609,15 +629,31 @@ 1.97 _blocks.push(b); 1.98 } 1.99 1.100 -//------------------------------RemoveEmpty------------------------------------ 1.101 -// Remove empty basic blocks and useless branches. 1.102 -void PhaseCFG::RemoveEmpty() { 1.103 +//---------------------------set_loop_alignment-------------------------------- 1.104 +// Set loop alignment for every block 1.105 +void PhaseCFG::set_loop_alignment() { 1.106 + uint last = _num_blocks; 1.107 + assert( _blocks[0] == _broot, "" ); 1.108 + 1.109 + for (uint i = 1; i < last; i++ ) { 1.110 + Block *b = _blocks[i]; 1.111 + if (b->head()->is_Loop()) { 1.112 + b->set_loop_alignment(b); 1.113 + } 1.114 + } 1.115 +} 1.116 + 1.117 +//-----------------------------remove_empty------------------------------------ 1.118 +// Make empty basic blocks to be "connector" blocks, Move uncommon blocks 1.119 +// to the end. 1.120 +void PhaseCFG::remove_empty() { 1.121 // Move uncommon blocks to the end 1.122 uint last = _num_blocks; 1.123 - uint i; 1.124 assert( _blocks[0] == _broot, "" ); 1.125 - for( i = 1; i < last; i++ ) { 1.126 + 1.127 + for (uint i = 1; i < last; i++) { 1.128 Block *b = _blocks[i]; 1.129 + if (b->is_connector()) break; 1.130 1.131 // Check for NeverBranch at block end. This needs to become a GOTO to the 1.132 // true target. NeverBranch are treated as a conditional branch that 1.133 @@ -629,37 +665,40 @@ 1.134 convert_NeverBranch_to_Goto(b); 1.135 1.136 // Look for uncommon blocks and move to end. 1.137 - if( b->is_uncommon(_bbs) ) { 1.138 - MoveToEnd(b, i); 1.139 - last--; // No longer check for being uncommon! 1.140 - if( no_flip_branch(b) ) { // Fall-thru case must follow? 1.141 - b = _blocks[i]; // Find the fall-thru block 1.142 - MoveToEnd(b, i); 1.143 - last--; 1.144 + if (!C->do_freq_based_layout()) { 1.145 + if( b->is_uncommon(_bbs) ) { 1.146 + move_to_end(b, i); 1.147 + last--; // No longer check for being uncommon! 1.148 + if( no_flip_branch(b) ) { // Fall-thru case must follow? 1.149 + b = _blocks[i]; // Find the fall-thru block 1.150 + move_to_end(b, i); 1.151 + last--; 1.152 + } 1.153 + i--; // backup block counter post-increment 1.154 } 1.155 - i--; // backup block counter post-increment 1.156 } 1.157 } 1.158 1.159 - // Remove empty blocks 1.160 - uint j1; 1.161 + // Move empty blocks to the end 1.162 last = _num_blocks; 1.163 - for( i=0; i < last; i++ ) { 1.164 + for (uint i = 1; i < last; i++) { 1.165 Block *b = _blocks[i]; 1.166 - if (i > 0) { 1.167 - if (b->is_Empty() != Block::not_empty) { 1.168 - MoveToEnd(b, i); 1.169 - last--; 1.170 - i--; 1.171 - } 1.172 + if (b->is_Empty() != Block::not_empty) { 1.173 + move_to_end(b, i); 1.174 + last--; 1.175 + i--; 1.176 } 1.177 } // End of for all blocks 1.178 +} 1.179 1.180 +//-----------------------------fixup_flow-------------------------------------- 1.181 +// Fix up the final control flow for basic blocks. 1.182 +void PhaseCFG::fixup_flow() { 1.183 // Fixup final control flow for the blocks. Remove jump-to-next 1.184 // block. If neither arm of a IF follows the conditional branch, we 1.185 // have to add a second jump after the conditional. We place the 1.186 // TRUE branch target in succs[0] for both GOTOs and IFs. 1.187 - for( i=0; i < _num_blocks; i++ ) { 1.188 + for (uint i=0; i < _num_blocks; i++) { 1.189 Block *b = _blocks[i]; 1.190 b->_pre_order = i; // turn pre-order into block-index 1.191 1.192 @@ -700,7 +739,7 @@ 1.193 } 1.194 } 1.195 // Remove all CatchProjs 1.196 - for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); 1.197 + for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); 1.198 1.199 } else if (b->_num_succs == 1) { 1.200 // Block ends in a Goto? 1.201 @@ -730,8 +769,7 @@ 1.202 // successors after the current one, provided that the 1.203 // successor was previously unscheduled, but moveable 1.204 // (i.e., all paths to it involve a branch). 1.205 - if( bnext != bs0 && bnext != bs1 ) { 1.206 - 1.207 + if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) { 1.208 // Choose the more common successor based on the probability 1.209 // of the conditional branch. 1.210 Block *bx = bs0; 1.211 @@ -751,9 +789,9 @@ 1.212 } 1.213 1.214 // Attempt the more common successor first 1.215 - if (MoveToNext(bx, i)) { 1.216 + if (move_to_next(bx, i)) { 1.217 bnext = bx; 1.218 - } else if (MoveToNext(by, i)) { 1.219 + } else if (move_to_next(by, i)) { 1.220 bnext = by; 1.221 } 1.222 } 1.223 @@ -774,10 +812,8 @@ 1.224 // Flip projection for each target 1.225 { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; } 1.226 1.227 - } else if( bnext == bs1 ) { // Fall-thru is already in succs[1] 1.228 - 1.229 - } else { // Else need a double-branch 1.230 - 1.231 + } else if( bnext != bs1 ) { 1.232 + // Need a double-branch 1.233 // The existing conditional branch need not change. 1.234 // Add a unconditional branch to the false target. 1.235 // Alas, it must appear in its own block and adding a 1.236 @@ -786,8 +822,9 @@ 1.237 } 1.238 1.239 // Make sure we TRUE branch to the target 1.240 - if( proj0->Opcode() == Op_IfFalse ) 1.241 + if( proj0->Opcode() == Op_IfFalse ) { 1.242 iff->negate(); 1.243 + } 1.244 1.245 b->_nodes.pop(); // Remove IfFalse & IfTrue projections 1.246 b->_nodes.pop(); 1.247 @@ -796,9 +833,7 @@ 1.248 // Multi-exit block, e.g. a switch statement 1.249 // But we don't need to do anything here 1.250 } 1.251 - 1.252 } // End of for all blocks 1.253 - 1.254 } 1.255 1.256 1.257 @@ -905,7 +940,7 @@ 1.258 // Force the Union-Find mapping to be at least this large 1.259 extend(max,0); 1.260 // Initialize to be the ID mapping. 1.261 - for( uint i=0; i<_max; i++ ) map(i,i); 1.262 + for( uint i=0; i<max; i++ ) map(i,i); 1.263 } 1.264 1.265 //------------------------------Find_compress---------------------------------- 1.266 @@ -937,7 +972,6 @@ 1.267 if( idx >= _max ) return idx; 1.268 uint next = lookup(idx); 1.269 while( next != idx ) { // Scan chain of equivalences 1.270 - assert( next < idx, "always union smaller" ); 1.271 idx = next; // until find a fixed-point 1.272 next = lookup(idx); 1.273 } 1.274 @@ -956,3 +990,491 @@ 1.275 assert( src < dst, "always union smaller" ); 1.276 map(dst,src); 1.277 } 1.278 + 1.279 +#ifndef PRODUCT 1.280 +static void edge_dump(GrowableArray<CFGEdge *> *edges) { 1.281 + tty->print_cr("---- Edges ----"); 1.282 + for (int i = 0; i < edges->length(); i++) { 1.283 + CFGEdge *e = edges->at(i); 1.284 + if (e != NULL) { 1.285 + edges->at(i)->dump(); 1.286 + } 1.287 + } 1.288 +} 1.289 + 1.290 +static void trace_dump(Trace *traces[], int count) { 1.291 + tty->print_cr("---- Traces ----"); 1.292 + for (int i = 0; i < count; i++) { 1.293 + Trace *tr = traces[i]; 1.294 + if (tr != NULL) { 1.295 + tr->dump(); 1.296 + } 1.297 + } 1.298 +} 1.299 + 1.300 +void Trace::dump( ) const { 1.301 + tty->print_cr("Trace (freq %f)", first_block()->_freq); 1.302 + for (Block *b = first_block(); b != NULL; b = next(b)) { 1.303 + tty->print(" B%d", b->_pre_order); 1.304 + if (b->head()->is_Loop()) { 1.305 + tty->print(" (L%d)", b->compute_loop_alignment()); 1.306 + } 1.307 + if (b->has_loop_alignment()) { 1.308 + tty->print(" (T%d)", b->code_alignment()); 1.309 + } 1.310 + } 1.311 + tty->cr(); 1.312 +} 1.313 + 1.314 +void CFGEdge::dump( ) const { 1.315 + tty->print(" B%d --> B%d Freq: %f out:%3d%% in:%3d%% State: ", 1.316 + from()->_pre_order, to()->_pre_order, freq(), _from_pct, _to_pct); 1.317 + switch(state()) { 1.318 + case connected: 1.319 + tty->print("connected"); 1.320 + break; 1.321 + case open: 1.322 + tty->print("open"); 1.323 + break; 1.324 + case interior: 1.325 + tty->print("interior"); 1.326 + break; 1.327 + } 1.328 + if (infrequent()) { 1.329 + tty->print(" infrequent"); 1.330 + } 1.331 + tty->cr(); 1.332 +} 1.333 +#endif 1.334 + 1.335 +//============================================================================= 1.336 + 1.337 +//------------------------------edge_order------------------------------------- 1.338 +// Comparison function for edges 1.339 +static int edge_order(CFGEdge **e0, CFGEdge **e1) { 1.340 + float freq0 = (*e0)->freq(); 1.341 + float freq1 = (*e1)->freq(); 1.342 + if (freq0 != freq1) { 1.343 + return freq0 > freq1 ? -1 : 1; 1.344 + } 1.345 + 1.346 + int dist0 = (*e0)->to()->_rpo - (*e0)->from()->_rpo; 1.347 + int dist1 = (*e1)->to()->_rpo - (*e1)->from()->_rpo; 1.348 + 1.349 + return dist1 - dist0; 1.350 +} 1.351 + 1.352 +//------------------------------trace_frequency_order-------------------------- 1.353 +// Comparison function for edges 1.354 +static int trace_frequency_order(const void *p0, const void *p1) { 1.355 + Trace *tr0 = *(Trace **) p0; 1.356 + Trace *tr1 = *(Trace **) p1; 1.357 + Block *b0 = tr0->first_block(); 1.358 + Block *b1 = tr1->first_block(); 1.359 + 1.360 + // The trace of connector blocks goes at the end; 1.361 + // we only expect one such trace 1.362 + if (b0->is_connector() != b1->is_connector()) { 1.363 + return b1->is_connector() ? -1 : 1; 1.364 + } 1.365 + 1.366 + // Pull more frequently executed blocks to the beginning 1.367 + float freq0 = b0->_freq; 1.368 + float freq1 = b1->_freq; 1.369 + if (freq0 != freq1) { 1.370 + return freq0 > freq1 ? -1 : 1; 1.371 + } 1.372 + 1.373 + int diff = tr0->first_block()->_rpo - tr1->first_block()->_rpo; 1.374 + 1.375 + return diff; 1.376 +} 1.377 + 1.378 +//------------------------------find_edges------------------------------------- 1.379 +// Find edges of interest, i.e, those which can fall through. Presumes that 1.380 +// edges which don't fall through are of low frequency and can be generally 1.381 +// ignored. Initialize the list of traces. 1.382 +void PhaseBlockLayout::find_edges() 1.383 +{ 1.384 + // Walk the blocks, creating edges and Traces 1.385 + uint i; 1.386 + Trace *tr = NULL; 1.387 + for (i = 0; i < _cfg._num_blocks; i++) { 1.388 + Block *b = _cfg._blocks[i]; 1.389 + tr = new Trace(b, next, prev); 1.390 + traces[tr->id()] = tr; 1.391 + 1.392 + // All connector blocks should be at the end of the list 1.393 + if (b->is_connector()) break; 1.394 + 1.395 + // If this block and the next one have a one-to-one successor 1.396 + // predecessor relationship, simply append the next block 1.397 + int nfallthru = b->num_fall_throughs(); 1.398 + while (nfallthru == 1 && 1.399 + b->succ_fall_through(0)) { 1.400 + Block *n = b->_succs[0]; 1.401 + 1.402 + // Skip over single-entry connector blocks, we don't want to 1.403 + // add them to the trace. 1.404 + while (n->is_connector() && n->num_preds() == 1) { 1.405 + n = n->_succs[0]; 1.406 + } 1.407 + 1.408 + // We see a merge point, so stop search for the next block 1.409 + if (n->num_preds() != 1) break; 1.410 + 1.411 + i++; 1.412 + assert(n = _cfg._blocks[i], "expecting next block"); 1.413 + tr->append(n); 1.414 + uf->map(n->_pre_order, tr->id()); 1.415 + traces[n->_pre_order] = NULL; 1.416 + nfallthru = b->num_fall_throughs(); 1.417 + b = n; 1.418 + } 1.419 + 1.420 + if (nfallthru > 0) { 1.421 + // Create a CFGEdge for each outgoing 1.422 + // edge that could be a fall-through. 1.423 + for (uint j = 0; j < b->_num_succs; j++ ) { 1.424 + if (b->succ_fall_through(j)) { 1.425 + Block *target = b->non_connector_successor(j); 1.426 + float freq = b->_freq * b->succ_prob(j); 1.427 + int from_pct = (int) ((100 * freq) / b->_freq); 1.428 + int to_pct = (int) ((100 * freq) / target->_freq); 1.429 + edges->append(new CFGEdge(b, target, freq, from_pct, to_pct)); 1.430 + } 1.431 + } 1.432 + } 1.433 + } 1.434 + 1.435 + // Group connector blocks into one trace 1.436 + for (i++; i < _cfg._num_blocks; i++) { 1.437 + Block *b = _cfg._blocks[i]; 1.438 + assert(b->is_connector(), "connector blocks at the end"); 1.439 + tr->append(b); 1.440 + uf->map(b->_pre_order, tr->id()); 1.441 + traces[b->_pre_order] = NULL; 1.442 + } 1.443 +} 1.444 + 1.445 +//------------------------------union_traces---------------------------------- 1.446 +// Union two traces together in uf, and null out the trace in the list 1.447 +void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) 1.448 +{ 1.449 + uint old_id = old_trace->id(); 1.450 + uint updated_id = updated_trace->id(); 1.451 + 1.452 + uint lo_id = updated_id; 1.453 + uint hi_id = old_id; 1.454 + 1.455 + // If from is greater than to, swap values to meet 1.456 + // UnionFind guarantee. 1.457 + if (updated_id > old_id) { 1.458 + lo_id = old_id; 1.459 + hi_id = updated_id; 1.460 + 1.461 + // Fix up the trace ids 1.462 + traces[lo_id] = traces[updated_id]; 1.463 + updated_trace->set_id(lo_id); 1.464 + } 1.465 + 1.466 + // Union the lower with the higher and remove the pointer 1.467 + // to the higher. 1.468 + uf->Union(lo_id, hi_id); 1.469 + traces[hi_id] = NULL; 1.470 +} 1.471 + 1.472 +//------------------------------grow_traces------------------------------------- 1.473 +// Append traces together via the most frequently executed edges 1.474 +void PhaseBlockLayout::grow_traces() 1.475 +{ 1.476 + // Order the edges, and drive the growth of Traces via the most 1.477 + // frequently executed edges. 1.478 + edges->sort(edge_order); 1.479 + for (int i = 0; i < edges->length(); i++) { 1.480 + CFGEdge *e = edges->at(i); 1.481 + 1.482 + if (e->state() != CFGEdge::open) continue; 1.483 + 1.484 + Block *src_block = e->from(); 1.485 + Block *targ_block = e->to(); 1.486 + 1.487 + // Don't grow traces along backedges? 1.488 + if (!BlockLayoutRotateLoops) { 1.489 + if (targ_block->_rpo <= src_block->_rpo) { 1.490 + targ_block->set_loop_alignment(targ_block); 1.491 + continue; 1.492 + } 1.493 + } 1.494 + 1.495 + Trace *src_trace = trace(src_block); 1.496 + Trace *targ_trace = trace(targ_block); 1.497 + 1.498 + // If the edge in question can join two traces at their ends, 1.499 + // append one trace to the other. 1.500 + if (src_trace->last_block() == src_block) { 1.501 + if (src_trace == targ_trace) { 1.502 + e->set_state(CFGEdge::interior); 1.503 + if (targ_trace->backedge(e)) { 1.504 + // Reset i to catch any newly eligible edge 1.505 + // (Or we could remember the first "open" edge, and reset there) 1.506 + i = 0; 1.507 + } 1.508 + } else if (targ_trace->first_block() == targ_block) { 1.509 + e->set_state(CFGEdge::connected); 1.510 + src_trace->append(targ_trace); 1.511 + union_traces(src_trace, targ_trace); 1.512 + } 1.513 + } 1.514 + } 1.515 +} 1.516 + 1.517 +//------------------------------merge_traces----------------------------------- 1.518 +// Embed one trace into another, if the fork or join points are sufficiently 1.519 +// balanced. 1.520 +void PhaseBlockLayout::merge_traces(bool fall_thru_only) 1.521 +{ 1.522 + // Walk the edge list a another time, looking at unprocessed edges. 1.523 + // Fold in diamonds 1.524 + for (int i = 0; i < edges->length(); i++) { 1.525 + CFGEdge *e = edges->at(i); 1.526 + 1.527 + if (e->state() != CFGEdge::open) continue; 1.528 + if (fall_thru_only) { 1.529 + if (e->infrequent()) continue; 1.530 + } 1.531 + 1.532 + Block *src_block = e->from(); 1.533 + Trace *src_trace = trace(src_block); 1.534 + bool src_at_tail = src_trace->last_block() == src_block; 1.535 + 1.536 + Block *targ_block = e->to(); 1.537 + Trace *targ_trace = trace(targ_block); 1.538 + bool targ_at_start = targ_trace->first_block() == targ_block; 1.539 + 1.540 + if (src_trace == targ_trace) { 1.541 + // This may be a loop, but we can't do much about it. 1.542 + e->set_state(CFGEdge::interior); 1.543 + continue; 1.544 + } 1.545 + 1.546 + if (fall_thru_only) { 1.547 + // If the edge links the middle of two traces, we can't do anything. 1.548 + // Mark the edge and continue. 1.549 + if (!src_at_tail & !targ_at_start) { 1.550 + continue; 1.551 + } 1.552 + 1.553 + // Don't grow traces along backedges? 1.554 + if (!BlockLayoutRotateLoops && (targ_block->_rpo <= src_block->_rpo)) { 1.555 + continue; 1.556 + } 1.557 + 1.558 + // If both ends of the edge are available, why didn't we handle it earlier? 1.559 + assert(src_at_tail ^ targ_at_start, "Should have caught this edge earlier."); 1.560 + 1.561 + if (targ_at_start) { 1.562 + // Insert the "targ" trace in the "src" trace if the insertion point 1.563 + // is a two way branch. 1.564 + // Better profitability check possible, but may not be worth it. 1.565 + // Someday, see if the this "fork" has an associated "join"; 1.566 + // then make a policy on merging this trace at the fork or join. 1.567 + // For example, other things being equal, it may be better to place this 1.568 + // trace at the join point if the "src" trace ends in a two-way, but 1.569 + // the insertion point is one-way. 1.570 + assert(src_block->num_fall_throughs() == 2, "unexpected diamond"); 1.571 + e->set_state(CFGEdge::connected); 1.572 + src_trace->insert_after(src_block, targ_trace); 1.573 + union_traces(src_trace, targ_trace); 1.574 + } else if (src_at_tail) { 1.575 + if (src_trace != trace(_cfg._broot)) { 1.576 + e->set_state(CFGEdge::connected); 1.577 + targ_trace->insert_before(targ_block, src_trace); 1.578 + union_traces(targ_trace, src_trace); 1.579 + } 1.580 + } 1.581 + } else if (e->state() == CFGEdge::open) { 1.582 + // Append traces, even without a fall-thru connection. 1.583 + // But leave root entry at the begining of the block list. 1.584 + if (targ_trace != trace(_cfg._broot)) { 1.585 + e->set_state(CFGEdge::connected); 1.586 + src_trace->append(targ_trace); 1.587 + union_traces(src_trace, targ_trace); 1.588 + } 1.589 + } 1.590 + } 1.591 +} 1.592 + 1.593 +//----------------------------reorder_traces----------------------------------- 1.594 +// Order the sequence of the traces in some desirable way, and fixup the 1.595 +// jumps at the end of each block. 1.596 +void PhaseBlockLayout::reorder_traces(int count) 1.597 +{ 1.598 + ResourceArea *area = Thread::current()->resource_area(); 1.599 + Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count); 1.600 + Block_List worklist; 1.601 + int new_count = 0; 1.602 + 1.603 + // Compact the traces. 1.604 + for (int i = 0; i < count; i++) { 1.605 + Trace *tr = traces[i]; 1.606 + if (tr != NULL) { 1.607 + new_traces[new_count++] = tr; 1.608 + } 1.609 + } 1.610 + 1.611 + // The entry block should be first on the new trace list. 1.612 + Trace *tr = trace(_cfg._broot); 1.613 + assert(tr == new_traces[0], "entry trace misplaced"); 1.614 + 1.615 + // Sort the new trace list by frequency 1.616 + qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order); 1.617 + 1.618 + // Patch up the successor blocks 1.619 + _cfg._blocks.reset(); 1.620 + _cfg._num_blocks = 0; 1.621 + for (int i = 0; i < new_count; i++) { 1.622 + Trace *tr = new_traces[i]; 1.623 + if (tr != NULL) { 1.624 + tr->fixup_blocks(_cfg); 1.625 + } 1.626 + } 1.627 +} 1.628 + 1.629 +//------------------------------PhaseBlockLayout------------------------------- 1.630 +// Order basic blocks based on frequency 1.631 +PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) : 1.632 + Phase(BlockLayout), 1.633 + _cfg(cfg) 1.634 +{ 1.635 + ResourceMark rm; 1.636 + ResourceArea *area = Thread::current()->resource_area(); 1.637 + 1.638 + // List of traces 1.639 + int size = _cfg._num_blocks + 1; 1.640 + traces = NEW_ARENA_ARRAY(area, Trace *, size); 1.641 + memset(traces, 0, size*sizeof(Trace*)); 1.642 + next = NEW_ARENA_ARRAY(area, Block *, size); 1.643 + memset(next, 0, size*sizeof(Block *)); 1.644 + prev = NEW_ARENA_ARRAY(area, Block *, size); 1.645 + memset(prev , 0, size*sizeof(Block *)); 1.646 + 1.647 + // List of edges 1.648 + edges = new GrowableArray<CFGEdge*>; 1.649 + 1.650 + // Mapping block index --> block_trace 1.651 + uf = new UnionFind(size); 1.652 + uf->reset(size); 1.653 + 1.654 + // Find edges and create traces. 1.655 + find_edges(); 1.656 + 1.657 + // Grow traces at their ends via most frequent edges. 1.658 + grow_traces(); 1.659 + 1.660 + // Merge one trace into another, but only at fall-through points. 1.661 + // This may make diamonds and other related shapes in a trace. 1.662 + merge_traces(true); 1.663 + 1.664 + // Run merge again, allowing two traces to be catenated, even if 1.665 + // one does not fall through into the other. This appends loosely 1.666 + // related traces to be near each other. 1.667 + merge_traces(false); 1.668 + 1.669 + // Re-order all the remaining traces by frequency 1.670 + reorder_traces(size); 1.671 + 1.672 + assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink"); 1.673 +} 1.674 + 1.675 + 1.676 +//------------------------------backedge--------------------------------------- 1.677 +// Edge e completes a loop in a trace. If the target block is head of the 1.678 +// loop, rotate the loop block so that the loop ends in a conditional branch. 1.679 +bool Trace::backedge(CFGEdge *e) { 1.680 + bool loop_rotated = false; 1.681 + Block *src_block = e->from(); 1.682 + Block *targ_block = e->to(); 1.683 + 1.684 + assert(last_block() == src_block, "loop discovery at back branch"); 1.685 + if (first_block() == targ_block) { 1.686 + if (BlockLayoutRotateLoops && last_block()->num_fall_throughs() < 2) { 1.687 + // Find the last block in the trace that has a conditional 1.688 + // branch. 1.689 + Block *b; 1.690 + for (b = last_block(); b != NULL; b = prev(b)) { 1.691 + if (b->num_fall_throughs() == 2) { 1.692 + break; 1.693 + } 1.694 + } 1.695 + 1.696 + if (b != last_block() && b != NULL) { 1.697 + loop_rotated = true; 1.698 + 1.699 + // Rotate the loop by doing two-part linked-list surgery. 1.700 + append(first_block()); 1.701 + break_loop_after(b); 1.702 + } 1.703 + } 1.704 + 1.705 + // Backbranch to the top of a trace 1.706 + // Scroll foward through the trace from the targ_block. If we find 1.707 + // a loop head before another loop top, use the the loop head alignment. 1.708 + for (Block *b = targ_block; b != NULL; b = next(b)) { 1.709 + if (b->has_loop_alignment()) { 1.710 + break; 1.711 + } 1.712 + if (b->head()->is_Loop()) { 1.713 + targ_block = b; 1.714 + break; 1.715 + } 1.716 + } 1.717 + 1.718 + first_block()->set_loop_alignment(targ_block); 1.719 + 1.720 + } else { 1.721 + // Backbranch into the middle of a trace 1.722 + targ_block->set_loop_alignment(targ_block); 1.723 + } 1.724 + 1.725 + return loop_rotated; 1.726 +} 1.727 + 1.728 +//------------------------------fixup_blocks----------------------------------- 1.729 +// push blocks onto the CFG list 1.730 +// ensure that blocks have the correct two-way branch sense 1.731 +void Trace::fixup_blocks(PhaseCFG &cfg) { 1.732 + Block *last = last_block(); 1.733 + for (Block *b = first_block(); b != NULL; b = next(b)) { 1.734 + cfg._blocks.push(b); 1.735 + cfg._num_blocks++; 1.736 + if (!b->is_connector()) { 1.737 + int nfallthru = b->num_fall_throughs(); 1.738 + if (b != last) { 1.739 + if (nfallthru == 2) { 1.740 + // Ensure that the sense of the branch is correct 1.741 + Block *bnext = next(b); 1.742 + Block *bs0 = b->non_connector_successor(0); 1.743 + 1.744 + MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach(); 1.745 + ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj(); 1.746 + ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj(); 1.747 + 1.748 + if (bnext == bs0) { 1.749 + // Fall-thru case in succs[0], should be in succs[1] 1.750 + 1.751 + // Flip targets in _succs map 1.752 + Block *tbs0 = b->_succs[0]; 1.753 + Block *tbs1 = b->_succs[1]; 1.754 + b->_succs.map( 0, tbs1 ); 1.755 + b->_succs.map( 1, tbs0 ); 1.756 + 1.757 + // Flip projections to match targets 1.758 + b->_nodes.map(b->_nodes.size()-2, proj1); 1.759 + b->_nodes.map(b->_nodes.size()-1, proj0); 1.760 + } 1.761 + } 1.762 + } 1.763 + } 1.764 + } 1.765 +}