1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/loopopts.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,2677 @@ 1.4 +/* 1.5 + * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "incls/_precompiled.incl" 1.29 +#include "incls/_loopopts.cpp.incl" 1.30 + 1.31 +//============================================================================= 1.32 +//------------------------------split_thru_phi--------------------------------- 1.33 +// Split Node 'n' through merge point if there is enough win. 1.34 +Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) { 1.35 + int wins = 0; 1.36 + assert( !n->is_CFG(), "" ); 1.37 + assert( region->is_Region(), "" ); 1.38 + Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() ); 1.39 + uint old_unique = C->unique(); 1.40 + for( uint i = 1; i < region->req(); i++ ) { 1.41 + Node *x; 1.42 + Node* the_clone = NULL; 1.43 + if( region->in(i) == C->top() ) { 1.44 + x = C->top(); // Dead path? Use a dead data op 1.45 + } else { 1.46 + x = n->clone(); // Else clone up the data op 1.47 + the_clone = x; // Remember for possible deletion. 1.48 + // Alter data node to use pre-phi inputs 1.49 + if( n->in(0) == region ) 1.50 + x->set_req( 0, region->in(i) ); 1.51 + for( uint j = 1; j < n->req(); j++ ) { 1.52 + Node *in = n->in(j); 1.53 + if( in->is_Phi() && in->in(0) == region ) 1.54 + x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone 1.55 + } 1.56 + } 1.57 + // Check for a 'win' on some paths 1.58 + const Type *t = x->Value(&_igvn); 1.59 + 1.60 + bool singleton = t->singleton(); 1.61 + 1.62 + // A TOP singleton indicates that there are no possible values incoming 1.63 + // along a particular edge. In most cases, this is OK, and the Phi will 1.64 + // be eliminated later in an Ideal call. However, we can't allow this to 1.65 + // happen if the singleton occurs on loop entry, as the elimination of 1.66 + // the PhiNode may cause the resulting node to migrate back to a previous 1.67 + // loop iteration. 1.68 + if( singleton && t == Type::TOP ) { 1.69 + // Is_Loop() == false does not confirm the absence of a loop (e.g., an 1.70 + // irreducible loop may not be indicated by an affirmative is_Loop()); 1.71 + // therefore, the only top we can split thru a phi is on a backedge of 1.72 + // a loop. 1.73 + singleton &= region->is_Loop() && (i != LoopNode::EntryControl); 1.74 + } 1.75 + 1.76 + if( singleton ) { 1.77 + wins++; 1.78 + x = ((PhaseGVN&)_igvn).makecon(t); 1.79 + } else { 1.80 + // We now call Identity to try to simplify the cloned node. 1.81 + // Note that some Identity methods call phase->type(this). 1.82 + // Make sure that the type array is big enough for 1.83 + // our new node, even though we may throw the node away. 1.84 + // (Note: This tweaking with igvn only works because x is a new node.) 1.85 + _igvn.set_type(x, t); 1.86 + Node *y = x->Identity(&_igvn); 1.87 + if( y != x ) { 1.88 + wins++; 1.89 + x = y; 1.90 + } else { 1.91 + y = _igvn.hash_find(x); 1.92 + if( y ) { 1.93 + wins++; 1.94 + x = y; 1.95 + } else { 1.96 + // Else x is a new node we are keeping 1.97 + // We do not need register_new_node_with_optimizer 1.98 + // because set_type has already been called. 1.99 + _igvn._worklist.push(x); 1.100 + } 1.101 + } 1.102 + } 1.103 + if (x != the_clone && the_clone != NULL) 1.104 + _igvn.remove_dead_node(the_clone); 1.105 + phi->set_req( i, x ); 1.106 + } 1.107 + // Too few wins? 1.108 + if( wins <= policy ) { 1.109 + _igvn.remove_dead_node(phi); 1.110 + return NULL; 1.111 + } 1.112 + 1.113 + // Record Phi 1.114 + register_new_node( phi, region ); 1.115 + 1.116 + for( uint i2 = 1; i2 < phi->req(); i2++ ) { 1.117 + Node *x = phi->in(i2); 1.118 + // If we commoned up the cloned 'x' with another existing Node, 1.119 + // the existing Node picks up a new use. We need to make the 1.120 + // existing Node occur higher up so it dominates its uses. 1.121 + Node *old_ctrl; 1.122 + IdealLoopTree *old_loop; 1.123 + 1.124 + // The occasional new node 1.125 + if( x->_idx >= old_unique ) { // Found a new, unplaced node? 1.126 + old_ctrl = x->is_Con() ? C->root() : NULL; 1.127 + old_loop = NULL; // Not in any prior loop 1.128 + } else { 1.129 + old_ctrl = x->is_Con() ? C->root() : get_ctrl(x); 1.130 + old_loop = get_loop(old_ctrl); // Get prior loop 1.131 + } 1.132 + // New late point must dominate new use 1.133 + Node *new_ctrl = dom_lca( old_ctrl, region->in(i2) ); 1.134 + // Set new location 1.135 + set_ctrl(x, new_ctrl); 1.136 + IdealLoopTree *new_loop = get_loop( new_ctrl ); 1.137 + // If changing loop bodies, see if we need to collect into new body 1.138 + if( old_loop != new_loop ) { 1.139 + if( old_loop && !old_loop->_child ) 1.140 + old_loop->_body.yank(x); 1.141 + if( !new_loop->_child ) 1.142 + new_loop->_body.push(x); // Collect body info 1.143 + } 1.144 + } 1.145 + 1.146 + return phi; 1.147 +} 1.148 + 1.149 +//------------------------------dominated_by------------------------------------ 1.150 +// Replace the dominated test with an obvious true or false. Place it on the 1.151 +// IGVN worklist for later cleanup. Move control-dependent data Nodes on the 1.152 +// live path up to the dominating control. 1.153 +void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff ) { 1.154 +#ifndef PRODUCT 1.155 + if( VerifyLoopOptimizations && PrintOpto ) tty->print_cr("dominating test"); 1.156 +#endif 1.157 + 1.158 + 1.159 + // prevdom is the dominating projection of the dominating test. 1.160 + assert( iff->is_If(), "" ); 1.161 + assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); 1.162 + int pop = prevdom->Opcode(); 1.163 + assert( pop == Op_IfFalse || pop == Op_IfTrue, "" ); 1.164 + // 'con' is set to true or false to kill the dominated test. 1.165 + Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO); 1.166 + set_ctrl(con, C->root()); // Constant gets a new use 1.167 + // Hack the dominated test 1.168 + _igvn.hash_delete(iff); 1.169 + iff->set_req(1, con); 1.170 + _igvn._worklist.push(iff); 1.171 + 1.172 + // If I dont have a reachable TRUE and FALSE path following the IfNode then 1.173 + // I can assume this path reaches an infinite loop. In this case it's not 1.174 + // important to optimize the data Nodes - either the whole compilation will 1.175 + // be tossed or this path (and all data Nodes) will go dead. 1.176 + if( iff->outcnt() != 2 ) return; 1.177 + 1.178 + // Make control-dependent data Nodes on the live path (path that will remain 1.179 + // once the dominated IF is removed) become control-dependent on the 1.180 + // dominating projection. 1.181 + Node* dp = ((IfNode*)iff)->proj_out(pop == Op_IfTrue); 1.182 + IdealLoopTree *old_loop = get_loop(dp); 1.183 + 1.184 + for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { 1.185 + Node* cd = dp->fast_out(i); // Control-dependent node 1.186 + if( cd->depends_only_on_test() ) { 1.187 + assert( cd->in(0) == dp, "" ); 1.188 + _igvn.hash_delete( cd ); 1.189 + cd->set_req(0, prevdom); 1.190 + set_early_ctrl( cd ); 1.191 + _igvn._worklist.push(cd); 1.192 + IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); 1.193 + if( old_loop != new_loop ) { 1.194 + if( !old_loop->_child ) old_loop->_body.yank(cd); 1.195 + if( !new_loop->_child ) new_loop->_body.push(cd); 1.196 + } 1.197 + --i; 1.198 + --imax; 1.199 + } 1.200 + } 1.201 +} 1.202 + 1.203 +//------------------------------has_local_phi_input---------------------------- 1.204 +// Return TRUE if 'n' has Phi inputs from its local block and no other 1.205 +// block-local inputs (all non-local-phi inputs come from earlier blocks) 1.206 +Node *PhaseIdealLoop::has_local_phi_input( Node *n ) { 1.207 + Node *n_ctrl = get_ctrl(n); 1.208 + // See if some inputs come from a Phi in this block, or from before 1.209 + // this block. 1.210 + uint i; 1.211 + for( i = 1; i < n->req(); i++ ) { 1.212 + Node *phi = n->in(i); 1.213 + if( phi->is_Phi() && phi->in(0) == n_ctrl ) 1.214 + break; 1.215 + } 1.216 + if( i >= n->req() ) 1.217 + return NULL; // No Phi inputs; nowhere to clone thru 1.218 + 1.219 + // Check for inputs created between 'n' and the Phi input. These 1.220 + // must split as well; they have already been given the chance 1.221 + // (courtesy of a post-order visit) and since they did not we must 1.222 + // recover the 'cost' of splitting them by being very profitable 1.223 + // when splitting 'n'. Since this is unlikely we simply give up. 1.224 + for( i = 1; i < n->req(); i++ ) { 1.225 + Node *m = n->in(i); 1.226 + if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) { 1.227 + // We allow the special case of AddP's with no local inputs. 1.228 + // This allows us to split-up address expressions. 1.229 + if (m->is_AddP() && 1.230 + get_ctrl(m->in(2)) != n_ctrl && 1.231 + get_ctrl(m->in(3)) != n_ctrl) { 1.232 + // Move the AddP up to dominating point 1.233 + set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl))); 1.234 + continue; 1.235 + } 1.236 + return NULL; 1.237 + } 1.238 + } 1.239 + 1.240 + return n_ctrl; 1.241 +} 1.242 + 1.243 +//------------------------------remix_address_expressions---------------------- 1.244 +// Rework addressing expressions to get the most loop-invariant stuff 1.245 +// moved out. We'd like to do all associative operators, but it's especially 1.246 +// important (common) to do address expressions. 1.247 +Node *PhaseIdealLoop::remix_address_expressions( Node *n ) { 1.248 + if (!has_ctrl(n)) return NULL; 1.249 + Node *n_ctrl = get_ctrl(n); 1.250 + IdealLoopTree *n_loop = get_loop(n_ctrl); 1.251 + 1.252 + // See if 'n' mixes loop-varying and loop-invariant inputs and 1.253 + // itself is loop-varying. 1.254 + 1.255 + // Only interested in binary ops (and AddP) 1.256 + if( n->req() < 3 || n->req() > 4 ) return NULL; 1.257 + 1.258 + Node *n1_ctrl = get_ctrl(n->in( 1)); 1.259 + Node *n2_ctrl = get_ctrl(n->in( 2)); 1.260 + Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3)); 1.261 + IdealLoopTree *n1_loop = get_loop( n1_ctrl ); 1.262 + IdealLoopTree *n2_loop = get_loop( n2_ctrl ); 1.263 + IdealLoopTree *n3_loop = get_loop( n3_ctrl ); 1.264 + 1.265 + // Does one of my inputs spin in a tighter loop than self? 1.266 + if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) || 1.267 + (n_loop->is_member( n2_loop ) && n_loop != n2_loop) || 1.268 + (n_loop->is_member( n3_loop ) && n_loop != n3_loop) ) 1.269 + return NULL; // Leave well enough alone 1.270 + 1.271 + // Is at least one of my inputs loop-invariant? 1.272 + if( n1_loop == n_loop && 1.273 + n2_loop == n_loop && 1.274 + n3_loop == n_loop ) 1.275 + return NULL; // No loop-invariant inputs 1.276 + 1.277 + 1.278 + int n_op = n->Opcode(); 1.279 + 1.280 + // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2). 1.281 + if( n_op == Op_LShiftI ) { 1.282 + // Scale is loop invariant 1.283 + Node *scale = n->in(2); 1.284 + Node *scale_ctrl = get_ctrl(scale); 1.285 + IdealLoopTree *scale_loop = get_loop(scale_ctrl ); 1.286 + if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) ) 1.287 + return NULL; 1.288 + const TypeInt *scale_t = scale->bottom_type()->isa_int(); 1.289 + if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 ) 1.290 + return NULL; // Dont bother with byte/short masking 1.291 + // Add must vary with loop (else shift would be loop-invariant) 1.292 + Node *add = n->in(1); 1.293 + Node *add_ctrl = get_ctrl(add); 1.294 + IdealLoopTree *add_loop = get_loop(add_ctrl); 1.295 + //assert( n_loop == add_loop, "" ); 1.296 + if( n_loop != add_loop ) return NULL; // happens w/ evil ZKM loops 1.297 + 1.298 + // Convert I-V into I+ (0-V); same for V-I 1.299 + if( add->Opcode() == Op_SubI && 1.300 + _igvn.type( add->in(1) ) != TypeInt::ZERO ) { 1.301 + Node *zero = _igvn.intcon(0); 1.302 + set_ctrl(zero, C->root()); 1.303 + Node *neg = new (C, 3) SubINode( _igvn.intcon(0), add->in(2) ); 1.304 + register_new_node( neg, get_ctrl(add->in(2) ) ); 1.305 + add = new (C, 3) AddINode( add->in(1), neg ); 1.306 + register_new_node( add, add_ctrl ); 1.307 + } 1.308 + if( add->Opcode() != Op_AddI ) return NULL; 1.309 + // See if one add input is loop invariant 1.310 + Node *add_var = add->in(1); 1.311 + Node *add_var_ctrl = get_ctrl(add_var); 1.312 + IdealLoopTree *add_var_loop = get_loop(add_var_ctrl ); 1.313 + Node *add_invar = add->in(2); 1.314 + Node *add_invar_ctrl = get_ctrl(add_invar); 1.315 + IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl ); 1.316 + if( add_var_loop == n_loop ) { 1.317 + } else if( add_invar_loop == n_loop ) { 1.318 + // Swap to find the invariant part 1.319 + add_invar = add_var; 1.320 + add_invar_ctrl = add_var_ctrl; 1.321 + add_invar_loop = add_var_loop; 1.322 + add_var = add->in(2); 1.323 + Node *add_var_ctrl = get_ctrl(add_var); 1.324 + IdealLoopTree *add_var_loop = get_loop(add_var_ctrl ); 1.325 + } else // Else neither input is loop invariant 1.326 + return NULL; 1.327 + if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) ) 1.328 + return NULL; // No invariant part of the add? 1.329 + 1.330 + // Yes! Reshape address expression! 1.331 + Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale ); 1.332 + register_new_node( inv_scale, add_invar_ctrl ); 1.333 + Node *var_scale = new (C, 3) LShiftINode( add_var, scale ); 1.334 + register_new_node( var_scale, n_ctrl ); 1.335 + Node *var_add = new (C, 3) AddINode( var_scale, inv_scale ); 1.336 + register_new_node( var_add, n_ctrl ); 1.337 + _igvn.hash_delete( n ); 1.338 + _igvn.subsume_node( n, var_add ); 1.339 + return var_add; 1.340 + } 1.341 + 1.342 + // Replace (I+V) with (V+I) 1.343 + if( n_op == Op_AddI || 1.344 + n_op == Op_AddL || 1.345 + n_op == Op_AddF || 1.346 + n_op == Op_AddD || 1.347 + n_op == Op_MulI || 1.348 + n_op == Op_MulL || 1.349 + n_op == Op_MulF || 1.350 + n_op == Op_MulD ) { 1.351 + if( n2_loop == n_loop ) { 1.352 + assert( n1_loop != n_loop, "" ); 1.353 + n->swap_edges(1, 2); 1.354 + } 1.355 + } 1.356 + 1.357 + // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V), 1.358 + // but not if I2 is a constant. 1.359 + if( n_op == Op_AddP ) { 1.360 + if( n2_loop == n_loop && n3_loop != n_loop ) { 1.361 + if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) { 1.362 + Node *n22_ctrl = get_ctrl(n->in(2)->in(2)); 1.363 + Node *n23_ctrl = get_ctrl(n->in(2)->in(3)); 1.364 + IdealLoopTree *n22loop = get_loop( n22_ctrl ); 1.365 + IdealLoopTree *n23_loop = get_loop( n23_ctrl ); 1.366 + if( n22loop != n_loop && n22loop->is_member(n_loop) && 1.367 + n23_loop == n_loop ) { 1.368 + Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) ); 1.369 + // Stuff new AddP in the loop preheader 1.370 + register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) ); 1.371 + Node *add2 = new (C, 4) AddPNode( n->in(1), add1, n->in(2)->in(3) ); 1.372 + register_new_node( add2, n_ctrl ); 1.373 + _igvn.hash_delete( n ); 1.374 + _igvn.subsume_node( n, add2 ); 1.375 + return add2; 1.376 + } 1.377 + } 1.378 + } 1.379 + 1.380 + // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V) 1.381 + if( n2_loop != n_loop && n3_loop == n_loop ) { 1.382 + if( n->in(3)->Opcode() == Op_AddI ) { 1.383 + Node *V = n->in(3)->in(1); 1.384 + Node *I = n->in(3)->in(2); 1.385 + if( is_member(n_loop,get_ctrl(V)) ) { 1.386 + } else { 1.387 + Node *tmp = V; V = I; I = tmp; 1.388 + } 1.389 + if( !is_member(n_loop,get_ctrl(I)) ) { 1.390 + Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2), I ); 1.391 + // Stuff new AddP in the loop preheader 1.392 + register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) ); 1.393 + Node *add2 = new (C, 4) AddPNode( n->in(1), add1, V ); 1.394 + register_new_node( add2, n_ctrl ); 1.395 + _igvn.hash_delete( n ); 1.396 + _igvn.subsume_node( n, add2 ); 1.397 + return add2; 1.398 + } 1.399 + } 1.400 + } 1.401 + } 1.402 + 1.403 + return NULL; 1.404 +} 1.405 + 1.406 +//------------------------------conditional_move------------------------------- 1.407 +// Attempt to replace a Phi with a conditional move. We have some pretty 1.408 +// strict profitability requirements. All Phis at the merge point must 1.409 +// be converted, so we can remove the control flow. We need to limit the 1.410 +// number of c-moves to a small handful. All code that was in the side-arms 1.411 +// of the CFG diamond is now speculatively executed. This code has to be 1.412 +// "cheap enough". We are pretty much limited to CFG diamonds that merge 1.413 +// 1 or 2 items with a total of 1 or 2 ops executed speculatively. 1.414 +Node *PhaseIdealLoop::conditional_move( Node *region ) { 1.415 + 1.416 + assert( region->is_Region(), "sanity check" ); 1.417 + if( region->req() != 3 ) return NULL; 1.418 + 1.419 + // Check for CFG diamond 1.420 + Node *lp = region->in(1); 1.421 + Node *rp = region->in(2); 1.422 + if( !lp || !rp ) return NULL; 1.423 + Node *lp_c = lp->in(0); 1.424 + if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL; 1.425 + IfNode *iff = lp_c->as_If(); 1.426 + 1.427 + // Check for highly predictable branch. No point in CMOV'ing if 1.428 + // we are going to predict accurately all the time. 1.429 + // %%% This hides patterns produced by utility methods like Math.min. 1.430 + if( iff->_prob < PROB_UNLIKELY_MAG(3) || 1.431 + iff->_prob > PROB_LIKELY_MAG(3) ) 1.432 + return NULL; 1.433 + 1.434 + // Check for ops pinned in an arm of the diamond. 1.435 + // Can't remove the control flow in this case 1.436 + if( lp->outcnt() > 1 ) return NULL; 1.437 + if( rp->outcnt() > 1 ) return NULL; 1.438 + 1.439 + // Check profitability 1.440 + int cost = 0; 1.441 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.442 + Node *out = region->fast_out(i); 1.443 + if( !out->is_Phi() ) continue; // Ignore other control edges, etc 1.444 + PhiNode* phi = out->as_Phi(); 1.445 + switch (phi->type()->basic_type()) { 1.446 + case T_LONG: 1.447 + cost++; // Probably encodes as 2 CMOV's 1.448 + case T_INT: // These all CMOV fine 1.449 + case T_FLOAT: 1.450 + case T_DOUBLE: 1.451 + case T_ADDRESS: // (RawPtr) 1.452 + cost++; 1.453 + break; 1.454 + case T_OBJECT: { // Base oops are OK, but not derived oops 1.455 + const TypeOopPtr *tp = phi->type()->isa_oopptr(); 1.456 + // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a 1.457 + // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus 1.458 + // CMOVE'ing a derived pointer requires we also CMOVE the base. If we 1.459 + // have a Phi for the base here that we convert to a CMOVE all is well 1.460 + // and good. But if the base is dead, we'll not make a CMOVE. Later 1.461 + // the allocator will have to produce a base by creating a CMOVE of the 1.462 + // relevant bases. This puts the allocator in the business of 1.463 + // manufacturing expensive instructions, generally a bad plan. 1.464 + // Just Say No to Conditionally-Moved Derived Pointers. 1.465 + if( tp && tp->offset() != 0 ) 1.466 + return NULL; 1.467 + cost++; 1.468 + break; 1.469 + } 1.470 + default: 1.471 + return NULL; // In particular, can't do memory or I/O 1.472 + } 1.473 + // Add in cost any speculative ops 1.474 + for( uint j = 1; j < region->req(); j++ ) { 1.475 + Node *proj = region->in(j); 1.476 + Node *inp = phi->in(j); 1.477 + if (get_ctrl(inp) == proj) { // Found local op 1.478 + cost++; 1.479 + // Check for a chain of dependent ops; these will all become 1.480 + // speculative in a CMOV. 1.481 + for( uint k = 1; k < inp->req(); k++ ) 1.482 + if (get_ctrl(inp->in(k)) == proj) 1.483 + return NULL; // Too much speculative goo 1.484 + } 1.485 + } 1.486 + // See if the Phi is used by a Cmp. This will likely Split-If, a 1.487 + // higher-payoff operation. 1.488 + for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { 1.489 + Node* use = phi->fast_out(k); 1.490 + if( use->is_Cmp() ) 1.491 + return NULL; 1.492 + } 1.493 + } 1.494 + if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo 1.495 + 1.496 + // -------------- 1.497 + // Now replace all Phis with CMOV's 1.498 + Node *cmov_ctrl = iff->in(0); 1.499 + uint flip = (lp->Opcode() == Op_IfTrue); 1.500 + while( 1 ) { 1.501 + PhiNode* phi = NULL; 1.502 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.503 + Node *out = region->fast_out(i); 1.504 + if (out->is_Phi()) { 1.505 + phi = out->as_Phi(); 1.506 + break; 1.507 + } 1.508 + } 1.509 + if (phi == NULL) break; 1.510 +#ifndef PRODUCT 1.511 + if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV"); 1.512 +#endif 1.513 + // Move speculative ops 1.514 + for( uint j = 1; j < region->req(); j++ ) { 1.515 + Node *proj = region->in(j); 1.516 + Node *inp = phi->in(j); 1.517 + if (get_ctrl(inp) == proj) { // Found local op 1.518 +#ifndef PRODUCT 1.519 + if( PrintOpto && VerifyLoopOptimizations ) { 1.520 + tty->print(" speculate: "); 1.521 + inp->dump(); 1.522 + } 1.523 +#endif 1.524 + set_ctrl(inp, cmov_ctrl); 1.525 + } 1.526 + } 1.527 + Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) ); 1.528 + register_new_node( cmov, cmov_ctrl ); 1.529 + _igvn.hash_delete(phi); 1.530 + _igvn.subsume_node( phi, cmov ); 1.531 +#ifndef PRODUCT 1.532 + if( VerifyLoopOptimizations ) verify(); 1.533 +#endif 1.534 + } 1.535 + 1.536 + // The useless CFG diamond will fold up later; see the optimization in 1.537 + // RegionNode::Ideal. 1.538 + _igvn._worklist.push(region); 1.539 + 1.540 + return iff->in(1); 1.541 +} 1.542 + 1.543 +//------------------------------split_if_with_blocks_pre----------------------- 1.544 +// Do the real work in a non-recursive function. Data nodes want to be 1.545 +// cloned in the pre-order so they can feed each other nicely. 1.546 +Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) { 1.547 + // Cloning these guys is unlikely to win 1.548 + int n_op = n->Opcode(); 1.549 + if( n_op == Op_MergeMem ) return n; 1.550 + if( n->is_Proj() ) return n; 1.551 + // Do not clone-up CmpFXXX variations, as these are always 1.552 + // followed by a CmpI 1.553 + if( n->is_Cmp() ) return n; 1.554 + // Attempt to use a conditional move instead of a phi/branch 1.555 + if( ConditionalMoveLimit > 0 && n_op == Op_Region ) { 1.556 + Node *cmov = conditional_move( n ); 1.557 + if( cmov ) return cmov; 1.558 + } 1.559 + if( n->is_CFG() || n_op == Op_StorePConditional || n_op == Op_StoreLConditional || n_op == Op_CompareAndSwapI || n_op == Op_CompareAndSwapL ||n_op == Op_CompareAndSwapP) return n; 1.560 + if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd 1.561 + n_op == Op_Opaque2 ) { 1.562 + if( !C->major_progress() ) // If chance of no more loop opts... 1.563 + _igvn._worklist.push(n); // maybe we'll remove them 1.564 + return n; 1.565 + } 1.566 + 1.567 + if( n->is_Con() ) return n; // No cloning for Con nodes 1.568 + 1.569 + Node *n_ctrl = get_ctrl(n); 1.570 + if( !n_ctrl ) return n; // Dead node 1.571 + 1.572 + // Attempt to remix address expressions for loop invariants 1.573 + Node *m = remix_address_expressions( n ); 1.574 + if( m ) return m; 1.575 + 1.576 + // Determine if the Node has inputs from some local Phi. 1.577 + // Returns the block to clone thru. 1.578 + Node *n_blk = has_local_phi_input( n ); 1.579 + if( !n_blk ) return n; 1.580 + // Do not clone the trip counter through on a CountedLoop 1.581 + // (messes up the canonical shape). 1.582 + if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n; 1.583 + 1.584 + // Check for having no control input; not pinned. Allow 1.585 + // dominating control. 1.586 + if( n->in(0) ) { 1.587 + Node *dom = idom(n_blk); 1.588 + if( dom_lca( n->in(0), dom ) != n->in(0) ) 1.589 + return n; 1.590 + } 1.591 + // Policy: when is it profitable. You must get more wins than 1.592 + // policy before it is considered profitable. Policy is usually 0, 1.593 + // so 1 win is considered profitable. Big merges will require big 1.594 + // cloning, so get a larger policy. 1.595 + int policy = n_blk->req() >> 2; 1.596 + 1.597 + // If the loop is a candidate for range check elimination, 1.598 + // delay splitting through it's phi until a later loop optimization 1.599 + if (n_blk->is_CountedLoop()) { 1.600 + IdealLoopTree *lp = get_loop(n_blk); 1.601 + if (lp && lp->_rce_candidate) { 1.602 + return n; 1.603 + } 1.604 + } 1.605 + 1.606 + // Use same limit as split_if_with_blocks_post 1.607 + if( C->unique() > 35000 ) return n; // Method too big 1.608 + 1.609 + // Split 'n' through the merge point if it is profitable 1.610 + Node *phi = split_thru_phi( n, n_blk, policy ); 1.611 + if( !phi ) return n; 1.612 + 1.613 + // Found a Phi to split thru! 1.614 + // Replace 'n' with the new phi 1.615 + _igvn.hash_delete(n); 1.616 + _igvn.subsume_node( n, phi ); 1.617 + // Moved a load around the loop, 'en-registering' something. 1.618 + if( n_blk->Opcode() == Op_Loop && n->is_Load() && 1.619 + !phi->in(LoopNode::LoopBackControl)->is_Load() ) 1.620 + C->set_major_progress(); 1.621 + 1.622 + return phi; 1.623 +} 1.624 + 1.625 +static bool merge_point_too_heavy(Compile* C, Node* region) { 1.626 + // Bail out if the region and its phis have too many users. 1.627 + int weight = 0; 1.628 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.629 + weight += region->fast_out(i)->outcnt(); 1.630 + } 1.631 + int nodes_left = MaxNodeLimit - C->unique(); 1.632 + if (weight * 8 > nodes_left) { 1.633 +#ifndef PRODUCT 1.634 + if (PrintOpto) 1.635 + tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight); 1.636 +#endif 1.637 + return true; 1.638 + } else { 1.639 + return false; 1.640 + } 1.641 +} 1.642 + 1.643 +#ifdef _LP64 1.644 +static bool merge_point_safe(Node* region) { 1.645 + // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode 1.646 + // having a PhiNode input. This sidesteps the dangerous case where the split 1.647 + // ConvI2LNode may become TOP if the input Value() does not 1.648 + // overlap the ConvI2L range, leaving a node which may not dominate its 1.649 + // uses. 1.650 + // A better fix for this problem can be found in the BugTraq entry, but 1.651 + // expediency for Mantis demands this hack. 1.652 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.653 + Node* n = region->fast_out(i); 1.654 + if (n->is_Phi()) { 1.655 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.656 + Node* m = n->fast_out(j); 1.657 + if (m->Opcode() == Op_ConvI2L) { 1.658 + return false; 1.659 + } 1.660 + } 1.661 + } 1.662 + } 1.663 + return true; 1.664 +} 1.665 +#endif 1.666 + 1.667 + 1.668 +//------------------------------place_near_use--------------------------------- 1.669 +// Place some computation next to use but not inside inner loops. 1.670 +// For inner loop uses move it to the preheader area. 1.671 +Node *PhaseIdealLoop::place_near_use( Node *useblock ) const { 1.672 + IdealLoopTree *u_loop = get_loop( useblock ); 1.673 + return (u_loop->_irreducible || u_loop->_child) 1.674 + ? useblock 1.675 + : u_loop->_head->in(LoopNode::EntryControl); 1.676 +} 1.677 + 1.678 + 1.679 +//------------------------------split_if_with_blocks_post---------------------- 1.680 +// Do the real work in a non-recursive function. CFG hackery wants to be 1.681 +// in the post-order, so it can dirty the I-DOM info and not use the dirtied 1.682 +// info. 1.683 +void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) { 1.684 + 1.685 + // Cloning Cmp through Phi's involves the split-if transform. 1.686 + // FastLock is not used by an If 1.687 + if( n->is_Cmp() && !n->is_FastLock() ) { 1.688 + if( C->unique() > 35000 ) return; // Method too big 1.689 + 1.690 + // Do not do 'split-if' if irreducible loops are present. 1.691 + if( _has_irreducible_loops ) 1.692 + return; 1.693 + 1.694 + Node *n_ctrl = get_ctrl(n); 1.695 + // Determine if the Node has inputs from some local Phi. 1.696 + // Returns the block to clone thru. 1.697 + Node *n_blk = has_local_phi_input( n ); 1.698 + if( n_blk != n_ctrl ) return; 1.699 + 1.700 + if( merge_point_too_heavy(C, n_ctrl) ) 1.701 + return; 1.702 + 1.703 + if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare? 1.704 + Node *bol = n->unique_out(); 1.705 + assert( bol->is_Bool(), "expect a bool here" ); 1.706 + if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare? 1.707 + Node *iff = bol->unique_out(); 1.708 + 1.709 + // Check some safety conditions 1.710 + if( iff->is_If() ) { // Classic split-if? 1.711 + if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if 1.712 + } else if (iff->is_CMove()) { // Trying to split-up a CMOVE 1.713 + if( get_ctrl(iff->in(2)) == n_ctrl || 1.714 + get_ctrl(iff->in(3)) == n_ctrl ) 1.715 + return; // Inputs not yet split-up 1.716 + if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) { 1.717 + return; // Loop-invar test gates loop-varying CMOVE 1.718 + } 1.719 + } else { 1.720 + return; // some other kind of node, such as an Allocate 1.721 + } 1.722 + 1.723 + // Do not do 'split-if' if some paths are dead. First do dead code 1.724 + // elimination and then see if its still profitable. 1.725 + for( uint i = 1; i < n_ctrl->req(); i++ ) 1.726 + if( n_ctrl->in(i) == C->top() ) 1.727 + return; 1.728 + 1.729 + // When is split-if profitable? Every 'win' on means some control flow 1.730 + // goes dead, so it's almost always a win. 1.731 + int policy = 0; 1.732 + // If trying to do a 'Split-If' at the loop head, it is only 1.733 + // profitable if the cmp folds up on BOTH paths. Otherwise we 1.734 + // risk peeling a loop forever. 1.735 + 1.736 + // CNC - Disabled for now. Requires careful handling of loop 1.737 + // body selection for the cloned code. Also, make sure we check 1.738 + // for any input path not being in the same loop as n_ctrl. For 1.739 + // irreducible loops we cannot check for 'n_ctrl->is_Loop()' 1.740 + // because the alternative loop entry points won't be converted 1.741 + // into LoopNodes. 1.742 + IdealLoopTree *n_loop = get_loop(n_ctrl); 1.743 + for( uint j = 1; j < n_ctrl->req(); j++ ) 1.744 + if( get_loop(n_ctrl->in(j)) != n_loop ) 1.745 + return; 1.746 + 1.747 +#ifdef _LP64 1.748 + // Check for safety of the merge point. 1.749 + if( !merge_point_safe(n_ctrl) ) { 1.750 + return; 1.751 + } 1.752 +#endif 1.753 + 1.754 + // Split compare 'n' through the merge point if it is profitable 1.755 + Node *phi = split_thru_phi( n, n_ctrl, policy ); 1.756 + if( !phi ) return; 1.757 + 1.758 + // Found a Phi to split thru! 1.759 + // Replace 'n' with the new phi 1.760 + _igvn.hash_delete(n); 1.761 + _igvn.subsume_node( n, phi ); 1.762 + 1.763 + // Now split the bool up thru the phi 1.764 + Node *bolphi = split_thru_phi( bol, n_ctrl, -1 ); 1.765 + _igvn.hash_delete(bol); 1.766 + _igvn.subsume_node( bol, bolphi ); 1.767 + assert( iff->in(1) == bolphi, "" ); 1.768 + if( bolphi->Value(&_igvn)->singleton() ) 1.769 + return; 1.770 + 1.771 + // Conditional-move? Must split up now 1.772 + if( !iff->is_If() ) { 1.773 + Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 ); 1.774 + _igvn.hash_delete(iff); 1.775 + _igvn.subsume_node( iff, cmovphi ); 1.776 + return; 1.777 + } 1.778 + 1.779 + // Now split the IF 1.780 + do_split_if( iff ); 1.781 + return; 1.782 + } 1.783 + 1.784 + // Check for an IF ready to split; one that has its 1.785 + // condition codes input coming from a Phi at the block start. 1.786 + int n_op = n->Opcode(); 1.787 + 1.788 + // Check for an IF being dominated by another IF same test 1.789 + if( n_op == Op_If ) { 1.790 + Node *bol = n->in(1); 1.791 + uint max = bol->outcnt(); 1.792 + // Check for same test used more than once? 1.793 + if( n_op == Op_If && max > 1 && bol->is_Bool() ) { 1.794 + // Search up IDOMs to see if this IF is dominated. 1.795 + Node *cutoff = get_ctrl(bol); 1.796 + 1.797 + // Now search up IDOMs till cutoff, looking for a dominating test 1.798 + Node *prevdom = n; 1.799 + Node *dom = idom(prevdom); 1.800 + while( dom != cutoff ) { 1.801 + if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) { 1.802 + // Replace the dominated test with an obvious true or false. 1.803 + // Place it on the IGVN worklist for later cleanup. 1.804 + C->set_major_progress(); 1.805 + dominated_by( prevdom, n ); 1.806 +#ifndef PRODUCT 1.807 + if( VerifyLoopOptimizations ) verify(); 1.808 +#endif 1.809 + return; 1.810 + } 1.811 + prevdom = dom; 1.812 + dom = idom(prevdom); 1.813 + } 1.814 + } 1.815 + } 1.816 + 1.817 + // See if a shared loop-varying computation has no loop-varying uses. 1.818 + // Happens if something is only used for JVM state in uncommon trap exits, 1.819 + // like various versions of induction variable+offset. Clone the 1.820 + // computation per usage to allow it to sink out of the loop. 1.821 + if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about) 1.822 + Node *n_ctrl = get_ctrl(n); 1.823 + IdealLoopTree *n_loop = get_loop(n_ctrl); 1.824 + if( n_loop != _ltree_root ) { 1.825 + DUIterator_Fast imax, i = n->fast_outs(imax); 1.826 + for (; i < imax; i++) { 1.827 + Node* u = n->fast_out(i); 1.828 + if( !has_ctrl(u) ) break; // Found control user 1.829 + IdealLoopTree *u_loop = get_loop(get_ctrl(u)); 1.830 + if( u_loop == n_loop ) break; // Found loop-varying use 1.831 + if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop 1.832 + if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003 1.833 + } 1.834 + bool did_break = (i < imax); // Did we break out of the previous loop? 1.835 + if (!did_break && n->outcnt() > 1) { // All uses in outer loops! 1.836 + Node *late_load_ctrl; 1.837 + if (n->is_Load()) { 1.838 + // If n is a load, get and save the result from get_late_ctrl(), 1.839 + // to be later used in calculating the control for n's clones. 1.840 + clear_dom_lca_tags(); 1.841 + late_load_ctrl = get_late_ctrl(n, n_ctrl); 1.842 + } 1.843 + // If n is a load, and the late control is the same as the current 1.844 + // control, then the cloning of n is a pointless exercise, because 1.845 + // GVN will ensure that we end up where we started. 1.846 + if (!n->is_Load() || late_load_ctrl != n_ctrl) { 1.847 + for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) { 1.848 + Node *u = n->last_out(j); // Clone private computation per use 1.849 + _igvn.hash_delete(u); 1.850 + _igvn._worklist.push(u); 1.851 + Node *x = n->clone(); // Clone computation 1.852 + Node *x_ctrl = NULL; 1.853 + if( u->is_Phi() ) { 1.854 + // Replace all uses of normal nodes. Replace Phi uses 1.855 + // individually, so the seperate Nodes can sink down 1.856 + // different paths. 1.857 + uint k = 1; 1.858 + while( u->in(k) != n ) k++; 1.859 + u->set_req( k, x ); 1.860 + // x goes next to Phi input path 1.861 + x_ctrl = u->in(0)->in(k); 1.862 + --j; 1.863 + } else { // Normal use 1.864 + // Replace all uses 1.865 + for( uint k = 0; k < u->req(); k++ ) { 1.866 + if( u->in(k) == n ) { 1.867 + u->set_req( k, x ); 1.868 + --j; 1.869 + } 1.870 + } 1.871 + x_ctrl = get_ctrl(u); 1.872 + } 1.873 + 1.874 + // Find control for 'x' next to use but not inside inner loops. 1.875 + // For inner loop uses get the preheader area. 1.876 + x_ctrl = place_near_use(x_ctrl); 1.877 + 1.878 + if (n->is_Load()) { 1.879 + // For loads, add a control edge to a CFG node outside of the loop 1.880 + // to force them to not combine and return back inside the loop 1.881 + // during GVN optimization (4641526). 1.882 + // 1.883 + // Because we are setting the actual control input, factor in 1.884 + // the result from get_late_ctrl() so we respect any 1.885 + // anti-dependences. (6233005). 1.886 + x_ctrl = dom_lca(late_load_ctrl, x_ctrl); 1.887 + 1.888 + // Don't allow the control input to be a CFG splitting node. 1.889 + // Such nodes should only have ProjNodes as outs, e.g. IfNode 1.890 + // should only have IfTrueNode and IfFalseNode (4985384). 1.891 + x_ctrl = find_non_split_ctrl(x_ctrl); 1.892 + assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone"); 1.893 + 1.894 + x->set_req(0, x_ctrl); 1.895 + } 1.896 + register_new_node(x, x_ctrl); 1.897 + 1.898 + // Some institutional knowledge is needed here: 'x' is 1.899 + // yanked because if the optimizer runs GVN on it all the 1.900 + // cloned x's will common up and undo this optimization and 1.901 + // be forced back in the loop. This is annoying because it 1.902 + // makes +VerifyOpto report false-positives on progress. I 1.903 + // tried setting control edges on the x's to force them to 1.904 + // not combine, but the matching gets worried when it tries 1.905 + // to fold a StoreP and an AddP together (as part of an 1.906 + // address expression) and the AddP and StoreP have 1.907 + // different controls. 1.908 + if( !x->is_Load() ) _igvn._worklist.yank(x); 1.909 + } 1.910 + _igvn.remove_dead_node(n); 1.911 + } 1.912 + } 1.913 + } 1.914 + } 1.915 + 1.916 + // Check for Opaque2's who's loop has disappeared - who's input is in the 1.917 + // same loop nest as their output. Remove 'em, they are no longer useful. 1.918 + if( n_op == Op_Opaque2 && 1.919 + n->in(1) != NULL && 1.920 + get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) { 1.921 + _igvn.add_users_to_worklist(n); 1.922 + _igvn.hash_delete(n); 1.923 + _igvn.subsume_node( n, n->in(1) ); 1.924 + } 1.925 +} 1.926 + 1.927 +//------------------------------split_if_with_blocks--------------------------- 1.928 +// Check for aggressive application of 'split-if' optimization, 1.929 +// using basic block level info. 1.930 +void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) { 1.931 + Node *n = C->root(); 1.932 + visited.set(n->_idx); // first, mark node as visited 1.933 + // Do pre-visit work for root 1.934 + n = split_if_with_blocks_pre( n ); 1.935 + uint cnt = n->outcnt(); 1.936 + uint i = 0; 1.937 + while (true) { 1.938 + // Visit all children 1.939 + if (i < cnt) { 1.940 + Node* use = n->raw_out(i); 1.941 + ++i; 1.942 + if (use->outcnt() != 0 && !visited.test_set(use->_idx)) { 1.943 + // Now do pre-visit work for this use 1.944 + use = split_if_with_blocks_pre( use ); 1.945 + nstack.push(n, i); // Save parent and next use's index. 1.946 + n = use; // Process all children of current use. 1.947 + cnt = use->outcnt(); 1.948 + i = 0; 1.949 + } 1.950 + } 1.951 + else { 1.952 + // All of n's children have been processed, complete post-processing. 1.953 + if (cnt != 0 && !n->is_Con()) { 1.954 + assert(has_node(n), "no dead nodes"); 1.955 + split_if_with_blocks_post( n ); 1.956 + } 1.957 + if (nstack.is_empty()) { 1.958 + // Finished all nodes on stack. 1.959 + break; 1.960 + } 1.961 + // Get saved parent node and next use's index. Visit the rest of uses. 1.962 + n = nstack.node(); 1.963 + cnt = n->outcnt(); 1.964 + i = nstack.index(); 1.965 + nstack.pop(); 1.966 + } 1.967 + } 1.968 +} 1.969 + 1.970 + 1.971 +//============================================================================= 1.972 +// 1.973 +// C L O N E A L O O P B O D Y 1.974 +// 1.975 + 1.976 +//------------------------------clone_iff-------------------------------------- 1.977 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 1.978 +// "Nearly" because all Nodes have been cloned from the original in the loop, 1.979 +// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 1.980 +// through the Phi recursively, and return a Bool. 1.981 +BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) { 1.982 + 1.983 + // Convert this Phi into a Phi merging Bools 1.984 + uint i; 1.985 + for( i = 1; i < phi->req(); i++ ) { 1.986 + Node *b = phi->in(i); 1.987 + if( b->is_Phi() ) { 1.988 + _igvn.hash_delete(phi); 1.989 + _igvn._worklist.push(phi); 1.990 + phi->set_req(i, clone_iff( b->as_Phi(), loop )); 1.991 + } else { 1.992 + assert( b->is_Bool(), "" ); 1.993 + } 1.994 + } 1.995 + 1.996 + Node *sample_bool = phi->in(1); 1.997 + Node *sample_cmp = sample_bool->in(1); 1.998 + 1.999 + // Make Phis to merge the Cmp's inputs. 1.1000 + int size = phi->in(0)->req(); 1.1001 + PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP ); 1.1002 + PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP ); 1.1003 + for( i = 1; i < phi->req(); i++ ) { 1.1004 + Node *n1 = phi->in(i)->in(1)->in(1); 1.1005 + Node *n2 = phi->in(i)->in(1)->in(2); 1.1006 + phi1->set_req( i, n1 ); 1.1007 + phi2->set_req( i, n2 ); 1.1008 + phi1->set_type( phi1->type()->meet(n1->bottom_type()) ); 1.1009 + phi2->set_type( phi2->type()->meet(n2->bottom_type()) ); 1.1010 + } 1.1011 + // See if these Phis have been made before. 1.1012 + // Register with optimizer 1.1013 + Node *hit1 = _igvn.hash_find_insert(phi1); 1.1014 + if( hit1 ) { // Hit, toss just made Phi 1.1015 + _igvn.remove_dead_node(phi1); // Remove new phi 1.1016 + assert( hit1->is_Phi(), "" ); 1.1017 + phi1 = (PhiNode*)hit1; // Use existing phi 1.1018 + } else { // Miss 1.1019 + _igvn.register_new_node_with_optimizer(phi1); 1.1020 + } 1.1021 + Node *hit2 = _igvn.hash_find_insert(phi2); 1.1022 + if( hit2 ) { // Hit, toss just made Phi 1.1023 + _igvn.remove_dead_node(phi2); // Remove new phi 1.1024 + assert( hit2->is_Phi(), "" ); 1.1025 + phi2 = (PhiNode*)hit2; // Use existing phi 1.1026 + } else { // Miss 1.1027 + _igvn.register_new_node_with_optimizer(phi2); 1.1028 + } 1.1029 + // Register Phis with loop/block info 1.1030 + set_ctrl(phi1, phi->in(0)); 1.1031 + set_ctrl(phi2, phi->in(0)); 1.1032 + // Make a new Cmp 1.1033 + Node *cmp = sample_cmp->clone(); 1.1034 + cmp->set_req( 1, phi1 ); 1.1035 + cmp->set_req( 2, phi2 ); 1.1036 + _igvn.register_new_node_with_optimizer(cmp); 1.1037 + set_ctrl(cmp, phi->in(0)); 1.1038 + 1.1039 + // Make a new Bool 1.1040 + Node *b = sample_bool->clone(); 1.1041 + b->set_req(1,cmp); 1.1042 + _igvn.register_new_node_with_optimizer(b); 1.1043 + set_ctrl(b, phi->in(0)); 1.1044 + 1.1045 + assert( b->is_Bool(), "" ); 1.1046 + return (BoolNode*)b; 1.1047 +} 1.1048 + 1.1049 +//------------------------------clone_bool------------------------------------- 1.1050 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 1.1051 +// "Nearly" because all Nodes have been cloned from the original in the loop, 1.1052 +// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 1.1053 +// through the Phi recursively, and return a Bool. 1.1054 +CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) { 1.1055 + uint i; 1.1056 + // Convert this Phi into a Phi merging Bools 1.1057 + for( i = 1; i < phi->req(); i++ ) { 1.1058 + Node *b = phi->in(i); 1.1059 + if( b->is_Phi() ) { 1.1060 + _igvn.hash_delete(phi); 1.1061 + _igvn._worklist.push(phi); 1.1062 + phi->set_req(i, clone_bool( b->as_Phi(), loop )); 1.1063 + } else { 1.1064 + assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" ); 1.1065 + } 1.1066 + } 1.1067 + 1.1068 + Node *sample_cmp = phi->in(1); 1.1069 + 1.1070 + // Make Phis to merge the Cmp's inputs. 1.1071 + int size = phi->in(0)->req(); 1.1072 + PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP ); 1.1073 + PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP ); 1.1074 + for( uint j = 1; j < phi->req(); j++ ) { 1.1075 + Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP 1.1076 + Node *n1, *n2; 1.1077 + if( cmp_top->is_Cmp() ) { 1.1078 + n1 = cmp_top->in(1); 1.1079 + n2 = cmp_top->in(2); 1.1080 + } else { 1.1081 + n1 = n2 = cmp_top; 1.1082 + } 1.1083 + phi1->set_req( j, n1 ); 1.1084 + phi2->set_req( j, n2 ); 1.1085 + phi1->set_type( phi1->type()->meet(n1->bottom_type()) ); 1.1086 + phi2->set_type( phi2->type()->meet(n2->bottom_type()) ); 1.1087 + } 1.1088 + 1.1089 + // See if these Phis have been made before. 1.1090 + // Register with optimizer 1.1091 + Node *hit1 = _igvn.hash_find_insert(phi1); 1.1092 + if( hit1 ) { // Hit, toss just made Phi 1.1093 + _igvn.remove_dead_node(phi1); // Remove new phi 1.1094 + assert( hit1->is_Phi(), "" ); 1.1095 + phi1 = (PhiNode*)hit1; // Use existing phi 1.1096 + } else { // Miss 1.1097 + _igvn.register_new_node_with_optimizer(phi1); 1.1098 + } 1.1099 + Node *hit2 = _igvn.hash_find_insert(phi2); 1.1100 + if( hit2 ) { // Hit, toss just made Phi 1.1101 + _igvn.remove_dead_node(phi2); // Remove new phi 1.1102 + assert( hit2->is_Phi(), "" ); 1.1103 + phi2 = (PhiNode*)hit2; // Use existing phi 1.1104 + } else { // Miss 1.1105 + _igvn.register_new_node_with_optimizer(phi2); 1.1106 + } 1.1107 + // Register Phis with loop/block info 1.1108 + set_ctrl(phi1, phi->in(0)); 1.1109 + set_ctrl(phi2, phi->in(0)); 1.1110 + // Make a new Cmp 1.1111 + Node *cmp = sample_cmp->clone(); 1.1112 + cmp->set_req( 1, phi1 ); 1.1113 + cmp->set_req( 2, phi2 ); 1.1114 + _igvn.register_new_node_with_optimizer(cmp); 1.1115 + set_ctrl(cmp, phi->in(0)); 1.1116 + 1.1117 + assert( cmp->is_Cmp(), "" ); 1.1118 + return (CmpNode*)cmp; 1.1119 +} 1.1120 + 1.1121 +//------------------------------sink_use--------------------------------------- 1.1122 +// If 'use' was in the loop-exit block, it now needs to be sunk 1.1123 +// below the post-loop merge point. 1.1124 +void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) { 1.1125 + if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) { 1.1126 + set_ctrl(use, post_loop); 1.1127 + for (DUIterator j = use->outs(); use->has_out(j); j++) 1.1128 + sink_use(use->out(j), post_loop); 1.1129 + } 1.1130 +} 1.1131 + 1.1132 +//------------------------------clone_loop------------------------------------- 1.1133 +// 1.1134 +// C L O N E A L O O P B O D Y 1.1135 +// 1.1136 +// This is the basic building block of the loop optimizations. It clones an 1.1137 +// entire loop body. It makes an old_new loop body mapping; with this mapping 1.1138 +// you can find the new-loop equivalent to an old-loop node. All new-loop 1.1139 +// nodes are exactly equal to their old-loop counterparts, all edges are the 1.1140 +// same. All exits from the old-loop now have a RegionNode that merges the 1.1141 +// equivalent new-loop path. This is true even for the normal "loop-exit" 1.1142 +// condition. All uses of loop-invariant old-loop values now come from (one 1.1143 +// or more) Phis that merge their new-loop equivalents. 1.1144 +// 1.1145 +// This operation leaves the graph in an illegal state: there are two valid 1.1146 +// control edges coming from the loop pre-header to both loop bodies. I'll 1.1147 +// definitely have to hack the graph after running this transform. 1.1148 +// 1.1149 +// From this building block I will further edit edges to perform loop peeling 1.1150 +// or loop unrolling or iteration splitting (Range-Check-Elimination), etc. 1.1151 +// 1.1152 +// Parameter side_by_size_idom: 1.1153 +// When side_by_size_idom is NULL, the dominator tree is constructed for 1.1154 +// the clone loop to dominate the original. Used in construction of 1.1155 +// pre-main-post loop sequence. 1.1156 +// When nonnull, the clone and original are side-by-side, both are 1.1157 +// dominated by the side_by_side_idom node. Used in construction of 1.1158 +// unswitched loops. 1.1159 +void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd, 1.1160 + Node* side_by_side_idom) { 1.1161 + 1.1162 + // Step 1: Clone the loop body. Make the old->new mapping. 1.1163 + uint i; 1.1164 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1165 + Node *old = loop->_body.at(i); 1.1166 + Node *nnn = old->clone(); 1.1167 + old_new.map( old->_idx, nnn ); 1.1168 + _igvn.register_new_node_with_optimizer(nnn); 1.1169 + } 1.1170 + 1.1171 + 1.1172 + // Step 2: Fix the edges in the new body. If the old input is outside the 1.1173 + // loop use it. If the old input is INside the loop, use the corresponding 1.1174 + // new node instead. 1.1175 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1176 + Node *old = loop->_body.at(i); 1.1177 + Node *nnn = old_new[old->_idx]; 1.1178 + // Fix CFG/Loop controlling the new node 1.1179 + if (has_ctrl(old)) { 1.1180 + set_ctrl(nnn, old_new[get_ctrl(old)->_idx]); 1.1181 + } else { 1.1182 + set_loop(nnn, loop->_parent); 1.1183 + if (old->outcnt() > 0) { 1.1184 + set_idom( nnn, old_new[idom(old)->_idx], dd ); 1.1185 + } 1.1186 + } 1.1187 + // Correct edges to the new node 1.1188 + for( uint j = 0; j < nnn->req(); j++ ) { 1.1189 + Node *n = nnn->in(j); 1.1190 + if( n ) { 1.1191 + IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n ); 1.1192 + if( loop->is_member( old_in_loop ) ) 1.1193 + nnn->set_req(j, old_new[n->_idx]); 1.1194 + } 1.1195 + } 1.1196 + _igvn.hash_find_insert(nnn); 1.1197 + } 1.1198 + Node *newhead = old_new[loop->_head->_idx]; 1.1199 + set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); 1.1200 + 1.1201 + 1.1202 + // Step 3: Now fix control uses. Loop varying control uses have already 1.1203 + // been fixed up (as part of all input edges in Step 2). Loop invariant 1.1204 + // control uses must be either an IfFalse or an IfTrue. Make a merge 1.1205 + // point to merge the old and new IfFalse/IfTrue nodes; make the use 1.1206 + // refer to this. 1.1207 + ResourceArea *area = Thread::current()->resource_area(); 1.1208 + Node_List worklist(area); 1.1209 + uint new_counter = C->unique(); 1.1210 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1211 + Node* old = loop->_body.at(i); 1.1212 + if( !old->is_CFG() ) continue; 1.1213 + Node* nnn = old_new[old->_idx]; 1.1214 + 1.1215 + // Copy uses to a worklist, so I can munge the def-use info 1.1216 + // with impunity. 1.1217 + for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) 1.1218 + worklist.push(old->fast_out(j)); 1.1219 + 1.1220 + while( worklist.size() ) { // Visit all uses 1.1221 + Node *use = worklist.pop(); 1.1222 + if (!has_node(use)) continue; // Ignore dead nodes 1.1223 + IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); 1.1224 + if( !loop->is_member( use_loop ) && use->is_CFG() ) { 1.1225 + // Both OLD and USE are CFG nodes here. 1.1226 + assert( use->is_Proj(), "" ); 1.1227 + 1.1228 + // Clone the loop exit control projection 1.1229 + Node *newuse = use->clone(); 1.1230 + newuse->set_req(0,nnn); 1.1231 + _igvn.register_new_node_with_optimizer(newuse); 1.1232 + set_loop(newuse, use_loop); 1.1233 + set_idom(newuse, nnn, dom_depth(nnn) + 1 ); 1.1234 + 1.1235 + // We need a Region to merge the exit from the peeled body and the 1.1236 + // exit from the old loop body. 1.1237 + RegionNode *r = new (C, 3) RegionNode(3); 1.1238 + // Map the old use to the new merge point 1.1239 + old_new.map( use->_idx, r ); 1.1240 + uint dd_r = MIN2(dom_depth(newuse),dom_depth(use)); 1.1241 + assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" ); 1.1242 + 1.1243 + // The original user of 'use' uses 'r' instead. 1.1244 + for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) { 1.1245 + Node* useuse = use->last_out(l); 1.1246 + _igvn.hash_delete(useuse); 1.1247 + _igvn._worklist.push(useuse); 1.1248 + uint uses_found = 0; 1.1249 + if( useuse->in(0) == use ) { 1.1250 + useuse->set_req(0, r); 1.1251 + uses_found++; 1.1252 + if( useuse->is_CFG() ) { 1.1253 + assert( dom_depth(useuse) > dd_r, "" ); 1.1254 + set_idom(useuse, r, dom_depth(useuse)); 1.1255 + } 1.1256 + } 1.1257 + for( uint k = 1; k < useuse->req(); k++ ) { 1.1258 + if( useuse->in(k) == use ) { 1.1259 + useuse->set_req(k, r); 1.1260 + uses_found++; 1.1261 + } 1.1262 + } 1.1263 + l -= uses_found; // we deleted 1 or more copies of this edge 1.1264 + } 1.1265 + 1.1266 + // Now finish up 'r' 1.1267 + r->set_req( 1, newuse ); 1.1268 + r->set_req( 2, use ); 1.1269 + _igvn.register_new_node_with_optimizer(r); 1.1270 + set_loop(r, use_loop); 1.1271 + set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r); 1.1272 + } // End of if a loop-exit test 1.1273 + } 1.1274 + } 1.1275 + 1.1276 + // Step 4: If loop-invariant use is not control, it must be dominated by a 1.1277 + // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region 1.1278 + // there if needed. Make a Phi there merging old and new used values. 1.1279 + Node_List *split_if_set = NULL; 1.1280 + Node_List *split_bool_set = NULL; 1.1281 + Node_List *split_cex_set = NULL; 1.1282 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1283 + Node* old = loop->_body.at(i); 1.1284 + Node* nnn = old_new[old->_idx]; 1.1285 + // Copy uses to a worklist, so I can munge the def-use info 1.1286 + // with impunity. 1.1287 + for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) 1.1288 + worklist.push(old->fast_out(j)); 1.1289 + 1.1290 + while( worklist.size() ) { 1.1291 + Node *use = worklist.pop(); 1.1292 + if (!has_node(use)) continue; // Ignore dead nodes 1.1293 + if (use->in(0) == C->top()) continue; 1.1294 + IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); 1.1295 + // Check for data-use outside of loop - at least one of OLD or USE 1.1296 + // must not be a CFG node. 1.1297 + if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) { 1.1298 + 1.1299 + // If the Data use is an IF, that means we have an IF outside of the 1.1300 + // loop that is switching on a condition that is set inside of the 1.1301 + // loop. Happens if people set a loop-exit flag; then test the flag 1.1302 + // in the loop to break the loop, then test is again outside of the 1.1303 + // loop to determine which way the loop exited. 1.1304 + if( use->is_If() || use->is_CMove() ) { 1.1305 + // Since this code is highly unlikely, we lazily build the worklist 1.1306 + // of such Nodes to go split. 1.1307 + if( !split_if_set ) 1.1308 + split_if_set = new Node_List(area); 1.1309 + split_if_set->push(use); 1.1310 + } 1.1311 + if( use->is_Bool() ) { 1.1312 + if( !split_bool_set ) 1.1313 + split_bool_set = new Node_List(area); 1.1314 + split_bool_set->push(use); 1.1315 + } 1.1316 + if( use->Opcode() == Op_CreateEx ) { 1.1317 + if( !split_cex_set ) 1.1318 + split_cex_set = new Node_List(area); 1.1319 + split_cex_set->push(use); 1.1320 + } 1.1321 + 1.1322 + 1.1323 + // Get "block" use is in 1.1324 + uint idx = 0; 1.1325 + while( use->in(idx) != old ) idx++; 1.1326 + Node *prev = use->is_CFG() ? use : get_ctrl(use); 1.1327 + assert( !loop->is_member( get_loop( prev ) ), "" ); 1.1328 + Node *cfg = prev->_idx >= new_counter 1.1329 + ? prev->in(2) 1.1330 + : idom(prev); 1.1331 + if( use->is_Phi() ) // Phi use is in prior block 1.1332 + cfg = prev->in(idx); // NOT in block of Phi itself 1.1333 + if (cfg->is_top()) { // Use is dead? 1.1334 + _igvn.hash_delete(use); 1.1335 + _igvn._worklist.push(use); 1.1336 + use->set_req(idx, C->top()); 1.1337 + continue; 1.1338 + } 1.1339 + 1.1340 + while( !loop->is_member( get_loop( cfg ) ) ) { 1.1341 + prev = cfg; 1.1342 + cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg); 1.1343 + } 1.1344 + // If the use occurs after merging several exits from the loop, then 1.1345 + // old value must have dominated all those exits. Since the same old 1.1346 + // value was used on all those exits we did not need a Phi at this 1.1347 + // merge point. NOW we do need a Phi here. Each loop exit value 1.1348 + // is now merged with the peeled body exit; each exit gets its own 1.1349 + // private Phi and those Phis need to be merged here. 1.1350 + Node *phi; 1.1351 + if( prev->is_Region() ) { 1.1352 + if( idx == 0 ) { // Updating control edge? 1.1353 + phi = prev; // Just use existing control 1.1354 + } else { // Else need a new Phi 1.1355 + phi = PhiNode::make( prev, old ); 1.1356 + // Now recursively fix up the new uses of old! 1.1357 + for( uint i = 1; i < prev->req(); i++ ) { 1.1358 + worklist.push(phi); // Onto worklist once for each 'old' input 1.1359 + } 1.1360 + } 1.1361 + } else { 1.1362 + // Get new RegionNode merging old and new loop exits 1.1363 + prev = old_new[prev->_idx]; 1.1364 + assert( prev, "just made this in step 7" ); 1.1365 + if( idx == 0 ) { // Updating control edge? 1.1366 + phi = prev; // Just use existing control 1.1367 + } else { // Else need a new Phi 1.1368 + // Make a new Phi merging data values properly 1.1369 + phi = PhiNode::make( prev, old ); 1.1370 + phi->set_req( 1, nnn ); 1.1371 + } 1.1372 + } 1.1373 + // If inserting a new Phi, check for prior hits 1.1374 + if( idx != 0 ) { 1.1375 + Node *hit = _igvn.hash_find_insert(phi); 1.1376 + if( hit == NULL ) { 1.1377 + _igvn.register_new_node_with_optimizer(phi); // Register new phi 1.1378 + } else { // or 1.1379 + // Remove the new phi from the graph and use the hit 1.1380 + _igvn.remove_dead_node(phi); 1.1381 + phi = hit; // Use existing phi 1.1382 + } 1.1383 + set_ctrl(phi, prev); 1.1384 + } 1.1385 + // Make 'use' use the Phi instead of the old loop body exit value 1.1386 + _igvn.hash_delete(use); 1.1387 + _igvn._worklist.push(use); 1.1388 + use->set_req(idx, phi); 1.1389 + if( use->_idx >= new_counter ) { // If updating new phis 1.1390 + // Not needed for correctness, but prevents a weak assert 1.1391 + // in AddPNode from tripping (when we end up with different 1.1392 + // base & derived Phis that will become the same after 1.1393 + // IGVN does CSE). 1.1394 + Node *hit = _igvn.hash_find_insert(use); 1.1395 + if( hit ) // Go ahead and re-hash for hits. 1.1396 + _igvn.subsume_node( use, hit ); 1.1397 + } 1.1398 + 1.1399 + // If 'use' was in the loop-exit block, it now needs to be sunk 1.1400 + // below the post-loop merge point. 1.1401 + sink_use( use, prev ); 1.1402 + } 1.1403 + } 1.1404 + } 1.1405 + 1.1406 + // Check for IFs that need splitting/cloning. Happens if an IF outside of 1.1407 + // the loop uses a condition set in the loop. The original IF probably 1.1408 + // takes control from one or more OLD Regions (which in turn get from NEW 1.1409 + // Regions). In any case, there will be a set of Phis for each merge point 1.1410 + // from the IF up to where the original BOOL def exists the loop. 1.1411 + if( split_if_set ) { 1.1412 + while( split_if_set->size() ) { 1.1413 + Node *iff = split_if_set->pop(); 1.1414 + if( iff->in(1)->is_Phi() ) { 1.1415 + BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop ); 1.1416 + _igvn.hash_delete(iff); 1.1417 + _igvn._worklist.push(iff); 1.1418 + iff->set_req(1, b); 1.1419 + } 1.1420 + } 1.1421 + } 1.1422 + if( split_bool_set ) { 1.1423 + while( split_bool_set->size() ) { 1.1424 + Node *b = split_bool_set->pop(); 1.1425 + Node *phi = b->in(1); 1.1426 + assert( phi->is_Phi(), "" ); 1.1427 + CmpNode *cmp = clone_bool( (PhiNode*)phi, loop ); 1.1428 + _igvn.hash_delete(b); 1.1429 + _igvn._worklist.push(b); 1.1430 + b->set_req(1, cmp); 1.1431 + } 1.1432 + } 1.1433 + if( split_cex_set ) { 1.1434 + while( split_cex_set->size() ) { 1.1435 + Node *b = split_cex_set->pop(); 1.1436 + assert( b->in(0)->is_Region(), "" ); 1.1437 + assert( b->in(1)->is_Phi(), "" ); 1.1438 + assert( b->in(0)->in(0) == b->in(1)->in(0), "" ); 1.1439 + split_up( b, b->in(0), NULL ); 1.1440 + } 1.1441 + } 1.1442 + 1.1443 +} 1.1444 + 1.1445 + 1.1446 +//---------------------- stride_of_possible_iv ------------------------------------- 1.1447 +// Looks for an iff/bool/comp with one operand of the compare 1.1448 +// being a cycle involving an add and a phi, 1.1449 +// with an optional truncation (left-shift followed by a right-shift) 1.1450 +// of the add. Returns zero if not an iv. 1.1451 +int PhaseIdealLoop::stride_of_possible_iv(Node* iff) { 1.1452 + Node* trunc1 = NULL; 1.1453 + Node* trunc2 = NULL; 1.1454 + const TypeInt* ttype = NULL; 1.1455 + if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) { 1.1456 + return 0; 1.1457 + } 1.1458 + BoolNode* bl = iff->in(1)->as_Bool(); 1.1459 + Node* cmp = bl->in(1); 1.1460 + if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) { 1.1461 + return 0; 1.1462 + } 1.1463 + // Must have an invariant operand 1.1464 + if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) { 1.1465 + return 0; 1.1466 + } 1.1467 + Node* add2 = NULL; 1.1468 + Node* cmp1 = cmp->in(1); 1.1469 + if (cmp1->is_Phi()) { 1.1470 + // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) ))) 1.1471 + Node* phi = cmp1; 1.1472 + for (uint i = 1; i < phi->req(); i++) { 1.1473 + Node* in = phi->in(i); 1.1474 + Node* add = CountedLoopNode::match_incr_with_optional_truncation(in, 1.1475 + &trunc1, &trunc2, &ttype); 1.1476 + if (add && add->in(1) == phi) { 1.1477 + add2 = add->in(2); 1.1478 + break; 1.1479 + } 1.1480 + } 1.1481 + } else { 1.1482 + // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) ))) 1.1483 + Node* addtrunc = cmp1; 1.1484 + Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc, 1.1485 + &trunc1, &trunc2, &ttype); 1.1486 + if (add && add->in(1)->is_Phi()) { 1.1487 + Node* phi = add->in(1); 1.1488 + for (uint i = 1; i < phi->req(); i++) { 1.1489 + if (phi->in(i) == addtrunc) { 1.1490 + add2 = add->in(2); 1.1491 + break; 1.1492 + } 1.1493 + } 1.1494 + } 1.1495 + } 1.1496 + if (add2 != NULL) { 1.1497 + const TypeInt* add2t = _igvn.type(add2)->is_int(); 1.1498 + if (add2t->is_con()) { 1.1499 + return add2t->get_con(); 1.1500 + } 1.1501 + } 1.1502 + return 0; 1.1503 +} 1.1504 + 1.1505 + 1.1506 +//---------------------- stay_in_loop ------------------------------------- 1.1507 +// Return the (unique) control output node that's in the loop (if it exists.) 1.1508 +Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) { 1.1509 + Node* unique = NULL; 1.1510 + if (!n) return NULL; 1.1511 + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 1.1512 + Node* use = n->fast_out(i); 1.1513 + if (!has_ctrl(use) && loop->is_member(get_loop(use))) { 1.1514 + if (unique != NULL) { 1.1515 + return NULL; 1.1516 + } 1.1517 + unique = use; 1.1518 + } 1.1519 + } 1.1520 + return unique; 1.1521 +} 1.1522 + 1.1523 +//------------------------------ register_node ------------------------------------- 1.1524 +// Utility to register node "n" with PhaseIdealLoop 1.1525 +void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) { 1.1526 + _igvn.register_new_node_with_optimizer(n); 1.1527 + loop->_body.push(n); 1.1528 + if (n->is_CFG()) { 1.1529 + set_loop(n, loop); 1.1530 + set_idom(n, pred, ddepth); 1.1531 + } else { 1.1532 + set_ctrl(n, pred); 1.1533 + } 1.1534 +} 1.1535 + 1.1536 +//------------------------------ proj_clone ------------------------------------- 1.1537 +// Utility to create an if-projection 1.1538 +ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) { 1.1539 + ProjNode* c = p->clone()->as_Proj(); 1.1540 + c->set_req(0, iff); 1.1541 + return c; 1.1542 +} 1.1543 + 1.1544 +//------------------------------ short_circuit_if ------------------------------------- 1.1545 +// Force the iff control output to be the live_proj 1.1546 +Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) { 1.1547 + int proj_con = live_proj->_con; 1.1548 + assert(proj_con == 0 || proj_con == 1, "false or true projection"); 1.1549 + Node *con = _igvn.intcon(proj_con); 1.1550 + set_ctrl(con, C->root()); 1.1551 + if (iff) { 1.1552 + iff->set_req(1, con); 1.1553 + } 1.1554 + return con; 1.1555 +} 1.1556 + 1.1557 +//------------------------------ insert_if_before_proj ------------------------------------- 1.1558 +// Insert a new if before an if projection (* - new node) 1.1559 +// 1.1560 +// before 1.1561 +// if(test) 1.1562 +// / \ 1.1563 +// v v 1.1564 +// other-proj proj (arg) 1.1565 +// 1.1566 +// after 1.1567 +// if(test) 1.1568 +// / \ 1.1569 +// / v 1.1570 +// | * proj-clone 1.1571 +// v | 1.1572 +// other-proj v 1.1573 +// * new_if(relop(cmp[IU](left,right))) 1.1574 +// / \ 1.1575 +// v v 1.1576 +// * new-proj proj 1.1577 +// (returned) 1.1578 +// 1.1579 +ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) { 1.1580 + IfNode* iff = proj->in(0)->as_If(); 1.1581 + IdealLoopTree *loop = get_loop(proj); 1.1582 + ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); 1.1583 + int ddepth = dom_depth(proj); 1.1584 + 1.1585 + _igvn.hash_delete(iff); 1.1586 + _igvn._worklist.push(iff); 1.1587 + _igvn.hash_delete(proj); 1.1588 + _igvn._worklist.push(proj); 1.1589 + 1.1590 + proj->set_req(0, NULL); // temporary disconnect 1.1591 + ProjNode* proj2 = proj_clone(proj, iff); 1.1592 + register_node(proj2, loop, iff, ddepth); 1.1593 + 1.1594 + Node* cmp = Signed ? (Node*) new (C,3)CmpINode(left, right) : (Node*) new (C,3)CmpUNode(left, right); 1.1595 + register_node(cmp, loop, proj2, ddepth); 1.1596 + 1.1597 + BoolNode* bol = new (C,2)BoolNode(cmp, relop); 1.1598 + register_node(bol, loop, proj2, ddepth); 1.1599 + 1.1600 + IfNode* new_if = new (C,2)IfNode(proj2, bol, iff->_prob, iff->_fcnt); 1.1601 + register_node(new_if, loop, proj2, ddepth); 1.1602 + 1.1603 + proj->set_req(0, new_if); // reattach 1.1604 + set_idom(proj, new_if, ddepth); 1.1605 + 1.1606 + ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj(); 1.1607 + register_node(new_exit, get_loop(other_proj), new_if, ddepth); 1.1608 + 1.1609 + return new_exit; 1.1610 +} 1.1611 + 1.1612 +//------------------------------ insert_region_before_proj ------------------------------------- 1.1613 +// Insert a region before an if projection (* - new node) 1.1614 +// 1.1615 +// before 1.1616 +// if(test) 1.1617 +// / | 1.1618 +// v | 1.1619 +// proj v 1.1620 +// other-proj 1.1621 +// 1.1622 +// after 1.1623 +// if(test) 1.1624 +// / | 1.1625 +// v | 1.1626 +// * proj-clone v 1.1627 +// | other-proj 1.1628 +// v 1.1629 +// * new-region 1.1630 +// | 1.1631 +// v 1.1632 +// * dum_if 1.1633 +// / \ 1.1634 +// v \ 1.1635 +// * dum-proj v 1.1636 +// proj 1.1637 +// 1.1638 +RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) { 1.1639 + IfNode* iff = proj->in(0)->as_If(); 1.1640 + IdealLoopTree *loop = get_loop(proj); 1.1641 + ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); 1.1642 + int ddepth = dom_depth(proj); 1.1643 + 1.1644 + _igvn.hash_delete(iff); 1.1645 + _igvn._worklist.push(iff); 1.1646 + _igvn.hash_delete(proj); 1.1647 + _igvn._worklist.push(proj); 1.1648 + 1.1649 + proj->set_req(0, NULL); // temporary disconnect 1.1650 + ProjNode* proj2 = proj_clone(proj, iff); 1.1651 + register_node(proj2, loop, iff, ddepth); 1.1652 + 1.1653 + RegionNode* reg = new (C,2)RegionNode(2); 1.1654 + reg->set_req(1, proj2); 1.1655 + register_node(reg, loop, iff, ddepth); 1.1656 + 1.1657 + IfNode* dum_if = new (C,2)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt); 1.1658 + register_node(dum_if, loop, reg, ddepth); 1.1659 + 1.1660 + proj->set_req(0, dum_if); // reattach 1.1661 + set_idom(proj, dum_if, ddepth); 1.1662 + 1.1663 + ProjNode* dum_proj = proj_clone(other_proj, dum_if); 1.1664 + register_node(dum_proj, loop, dum_if, ddepth); 1.1665 + 1.1666 + return reg; 1.1667 +} 1.1668 + 1.1669 +//------------------------------ insert_cmpi_loop_exit ------------------------------------- 1.1670 +// Clone a signed compare loop exit from an unsigned compare and 1.1671 +// insert it before the unsigned cmp on the stay-in-loop path. 1.1672 +// All new nodes inserted in the dominator tree between the original 1.1673 +// if and it's projections. The original if test is replaced with 1.1674 +// a constant to force the stay-in-loop path. 1.1675 +// 1.1676 +// This is done to make sure that the original if and it's projections 1.1677 +// still dominate the same set of control nodes, that the ctrl() relation 1.1678 +// from data nodes to them is preserved, and that their loop nesting is 1.1679 +// preserved. 1.1680 +// 1.1681 +// before 1.1682 +// if(i <u limit) unsigned compare loop exit 1.1683 +// / | 1.1684 +// v v 1.1685 +// exit-proj stay-in-loop-proj 1.1686 +// 1.1687 +// after 1.1688 +// if(stay-in-loop-const) original if 1.1689 +// / | 1.1690 +// / v 1.1691 +// / if(i < limit) new signed test 1.1692 +// / / | 1.1693 +// / / v 1.1694 +// / / if(i <u limit) new cloned unsigned test 1.1695 +// / / / | 1.1696 +// v v v | 1.1697 +// region | 1.1698 +// | | 1.1699 +// dum-if | 1.1700 +// / | | 1.1701 +// ether | | 1.1702 +// v v 1.1703 +// exit-proj stay-in-loop-proj 1.1704 +// 1.1705 +IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) { 1.1706 + const bool Signed = true; 1.1707 + const bool Unsigned = false; 1.1708 + 1.1709 + BoolNode* bol = if_cmpu->in(1)->as_Bool(); 1.1710 + if (bol->_test._test != BoolTest::lt) return NULL; 1.1711 + CmpNode* cmpu = bol->in(1)->as_Cmp(); 1.1712 + if (cmpu->Opcode() != Op_CmpU) return NULL; 1.1713 + int stride = stride_of_possible_iv(if_cmpu); 1.1714 + if (stride == 0) return NULL; 1.1715 + 1.1716 + ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj(); 1.1717 + ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj(); 1.1718 + 1.1719 + Node* limit = NULL; 1.1720 + if (stride > 0) { 1.1721 + limit = cmpu->in(2); 1.1722 + } else { 1.1723 + limit = _igvn.makecon(TypeInt::ZERO); 1.1724 + set_ctrl(limit, C->root()); 1.1725 + } 1.1726 + // Create a new region on the exit path 1.1727 + RegionNode* reg = insert_region_before_proj(lp_exit); 1.1728 + 1.1729 + // Clone the if-cmpu-true-false using a signed compare 1.1730 + BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge; 1.1731 + ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue); 1.1732 + reg->add_req(cmpi_exit); 1.1733 + 1.1734 + // Clone the if-cmpu-true-false 1.1735 + BoolTest::mask rel_u = bol->_test._test; 1.1736 + ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue); 1.1737 + reg->add_req(cmpu_exit); 1.1738 + 1.1739 + // Force original if to stay in loop. 1.1740 + short_circuit_if(if_cmpu, lp_continue); 1.1741 + 1.1742 + return cmpi_exit->in(0)->as_If(); 1.1743 +} 1.1744 + 1.1745 +//------------------------------ remove_cmpi_loop_exit ------------------------------------- 1.1746 +// Remove a previously inserted signed compare loop exit. 1.1747 +void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) { 1.1748 + Node* lp_proj = stay_in_loop(if_cmp, loop); 1.1749 + assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI && 1.1750 + stay_in_loop(lp_proj, loop)->is_If() && 1.1751 + stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu"); 1.1752 + Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO); 1.1753 + set_ctrl(con, C->root()); 1.1754 + if_cmp->set_req(1, con); 1.1755 +} 1.1756 + 1.1757 +//------------------------------ scheduled_nodelist ------------------------------------- 1.1758 +// Create a post order schedule of nodes that are in the 1.1759 +// "member" set. The list is returned in "sched". 1.1760 +// The first node in "sched" is the loop head, followed by 1.1761 +// nodes which have no inputs in the "member" set, and then 1.1762 +// followed by the nodes that have an immediate input dependence 1.1763 +// on a node in "sched". 1.1764 +void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) { 1.1765 + 1.1766 + assert(member.test(loop->_head->_idx), "loop head must be in member set"); 1.1767 + Arena *a = Thread::current()->resource_area(); 1.1768 + VectorSet visited(a); 1.1769 + Node_Stack nstack(a, loop->_body.size()); 1.1770 + 1.1771 + Node* n = loop->_head; // top of stack is cached in "n" 1.1772 + uint idx = 0; 1.1773 + visited.set(n->_idx); 1.1774 + 1.1775 + // Initially push all with no inputs from within member set 1.1776 + for(uint i = 0; i < loop->_body.size(); i++ ) { 1.1777 + Node *elt = loop->_body.at(i); 1.1778 + if (member.test(elt->_idx)) { 1.1779 + bool found = false; 1.1780 + for (uint j = 0; j < elt->req(); j++) { 1.1781 + Node* def = elt->in(j); 1.1782 + if (def && member.test(def->_idx) && def != elt) { 1.1783 + found = true; 1.1784 + break; 1.1785 + } 1.1786 + } 1.1787 + if (!found && elt != loop->_head) { 1.1788 + nstack.push(n, idx); 1.1789 + n = elt; 1.1790 + assert(!visited.test(n->_idx), "not seen yet"); 1.1791 + visited.set(n->_idx); 1.1792 + } 1.1793 + } 1.1794 + } 1.1795 + 1.1796 + // traverse out's that are in the member set 1.1797 + while (true) { 1.1798 + if (idx < n->outcnt()) { 1.1799 + Node* use = n->raw_out(idx); 1.1800 + idx++; 1.1801 + if (!visited.test_set(use->_idx)) { 1.1802 + if (member.test(use->_idx)) { 1.1803 + nstack.push(n, idx); 1.1804 + n = use; 1.1805 + idx = 0; 1.1806 + } 1.1807 + } 1.1808 + } else { 1.1809 + // All outputs processed 1.1810 + sched.push(n); 1.1811 + if (nstack.is_empty()) break; 1.1812 + n = nstack.node(); 1.1813 + idx = nstack.index(); 1.1814 + nstack.pop(); 1.1815 + } 1.1816 + } 1.1817 +} 1.1818 + 1.1819 + 1.1820 +//------------------------------ has_use_in_set ------------------------------------- 1.1821 +// Has a use in the vector set 1.1822 +bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) { 1.1823 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1824 + Node* use = n->fast_out(j); 1.1825 + if (vset.test(use->_idx)) { 1.1826 + return true; 1.1827 + } 1.1828 + } 1.1829 + return false; 1.1830 +} 1.1831 + 1.1832 + 1.1833 +//------------------------------ has_use_internal_to_set ------------------------------------- 1.1834 +// Has use internal to the vector set (ie. not in a phi at the loop head) 1.1835 +bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) { 1.1836 + Node* head = loop->_head; 1.1837 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1838 + Node* use = n->fast_out(j); 1.1839 + if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) { 1.1840 + return true; 1.1841 + } 1.1842 + } 1.1843 + return false; 1.1844 +} 1.1845 + 1.1846 + 1.1847 +//------------------------------ clone_for_use_outside_loop ------------------------------------- 1.1848 +// clone "n" for uses that are outside of loop 1.1849 +void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) { 1.1850 + 1.1851 + assert(worklist.size() == 0, "should be empty"); 1.1852 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1853 + Node* use = n->fast_out(j); 1.1854 + if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) { 1.1855 + worklist.push(use); 1.1856 + } 1.1857 + } 1.1858 + while( worklist.size() ) { 1.1859 + Node *use = worklist.pop(); 1.1860 + if (!has_node(use) || use->in(0) == C->top()) continue; 1.1861 + uint j; 1.1862 + for (j = 0; j < use->req(); j++) { 1.1863 + if (use->in(j) == n) break; 1.1864 + } 1.1865 + assert(j < use->req(), "must be there"); 1.1866 + 1.1867 + // clone "n" and insert it between the inputs of "n" and the use outside the loop 1.1868 + Node* n_clone = n->clone(); 1.1869 + _igvn.hash_delete(use); 1.1870 + use->set_req(j, n_clone); 1.1871 + _igvn._worklist.push(use); 1.1872 + if (!use->is_Phi()) { 1.1873 + Node* use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0); 1.1874 + set_ctrl(n_clone, use_c); 1.1875 + assert(!loop->is_member(get_loop(use_c)), "should be outside loop"); 1.1876 + get_loop(use_c)->_body.push(n_clone); 1.1877 + } else { 1.1878 + // Use in a phi is considered a use in the associated predecessor block 1.1879 + Node *prevbb = use->in(0)->in(j); 1.1880 + set_ctrl(n_clone, prevbb); 1.1881 + assert(!loop->is_member(get_loop(prevbb)), "should be outside loop"); 1.1882 + get_loop(prevbb)->_body.push(n_clone); 1.1883 + } 1.1884 + _igvn.register_new_node_with_optimizer(n_clone); 1.1885 +#if !defined(PRODUCT) 1.1886 + if (TracePartialPeeling) { 1.1887 + tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx); 1.1888 + } 1.1889 +#endif 1.1890 + } 1.1891 +} 1.1892 + 1.1893 + 1.1894 +//------------------------------ clone_for_special_use_inside_loop ------------------------------------- 1.1895 +// clone "n" for special uses that are in the not_peeled region. 1.1896 +// If these def-uses occur in separate blocks, the code generator 1.1897 +// marks the method as not compilable. For example, if a "BoolNode" 1.1898 +// is in a different basic block than the "IfNode" that uses it, then 1.1899 +// the compilation is aborted in the code generator. 1.1900 +void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, 1.1901 + VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) { 1.1902 + if (n->is_Phi() || n->is_Load()) { 1.1903 + return; 1.1904 + } 1.1905 + assert(worklist.size() == 0, "should be empty"); 1.1906 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1907 + Node* use = n->fast_out(j); 1.1908 + if ( not_peel.test(use->_idx) && 1.1909 + (use->is_If() || use->is_CMove() || use->is_Bool()) && 1.1910 + use->in(1) == n) { 1.1911 + worklist.push(use); 1.1912 + } 1.1913 + } 1.1914 + if (worklist.size() > 0) { 1.1915 + // clone "n" and insert it between inputs of "n" and the use 1.1916 + Node* n_clone = n->clone(); 1.1917 + loop->_body.push(n_clone); 1.1918 + _igvn.register_new_node_with_optimizer(n_clone); 1.1919 + set_ctrl(n_clone, get_ctrl(n)); 1.1920 + sink_list.push(n_clone); 1.1921 + not_peel <<= n_clone->_idx; // add n_clone to not_peel set. 1.1922 +#if !defined(PRODUCT) 1.1923 + if (TracePartialPeeling) { 1.1924 + tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx); 1.1925 + } 1.1926 +#endif 1.1927 + while( worklist.size() ) { 1.1928 + Node *use = worklist.pop(); 1.1929 + _igvn.hash_delete(use); 1.1930 + _igvn._worklist.push(use); 1.1931 + for (uint j = 1; j < use->req(); j++) { 1.1932 + if (use->in(j) == n) { 1.1933 + use->set_req(j, n_clone); 1.1934 + } 1.1935 + } 1.1936 + } 1.1937 + } 1.1938 +} 1.1939 + 1.1940 + 1.1941 +//------------------------------ insert_phi_for_loop ------------------------------------- 1.1942 +// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist 1.1943 +void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) { 1.1944 + Node *phi = PhiNode::make(lp, back_edge_val); 1.1945 + phi->set_req(LoopNode::EntryControl, lp_entry_val); 1.1946 + // Use existing phi if it already exists 1.1947 + Node *hit = _igvn.hash_find_insert(phi); 1.1948 + if( hit == NULL ) { 1.1949 + _igvn.register_new_node_with_optimizer(phi); 1.1950 + set_ctrl(phi, lp); 1.1951 + } else { 1.1952 + // Remove the new phi from the graph and use the hit 1.1953 + _igvn.remove_dead_node(phi); 1.1954 + phi = hit; 1.1955 + } 1.1956 + _igvn.hash_delete(use); 1.1957 + _igvn._worklist.push(use); 1.1958 + use->set_req(idx, phi); 1.1959 +} 1.1960 + 1.1961 +#ifdef ASSERT 1.1962 +//------------------------------ is_valid_loop_partition ------------------------------------- 1.1963 +// Validate the loop partition sets: peel and not_peel 1.1964 +bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, 1.1965 + VectorSet& not_peel ) { 1.1966 + uint i; 1.1967 + // Check that peel_list entries are in the peel set 1.1968 + for (i = 0; i < peel_list.size(); i++) { 1.1969 + if (!peel.test(peel_list.at(i)->_idx)) { 1.1970 + return false; 1.1971 + } 1.1972 + } 1.1973 + // Check at loop members are in one of peel set or not_peel set 1.1974 + for (i = 0; i < loop->_body.size(); i++ ) { 1.1975 + Node *def = loop->_body.at(i); 1.1976 + uint di = def->_idx; 1.1977 + // Check that peel set elements are in peel_list 1.1978 + if (peel.test(di)) { 1.1979 + if (not_peel.test(di)) { 1.1980 + return false; 1.1981 + } 1.1982 + // Must be in peel_list also 1.1983 + bool found = false; 1.1984 + for (uint j = 0; j < peel_list.size(); j++) { 1.1985 + if (peel_list.at(j)->_idx == di) { 1.1986 + found = true; 1.1987 + break; 1.1988 + } 1.1989 + } 1.1990 + if (!found) { 1.1991 + return false; 1.1992 + } 1.1993 + } else if (not_peel.test(di)) { 1.1994 + if (peel.test(di)) { 1.1995 + return false; 1.1996 + } 1.1997 + } else { 1.1998 + return false; 1.1999 + } 1.2000 + } 1.2001 + return true; 1.2002 +} 1.2003 + 1.2004 +//------------------------------ is_valid_clone_loop_exit_use ------------------------------------- 1.2005 +// Ensure a use outside of loop is of the right form 1.2006 +bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) { 1.2007 + Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; 1.2008 + return (use->is_Phi() && 1.2009 + use_c->is_Region() && use_c->req() == 3 && 1.2010 + (use_c->in(exit_idx)->Opcode() == Op_IfTrue || 1.2011 + use_c->in(exit_idx)->Opcode() == Op_IfFalse || 1.2012 + use_c->in(exit_idx)->Opcode() == Op_JumpProj) && 1.2013 + loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) ); 1.2014 +} 1.2015 + 1.2016 +//------------------------------ is_valid_clone_loop_form ------------------------------------- 1.2017 +// Ensure that all uses outside of loop are of the right form 1.2018 +bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, 1.2019 + uint orig_exit_idx, uint clone_exit_idx) { 1.2020 + uint len = peel_list.size(); 1.2021 + for (uint i = 0; i < len; i++) { 1.2022 + Node *def = peel_list.at(i); 1.2023 + 1.2024 + for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1.2025 + Node *use = def->fast_out(j); 1.2026 + Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; 1.2027 + if (!loop->is_member(get_loop(use_c))) { 1.2028 + // use is not in the loop, check for correct structure 1.2029 + if (use->in(0) == def) { 1.2030 + // Okay 1.2031 + } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) { 1.2032 + return false; 1.2033 + } 1.2034 + } 1.2035 + } 1.2036 + } 1.2037 + return true; 1.2038 +} 1.2039 +#endif 1.2040 + 1.2041 +//------------------------------ partial_peel ------------------------------------- 1.2042 +// Partially peel (aka loop rotation) the top portion of a loop (called 1.2043 +// the peel section below) by cloning it and placing one copy just before 1.2044 +// the new loop head and the other copy at the bottom of the new loop. 1.2045 +// 1.2046 +// before after where it came from 1.2047 +// 1.2048 +// stmt1 stmt1 1.2049 +// loop: stmt2 clone 1.2050 +// stmt2 if condA goto exitA clone 1.2051 +// if condA goto exitA new_loop: new 1.2052 +// stmt3 stmt3 clone 1.2053 +// if !condB goto loop if condB goto exitB clone 1.2054 +// exitB: stmt2 orig 1.2055 +// stmt4 if !condA goto new_loop orig 1.2056 +// exitA: goto exitA 1.2057 +// exitB: 1.2058 +// stmt4 1.2059 +// exitA: 1.2060 +// 1.2061 +// Step 1: find the cut point: an exit test on probable 1.2062 +// induction variable. 1.2063 +// Step 2: schedule (with cloning) operations in the peel 1.2064 +// section that can be executed after the cut into 1.2065 +// the section that is not peeled. This may need 1.2066 +// to clone operations into exit blocks. For 1.2067 +// instance, a reference to A[i] in the not-peel 1.2068 +// section and a reference to B[i] in an exit block 1.2069 +// may cause a left-shift of i by 2 to be placed 1.2070 +// in the peel block. This step will clone the left 1.2071 +// shift into the exit block and sink the left shift 1.2072 +// from the peel to the not-peel section. 1.2073 +// Step 3: clone the loop, retarget the control, and insert 1.2074 +// phis for values that are live across the new loop 1.2075 +// head. This is very dependent on the graph structure 1.2076 +// from clone_loop. It creates region nodes for 1.2077 +// exit control and associated phi nodes for values 1.2078 +// flow out of the loop through that exit. The region 1.2079 +// node is dominated by the clone's control projection. 1.2080 +// So the clone's peel section is placed before the 1.2081 +// new loop head, and the clone's not-peel section is 1.2082 +// forms the top part of the new loop. The original 1.2083 +// peel section forms the tail of the new loop. 1.2084 +// Step 4: update the dominator tree and recompute the 1.2085 +// dominator depth. 1.2086 +// 1.2087 +// orig 1.2088 +// 1.2089 +// stmt1 1.2090 +// | 1.2091 +// v 1.2092 +// loop<----+ 1.2093 +// | | 1.2094 +// stmt2 | 1.2095 +// | | 1.2096 +// v | 1.2097 +// ifA | 1.2098 +// / | | 1.2099 +// v v | 1.2100 +// false true ^ <-- last_peel 1.2101 +// / | | 1.2102 +// / ===|==cut | 1.2103 +// / stmt3 | <-- first_not_peel 1.2104 +// / | | 1.2105 +// | v | 1.2106 +// v ifB | 1.2107 +// exitA: / \ | 1.2108 +// / \ | 1.2109 +// v v | 1.2110 +// false true | 1.2111 +// / \ | 1.2112 +// / ----+ 1.2113 +// | 1.2114 +// v 1.2115 +// exitB: 1.2116 +// stmt4 1.2117 +// 1.2118 +// 1.2119 +// after clone loop 1.2120 +// 1.2121 +// stmt1 1.2122 +// / \ 1.2123 +// clone / \ orig 1.2124 +// / \ 1.2125 +// / \ 1.2126 +// v v 1.2127 +// +---->loop loop<----+ 1.2128 +// | | | | 1.2129 +// | stmt2 stmt2 | 1.2130 +// | | | | 1.2131 +// | v v | 1.2132 +// | ifA ifA | 1.2133 +// | | \ / | | 1.2134 +// | v v v v | 1.2135 +// ^ true false false true ^ <-- last_peel 1.2136 +// | | ^ \ / | | 1.2137 +// | cut==|== \ \ / ===|==cut | 1.2138 +// | stmt3 \ \ / stmt3 | <-- first_not_peel 1.2139 +// | | dom | | | | 1.2140 +// | v \ 1v v2 v | 1.2141 +// | ifB regionA ifB | 1.2142 +// | / \ | / \ | 1.2143 +// | / \ v / \ | 1.2144 +// | v v exitA: v v | 1.2145 +// | true false false true | 1.2146 +// | / ^ \ / \ | 1.2147 +// +---- \ \ / ----+ 1.2148 +// dom \ / 1.2149 +// \ 1v v2 1.2150 +// regionB 1.2151 +// | 1.2152 +// v 1.2153 +// exitB: 1.2154 +// stmt4 1.2155 +// 1.2156 +// 1.2157 +// after partial peel 1.2158 +// 1.2159 +// stmt1 1.2160 +// / 1.2161 +// clone / orig 1.2162 +// / TOP 1.2163 +// / \ 1.2164 +// v v 1.2165 +// TOP->region region----+ 1.2166 +// | | | 1.2167 +// stmt2 stmt2 | 1.2168 +// | | | 1.2169 +// v v | 1.2170 +// ifA ifA | 1.2171 +// | \ / | | 1.2172 +// v v v v | 1.2173 +// true false false true | <-- last_peel 1.2174 +// | ^ \ / +------|---+ 1.2175 +// +->newloop \ \ / === ==cut | | 1.2176 +// | stmt3 \ \ / TOP | | 1.2177 +// | | dom | | stmt3 | | <-- first_not_peel 1.2178 +// | v \ 1v v2 v | | 1.2179 +// | ifB regionA ifB ^ v 1.2180 +// | / \ | / \ | | 1.2181 +// | / \ v / \ | | 1.2182 +// | v v exitA: v v | | 1.2183 +// | true false false true | | 1.2184 +// | / ^ \ / \ | | 1.2185 +// | | \ \ / v | | 1.2186 +// | | dom \ / TOP | | 1.2187 +// | | \ 1v v2 | | 1.2188 +// ^ v regionB | | 1.2189 +// | | | | | 1.2190 +// | | v ^ v 1.2191 +// | | exitB: | | 1.2192 +// | | stmt4 | | 1.2193 +// | +------------>-----------------+ | 1.2194 +// | | 1.2195 +// +-----------------<---------------------+ 1.2196 +// 1.2197 +// 1.2198 +// final graph 1.2199 +// 1.2200 +// stmt1 1.2201 +// | 1.2202 +// v 1.2203 +// ........> ifA clone 1.2204 +// : / | 1.2205 +// dom / | 1.2206 +// : v v 1.2207 +// : false true 1.2208 +// : | | 1.2209 +// : | stmt2 clone 1.2210 +// : | | 1.2211 +// : | v 1.2212 +// : | newloop<-----+ 1.2213 +// : | | | 1.2214 +// : | stmt3 clone | 1.2215 +// : | | | 1.2216 +// : | v | 1.2217 +// : | ifB | 1.2218 +// : | / \ | 1.2219 +// : | v v | 1.2220 +// : | false true | 1.2221 +// : | | | | 1.2222 +// : | v stmt2 | 1.2223 +// : | exitB: | | 1.2224 +// : | stmt4 v | 1.2225 +// : | ifA orig | 1.2226 +// : | / \ | 1.2227 +// : | / \ | 1.2228 +// : | v v | 1.2229 +// : | false true | 1.2230 +// : | / \ | 1.2231 +// : v v -----+ 1.2232 +// RegionA 1.2233 +// | 1.2234 +// v 1.2235 +// exitA 1.2236 +// 1.2237 +bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) { 1.2238 + 1.2239 + LoopNode *head = loop->_head->as_Loop(); 1.2240 + 1.2241 + if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) { 1.2242 + return false; 1.2243 + } 1.2244 + 1.2245 + // Check for complex exit control 1.2246 + for(uint ii = 0; ii < loop->_body.size(); ii++ ) { 1.2247 + Node *n = loop->_body.at(ii); 1.2248 + int opc = n->Opcode(); 1.2249 + if (n->is_Call() || 1.2250 + opc == Op_Catch || 1.2251 + opc == Op_CatchProj || 1.2252 + opc == Op_Jump || 1.2253 + opc == Op_JumpProj) { 1.2254 +#if !defined(PRODUCT) 1.2255 + if (TracePartialPeeling) { 1.2256 + tty->print_cr("\nExit control too complex: lp: %d", head->_idx); 1.2257 + } 1.2258 +#endif 1.2259 + return false; 1.2260 + } 1.2261 + } 1.2262 + 1.2263 + int dd = dom_depth(head); 1.2264 + 1.2265 + // Step 1: find cut point 1.2266 + 1.2267 + // Walk up dominators to loop head looking for first loop exit 1.2268 + // which is executed on every path thru loop. 1.2269 + IfNode *peel_if = NULL; 1.2270 + IfNode *peel_if_cmpu = NULL; 1.2271 + 1.2272 + Node *iff = loop->tail(); 1.2273 + while( iff != head ) { 1.2274 + if( iff->is_If() ) { 1.2275 + Node *ctrl = get_ctrl(iff->in(1)); 1.2276 + if (ctrl->is_top()) return false; // Dead test on live IF. 1.2277 + // If loop-varying exit-test, check for induction variable 1.2278 + if( loop->is_member(get_loop(ctrl)) && 1.2279 + loop->is_loop_exit(iff) && 1.2280 + is_possible_iv_test(iff)) { 1.2281 + Node* cmp = iff->in(1)->in(1); 1.2282 + if (cmp->Opcode() == Op_CmpI) { 1.2283 + peel_if = iff->as_If(); 1.2284 + } else { 1.2285 + assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU"); 1.2286 + peel_if_cmpu = iff->as_If(); 1.2287 + } 1.2288 + } 1.2289 + } 1.2290 + iff = idom(iff); 1.2291 + } 1.2292 + // Prefer signed compare over unsigned compare. 1.2293 + IfNode* new_peel_if = NULL; 1.2294 + if (peel_if == NULL) { 1.2295 + if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) { 1.2296 + return false; // No peel point found 1.2297 + } 1.2298 + new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop); 1.2299 + if (new_peel_if == NULL) { 1.2300 + return false; // No peel point found 1.2301 + } 1.2302 + peel_if = new_peel_if; 1.2303 + } 1.2304 + Node* last_peel = stay_in_loop(peel_if, loop); 1.2305 + Node* first_not_peeled = stay_in_loop(last_peel, loop); 1.2306 + if (first_not_peeled == NULL || first_not_peeled == head) { 1.2307 + return false; 1.2308 + } 1.2309 + 1.2310 +#if !defined(PRODUCT) 1.2311 + if (TracePartialPeeling) { 1.2312 + tty->print_cr("before partial peel one iteration"); 1.2313 + Node_List wl; 1.2314 + Node* t = head->in(2); 1.2315 + while (true) { 1.2316 + wl.push(t); 1.2317 + if (t == head) break; 1.2318 + t = idom(t); 1.2319 + } 1.2320 + while (wl.size() > 0) { 1.2321 + Node* tt = wl.pop(); 1.2322 + tt->dump(); 1.2323 + if (tt == last_peel) tty->print_cr("-- cut --"); 1.2324 + } 1.2325 + } 1.2326 +#endif 1.2327 + ResourceArea *area = Thread::current()->resource_area(); 1.2328 + VectorSet peel(area); 1.2329 + VectorSet not_peel(area); 1.2330 + Node_List peel_list(area); 1.2331 + Node_List worklist(area); 1.2332 + Node_List sink_list(area); 1.2333 + 1.2334 + // Set of cfg nodes to peel are those that are executable from 1.2335 + // the head through last_peel. 1.2336 + assert(worklist.size() == 0, "should be empty"); 1.2337 + worklist.push(head); 1.2338 + peel.set(head->_idx); 1.2339 + while (worklist.size() > 0) { 1.2340 + Node *n = worklist.pop(); 1.2341 + if (n != last_peel) { 1.2342 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.2343 + Node* use = n->fast_out(j); 1.2344 + if (use->is_CFG() && 1.2345 + loop->is_member(get_loop(use)) && 1.2346 + !peel.test_set(use->_idx)) { 1.2347 + worklist.push(use); 1.2348 + } 1.2349 + } 1.2350 + } 1.2351 + } 1.2352 + 1.2353 + // Set of non-cfg nodes to peel are those that are control 1.2354 + // dependent on the cfg nodes. 1.2355 + uint i; 1.2356 + for(i = 0; i < loop->_body.size(); i++ ) { 1.2357 + Node *n = loop->_body.at(i); 1.2358 + Node *n_c = has_ctrl(n) ? get_ctrl(n) : n; 1.2359 + if (peel.test(n_c->_idx)) { 1.2360 + peel.set(n->_idx); 1.2361 + } else { 1.2362 + not_peel.set(n->_idx); 1.2363 + } 1.2364 + } 1.2365 + 1.2366 + // Step 2: move operations from the peeled section down into the 1.2367 + // not-peeled section 1.2368 + 1.2369 + // Get a post order schedule of nodes in the peel region 1.2370 + // Result in right-most operand. 1.2371 + scheduled_nodelist(loop, peel, peel_list ); 1.2372 + 1.2373 + assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); 1.2374 + 1.2375 + // For future check for too many new phis 1.2376 + uint old_phi_cnt = 0; 1.2377 + for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { 1.2378 + Node* use = head->fast_out(j); 1.2379 + if (use->is_Phi()) old_phi_cnt++; 1.2380 + } 1.2381 + 1.2382 +#if !defined(PRODUCT) 1.2383 + if (TracePartialPeeling) { 1.2384 + tty->print_cr("\npeeled list"); 1.2385 + } 1.2386 +#endif 1.2387 + 1.2388 + // Evacuate nodes in peel region into the not_peeled region if possible 1.2389 + uint new_phi_cnt = 0; 1.2390 + for (i = 0; i < peel_list.size();) { 1.2391 + Node* n = peel_list.at(i); 1.2392 +#if !defined(PRODUCT) 1.2393 + if (TracePartialPeeling) n->dump(); 1.2394 +#endif 1.2395 + bool incr = true; 1.2396 + if ( !n->is_CFG() ) { 1.2397 + 1.2398 + if ( has_use_in_set(n, not_peel) ) { 1.2399 + 1.2400 + // If not used internal to the peeled region, 1.2401 + // move "n" from peeled to not_peeled region. 1.2402 + 1.2403 + if ( !has_use_internal_to_set(n, peel, loop) ) { 1.2404 + 1.2405 + // if not pinned and not a load (which maybe anti-dependent on a store) 1.2406 + // and not a CMove (Matcher expects only bool->cmove). 1.2407 + if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) { 1.2408 + clone_for_use_outside_loop( loop, n, worklist ); 1.2409 + 1.2410 + sink_list.push(n); 1.2411 + peel >>= n->_idx; // delete n from peel set. 1.2412 + not_peel <<= n->_idx; // add n to not_peel set. 1.2413 + peel_list.remove(i); 1.2414 + incr = false; 1.2415 +#if !defined(PRODUCT) 1.2416 + if (TracePartialPeeling) { 1.2417 + tty->print_cr("sink to not_peeled region: %d newbb: %d", 1.2418 + n->_idx, get_ctrl(n)->_idx); 1.2419 + } 1.2420 +#endif 1.2421 + } 1.2422 + } else { 1.2423 + // Otherwise check for special def-use cases that span 1.2424 + // the peel/not_peel boundary such as bool->if 1.2425 + clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist ); 1.2426 + new_phi_cnt++; 1.2427 + } 1.2428 + } 1.2429 + } 1.2430 + if (incr) i++; 1.2431 + } 1.2432 + 1.2433 + if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) { 1.2434 +#if !defined(PRODUCT) 1.2435 + if (TracePartialPeeling) { 1.2436 + tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c", 1.2437 + new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F'); 1.2438 + } 1.2439 +#endif 1.2440 + if (new_peel_if != NULL) { 1.2441 + remove_cmpi_loop_exit(new_peel_if, loop); 1.2442 + } 1.2443 + // Inhibit more partial peeling on this loop 1.2444 + assert(!head->is_partial_peel_loop(), "not partial peeled"); 1.2445 + head->mark_partial_peel_failed(); 1.2446 + return false; 1.2447 + } 1.2448 + 1.2449 + // Step 3: clone loop, retarget control, and insert new phis 1.2450 + 1.2451 + // Create new loop head for new phis and to hang 1.2452 + // the nodes being moved (sinked) from the peel region. 1.2453 + LoopNode* new_head = new (C, 3) LoopNode(last_peel, last_peel); 1.2454 + _igvn.register_new_node_with_optimizer(new_head); 1.2455 + assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled"); 1.2456 + first_not_peeled->set_req(0, new_head); 1.2457 + set_loop(new_head, loop); 1.2458 + loop->_body.push(new_head); 1.2459 + not_peel.set(new_head->_idx); 1.2460 + set_idom(new_head, last_peel, dom_depth(first_not_peeled)); 1.2461 + set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled)); 1.2462 + 1.2463 + while (sink_list.size() > 0) { 1.2464 + Node* n = sink_list.pop(); 1.2465 + set_ctrl(n, new_head); 1.2466 + } 1.2467 + 1.2468 + assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); 1.2469 + 1.2470 + clone_loop( loop, old_new, dd ); 1.2471 + 1.2472 + const uint clone_exit_idx = 1; 1.2473 + const uint orig_exit_idx = 2; 1.2474 + assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop"); 1.2475 + 1.2476 + Node* head_clone = old_new[head->_idx]; 1.2477 + LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop(); 1.2478 + Node* orig_tail_clone = head_clone->in(2); 1.2479 + 1.2480 + // Add phi if "def" node is in peel set and "use" is not 1.2481 + 1.2482 + for(i = 0; i < peel_list.size(); i++ ) { 1.2483 + Node *def = peel_list.at(i); 1.2484 + if (!def->is_CFG()) { 1.2485 + for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1.2486 + Node *use = def->fast_out(j); 1.2487 + if (has_node(use) && use->in(0) != C->top() && 1.2488 + (!peel.test(use->_idx) || 1.2489 + (use->is_Phi() && use->in(0) == head)) ) { 1.2490 + worklist.push(use); 1.2491 + } 1.2492 + } 1.2493 + while( worklist.size() ) { 1.2494 + Node *use = worklist.pop(); 1.2495 + for (uint j = 1; j < use->req(); j++) { 1.2496 + Node* n = use->in(j); 1.2497 + if (n == def) { 1.2498 + 1.2499 + // "def" is in peel set, "use" is not in peel set 1.2500 + // or "use" is in the entry boundary (a phi) of the peel set 1.2501 + 1.2502 + Node* use_c = has_ctrl(use) ? get_ctrl(use) : use; 1.2503 + 1.2504 + if ( loop->is_member(get_loop( use_c )) ) { 1.2505 + // use is in loop 1.2506 + if (old_new[use->_idx] != NULL) { // null for dead code 1.2507 + Node* use_clone = old_new[use->_idx]; 1.2508 + _igvn.hash_delete(use); 1.2509 + use->set_req(j, C->top()); 1.2510 + _igvn._worklist.push(use); 1.2511 + insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone ); 1.2512 + } 1.2513 + } else { 1.2514 + assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format"); 1.2515 + // use is not in the loop, check if the live range includes the cut 1.2516 + Node* lp_if = use_c->in(orig_exit_idx)->in(0); 1.2517 + if (not_peel.test(lp_if->_idx)) { 1.2518 + assert(j == orig_exit_idx, "use from original loop"); 1.2519 + insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone ); 1.2520 + } 1.2521 + } 1.2522 + } 1.2523 + } 1.2524 + } 1.2525 + } 1.2526 + } 1.2527 + 1.2528 + // Step 3b: retarget control 1.2529 + 1.2530 + // Redirect control to the new loop head if a cloned node in 1.2531 + // the not_peeled region has control that points into the peeled region. 1.2532 + // This necessary because the cloned peeled region will be outside 1.2533 + // the loop. 1.2534 + // from to 1.2535 + // cloned-peeled <---+ 1.2536 + // new_head_clone: | <--+ 1.2537 + // cloned-not_peeled in(0) in(0) 1.2538 + // orig-peeled 1.2539 + 1.2540 + for(i = 0; i < loop->_body.size(); i++ ) { 1.2541 + Node *n = loop->_body.at(i); 1.2542 + if (!n->is_CFG() && n->in(0) != NULL && 1.2543 + not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) { 1.2544 + Node* n_clone = old_new[n->_idx]; 1.2545 + _igvn.hash_delete(n_clone); 1.2546 + n_clone->set_req(0, new_head_clone); 1.2547 + _igvn._worklist.push(n_clone); 1.2548 + } 1.2549 + } 1.2550 + 1.2551 + // Backedge of the surviving new_head (the clone) is original last_peel 1.2552 + _igvn.hash_delete(new_head_clone); 1.2553 + new_head_clone->set_req(LoopNode::LoopBackControl, last_peel); 1.2554 + _igvn._worklist.push(new_head_clone); 1.2555 + 1.2556 + // Cut first node in original not_peel set 1.2557 + _igvn.hash_delete(new_head); 1.2558 + new_head->set_req(LoopNode::EntryControl, C->top()); 1.2559 + new_head->set_req(LoopNode::LoopBackControl, C->top()); 1.2560 + _igvn._worklist.push(new_head); 1.2561 + 1.2562 + // Copy head_clone back-branch info to original head 1.2563 + // and remove original head's loop entry and 1.2564 + // clone head's back-branch 1.2565 + _igvn.hash_delete(head); 1.2566 + _igvn.hash_delete(head_clone); 1.2567 + head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl)); 1.2568 + head->set_req(LoopNode::LoopBackControl, C->top()); 1.2569 + head_clone->set_req(LoopNode::LoopBackControl, C->top()); 1.2570 + _igvn._worklist.push(head); 1.2571 + _igvn._worklist.push(head_clone); 1.2572 + 1.2573 + // Similarly modify the phis 1.2574 + for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) { 1.2575 + Node* use = head->fast_out(k); 1.2576 + if (use->is_Phi() && use->outcnt() > 0) { 1.2577 + Node* use_clone = old_new[use->_idx]; 1.2578 + _igvn.hash_delete(use); 1.2579 + _igvn.hash_delete(use_clone); 1.2580 + use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl)); 1.2581 + use->set_req(LoopNode::LoopBackControl, C->top()); 1.2582 + use_clone->set_req(LoopNode::LoopBackControl, C->top()); 1.2583 + _igvn._worklist.push(use); 1.2584 + _igvn._worklist.push(use_clone); 1.2585 + } 1.2586 + } 1.2587 + 1.2588 + // Step 4: update dominator tree and dominator depth 1.2589 + 1.2590 + set_idom(head, orig_tail_clone, dd); 1.2591 + recompute_dom_depth(); 1.2592 + 1.2593 + // Inhibit more partial peeling on this loop 1.2594 + new_head_clone->set_partial_peel_loop(); 1.2595 + C->set_major_progress(); 1.2596 + 1.2597 +#if !defined(PRODUCT) 1.2598 + if (TracePartialPeeling) { 1.2599 + tty->print_cr("\nafter partial peel one iteration"); 1.2600 + Node_List wl(area); 1.2601 + Node* t = last_peel; 1.2602 + while (true) { 1.2603 + wl.push(t); 1.2604 + if (t == head_clone) break; 1.2605 + t = idom(t); 1.2606 + } 1.2607 + while (wl.size() > 0) { 1.2608 + Node* tt = wl.pop(); 1.2609 + if (tt == head) tty->print_cr("orig head"); 1.2610 + else if (tt == new_head_clone) tty->print_cr("new head"); 1.2611 + else if (tt == head_clone) tty->print_cr("clone head"); 1.2612 + tt->dump(); 1.2613 + } 1.2614 + } 1.2615 +#endif 1.2616 + return true; 1.2617 +} 1.2618 + 1.2619 +//------------------------------reorg_offsets---------------------------------- 1.2620 +// Reorganize offset computations to lower register pressure. Mostly 1.2621 +// prevent loop-fallout uses of the pre-incremented trip counter (which are 1.2622 +// then alive with the post-incremented trip counter forcing an extra 1.2623 +// register move) 1.2624 +void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) { 1.2625 + 1.2626 + CountedLoopNode *cl = loop->_head->as_CountedLoop(); 1.2627 + CountedLoopEndNode *cle = cl->loopexit(); 1.2628 + if( !cle ) return; // The occasional dead loop 1.2629 + // Find loop exit control 1.2630 + Node *exit = cle->proj_out(false); 1.2631 + assert( exit->Opcode() == Op_IfFalse, "" ); 1.2632 + 1.2633 + // Check for the special case of folks using the pre-incremented 1.2634 + // trip-counter on the fall-out path (forces the pre-incremented 1.2635 + // and post-incremented trip counter to be live at the same time). 1.2636 + // Fix this by adjusting to use the post-increment trip counter. 1.2637 + Node *phi = cl->phi(); 1.2638 + if( !phi ) return; // Dead infinite loop 1.2639 + bool progress = true; 1.2640 + while (progress) { 1.2641 + progress = false; 1.2642 + for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) { 1.2643 + Node* use = phi->fast_out(i); // User of trip-counter 1.2644 + if (!has_ctrl(use)) continue; 1.2645 + Node *u_ctrl = get_ctrl(use); 1.2646 + if( use->is_Phi() ) { 1.2647 + u_ctrl = NULL; 1.2648 + for( uint j = 1; j < use->req(); j++ ) 1.2649 + if( use->in(j) == phi ) 1.2650 + u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) ); 1.2651 + } 1.2652 + IdealLoopTree *u_loop = get_loop(u_ctrl); 1.2653 + // Look for loop-invariant use 1.2654 + if( u_loop == loop ) continue; 1.2655 + if( loop->is_member( u_loop ) ) continue; 1.2656 + // Check that use is live out the bottom. Assuming the trip-counter 1.2657 + // update is right at the bottom, uses of of the loop middle are ok. 1.2658 + if( dom_lca( exit, u_ctrl ) != exit ) continue; 1.2659 + // protect against stride not being a constant 1.2660 + if( !cle->stride_is_con() ) continue; 1.2661 + // Hit! Refactor use to use the post-incremented tripcounter. 1.2662 + // Compute a post-increment tripcounter. 1.2663 + Node *opaq = new (C, 2) Opaque2Node( cle->incr() ); 1.2664 + register_new_node( opaq, u_ctrl ); 1.2665 + Node *neg_stride = _igvn.intcon(-cle->stride_con()); 1.2666 + set_ctrl(neg_stride, C->root()); 1.2667 + Node *post = new (C, 3) AddINode( opaq, neg_stride); 1.2668 + register_new_node( post, u_ctrl ); 1.2669 + _igvn.hash_delete(use); 1.2670 + _igvn._worklist.push(use); 1.2671 + for( uint j = 1; j < use->req(); j++ ) 1.2672 + if( use->in(j) == phi ) 1.2673 + use->set_req(j, post); 1.2674 + // Since DU info changed, rerun loop 1.2675 + progress = true; 1.2676 + break; 1.2677 + } 1.2678 + } 1.2679 + 1.2680 +}