1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/loopopts.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,2788 @@ 1.4 +/* 1.5 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "memory/allocation.inline.hpp" 1.30 +#include "opto/addnode.hpp" 1.31 +#include "opto/connode.hpp" 1.32 +#include "opto/divnode.hpp" 1.33 +#include "opto/loopnode.hpp" 1.34 +#include "opto/matcher.hpp" 1.35 +#include "opto/mulnode.hpp" 1.36 +#include "opto/rootnode.hpp" 1.37 +#include "opto/subnode.hpp" 1.38 + 1.39 +//============================================================================= 1.40 +//------------------------------split_thru_phi--------------------------------- 1.41 +// Split Node 'n' through merge point if there is enough win. 1.42 +Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) { 1.43 + if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) { 1.44 + // ConvI2L may have type information on it which is unsafe to push up 1.45 + // so disable this for now 1.46 + return NULL; 1.47 + } 1.48 + 1.49 + int wins = 0; 1.50 + assert(!n->is_CFG(), ""); 1.51 + assert(region->is_Region(), ""); 1.52 + 1.53 + const Type* type = n->bottom_type(); 1.54 + const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr(); 1.55 + Node *phi; 1.56 + if (t_oop != NULL && t_oop->is_known_instance_field()) { 1.57 + int iid = t_oop->instance_id(); 1.58 + int index = C->get_alias_index(t_oop); 1.59 + int offset = t_oop->offset(); 1.60 + phi = new (C) PhiNode(region, type, NULL, iid, index, offset); 1.61 + } else { 1.62 + phi = PhiNode::make_blank(region, n); 1.63 + } 1.64 + uint old_unique = C->unique(); 1.65 + for (uint i = 1; i < region->req(); i++) { 1.66 + Node *x; 1.67 + Node* the_clone = NULL; 1.68 + if (region->in(i) == C->top()) { 1.69 + x = C->top(); // Dead path? Use a dead data op 1.70 + } else { 1.71 + x = n->clone(); // Else clone up the data op 1.72 + the_clone = x; // Remember for possible deletion. 1.73 + // Alter data node to use pre-phi inputs 1.74 + if (n->in(0) == region) 1.75 + x->set_req( 0, region->in(i) ); 1.76 + for (uint j = 1; j < n->req(); j++) { 1.77 + Node *in = n->in(j); 1.78 + if (in->is_Phi() && in->in(0) == region) 1.79 + x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone 1.80 + } 1.81 + } 1.82 + // Check for a 'win' on some paths 1.83 + const Type *t = x->Value(&_igvn); 1.84 + 1.85 + bool singleton = t->singleton(); 1.86 + 1.87 + // A TOP singleton indicates that there are no possible values incoming 1.88 + // along a particular edge. In most cases, this is OK, and the Phi will 1.89 + // be eliminated later in an Ideal call. However, we can't allow this to 1.90 + // happen if the singleton occurs on loop entry, as the elimination of 1.91 + // the PhiNode may cause the resulting node to migrate back to a previous 1.92 + // loop iteration. 1.93 + if (singleton && t == Type::TOP) { 1.94 + // Is_Loop() == false does not confirm the absence of a loop (e.g., an 1.95 + // irreducible loop may not be indicated by an affirmative is_Loop()); 1.96 + // therefore, the only top we can split thru a phi is on a backedge of 1.97 + // a loop. 1.98 + singleton &= region->is_Loop() && (i != LoopNode::EntryControl); 1.99 + } 1.100 + 1.101 + if (singleton) { 1.102 + wins++; 1.103 + x = ((PhaseGVN&)_igvn).makecon(t); 1.104 + } else { 1.105 + // We now call Identity to try to simplify the cloned node. 1.106 + // Note that some Identity methods call phase->type(this). 1.107 + // Make sure that the type array is big enough for 1.108 + // our new node, even though we may throw the node away. 1.109 + // (Note: This tweaking with igvn only works because x is a new node.) 1.110 + _igvn.set_type(x, t); 1.111 + // If x is a TypeNode, capture any more-precise type permanently into Node 1.112 + // otherwise it will be not updated during igvn->transform since 1.113 + // igvn->type(x) is set to x->Value() already. 1.114 + x->raise_bottom_type(t); 1.115 + Node *y = x->Identity(&_igvn); 1.116 + if (y != x) { 1.117 + wins++; 1.118 + x = y; 1.119 + } else { 1.120 + y = _igvn.hash_find(x); 1.121 + if (y) { 1.122 + wins++; 1.123 + x = y; 1.124 + } else { 1.125 + // Else x is a new node we are keeping 1.126 + // We do not need register_new_node_with_optimizer 1.127 + // because set_type has already been called. 1.128 + _igvn._worklist.push(x); 1.129 + } 1.130 + } 1.131 + } 1.132 + if (x != the_clone && the_clone != NULL) 1.133 + _igvn.remove_dead_node(the_clone); 1.134 + phi->set_req( i, x ); 1.135 + } 1.136 + // Too few wins? 1.137 + if (wins <= policy) { 1.138 + _igvn.remove_dead_node(phi); 1.139 + return NULL; 1.140 + } 1.141 + 1.142 + // Record Phi 1.143 + register_new_node( phi, region ); 1.144 + 1.145 + for (uint i2 = 1; i2 < phi->req(); i2++) { 1.146 + Node *x = phi->in(i2); 1.147 + // If we commoned up the cloned 'x' with another existing Node, 1.148 + // the existing Node picks up a new use. We need to make the 1.149 + // existing Node occur higher up so it dominates its uses. 1.150 + Node *old_ctrl; 1.151 + IdealLoopTree *old_loop; 1.152 + 1.153 + if (x->is_Con()) { 1.154 + // Constant's control is always root. 1.155 + set_ctrl(x, C->root()); 1.156 + continue; 1.157 + } 1.158 + // The occasional new node 1.159 + if (x->_idx >= old_unique) { // Found a new, unplaced node? 1.160 + old_ctrl = NULL; 1.161 + old_loop = NULL; // Not in any prior loop 1.162 + } else { 1.163 + old_ctrl = get_ctrl(x); 1.164 + old_loop = get_loop(old_ctrl); // Get prior loop 1.165 + } 1.166 + // New late point must dominate new use 1.167 + Node *new_ctrl = dom_lca(old_ctrl, region->in(i2)); 1.168 + if (new_ctrl == old_ctrl) // Nothing is changed 1.169 + continue; 1.170 + 1.171 + IdealLoopTree *new_loop = get_loop(new_ctrl); 1.172 + 1.173 + // Don't move x into a loop if its uses are 1.174 + // outside of loop. Otherwise x will be cloned 1.175 + // for each use outside of this loop. 1.176 + IdealLoopTree *use_loop = get_loop(region); 1.177 + if (!new_loop->is_member(use_loop) && 1.178 + (old_loop == NULL || !new_loop->is_member(old_loop))) { 1.179 + // Take early control, later control will be recalculated 1.180 + // during next iteration of loop optimizations. 1.181 + new_ctrl = get_early_ctrl(x); 1.182 + new_loop = get_loop(new_ctrl); 1.183 + } 1.184 + // Set new location 1.185 + set_ctrl(x, new_ctrl); 1.186 + // If changing loop bodies, see if we need to collect into new body 1.187 + if (old_loop != new_loop) { 1.188 + if (old_loop && !old_loop->_child) 1.189 + old_loop->_body.yank(x); 1.190 + if (!new_loop->_child) 1.191 + new_loop->_body.push(x); // Collect body info 1.192 + } 1.193 + } 1.194 + 1.195 + return phi; 1.196 +} 1.197 + 1.198 +//------------------------------dominated_by------------------------------------ 1.199 +// Replace the dominated test with an obvious true or false. Place it on the 1.200 +// IGVN worklist for later cleanup. Move control-dependent data Nodes on the 1.201 +// live path up to the dominating control. 1.202 +void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) { 1.203 +#ifndef PRODUCT 1.204 + if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test"); 1.205 +#endif 1.206 + 1.207 + 1.208 + // prevdom is the dominating projection of the dominating test. 1.209 + assert( iff->is_If(), "" ); 1.210 + assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added"); 1.211 + int pop = prevdom->Opcode(); 1.212 + assert( pop == Op_IfFalse || pop == Op_IfTrue, "" ); 1.213 + if (flip) { 1.214 + if (pop == Op_IfTrue) 1.215 + pop = Op_IfFalse; 1.216 + else 1.217 + pop = Op_IfTrue; 1.218 + } 1.219 + // 'con' is set to true or false to kill the dominated test. 1.220 + Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO); 1.221 + set_ctrl(con, C->root()); // Constant gets a new use 1.222 + // Hack the dominated test 1.223 + _igvn.replace_input_of(iff, 1, con); 1.224 + 1.225 + // If I dont have a reachable TRUE and FALSE path following the IfNode then 1.226 + // I can assume this path reaches an infinite loop. In this case it's not 1.227 + // important to optimize the data Nodes - either the whole compilation will 1.228 + // be tossed or this path (and all data Nodes) will go dead. 1.229 + if (iff->outcnt() != 2) return; 1.230 + 1.231 + // Make control-dependent data Nodes on the live path (path that will remain 1.232 + // once the dominated IF is removed) become control-dependent on the 1.233 + // dominating projection. 1.234 + Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue); 1.235 + 1.236 + // Loop predicates may have depending checks which should not 1.237 + // be skipped. For example, range check predicate has two checks 1.238 + // for lower and upper bounds. 1.239 + if (dp == NULL) 1.240 + return; 1.241 + 1.242 + ProjNode* dp_proj = dp->as_Proj(); 1.243 + ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj(); 1.244 + if (exclude_loop_predicate && 1.245 + unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate)) 1.246 + return; // Let IGVN transformation change control dependence. 1.247 + 1.248 + IdealLoopTree *old_loop = get_loop(dp); 1.249 + 1.250 + for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { 1.251 + Node* cd = dp->fast_out(i); // Control-dependent node 1.252 + if (cd->depends_only_on_test()) { 1.253 + assert(cd->in(0) == dp, ""); 1.254 + _igvn.replace_input_of(cd, 0, prevdom); 1.255 + set_early_ctrl(cd); 1.256 + IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); 1.257 + if (old_loop != new_loop) { 1.258 + if (!old_loop->_child) old_loop->_body.yank(cd); 1.259 + if (!new_loop->_child) new_loop->_body.push(cd); 1.260 + } 1.261 + --i; 1.262 + --imax; 1.263 + } 1.264 + } 1.265 +} 1.266 + 1.267 +//------------------------------has_local_phi_input---------------------------- 1.268 +// Return TRUE if 'n' has Phi inputs from its local block and no other 1.269 +// block-local inputs (all non-local-phi inputs come from earlier blocks) 1.270 +Node *PhaseIdealLoop::has_local_phi_input( Node *n ) { 1.271 + Node *n_ctrl = get_ctrl(n); 1.272 + // See if some inputs come from a Phi in this block, or from before 1.273 + // this block. 1.274 + uint i; 1.275 + for( i = 1; i < n->req(); i++ ) { 1.276 + Node *phi = n->in(i); 1.277 + if( phi->is_Phi() && phi->in(0) == n_ctrl ) 1.278 + break; 1.279 + } 1.280 + if( i >= n->req() ) 1.281 + return NULL; // No Phi inputs; nowhere to clone thru 1.282 + 1.283 + // Check for inputs created between 'n' and the Phi input. These 1.284 + // must split as well; they have already been given the chance 1.285 + // (courtesy of a post-order visit) and since they did not we must 1.286 + // recover the 'cost' of splitting them by being very profitable 1.287 + // when splitting 'n'. Since this is unlikely we simply give up. 1.288 + for( i = 1; i < n->req(); i++ ) { 1.289 + Node *m = n->in(i); 1.290 + if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) { 1.291 + // We allow the special case of AddP's with no local inputs. 1.292 + // This allows us to split-up address expressions. 1.293 + if (m->is_AddP() && 1.294 + get_ctrl(m->in(2)) != n_ctrl && 1.295 + get_ctrl(m->in(3)) != n_ctrl) { 1.296 + // Move the AddP up to dominating point 1.297 + set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl))); 1.298 + continue; 1.299 + } 1.300 + return NULL; 1.301 + } 1.302 + } 1.303 + 1.304 + return n_ctrl; 1.305 +} 1.306 + 1.307 +//------------------------------remix_address_expressions---------------------- 1.308 +// Rework addressing expressions to get the most loop-invariant stuff 1.309 +// moved out. We'd like to do all associative operators, but it's especially 1.310 +// important (common) to do address expressions. 1.311 +Node *PhaseIdealLoop::remix_address_expressions( Node *n ) { 1.312 + if (!has_ctrl(n)) return NULL; 1.313 + Node *n_ctrl = get_ctrl(n); 1.314 + IdealLoopTree *n_loop = get_loop(n_ctrl); 1.315 + 1.316 + // See if 'n' mixes loop-varying and loop-invariant inputs and 1.317 + // itself is loop-varying. 1.318 + 1.319 + // Only interested in binary ops (and AddP) 1.320 + if( n->req() < 3 || n->req() > 4 ) return NULL; 1.321 + 1.322 + Node *n1_ctrl = get_ctrl(n->in( 1)); 1.323 + Node *n2_ctrl = get_ctrl(n->in( 2)); 1.324 + Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3)); 1.325 + IdealLoopTree *n1_loop = get_loop( n1_ctrl ); 1.326 + IdealLoopTree *n2_loop = get_loop( n2_ctrl ); 1.327 + IdealLoopTree *n3_loop = get_loop( n3_ctrl ); 1.328 + 1.329 + // Does one of my inputs spin in a tighter loop than self? 1.330 + if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) || 1.331 + (n_loop->is_member( n2_loop ) && n_loop != n2_loop) || 1.332 + (n_loop->is_member( n3_loop ) && n_loop != n3_loop) ) 1.333 + return NULL; // Leave well enough alone 1.334 + 1.335 + // Is at least one of my inputs loop-invariant? 1.336 + if( n1_loop == n_loop && 1.337 + n2_loop == n_loop && 1.338 + n3_loop == n_loop ) 1.339 + return NULL; // No loop-invariant inputs 1.340 + 1.341 + 1.342 + int n_op = n->Opcode(); 1.343 + 1.344 + // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2). 1.345 + if( n_op == Op_LShiftI ) { 1.346 + // Scale is loop invariant 1.347 + Node *scale = n->in(2); 1.348 + Node *scale_ctrl = get_ctrl(scale); 1.349 + IdealLoopTree *scale_loop = get_loop(scale_ctrl ); 1.350 + if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) ) 1.351 + return NULL; 1.352 + const TypeInt *scale_t = scale->bottom_type()->isa_int(); 1.353 + if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 ) 1.354 + return NULL; // Dont bother with byte/short masking 1.355 + // Add must vary with loop (else shift would be loop-invariant) 1.356 + Node *add = n->in(1); 1.357 + Node *add_ctrl = get_ctrl(add); 1.358 + IdealLoopTree *add_loop = get_loop(add_ctrl); 1.359 + //assert( n_loop == add_loop, "" ); 1.360 + if( n_loop != add_loop ) return NULL; // happens w/ evil ZKM loops 1.361 + 1.362 + // Convert I-V into I+ (0-V); same for V-I 1.363 + if( add->Opcode() == Op_SubI && 1.364 + _igvn.type( add->in(1) ) != TypeInt::ZERO ) { 1.365 + Node *zero = _igvn.intcon(0); 1.366 + set_ctrl(zero, C->root()); 1.367 + Node *neg = new (C) SubINode( _igvn.intcon(0), add->in(2) ); 1.368 + register_new_node( neg, get_ctrl(add->in(2) ) ); 1.369 + add = new (C) AddINode( add->in(1), neg ); 1.370 + register_new_node( add, add_ctrl ); 1.371 + } 1.372 + if( add->Opcode() != Op_AddI ) return NULL; 1.373 + // See if one add input is loop invariant 1.374 + Node *add_var = add->in(1); 1.375 + Node *add_var_ctrl = get_ctrl(add_var); 1.376 + IdealLoopTree *add_var_loop = get_loop(add_var_ctrl ); 1.377 + Node *add_invar = add->in(2); 1.378 + Node *add_invar_ctrl = get_ctrl(add_invar); 1.379 + IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl ); 1.380 + if( add_var_loop == n_loop ) { 1.381 + } else if( add_invar_loop == n_loop ) { 1.382 + // Swap to find the invariant part 1.383 + add_invar = add_var; 1.384 + add_invar_ctrl = add_var_ctrl; 1.385 + add_invar_loop = add_var_loop; 1.386 + add_var = add->in(2); 1.387 + Node *add_var_ctrl = get_ctrl(add_var); 1.388 + IdealLoopTree *add_var_loop = get_loop(add_var_ctrl ); 1.389 + } else // Else neither input is loop invariant 1.390 + return NULL; 1.391 + if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) ) 1.392 + return NULL; // No invariant part of the add? 1.393 + 1.394 + // Yes! Reshape address expression! 1.395 + Node *inv_scale = new (C) LShiftINode( add_invar, scale ); 1.396 + Node *inv_scale_ctrl = 1.397 + dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ? 1.398 + add_invar_ctrl : scale_ctrl; 1.399 + register_new_node( inv_scale, inv_scale_ctrl ); 1.400 + Node *var_scale = new (C) LShiftINode( add_var, scale ); 1.401 + register_new_node( var_scale, n_ctrl ); 1.402 + Node *var_add = new (C) AddINode( var_scale, inv_scale ); 1.403 + register_new_node( var_add, n_ctrl ); 1.404 + _igvn.replace_node( n, var_add ); 1.405 + return var_add; 1.406 + } 1.407 + 1.408 + // Replace (I+V) with (V+I) 1.409 + if( n_op == Op_AddI || 1.410 + n_op == Op_AddL || 1.411 + n_op == Op_AddF || 1.412 + n_op == Op_AddD || 1.413 + n_op == Op_MulI || 1.414 + n_op == Op_MulL || 1.415 + n_op == Op_MulF || 1.416 + n_op == Op_MulD ) { 1.417 + if( n2_loop == n_loop ) { 1.418 + assert( n1_loop != n_loop, "" ); 1.419 + n->swap_edges(1, 2); 1.420 + } 1.421 + } 1.422 + 1.423 + // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V), 1.424 + // but not if I2 is a constant. 1.425 + if( n_op == Op_AddP ) { 1.426 + if( n2_loop == n_loop && n3_loop != n_loop ) { 1.427 + if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) { 1.428 + Node *n22_ctrl = get_ctrl(n->in(2)->in(2)); 1.429 + Node *n23_ctrl = get_ctrl(n->in(2)->in(3)); 1.430 + IdealLoopTree *n22loop = get_loop( n22_ctrl ); 1.431 + IdealLoopTree *n23_loop = get_loop( n23_ctrl ); 1.432 + if( n22loop != n_loop && n22loop->is_member(n_loop) && 1.433 + n23_loop == n_loop ) { 1.434 + Node *add1 = new (C) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) ); 1.435 + // Stuff new AddP in the loop preheader 1.436 + register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) ); 1.437 + Node *add2 = new (C) AddPNode( n->in(1), add1, n->in(2)->in(3) ); 1.438 + register_new_node( add2, n_ctrl ); 1.439 + _igvn.replace_node( n, add2 ); 1.440 + return add2; 1.441 + } 1.442 + } 1.443 + } 1.444 + 1.445 + // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V) 1.446 + if( n2_loop != n_loop && n3_loop == n_loop ) { 1.447 + if( n->in(3)->Opcode() == Op_AddI ) { 1.448 + Node *V = n->in(3)->in(1); 1.449 + Node *I = n->in(3)->in(2); 1.450 + if( is_member(n_loop,get_ctrl(V)) ) { 1.451 + } else { 1.452 + Node *tmp = V; V = I; I = tmp; 1.453 + } 1.454 + if( !is_member(n_loop,get_ctrl(I)) ) { 1.455 + Node *add1 = new (C) AddPNode( n->in(1), n->in(2), I ); 1.456 + // Stuff new AddP in the loop preheader 1.457 + register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) ); 1.458 + Node *add2 = new (C) AddPNode( n->in(1), add1, V ); 1.459 + register_new_node( add2, n_ctrl ); 1.460 + _igvn.replace_node( n, add2 ); 1.461 + return add2; 1.462 + } 1.463 + } 1.464 + } 1.465 + } 1.466 + 1.467 + return NULL; 1.468 +} 1.469 + 1.470 +//------------------------------conditional_move------------------------------- 1.471 +// Attempt to replace a Phi with a conditional move. We have some pretty 1.472 +// strict profitability requirements. All Phis at the merge point must 1.473 +// be converted, so we can remove the control flow. We need to limit the 1.474 +// number of c-moves to a small handful. All code that was in the side-arms 1.475 +// of the CFG diamond is now speculatively executed. This code has to be 1.476 +// "cheap enough". We are pretty much limited to CFG diamonds that merge 1.477 +// 1 or 2 items with a total of 1 or 2 ops executed speculatively. 1.478 +Node *PhaseIdealLoop::conditional_move( Node *region ) { 1.479 + 1.480 + assert(region->is_Region(), "sanity check"); 1.481 + if (region->req() != 3) return NULL; 1.482 + 1.483 + // Check for CFG diamond 1.484 + Node *lp = region->in(1); 1.485 + Node *rp = region->in(2); 1.486 + if (!lp || !rp) return NULL; 1.487 + Node *lp_c = lp->in(0); 1.488 + if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL; 1.489 + IfNode *iff = lp_c->as_If(); 1.490 + 1.491 + // Check for ops pinned in an arm of the diamond. 1.492 + // Can't remove the control flow in this case 1.493 + if (lp->outcnt() > 1) return NULL; 1.494 + if (rp->outcnt() > 1) return NULL; 1.495 + 1.496 + IdealLoopTree* r_loop = get_loop(region); 1.497 + assert(r_loop == get_loop(iff), "sanity"); 1.498 + // Always convert to CMOVE if all results are used only outside this loop. 1.499 + bool used_inside_loop = (r_loop == _ltree_root); 1.500 + 1.501 + // Check profitability 1.502 + int cost = 0; 1.503 + int phis = 0; 1.504 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.505 + Node *out = region->fast_out(i); 1.506 + if (!out->is_Phi()) continue; // Ignore other control edges, etc 1.507 + phis++; 1.508 + PhiNode* phi = out->as_Phi(); 1.509 + BasicType bt = phi->type()->basic_type(); 1.510 + switch (bt) { 1.511 + case T_FLOAT: 1.512 + case T_DOUBLE: { 1.513 + cost += Matcher::float_cmove_cost(); // Could be very expensive 1.514 + break; 1.515 + } 1.516 + case T_LONG: { 1.517 + cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's 1.518 + } 1.519 + case T_INT: // These all CMOV fine 1.520 + case T_ADDRESS: { // (RawPtr) 1.521 + cost++; 1.522 + break; 1.523 + } 1.524 + case T_NARROWOOP: // Fall through 1.525 + case T_OBJECT: { // Base oops are OK, but not derived oops 1.526 + const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); 1.527 + // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a 1.528 + // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus 1.529 + // CMOVE'ing a derived pointer requires we also CMOVE the base. If we 1.530 + // have a Phi for the base here that we convert to a CMOVE all is well 1.531 + // and good. But if the base is dead, we'll not make a CMOVE. Later 1.532 + // the allocator will have to produce a base by creating a CMOVE of the 1.533 + // relevant bases. This puts the allocator in the business of 1.534 + // manufacturing expensive instructions, generally a bad plan. 1.535 + // Just Say No to Conditionally-Moved Derived Pointers. 1.536 + if (tp && tp->offset() != 0) 1.537 + return NULL; 1.538 + cost++; 1.539 + break; 1.540 + } 1.541 + default: 1.542 + return NULL; // In particular, can't do memory or I/O 1.543 + } 1.544 + // Add in cost any speculative ops 1.545 + for (uint j = 1; j < region->req(); j++) { 1.546 + Node *proj = region->in(j); 1.547 + Node *inp = phi->in(j); 1.548 + if (get_ctrl(inp) == proj) { // Found local op 1.549 + cost++; 1.550 + // Check for a chain of dependent ops; these will all become 1.551 + // speculative in a CMOV. 1.552 + for (uint k = 1; k < inp->req(); k++) 1.553 + if (get_ctrl(inp->in(k)) == proj) 1.554 + cost += ConditionalMoveLimit; // Too much speculative goo 1.555 + } 1.556 + } 1.557 + // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. 1.558 + // This will likely Split-If, a higher-payoff operation. 1.559 + for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { 1.560 + Node* use = phi->fast_out(k); 1.561 + if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr()) 1.562 + cost += ConditionalMoveLimit; 1.563 + // Is there a use inside the loop? 1.564 + // Note: check only basic types since CMoveP is pinned. 1.565 + if (!used_inside_loop && is_java_primitive(bt)) { 1.566 + IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); 1.567 + if (r_loop == u_loop || r_loop->is_member(u_loop)) { 1.568 + used_inside_loop = true; 1.569 + } 1.570 + } 1.571 + } 1.572 + } 1.573 + Node* bol = iff->in(1); 1.574 + assert(bol->Opcode() == Op_Bool, ""); 1.575 + int cmp_op = bol->in(1)->Opcode(); 1.576 + // It is expensive to generate flags from a float compare. 1.577 + // Avoid duplicated float compare. 1.578 + if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; 1.579 + 1.580 + float infrequent_prob = PROB_UNLIKELY_MAG(3); 1.581 + // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. 1.582 + if (used_inside_loop) { 1.583 + if (cost >= ConditionalMoveLimit) return NULL; // Too much goo 1.584 + 1.585 + // BlockLayoutByFrequency optimization moves infrequent branch 1.586 + // from hot path. No point in CMOV'ing in such case (110 is used 1.587 + // instead of 100 to take into account not exactness of float value). 1.588 + if (BlockLayoutByFrequency) { 1.589 + infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); 1.590 + } 1.591 + } 1.592 + // Check for highly predictable branch. No point in CMOV'ing if 1.593 + // we are going to predict accurately all the time. 1.594 + if (iff->_prob < infrequent_prob || 1.595 + iff->_prob > (1.0f - infrequent_prob)) 1.596 + return NULL; 1.597 + 1.598 + // -------------- 1.599 + // Now replace all Phis with CMOV's 1.600 + Node *cmov_ctrl = iff->in(0); 1.601 + uint flip = (lp->Opcode() == Op_IfTrue); 1.602 + while (1) { 1.603 + PhiNode* phi = NULL; 1.604 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.605 + Node *out = region->fast_out(i); 1.606 + if (out->is_Phi()) { 1.607 + phi = out->as_Phi(); 1.608 + break; 1.609 + } 1.610 + } 1.611 + if (phi == NULL) break; 1.612 +#ifndef PRODUCT 1.613 + if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); 1.614 +#endif 1.615 + // Move speculative ops 1.616 + for (uint j = 1; j < region->req(); j++) { 1.617 + Node *proj = region->in(j); 1.618 + Node *inp = phi->in(j); 1.619 + if (get_ctrl(inp) == proj) { // Found local op 1.620 +#ifndef PRODUCT 1.621 + if (PrintOpto && VerifyLoopOptimizations) { 1.622 + tty->print(" speculate: "); 1.623 + inp->dump(); 1.624 + } 1.625 +#endif 1.626 + set_ctrl(inp, cmov_ctrl); 1.627 + } 1.628 + } 1.629 + Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) ); 1.630 + register_new_node( cmov, cmov_ctrl ); 1.631 + _igvn.replace_node( phi, cmov ); 1.632 +#ifndef PRODUCT 1.633 + if (TraceLoopOpts) { 1.634 + tty->print("CMOV "); 1.635 + r_loop->dump_head(); 1.636 + if (Verbose) { 1.637 + bol->in(1)->dump(1); 1.638 + cmov->dump(1); 1.639 + } 1.640 + } 1.641 + if (VerifyLoopOptimizations) verify(); 1.642 +#endif 1.643 + } 1.644 + 1.645 + // The useless CFG diamond will fold up later; see the optimization in 1.646 + // RegionNode::Ideal. 1.647 + _igvn._worklist.push(region); 1.648 + 1.649 + return iff->in(1); 1.650 +} 1.651 + 1.652 +//------------------------------split_if_with_blocks_pre----------------------- 1.653 +// Do the real work in a non-recursive function. Data nodes want to be 1.654 +// cloned in the pre-order so they can feed each other nicely. 1.655 +Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) { 1.656 + // Cloning these guys is unlikely to win 1.657 + int n_op = n->Opcode(); 1.658 + if( n_op == Op_MergeMem ) return n; 1.659 + if( n->is_Proj() ) return n; 1.660 + // Do not clone-up CmpFXXX variations, as these are always 1.661 + // followed by a CmpI 1.662 + if( n->is_Cmp() ) return n; 1.663 + // Attempt to use a conditional move instead of a phi/branch 1.664 + if( ConditionalMoveLimit > 0 && n_op == Op_Region ) { 1.665 + Node *cmov = conditional_move( n ); 1.666 + if( cmov ) return cmov; 1.667 + } 1.668 + if( n->is_CFG() || n->is_LoadStore() ) 1.669 + return n; 1.670 + if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd 1.671 + n_op == Op_Opaque2 ) { 1.672 + if( !C->major_progress() ) // If chance of no more loop opts... 1.673 + _igvn._worklist.push(n); // maybe we'll remove them 1.674 + return n; 1.675 + } 1.676 + 1.677 + if( n->is_Con() ) return n; // No cloning for Con nodes 1.678 + 1.679 + Node *n_ctrl = get_ctrl(n); 1.680 + if( !n_ctrl ) return n; // Dead node 1.681 + 1.682 + // Attempt to remix address expressions for loop invariants 1.683 + Node *m = remix_address_expressions( n ); 1.684 + if( m ) return m; 1.685 + 1.686 + // Determine if the Node has inputs from some local Phi. 1.687 + // Returns the block to clone thru. 1.688 + Node *n_blk = has_local_phi_input( n ); 1.689 + if( !n_blk ) return n; 1.690 + // Do not clone the trip counter through on a CountedLoop 1.691 + // (messes up the canonical shape). 1.692 + if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n; 1.693 + 1.694 + // Check for having no control input; not pinned. Allow 1.695 + // dominating control. 1.696 + if( n->in(0) ) { 1.697 + Node *dom = idom(n_blk); 1.698 + if( dom_lca( n->in(0), dom ) != n->in(0) ) 1.699 + return n; 1.700 + } 1.701 + // Policy: when is it profitable. You must get more wins than 1.702 + // policy before it is considered profitable. Policy is usually 0, 1.703 + // so 1 win is considered profitable. Big merges will require big 1.704 + // cloning, so get a larger policy. 1.705 + int policy = n_blk->req() >> 2; 1.706 + 1.707 + // If the loop is a candidate for range check elimination, 1.708 + // delay splitting through it's phi until a later loop optimization 1.709 + if (n_blk->is_CountedLoop()) { 1.710 + IdealLoopTree *lp = get_loop(n_blk); 1.711 + if (lp && lp->_rce_candidate) { 1.712 + return n; 1.713 + } 1.714 + } 1.715 + 1.716 + // Use same limit as split_if_with_blocks_post 1.717 + if( C->unique() > 35000 ) return n; // Method too big 1.718 + 1.719 + // Split 'n' through the merge point if it is profitable 1.720 + Node *phi = split_thru_phi( n, n_blk, policy ); 1.721 + if (!phi) return n; 1.722 + 1.723 + // Found a Phi to split thru! 1.724 + // Replace 'n' with the new phi 1.725 + _igvn.replace_node( n, phi ); 1.726 + // Moved a load around the loop, 'en-registering' something. 1.727 + if (n_blk->is_Loop() && n->is_Load() && 1.728 + !phi->in(LoopNode::LoopBackControl)->is_Load()) 1.729 + C->set_major_progress(); 1.730 + 1.731 + return phi; 1.732 +} 1.733 + 1.734 +static bool merge_point_too_heavy(Compile* C, Node* region) { 1.735 + // Bail out if the region and its phis have too many users. 1.736 + int weight = 0; 1.737 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.738 + weight += region->fast_out(i)->outcnt(); 1.739 + } 1.740 + int nodes_left = MaxNodeLimit - C->live_nodes(); 1.741 + if (weight * 8 > nodes_left) { 1.742 +#ifndef PRODUCT 1.743 + if (PrintOpto) 1.744 + tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight); 1.745 +#endif 1.746 + return true; 1.747 + } else { 1.748 + return false; 1.749 + } 1.750 +} 1.751 + 1.752 +static bool merge_point_safe(Node* region) { 1.753 + // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode 1.754 + // having a PhiNode input. This sidesteps the dangerous case where the split 1.755 + // ConvI2LNode may become TOP if the input Value() does not 1.756 + // overlap the ConvI2L range, leaving a node which may not dominate its 1.757 + // uses. 1.758 + // A better fix for this problem can be found in the BugTraq entry, but 1.759 + // expediency for Mantis demands this hack. 1.760 + // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop 1.761 + // split_if_with_blocks from splitting a block because we could not move around 1.762 + // the FastLockNode. 1.763 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.764 + Node* n = region->fast_out(i); 1.765 + if (n->is_Phi()) { 1.766 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.767 + Node* m = n->fast_out(j); 1.768 + if (m->is_FastLock()) 1.769 + return false; 1.770 +#ifdef _LP64 1.771 + if (m->Opcode() == Op_ConvI2L) 1.772 + return false; 1.773 +#endif 1.774 + } 1.775 + } 1.776 + } 1.777 + return true; 1.778 +} 1.779 + 1.780 + 1.781 +//------------------------------place_near_use--------------------------------- 1.782 +// Place some computation next to use but not inside inner loops. 1.783 +// For inner loop uses move it to the preheader area. 1.784 +Node *PhaseIdealLoop::place_near_use( Node *useblock ) const { 1.785 + IdealLoopTree *u_loop = get_loop( useblock ); 1.786 + return (u_loop->_irreducible || u_loop->_child) 1.787 + ? useblock 1.788 + : u_loop->_head->in(LoopNode::EntryControl); 1.789 +} 1.790 + 1.791 + 1.792 +//------------------------------split_if_with_blocks_post---------------------- 1.793 +// Do the real work in a non-recursive function. CFG hackery wants to be 1.794 +// in the post-order, so it can dirty the I-DOM info and not use the dirtied 1.795 +// info. 1.796 +void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) { 1.797 + 1.798 + // Cloning Cmp through Phi's involves the split-if transform. 1.799 + // FastLock is not used by an If 1.800 + if( n->is_Cmp() && !n->is_FastLock() ) { 1.801 + if( C->unique() > 35000 ) return; // Method too big 1.802 + 1.803 + // Do not do 'split-if' if irreducible loops are present. 1.804 + if( _has_irreducible_loops ) 1.805 + return; 1.806 + 1.807 + Node *n_ctrl = get_ctrl(n); 1.808 + // Determine if the Node has inputs from some local Phi. 1.809 + // Returns the block to clone thru. 1.810 + Node *n_blk = has_local_phi_input( n ); 1.811 + if( n_blk != n_ctrl ) return; 1.812 + 1.813 + if( merge_point_too_heavy(C, n_ctrl) ) 1.814 + return; 1.815 + 1.816 + if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare? 1.817 + Node *bol = n->unique_out(); 1.818 + assert( bol->is_Bool(), "expect a bool here" ); 1.819 + if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare? 1.820 + Node *iff = bol->unique_out(); 1.821 + 1.822 + // Check some safety conditions 1.823 + if( iff->is_If() ) { // Classic split-if? 1.824 + if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if 1.825 + } else if (iff->is_CMove()) { // Trying to split-up a CMOVE 1.826 + // Can't split CMove with different control edge. 1.827 + if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return; 1.828 + if( get_ctrl(iff->in(2)) == n_ctrl || 1.829 + get_ctrl(iff->in(3)) == n_ctrl ) 1.830 + return; // Inputs not yet split-up 1.831 + if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) { 1.832 + return; // Loop-invar test gates loop-varying CMOVE 1.833 + } 1.834 + } else { 1.835 + return; // some other kind of node, such as an Allocate 1.836 + } 1.837 + 1.838 + // Do not do 'split-if' if some paths are dead. First do dead code 1.839 + // elimination and then see if its still profitable. 1.840 + for( uint i = 1; i < n_ctrl->req(); i++ ) 1.841 + if( n_ctrl->in(i) == C->top() ) 1.842 + return; 1.843 + 1.844 + // When is split-if profitable? Every 'win' on means some control flow 1.845 + // goes dead, so it's almost always a win. 1.846 + int policy = 0; 1.847 + // If trying to do a 'Split-If' at the loop head, it is only 1.848 + // profitable if the cmp folds up on BOTH paths. Otherwise we 1.849 + // risk peeling a loop forever. 1.850 + 1.851 + // CNC - Disabled for now. Requires careful handling of loop 1.852 + // body selection for the cloned code. Also, make sure we check 1.853 + // for any input path not being in the same loop as n_ctrl. For 1.854 + // irreducible loops we cannot check for 'n_ctrl->is_Loop()' 1.855 + // because the alternative loop entry points won't be converted 1.856 + // into LoopNodes. 1.857 + IdealLoopTree *n_loop = get_loop(n_ctrl); 1.858 + for( uint j = 1; j < n_ctrl->req(); j++ ) 1.859 + if( get_loop(n_ctrl->in(j)) != n_loop ) 1.860 + return; 1.861 + 1.862 + // Check for safety of the merge point. 1.863 + if( !merge_point_safe(n_ctrl) ) { 1.864 + return; 1.865 + } 1.866 + 1.867 + // Split compare 'n' through the merge point if it is profitable 1.868 + Node *phi = split_thru_phi( n, n_ctrl, policy ); 1.869 + if( !phi ) return; 1.870 + 1.871 + // Found a Phi to split thru! 1.872 + // Replace 'n' with the new phi 1.873 + _igvn.replace_node( n, phi ); 1.874 + 1.875 + // Now split the bool up thru the phi 1.876 + Node *bolphi = split_thru_phi( bol, n_ctrl, -1 ); 1.877 + guarantee(bolphi != NULL, "null boolean phi node"); 1.878 + 1.879 + _igvn.replace_node( bol, bolphi ); 1.880 + assert( iff->in(1) == bolphi, "" ); 1.881 + 1.882 + if( bolphi->Value(&_igvn)->singleton() ) 1.883 + return; 1.884 + 1.885 + // Conditional-move? Must split up now 1.886 + if( !iff->is_If() ) { 1.887 + Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 ); 1.888 + _igvn.replace_node( iff, cmovphi ); 1.889 + return; 1.890 + } 1.891 + 1.892 + // Now split the IF 1.893 + do_split_if( iff ); 1.894 + return; 1.895 + } 1.896 + 1.897 + // Check for an IF ready to split; one that has its 1.898 + // condition codes input coming from a Phi at the block start. 1.899 + int n_op = n->Opcode(); 1.900 + 1.901 + // Check for an IF being dominated by another IF same test 1.902 + if( n_op == Op_If ) { 1.903 + Node *bol = n->in(1); 1.904 + uint max = bol->outcnt(); 1.905 + // Check for same test used more than once? 1.906 + if( n_op == Op_If && max > 1 && bol->is_Bool() ) { 1.907 + // Search up IDOMs to see if this IF is dominated. 1.908 + Node *cutoff = get_ctrl(bol); 1.909 + 1.910 + // Now search up IDOMs till cutoff, looking for a dominating test 1.911 + Node *prevdom = n; 1.912 + Node *dom = idom(prevdom); 1.913 + while( dom != cutoff ) { 1.914 + if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) { 1.915 + // Replace the dominated test with an obvious true or false. 1.916 + // Place it on the IGVN worklist for later cleanup. 1.917 + C->set_major_progress(); 1.918 + dominated_by( prevdom, n, false, true ); 1.919 +#ifndef PRODUCT 1.920 + if( VerifyLoopOptimizations ) verify(); 1.921 +#endif 1.922 + return; 1.923 + } 1.924 + prevdom = dom; 1.925 + dom = idom(prevdom); 1.926 + } 1.927 + } 1.928 + } 1.929 + 1.930 + // See if a shared loop-varying computation has no loop-varying uses. 1.931 + // Happens if something is only used for JVM state in uncommon trap exits, 1.932 + // like various versions of induction variable+offset. Clone the 1.933 + // computation per usage to allow it to sink out of the loop. 1.934 + if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about) 1.935 + Node *n_ctrl = get_ctrl(n); 1.936 + IdealLoopTree *n_loop = get_loop(n_ctrl); 1.937 + if( n_loop != _ltree_root ) { 1.938 + DUIterator_Fast imax, i = n->fast_outs(imax); 1.939 + for (; i < imax; i++) { 1.940 + Node* u = n->fast_out(i); 1.941 + if( !has_ctrl(u) ) break; // Found control user 1.942 + IdealLoopTree *u_loop = get_loop(get_ctrl(u)); 1.943 + if( u_loop == n_loop ) break; // Found loop-varying use 1.944 + if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop 1.945 + if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003 1.946 + } 1.947 + bool did_break = (i < imax); // Did we break out of the previous loop? 1.948 + if (!did_break && n->outcnt() > 1) { // All uses in outer loops! 1.949 + Node *late_load_ctrl = NULL; 1.950 + if (n->is_Load()) { 1.951 + // If n is a load, get and save the result from get_late_ctrl(), 1.952 + // to be later used in calculating the control for n's clones. 1.953 + clear_dom_lca_tags(); 1.954 + late_load_ctrl = get_late_ctrl(n, n_ctrl); 1.955 + } 1.956 + // If n is a load, and the late control is the same as the current 1.957 + // control, then the cloning of n is a pointless exercise, because 1.958 + // GVN will ensure that we end up where we started. 1.959 + if (!n->is_Load() || late_load_ctrl != n_ctrl) { 1.960 + for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) { 1.961 + Node *u = n->last_out(j); // Clone private computation per use 1.962 + _igvn.rehash_node_delayed(u); 1.963 + Node *x = n->clone(); // Clone computation 1.964 + Node *x_ctrl = NULL; 1.965 + if( u->is_Phi() ) { 1.966 + // Replace all uses of normal nodes. Replace Phi uses 1.967 + // individually, so the separate Nodes can sink down 1.968 + // different paths. 1.969 + uint k = 1; 1.970 + while( u->in(k) != n ) k++; 1.971 + u->set_req( k, x ); 1.972 + // x goes next to Phi input path 1.973 + x_ctrl = u->in(0)->in(k); 1.974 + --j; 1.975 + } else { // Normal use 1.976 + // Replace all uses 1.977 + for( uint k = 0; k < u->req(); k++ ) { 1.978 + if( u->in(k) == n ) { 1.979 + u->set_req( k, x ); 1.980 + --j; 1.981 + } 1.982 + } 1.983 + x_ctrl = get_ctrl(u); 1.984 + } 1.985 + 1.986 + // Find control for 'x' next to use but not inside inner loops. 1.987 + // For inner loop uses get the preheader area. 1.988 + x_ctrl = place_near_use(x_ctrl); 1.989 + 1.990 + if (n->is_Load()) { 1.991 + // For loads, add a control edge to a CFG node outside of the loop 1.992 + // to force them to not combine and return back inside the loop 1.993 + // during GVN optimization (4641526). 1.994 + // 1.995 + // Because we are setting the actual control input, factor in 1.996 + // the result from get_late_ctrl() so we respect any 1.997 + // anti-dependences. (6233005). 1.998 + x_ctrl = dom_lca(late_load_ctrl, x_ctrl); 1.999 + 1.1000 + // Don't allow the control input to be a CFG splitting node. 1.1001 + // Such nodes should only have ProjNodes as outs, e.g. IfNode 1.1002 + // should only have IfTrueNode and IfFalseNode (4985384). 1.1003 + x_ctrl = find_non_split_ctrl(x_ctrl); 1.1004 + assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone"); 1.1005 + 1.1006 + x->set_req(0, x_ctrl); 1.1007 + } 1.1008 + register_new_node(x, x_ctrl); 1.1009 + 1.1010 + // Some institutional knowledge is needed here: 'x' is 1.1011 + // yanked because if the optimizer runs GVN on it all the 1.1012 + // cloned x's will common up and undo this optimization and 1.1013 + // be forced back in the loop. This is annoying because it 1.1014 + // makes +VerifyOpto report false-positives on progress. I 1.1015 + // tried setting control edges on the x's to force them to 1.1016 + // not combine, but the matching gets worried when it tries 1.1017 + // to fold a StoreP and an AddP together (as part of an 1.1018 + // address expression) and the AddP and StoreP have 1.1019 + // different controls. 1.1020 + if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x); 1.1021 + } 1.1022 + _igvn.remove_dead_node(n); 1.1023 + } 1.1024 + } 1.1025 + } 1.1026 + } 1.1027 + 1.1028 + // Check for Opaque2's who's loop has disappeared - who's input is in the 1.1029 + // same loop nest as their output. Remove 'em, they are no longer useful. 1.1030 + if( n_op == Op_Opaque2 && 1.1031 + n->in(1) != NULL && 1.1032 + get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) { 1.1033 + _igvn.replace_node( n, n->in(1) ); 1.1034 + } 1.1035 +} 1.1036 + 1.1037 +//------------------------------split_if_with_blocks--------------------------- 1.1038 +// Check for aggressive application of 'split-if' optimization, 1.1039 +// using basic block level info. 1.1040 +void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) { 1.1041 + Node *n = C->root(); 1.1042 + visited.set(n->_idx); // first, mark node as visited 1.1043 + // Do pre-visit work for root 1.1044 + n = split_if_with_blocks_pre( n ); 1.1045 + uint cnt = n->outcnt(); 1.1046 + uint i = 0; 1.1047 + while (true) { 1.1048 + // Visit all children 1.1049 + if (i < cnt) { 1.1050 + Node* use = n->raw_out(i); 1.1051 + ++i; 1.1052 + if (use->outcnt() != 0 && !visited.test_set(use->_idx)) { 1.1053 + // Now do pre-visit work for this use 1.1054 + use = split_if_with_blocks_pre( use ); 1.1055 + nstack.push(n, i); // Save parent and next use's index. 1.1056 + n = use; // Process all children of current use. 1.1057 + cnt = use->outcnt(); 1.1058 + i = 0; 1.1059 + } 1.1060 + } 1.1061 + else { 1.1062 + // All of n's children have been processed, complete post-processing. 1.1063 + if (cnt != 0 && !n->is_Con()) { 1.1064 + assert(has_node(n), "no dead nodes"); 1.1065 + split_if_with_blocks_post( n ); 1.1066 + } 1.1067 + if (nstack.is_empty()) { 1.1068 + // Finished all nodes on stack. 1.1069 + break; 1.1070 + } 1.1071 + // Get saved parent node and next use's index. Visit the rest of uses. 1.1072 + n = nstack.node(); 1.1073 + cnt = n->outcnt(); 1.1074 + i = nstack.index(); 1.1075 + nstack.pop(); 1.1076 + } 1.1077 + } 1.1078 +} 1.1079 + 1.1080 + 1.1081 +//============================================================================= 1.1082 +// 1.1083 +// C L O N E A L O O P B O D Y 1.1084 +// 1.1085 + 1.1086 +//------------------------------clone_iff-------------------------------------- 1.1087 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 1.1088 +// "Nearly" because all Nodes have been cloned from the original in the loop, 1.1089 +// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 1.1090 +// through the Phi recursively, and return a Bool. 1.1091 +BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) { 1.1092 + 1.1093 + // Convert this Phi into a Phi merging Bools 1.1094 + uint i; 1.1095 + for( i = 1; i < phi->req(); i++ ) { 1.1096 + Node *b = phi->in(i); 1.1097 + if( b->is_Phi() ) { 1.1098 + _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop )); 1.1099 + } else { 1.1100 + assert( b->is_Bool(), "" ); 1.1101 + } 1.1102 + } 1.1103 + 1.1104 + Node *sample_bool = phi->in(1); 1.1105 + Node *sample_cmp = sample_bool->in(1); 1.1106 + 1.1107 + // Make Phis to merge the Cmp's inputs. 1.1108 + PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP ); 1.1109 + PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP ); 1.1110 + for( i = 1; i < phi->req(); i++ ) { 1.1111 + Node *n1 = phi->in(i)->in(1)->in(1); 1.1112 + Node *n2 = phi->in(i)->in(1)->in(2); 1.1113 + phi1->set_req( i, n1 ); 1.1114 + phi2->set_req( i, n2 ); 1.1115 + phi1->set_type( phi1->type()->meet_speculative(n1->bottom_type())); 1.1116 + phi2->set_type( phi2->type()->meet_speculative(n2->bottom_type())); 1.1117 + } 1.1118 + // See if these Phis have been made before. 1.1119 + // Register with optimizer 1.1120 + Node *hit1 = _igvn.hash_find_insert(phi1); 1.1121 + if( hit1 ) { // Hit, toss just made Phi 1.1122 + _igvn.remove_dead_node(phi1); // Remove new phi 1.1123 + assert( hit1->is_Phi(), "" ); 1.1124 + phi1 = (PhiNode*)hit1; // Use existing phi 1.1125 + } else { // Miss 1.1126 + _igvn.register_new_node_with_optimizer(phi1); 1.1127 + } 1.1128 + Node *hit2 = _igvn.hash_find_insert(phi2); 1.1129 + if( hit2 ) { // Hit, toss just made Phi 1.1130 + _igvn.remove_dead_node(phi2); // Remove new phi 1.1131 + assert( hit2->is_Phi(), "" ); 1.1132 + phi2 = (PhiNode*)hit2; // Use existing phi 1.1133 + } else { // Miss 1.1134 + _igvn.register_new_node_with_optimizer(phi2); 1.1135 + } 1.1136 + // Register Phis with loop/block info 1.1137 + set_ctrl(phi1, phi->in(0)); 1.1138 + set_ctrl(phi2, phi->in(0)); 1.1139 + // Make a new Cmp 1.1140 + Node *cmp = sample_cmp->clone(); 1.1141 + cmp->set_req( 1, phi1 ); 1.1142 + cmp->set_req( 2, phi2 ); 1.1143 + _igvn.register_new_node_with_optimizer(cmp); 1.1144 + set_ctrl(cmp, phi->in(0)); 1.1145 + 1.1146 + // Make a new Bool 1.1147 + Node *b = sample_bool->clone(); 1.1148 + b->set_req(1,cmp); 1.1149 + _igvn.register_new_node_with_optimizer(b); 1.1150 + set_ctrl(b, phi->in(0)); 1.1151 + 1.1152 + assert( b->is_Bool(), "" ); 1.1153 + return (BoolNode*)b; 1.1154 +} 1.1155 + 1.1156 +//------------------------------clone_bool------------------------------------- 1.1157 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 1.1158 +// "Nearly" because all Nodes have been cloned from the original in the loop, 1.1159 +// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 1.1160 +// through the Phi recursively, and return a Bool. 1.1161 +CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) { 1.1162 + uint i; 1.1163 + // Convert this Phi into a Phi merging Bools 1.1164 + for( i = 1; i < phi->req(); i++ ) { 1.1165 + Node *b = phi->in(i); 1.1166 + if( b->is_Phi() ) { 1.1167 + _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop )); 1.1168 + } else { 1.1169 + assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" ); 1.1170 + } 1.1171 + } 1.1172 + 1.1173 + Node *sample_cmp = phi->in(1); 1.1174 + 1.1175 + // Make Phis to merge the Cmp's inputs. 1.1176 + PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP ); 1.1177 + PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP ); 1.1178 + for( uint j = 1; j < phi->req(); j++ ) { 1.1179 + Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP 1.1180 + Node *n1, *n2; 1.1181 + if( cmp_top->is_Cmp() ) { 1.1182 + n1 = cmp_top->in(1); 1.1183 + n2 = cmp_top->in(2); 1.1184 + } else { 1.1185 + n1 = n2 = cmp_top; 1.1186 + } 1.1187 + phi1->set_req( j, n1 ); 1.1188 + phi2->set_req( j, n2 ); 1.1189 + phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type())); 1.1190 + phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type())); 1.1191 + } 1.1192 + 1.1193 + // See if these Phis have been made before. 1.1194 + // Register with optimizer 1.1195 + Node *hit1 = _igvn.hash_find_insert(phi1); 1.1196 + if( hit1 ) { // Hit, toss just made Phi 1.1197 + _igvn.remove_dead_node(phi1); // Remove new phi 1.1198 + assert( hit1->is_Phi(), "" ); 1.1199 + phi1 = (PhiNode*)hit1; // Use existing phi 1.1200 + } else { // Miss 1.1201 + _igvn.register_new_node_with_optimizer(phi1); 1.1202 + } 1.1203 + Node *hit2 = _igvn.hash_find_insert(phi2); 1.1204 + if( hit2 ) { // Hit, toss just made Phi 1.1205 + _igvn.remove_dead_node(phi2); // Remove new phi 1.1206 + assert( hit2->is_Phi(), "" ); 1.1207 + phi2 = (PhiNode*)hit2; // Use existing phi 1.1208 + } else { // Miss 1.1209 + _igvn.register_new_node_with_optimizer(phi2); 1.1210 + } 1.1211 + // Register Phis with loop/block info 1.1212 + set_ctrl(phi1, phi->in(0)); 1.1213 + set_ctrl(phi2, phi->in(0)); 1.1214 + // Make a new Cmp 1.1215 + Node *cmp = sample_cmp->clone(); 1.1216 + cmp->set_req( 1, phi1 ); 1.1217 + cmp->set_req( 2, phi2 ); 1.1218 + _igvn.register_new_node_with_optimizer(cmp); 1.1219 + set_ctrl(cmp, phi->in(0)); 1.1220 + 1.1221 + assert( cmp->is_Cmp(), "" ); 1.1222 + return (CmpNode*)cmp; 1.1223 +} 1.1224 + 1.1225 +//------------------------------sink_use--------------------------------------- 1.1226 +// If 'use' was in the loop-exit block, it now needs to be sunk 1.1227 +// below the post-loop merge point. 1.1228 +void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) { 1.1229 + if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) { 1.1230 + set_ctrl(use, post_loop); 1.1231 + for (DUIterator j = use->outs(); use->has_out(j); j++) 1.1232 + sink_use(use->out(j), post_loop); 1.1233 + } 1.1234 +} 1.1235 + 1.1236 +//------------------------------clone_loop------------------------------------- 1.1237 +// 1.1238 +// C L O N E A L O O P B O D Y 1.1239 +// 1.1240 +// This is the basic building block of the loop optimizations. It clones an 1.1241 +// entire loop body. It makes an old_new loop body mapping; with this mapping 1.1242 +// you can find the new-loop equivalent to an old-loop node. All new-loop 1.1243 +// nodes are exactly equal to their old-loop counterparts, all edges are the 1.1244 +// same. All exits from the old-loop now have a RegionNode that merges the 1.1245 +// equivalent new-loop path. This is true even for the normal "loop-exit" 1.1246 +// condition. All uses of loop-invariant old-loop values now come from (one 1.1247 +// or more) Phis that merge their new-loop equivalents. 1.1248 +// 1.1249 +// This operation leaves the graph in an illegal state: there are two valid 1.1250 +// control edges coming from the loop pre-header to both loop bodies. I'll 1.1251 +// definitely have to hack the graph after running this transform. 1.1252 +// 1.1253 +// From this building block I will further edit edges to perform loop peeling 1.1254 +// or loop unrolling or iteration splitting (Range-Check-Elimination), etc. 1.1255 +// 1.1256 +// Parameter side_by_size_idom: 1.1257 +// When side_by_size_idom is NULL, the dominator tree is constructed for 1.1258 +// the clone loop to dominate the original. Used in construction of 1.1259 +// pre-main-post loop sequence. 1.1260 +// When nonnull, the clone and original are side-by-side, both are 1.1261 +// dominated by the side_by_side_idom node. Used in construction of 1.1262 +// unswitched loops. 1.1263 +void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd, 1.1264 + Node* side_by_side_idom) { 1.1265 + 1.1266 + // Step 1: Clone the loop body. Make the old->new mapping. 1.1267 + uint i; 1.1268 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1269 + Node *old = loop->_body.at(i); 1.1270 + Node *nnn = old->clone(); 1.1271 + old_new.map( old->_idx, nnn ); 1.1272 + _igvn.register_new_node_with_optimizer(nnn); 1.1273 + } 1.1274 + 1.1275 + 1.1276 + // Step 2: Fix the edges in the new body. If the old input is outside the 1.1277 + // loop use it. If the old input is INside the loop, use the corresponding 1.1278 + // new node instead. 1.1279 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1280 + Node *old = loop->_body.at(i); 1.1281 + Node *nnn = old_new[old->_idx]; 1.1282 + // Fix CFG/Loop controlling the new node 1.1283 + if (has_ctrl(old)) { 1.1284 + set_ctrl(nnn, old_new[get_ctrl(old)->_idx]); 1.1285 + } else { 1.1286 + set_loop(nnn, loop->_parent); 1.1287 + if (old->outcnt() > 0) { 1.1288 + set_idom( nnn, old_new[idom(old)->_idx], dd ); 1.1289 + } 1.1290 + } 1.1291 + // Correct edges to the new node 1.1292 + for( uint j = 0; j < nnn->req(); j++ ) { 1.1293 + Node *n = nnn->in(j); 1.1294 + if( n ) { 1.1295 + IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n ); 1.1296 + if( loop->is_member( old_in_loop ) ) 1.1297 + nnn->set_req(j, old_new[n->_idx]); 1.1298 + } 1.1299 + } 1.1300 + _igvn.hash_find_insert(nnn); 1.1301 + } 1.1302 + Node *newhead = old_new[loop->_head->_idx]; 1.1303 + set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); 1.1304 + 1.1305 + 1.1306 + // Step 3: Now fix control uses. Loop varying control uses have already 1.1307 + // been fixed up (as part of all input edges in Step 2). Loop invariant 1.1308 + // control uses must be either an IfFalse or an IfTrue. Make a merge 1.1309 + // point to merge the old and new IfFalse/IfTrue nodes; make the use 1.1310 + // refer to this. 1.1311 + ResourceArea *area = Thread::current()->resource_area(); 1.1312 + Node_List worklist(area); 1.1313 + uint new_counter = C->unique(); 1.1314 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1315 + Node* old = loop->_body.at(i); 1.1316 + if( !old->is_CFG() ) continue; 1.1317 + Node* nnn = old_new[old->_idx]; 1.1318 + 1.1319 + // Copy uses to a worklist, so I can munge the def-use info 1.1320 + // with impunity. 1.1321 + for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) 1.1322 + worklist.push(old->fast_out(j)); 1.1323 + 1.1324 + while( worklist.size() ) { // Visit all uses 1.1325 + Node *use = worklist.pop(); 1.1326 + if (!has_node(use)) continue; // Ignore dead nodes 1.1327 + IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); 1.1328 + if( !loop->is_member( use_loop ) && use->is_CFG() ) { 1.1329 + // Both OLD and USE are CFG nodes here. 1.1330 + assert( use->is_Proj(), "" ); 1.1331 + 1.1332 + // Clone the loop exit control projection 1.1333 + Node *newuse = use->clone(); 1.1334 + newuse->set_req(0,nnn); 1.1335 + _igvn.register_new_node_with_optimizer(newuse); 1.1336 + set_loop(newuse, use_loop); 1.1337 + set_idom(newuse, nnn, dom_depth(nnn) + 1 ); 1.1338 + 1.1339 + // We need a Region to merge the exit from the peeled body and the 1.1340 + // exit from the old loop body. 1.1341 + RegionNode *r = new (C) RegionNode(3); 1.1342 + // Map the old use to the new merge point 1.1343 + old_new.map( use->_idx, r ); 1.1344 + uint dd_r = MIN2(dom_depth(newuse),dom_depth(use)); 1.1345 + assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" ); 1.1346 + 1.1347 + // The original user of 'use' uses 'r' instead. 1.1348 + for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) { 1.1349 + Node* useuse = use->last_out(l); 1.1350 + _igvn.rehash_node_delayed(useuse); 1.1351 + uint uses_found = 0; 1.1352 + if( useuse->in(0) == use ) { 1.1353 + useuse->set_req(0, r); 1.1354 + uses_found++; 1.1355 + if( useuse->is_CFG() ) { 1.1356 + assert( dom_depth(useuse) > dd_r, "" ); 1.1357 + set_idom(useuse, r, dom_depth(useuse)); 1.1358 + } 1.1359 + } 1.1360 + for( uint k = 1; k < useuse->req(); k++ ) { 1.1361 + if( useuse->in(k) == use ) { 1.1362 + useuse->set_req(k, r); 1.1363 + uses_found++; 1.1364 + } 1.1365 + } 1.1366 + l -= uses_found; // we deleted 1 or more copies of this edge 1.1367 + } 1.1368 + 1.1369 + // Now finish up 'r' 1.1370 + r->set_req( 1, newuse ); 1.1371 + r->set_req( 2, use ); 1.1372 + _igvn.register_new_node_with_optimizer(r); 1.1373 + set_loop(r, use_loop); 1.1374 + set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r); 1.1375 + } // End of if a loop-exit test 1.1376 + } 1.1377 + } 1.1378 + 1.1379 + // Step 4: If loop-invariant use is not control, it must be dominated by a 1.1380 + // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region 1.1381 + // there if needed. Make a Phi there merging old and new used values. 1.1382 + Node_List *split_if_set = NULL; 1.1383 + Node_List *split_bool_set = NULL; 1.1384 + Node_List *split_cex_set = NULL; 1.1385 + for( i = 0; i < loop->_body.size(); i++ ) { 1.1386 + Node* old = loop->_body.at(i); 1.1387 + Node* nnn = old_new[old->_idx]; 1.1388 + // Copy uses to a worklist, so I can munge the def-use info 1.1389 + // with impunity. 1.1390 + for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) 1.1391 + worklist.push(old->fast_out(j)); 1.1392 + 1.1393 + while( worklist.size() ) { 1.1394 + Node *use = worklist.pop(); 1.1395 + if (!has_node(use)) continue; // Ignore dead nodes 1.1396 + if (use->in(0) == C->top()) continue; 1.1397 + IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); 1.1398 + // Check for data-use outside of loop - at least one of OLD or USE 1.1399 + // must not be a CFG node. 1.1400 + if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) { 1.1401 + 1.1402 + // If the Data use is an IF, that means we have an IF outside of the 1.1403 + // loop that is switching on a condition that is set inside of the 1.1404 + // loop. Happens if people set a loop-exit flag; then test the flag 1.1405 + // in the loop to break the loop, then test is again outside of the 1.1406 + // loop to determine which way the loop exited. 1.1407 + // Loop predicate If node connects to Bool node through Opaque1 node. 1.1408 + if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) { 1.1409 + // Since this code is highly unlikely, we lazily build the worklist 1.1410 + // of such Nodes to go split. 1.1411 + if( !split_if_set ) 1.1412 + split_if_set = new Node_List(area); 1.1413 + split_if_set->push(use); 1.1414 + } 1.1415 + if( use->is_Bool() ) { 1.1416 + if( !split_bool_set ) 1.1417 + split_bool_set = new Node_List(area); 1.1418 + split_bool_set->push(use); 1.1419 + } 1.1420 + if( use->Opcode() == Op_CreateEx ) { 1.1421 + if( !split_cex_set ) 1.1422 + split_cex_set = new Node_List(area); 1.1423 + split_cex_set->push(use); 1.1424 + } 1.1425 + 1.1426 + 1.1427 + // Get "block" use is in 1.1428 + uint idx = 0; 1.1429 + while( use->in(idx) != old ) idx++; 1.1430 + Node *prev = use->is_CFG() ? use : get_ctrl(use); 1.1431 + assert( !loop->is_member( get_loop( prev ) ), "" ); 1.1432 + Node *cfg = prev->_idx >= new_counter 1.1433 + ? prev->in(2) 1.1434 + : idom(prev); 1.1435 + if( use->is_Phi() ) // Phi use is in prior block 1.1436 + cfg = prev->in(idx); // NOT in block of Phi itself 1.1437 + if (cfg->is_top()) { // Use is dead? 1.1438 + _igvn.replace_input_of(use, idx, C->top()); 1.1439 + continue; 1.1440 + } 1.1441 + 1.1442 + while( !loop->is_member( get_loop( cfg ) ) ) { 1.1443 + prev = cfg; 1.1444 + cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg); 1.1445 + } 1.1446 + // If the use occurs after merging several exits from the loop, then 1.1447 + // old value must have dominated all those exits. Since the same old 1.1448 + // value was used on all those exits we did not need a Phi at this 1.1449 + // merge point. NOW we do need a Phi here. Each loop exit value 1.1450 + // is now merged with the peeled body exit; each exit gets its own 1.1451 + // private Phi and those Phis need to be merged here. 1.1452 + Node *phi; 1.1453 + if( prev->is_Region() ) { 1.1454 + if( idx == 0 ) { // Updating control edge? 1.1455 + phi = prev; // Just use existing control 1.1456 + } else { // Else need a new Phi 1.1457 + phi = PhiNode::make( prev, old ); 1.1458 + // Now recursively fix up the new uses of old! 1.1459 + for( uint i = 1; i < prev->req(); i++ ) { 1.1460 + worklist.push(phi); // Onto worklist once for each 'old' input 1.1461 + } 1.1462 + } 1.1463 + } else { 1.1464 + // Get new RegionNode merging old and new loop exits 1.1465 + prev = old_new[prev->_idx]; 1.1466 + assert( prev, "just made this in step 7" ); 1.1467 + if( idx == 0 ) { // Updating control edge? 1.1468 + phi = prev; // Just use existing control 1.1469 + } else { // Else need a new Phi 1.1470 + // Make a new Phi merging data values properly 1.1471 + phi = PhiNode::make( prev, old ); 1.1472 + phi->set_req( 1, nnn ); 1.1473 + } 1.1474 + } 1.1475 + // If inserting a new Phi, check for prior hits 1.1476 + if( idx != 0 ) { 1.1477 + Node *hit = _igvn.hash_find_insert(phi); 1.1478 + if( hit == NULL ) { 1.1479 + _igvn.register_new_node_with_optimizer(phi); // Register new phi 1.1480 + } else { // or 1.1481 + // Remove the new phi from the graph and use the hit 1.1482 + _igvn.remove_dead_node(phi); 1.1483 + phi = hit; // Use existing phi 1.1484 + } 1.1485 + set_ctrl(phi, prev); 1.1486 + } 1.1487 + // Make 'use' use the Phi instead of the old loop body exit value 1.1488 + _igvn.replace_input_of(use, idx, phi); 1.1489 + if( use->_idx >= new_counter ) { // If updating new phis 1.1490 + // Not needed for correctness, but prevents a weak assert 1.1491 + // in AddPNode from tripping (when we end up with different 1.1492 + // base & derived Phis that will become the same after 1.1493 + // IGVN does CSE). 1.1494 + Node *hit = _igvn.hash_find_insert(use); 1.1495 + if( hit ) // Go ahead and re-hash for hits. 1.1496 + _igvn.replace_node( use, hit ); 1.1497 + } 1.1498 + 1.1499 + // If 'use' was in the loop-exit block, it now needs to be sunk 1.1500 + // below the post-loop merge point. 1.1501 + sink_use( use, prev ); 1.1502 + } 1.1503 + } 1.1504 + } 1.1505 + 1.1506 + // Check for IFs that need splitting/cloning. Happens if an IF outside of 1.1507 + // the loop uses a condition set in the loop. The original IF probably 1.1508 + // takes control from one or more OLD Regions (which in turn get from NEW 1.1509 + // Regions). In any case, there will be a set of Phis for each merge point 1.1510 + // from the IF up to where the original BOOL def exists the loop. 1.1511 + if( split_if_set ) { 1.1512 + while( split_if_set->size() ) { 1.1513 + Node *iff = split_if_set->pop(); 1.1514 + if( iff->in(1)->is_Phi() ) { 1.1515 + BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop ); 1.1516 + _igvn.replace_input_of(iff, 1, b); 1.1517 + } 1.1518 + } 1.1519 + } 1.1520 + if( split_bool_set ) { 1.1521 + while( split_bool_set->size() ) { 1.1522 + Node *b = split_bool_set->pop(); 1.1523 + Node *phi = b->in(1); 1.1524 + assert( phi->is_Phi(), "" ); 1.1525 + CmpNode *cmp = clone_bool( (PhiNode*)phi, loop ); 1.1526 + _igvn.replace_input_of(b, 1, cmp); 1.1527 + } 1.1528 + } 1.1529 + if( split_cex_set ) { 1.1530 + while( split_cex_set->size() ) { 1.1531 + Node *b = split_cex_set->pop(); 1.1532 + assert( b->in(0)->is_Region(), "" ); 1.1533 + assert( b->in(1)->is_Phi(), "" ); 1.1534 + assert( b->in(0)->in(0) == b->in(1)->in(0), "" ); 1.1535 + split_up( b, b->in(0), NULL ); 1.1536 + } 1.1537 + } 1.1538 + 1.1539 +} 1.1540 + 1.1541 + 1.1542 +//---------------------- stride_of_possible_iv ------------------------------------- 1.1543 +// Looks for an iff/bool/comp with one operand of the compare 1.1544 +// being a cycle involving an add and a phi, 1.1545 +// with an optional truncation (left-shift followed by a right-shift) 1.1546 +// of the add. Returns zero if not an iv. 1.1547 +int PhaseIdealLoop::stride_of_possible_iv(Node* iff) { 1.1548 + Node* trunc1 = NULL; 1.1549 + Node* trunc2 = NULL; 1.1550 + const TypeInt* ttype = NULL; 1.1551 + if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) { 1.1552 + return 0; 1.1553 + } 1.1554 + BoolNode* bl = iff->in(1)->as_Bool(); 1.1555 + Node* cmp = bl->in(1); 1.1556 + if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) { 1.1557 + return 0; 1.1558 + } 1.1559 + // Must have an invariant operand 1.1560 + if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) { 1.1561 + return 0; 1.1562 + } 1.1563 + Node* add2 = NULL; 1.1564 + Node* cmp1 = cmp->in(1); 1.1565 + if (cmp1->is_Phi()) { 1.1566 + // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) ))) 1.1567 + Node* phi = cmp1; 1.1568 + for (uint i = 1; i < phi->req(); i++) { 1.1569 + Node* in = phi->in(i); 1.1570 + Node* add = CountedLoopNode::match_incr_with_optional_truncation(in, 1.1571 + &trunc1, &trunc2, &ttype); 1.1572 + if (add && add->in(1) == phi) { 1.1573 + add2 = add->in(2); 1.1574 + break; 1.1575 + } 1.1576 + } 1.1577 + } else { 1.1578 + // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) ))) 1.1579 + Node* addtrunc = cmp1; 1.1580 + Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc, 1.1581 + &trunc1, &trunc2, &ttype); 1.1582 + if (add && add->in(1)->is_Phi()) { 1.1583 + Node* phi = add->in(1); 1.1584 + for (uint i = 1; i < phi->req(); i++) { 1.1585 + if (phi->in(i) == addtrunc) { 1.1586 + add2 = add->in(2); 1.1587 + break; 1.1588 + } 1.1589 + } 1.1590 + } 1.1591 + } 1.1592 + if (add2 != NULL) { 1.1593 + const TypeInt* add2t = _igvn.type(add2)->is_int(); 1.1594 + if (add2t->is_con()) { 1.1595 + return add2t->get_con(); 1.1596 + } 1.1597 + } 1.1598 + return 0; 1.1599 +} 1.1600 + 1.1601 + 1.1602 +//---------------------- stay_in_loop ------------------------------------- 1.1603 +// Return the (unique) control output node that's in the loop (if it exists.) 1.1604 +Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) { 1.1605 + Node* unique = NULL; 1.1606 + if (!n) return NULL; 1.1607 + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 1.1608 + Node* use = n->fast_out(i); 1.1609 + if (!has_ctrl(use) && loop->is_member(get_loop(use))) { 1.1610 + if (unique != NULL) { 1.1611 + return NULL; 1.1612 + } 1.1613 + unique = use; 1.1614 + } 1.1615 + } 1.1616 + return unique; 1.1617 +} 1.1618 + 1.1619 +//------------------------------ register_node ------------------------------------- 1.1620 +// Utility to register node "n" with PhaseIdealLoop 1.1621 +void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) { 1.1622 + _igvn.register_new_node_with_optimizer(n); 1.1623 + loop->_body.push(n); 1.1624 + if (n->is_CFG()) { 1.1625 + set_loop(n, loop); 1.1626 + set_idom(n, pred, ddepth); 1.1627 + } else { 1.1628 + set_ctrl(n, pred); 1.1629 + } 1.1630 +} 1.1631 + 1.1632 +//------------------------------ proj_clone ------------------------------------- 1.1633 +// Utility to create an if-projection 1.1634 +ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) { 1.1635 + ProjNode* c = p->clone()->as_Proj(); 1.1636 + c->set_req(0, iff); 1.1637 + return c; 1.1638 +} 1.1639 + 1.1640 +//------------------------------ short_circuit_if ------------------------------------- 1.1641 +// Force the iff control output to be the live_proj 1.1642 +Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) { 1.1643 + guarantee(live_proj != NULL, "null projection"); 1.1644 + int proj_con = live_proj->_con; 1.1645 + assert(proj_con == 0 || proj_con == 1, "false or true projection"); 1.1646 + Node *con = _igvn.intcon(proj_con); 1.1647 + set_ctrl(con, C->root()); 1.1648 + if (iff) { 1.1649 + iff->set_req(1, con); 1.1650 + } 1.1651 + return con; 1.1652 +} 1.1653 + 1.1654 +//------------------------------ insert_if_before_proj ------------------------------------- 1.1655 +// Insert a new if before an if projection (* - new node) 1.1656 +// 1.1657 +// before 1.1658 +// if(test) 1.1659 +// / \ 1.1660 +// v v 1.1661 +// other-proj proj (arg) 1.1662 +// 1.1663 +// after 1.1664 +// if(test) 1.1665 +// / \ 1.1666 +// / v 1.1667 +// | * proj-clone 1.1668 +// v | 1.1669 +// other-proj v 1.1670 +// * new_if(relop(cmp[IU](left,right))) 1.1671 +// / \ 1.1672 +// v v 1.1673 +// * new-proj proj 1.1674 +// (returned) 1.1675 +// 1.1676 +ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) { 1.1677 + IfNode* iff = proj->in(0)->as_If(); 1.1678 + IdealLoopTree *loop = get_loop(proj); 1.1679 + ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); 1.1680 + int ddepth = dom_depth(proj); 1.1681 + 1.1682 + _igvn.rehash_node_delayed(iff); 1.1683 + _igvn.rehash_node_delayed(proj); 1.1684 + 1.1685 + proj->set_req(0, NULL); // temporary disconnect 1.1686 + ProjNode* proj2 = proj_clone(proj, iff); 1.1687 + register_node(proj2, loop, iff, ddepth); 1.1688 + 1.1689 + Node* cmp = Signed ? (Node*) new (C)CmpINode(left, right) : (Node*) new (C)CmpUNode(left, right); 1.1690 + register_node(cmp, loop, proj2, ddepth); 1.1691 + 1.1692 + BoolNode* bol = new (C)BoolNode(cmp, relop); 1.1693 + register_node(bol, loop, proj2, ddepth); 1.1694 + 1.1695 + IfNode* new_if = new (C)IfNode(proj2, bol, iff->_prob, iff->_fcnt); 1.1696 + register_node(new_if, loop, proj2, ddepth); 1.1697 + 1.1698 + proj->set_req(0, new_if); // reattach 1.1699 + set_idom(proj, new_if, ddepth); 1.1700 + 1.1701 + ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj(); 1.1702 + guarantee(new_exit != NULL, "null exit node"); 1.1703 + register_node(new_exit, get_loop(other_proj), new_if, ddepth); 1.1704 + 1.1705 + return new_exit; 1.1706 +} 1.1707 + 1.1708 +//------------------------------ insert_region_before_proj ------------------------------------- 1.1709 +// Insert a region before an if projection (* - new node) 1.1710 +// 1.1711 +// before 1.1712 +// if(test) 1.1713 +// / | 1.1714 +// v | 1.1715 +// proj v 1.1716 +// other-proj 1.1717 +// 1.1718 +// after 1.1719 +// if(test) 1.1720 +// / | 1.1721 +// v | 1.1722 +// * proj-clone v 1.1723 +// | other-proj 1.1724 +// v 1.1725 +// * new-region 1.1726 +// | 1.1727 +// v 1.1728 +// * dum_if 1.1729 +// / \ 1.1730 +// v \ 1.1731 +// * dum-proj v 1.1732 +// proj 1.1733 +// 1.1734 +RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) { 1.1735 + IfNode* iff = proj->in(0)->as_If(); 1.1736 + IdealLoopTree *loop = get_loop(proj); 1.1737 + ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); 1.1738 + int ddepth = dom_depth(proj); 1.1739 + 1.1740 + _igvn.rehash_node_delayed(iff); 1.1741 + _igvn.rehash_node_delayed(proj); 1.1742 + 1.1743 + proj->set_req(0, NULL); // temporary disconnect 1.1744 + ProjNode* proj2 = proj_clone(proj, iff); 1.1745 + register_node(proj2, loop, iff, ddepth); 1.1746 + 1.1747 + RegionNode* reg = new (C)RegionNode(2); 1.1748 + reg->set_req(1, proj2); 1.1749 + register_node(reg, loop, iff, ddepth); 1.1750 + 1.1751 + IfNode* dum_if = new (C)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt); 1.1752 + register_node(dum_if, loop, reg, ddepth); 1.1753 + 1.1754 + proj->set_req(0, dum_if); // reattach 1.1755 + set_idom(proj, dum_if, ddepth); 1.1756 + 1.1757 + ProjNode* dum_proj = proj_clone(other_proj, dum_if); 1.1758 + register_node(dum_proj, loop, dum_if, ddepth); 1.1759 + 1.1760 + return reg; 1.1761 +} 1.1762 + 1.1763 +//------------------------------ insert_cmpi_loop_exit ------------------------------------- 1.1764 +// Clone a signed compare loop exit from an unsigned compare and 1.1765 +// insert it before the unsigned cmp on the stay-in-loop path. 1.1766 +// All new nodes inserted in the dominator tree between the original 1.1767 +// if and it's projections. The original if test is replaced with 1.1768 +// a constant to force the stay-in-loop path. 1.1769 +// 1.1770 +// This is done to make sure that the original if and it's projections 1.1771 +// still dominate the same set of control nodes, that the ctrl() relation 1.1772 +// from data nodes to them is preserved, and that their loop nesting is 1.1773 +// preserved. 1.1774 +// 1.1775 +// before 1.1776 +// if(i <u limit) unsigned compare loop exit 1.1777 +// / | 1.1778 +// v v 1.1779 +// exit-proj stay-in-loop-proj 1.1780 +// 1.1781 +// after 1.1782 +// if(stay-in-loop-const) original if 1.1783 +// / | 1.1784 +// / v 1.1785 +// / if(i < limit) new signed test 1.1786 +// / / | 1.1787 +// / / v 1.1788 +// / / if(i <u limit) new cloned unsigned test 1.1789 +// / / / | 1.1790 +// v v v | 1.1791 +// region | 1.1792 +// | | 1.1793 +// dum-if | 1.1794 +// / | | 1.1795 +// ether | | 1.1796 +// v v 1.1797 +// exit-proj stay-in-loop-proj 1.1798 +// 1.1799 +IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) { 1.1800 + const bool Signed = true; 1.1801 + const bool Unsigned = false; 1.1802 + 1.1803 + BoolNode* bol = if_cmpu->in(1)->as_Bool(); 1.1804 + if (bol->_test._test != BoolTest::lt) return NULL; 1.1805 + CmpNode* cmpu = bol->in(1)->as_Cmp(); 1.1806 + if (cmpu->Opcode() != Op_CmpU) return NULL; 1.1807 + int stride = stride_of_possible_iv(if_cmpu); 1.1808 + if (stride == 0) return NULL; 1.1809 + 1.1810 + Node* lp_proj = stay_in_loop(if_cmpu, loop); 1.1811 + guarantee(lp_proj != NULL, "null loop node"); 1.1812 + 1.1813 + ProjNode* lp_continue = lp_proj->as_Proj(); 1.1814 + ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj(); 1.1815 + 1.1816 + Node* limit = NULL; 1.1817 + if (stride > 0) { 1.1818 + limit = cmpu->in(2); 1.1819 + } else { 1.1820 + limit = _igvn.makecon(TypeInt::ZERO); 1.1821 + set_ctrl(limit, C->root()); 1.1822 + } 1.1823 + // Create a new region on the exit path 1.1824 + RegionNode* reg = insert_region_before_proj(lp_exit); 1.1825 + guarantee(reg != NULL, "null region node"); 1.1826 + 1.1827 + // Clone the if-cmpu-true-false using a signed compare 1.1828 + BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge; 1.1829 + ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue); 1.1830 + reg->add_req(cmpi_exit); 1.1831 + 1.1832 + // Clone the if-cmpu-true-false 1.1833 + BoolTest::mask rel_u = bol->_test._test; 1.1834 + ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue); 1.1835 + reg->add_req(cmpu_exit); 1.1836 + 1.1837 + // Force original if to stay in loop. 1.1838 + short_circuit_if(if_cmpu, lp_continue); 1.1839 + 1.1840 + return cmpi_exit->in(0)->as_If(); 1.1841 +} 1.1842 + 1.1843 +//------------------------------ remove_cmpi_loop_exit ------------------------------------- 1.1844 +// Remove a previously inserted signed compare loop exit. 1.1845 +void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) { 1.1846 + Node* lp_proj = stay_in_loop(if_cmp, loop); 1.1847 + assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI && 1.1848 + stay_in_loop(lp_proj, loop)->is_If() && 1.1849 + stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu"); 1.1850 + Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO); 1.1851 + set_ctrl(con, C->root()); 1.1852 + if_cmp->set_req(1, con); 1.1853 +} 1.1854 + 1.1855 +//------------------------------ scheduled_nodelist ------------------------------------- 1.1856 +// Create a post order schedule of nodes that are in the 1.1857 +// "member" set. The list is returned in "sched". 1.1858 +// The first node in "sched" is the loop head, followed by 1.1859 +// nodes which have no inputs in the "member" set, and then 1.1860 +// followed by the nodes that have an immediate input dependence 1.1861 +// on a node in "sched". 1.1862 +void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) { 1.1863 + 1.1864 + assert(member.test(loop->_head->_idx), "loop head must be in member set"); 1.1865 + Arena *a = Thread::current()->resource_area(); 1.1866 + VectorSet visited(a); 1.1867 + Node_Stack nstack(a, loop->_body.size()); 1.1868 + 1.1869 + Node* n = loop->_head; // top of stack is cached in "n" 1.1870 + uint idx = 0; 1.1871 + visited.set(n->_idx); 1.1872 + 1.1873 + // Initially push all with no inputs from within member set 1.1874 + for(uint i = 0; i < loop->_body.size(); i++ ) { 1.1875 + Node *elt = loop->_body.at(i); 1.1876 + if (member.test(elt->_idx)) { 1.1877 + bool found = false; 1.1878 + for (uint j = 0; j < elt->req(); j++) { 1.1879 + Node* def = elt->in(j); 1.1880 + if (def && member.test(def->_idx) && def != elt) { 1.1881 + found = true; 1.1882 + break; 1.1883 + } 1.1884 + } 1.1885 + if (!found && elt != loop->_head) { 1.1886 + nstack.push(n, idx); 1.1887 + n = elt; 1.1888 + assert(!visited.test(n->_idx), "not seen yet"); 1.1889 + visited.set(n->_idx); 1.1890 + } 1.1891 + } 1.1892 + } 1.1893 + 1.1894 + // traverse out's that are in the member set 1.1895 + while (true) { 1.1896 + if (idx < n->outcnt()) { 1.1897 + Node* use = n->raw_out(idx); 1.1898 + idx++; 1.1899 + if (!visited.test_set(use->_idx)) { 1.1900 + if (member.test(use->_idx)) { 1.1901 + nstack.push(n, idx); 1.1902 + n = use; 1.1903 + idx = 0; 1.1904 + } 1.1905 + } 1.1906 + } else { 1.1907 + // All outputs processed 1.1908 + sched.push(n); 1.1909 + if (nstack.is_empty()) break; 1.1910 + n = nstack.node(); 1.1911 + idx = nstack.index(); 1.1912 + nstack.pop(); 1.1913 + } 1.1914 + } 1.1915 +} 1.1916 + 1.1917 + 1.1918 +//------------------------------ has_use_in_set ------------------------------------- 1.1919 +// Has a use in the vector set 1.1920 +bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) { 1.1921 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1922 + Node* use = n->fast_out(j); 1.1923 + if (vset.test(use->_idx)) { 1.1924 + return true; 1.1925 + } 1.1926 + } 1.1927 + return false; 1.1928 +} 1.1929 + 1.1930 + 1.1931 +//------------------------------ has_use_internal_to_set ------------------------------------- 1.1932 +// Has use internal to the vector set (ie. not in a phi at the loop head) 1.1933 +bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) { 1.1934 + Node* head = loop->_head; 1.1935 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1936 + Node* use = n->fast_out(j); 1.1937 + if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) { 1.1938 + return true; 1.1939 + } 1.1940 + } 1.1941 + return false; 1.1942 +} 1.1943 + 1.1944 + 1.1945 +//------------------------------ clone_for_use_outside_loop ------------------------------------- 1.1946 +// clone "n" for uses that are outside of loop 1.1947 +int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) { 1.1948 + int cloned = 0; 1.1949 + assert(worklist.size() == 0, "should be empty"); 1.1950 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.1951 + Node* use = n->fast_out(j); 1.1952 + if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) { 1.1953 + worklist.push(use); 1.1954 + } 1.1955 + } 1.1956 + while( worklist.size() ) { 1.1957 + Node *use = worklist.pop(); 1.1958 + if (!has_node(use) || use->in(0) == C->top()) continue; 1.1959 + uint j; 1.1960 + for (j = 0; j < use->req(); j++) { 1.1961 + if (use->in(j) == n) break; 1.1962 + } 1.1963 + assert(j < use->req(), "must be there"); 1.1964 + 1.1965 + // clone "n" and insert it between the inputs of "n" and the use outside the loop 1.1966 + Node* n_clone = n->clone(); 1.1967 + _igvn.replace_input_of(use, j, n_clone); 1.1968 + cloned++; 1.1969 + Node* use_c; 1.1970 + if (!use->is_Phi()) { 1.1971 + use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0); 1.1972 + } else { 1.1973 + // Use in a phi is considered a use in the associated predecessor block 1.1974 + use_c = use->in(0)->in(j); 1.1975 + } 1.1976 + set_ctrl(n_clone, use_c); 1.1977 + assert(!loop->is_member(get_loop(use_c)), "should be outside loop"); 1.1978 + get_loop(use_c)->_body.push(n_clone); 1.1979 + _igvn.register_new_node_with_optimizer(n_clone); 1.1980 +#if !defined(PRODUCT) 1.1981 + if (TracePartialPeeling) { 1.1982 + tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx); 1.1983 + } 1.1984 +#endif 1.1985 + } 1.1986 + return cloned; 1.1987 +} 1.1988 + 1.1989 + 1.1990 +//------------------------------ clone_for_special_use_inside_loop ------------------------------------- 1.1991 +// clone "n" for special uses that are in the not_peeled region. 1.1992 +// If these def-uses occur in separate blocks, the code generator 1.1993 +// marks the method as not compilable. For example, if a "BoolNode" 1.1994 +// is in a different basic block than the "IfNode" that uses it, then 1.1995 +// the compilation is aborted in the code generator. 1.1996 +void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, 1.1997 + VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) { 1.1998 + if (n->is_Phi() || n->is_Load()) { 1.1999 + return; 1.2000 + } 1.2001 + assert(worklist.size() == 0, "should be empty"); 1.2002 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.2003 + Node* use = n->fast_out(j); 1.2004 + if ( not_peel.test(use->_idx) && 1.2005 + (use->is_If() || use->is_CMove() || use->is_Bool()) && 1.2006 + use->in(1) == n) { 1.2007 + worklist.push(use); 1.2008 + } 1.2009 + } 1.2010 + if (worklist.size() > 0) { 1.2011 + // clone "n" and insert it between inputs of "n" and the use 1.2012 + Node* n_clone = n->clone(); 1.2013 + loop->_body.push(n_clone); 1.2014 + _igvn.register_new_node_with_optimizer(n_clone); 1.2015 + set_ctrl(n_clone, get_ctrl(n)); 1.2016 + sink_list.push(n_clone); 1.2017 + not_peel <<= n_clone->_idx; // add n_clone to not_peel set. 1.2018 +#if !defined(PRODUCT) 1.2019 + if (TracePartialPeeling) { 1.2020 + tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx); 1.2021 + } 1.2022 +#endif 1.2023 + while( worklist.size() ) { 1.2024 + Node *use = worklist.pop(); 1.2025 + _igvn.rehash_node_delayed(use); 1.2026 + for (uint j = 1; j < use->req(); j++) { 1.2027 + if (use->in(j) == n) { 1.2028 + use->set_req(j, n_clone); 1.2029 + } 1.2030 + } 1.2031 + } 1.2032 + } 1.2033 +} 1.2034 + 1.2035 + 1.2036 +//------------------------------ insert_phi_for_loop ------------------------------------- 1.2037 +// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist 1.2038 +void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) { 1.2039 + Node *phi = PhiNode::make(lp, back_edge_val); 1.2040 + phi->set_req(LoopNode::EntryControl, lp_entry_val); 1.2041 + // Use existing phi if it already exists 1.2042 + Node *hit = _igvn.hash_find_insert(phi); 1.2043 + if( hit == NULL ) { 1.2044 + _igvn.register_new_node_with_optimizer(phi); 1.2045 + set_ctrl(phi, lp); 1.2046 + } else { 1.2047 + // Remove the new phi from the graph and use the hit 1.2048 + _igvn.remove_dead_node(phi); 1.2049 + phi = hit; 1.2050 + } 1.2051 + _igvn.replace_input_of(use, idx, phi); 1.2052 +} 1.2053 + 1.2054 +#ifdef ASSERT 1.2055 +//------------------------------ is_valid_loop_partition ------------------------------------- 1.2056 +// Validate the loop partition sets: peel and not_peel 1.2057 +bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, 1.2058 + VectorSet& not_peel ) { 1.2059 + uint i; 1.2060 + // Check that peel_list entries are in the peel set 1.2061 + for (i = 0; i < peel_list.size(); i++) { 1.2062 + if (!peel.test(peel_list.at(i)->_idx)) { 1.2063 + return false; 1.2064 + } 1.2065 + } 1.2066 + // Check at loop members are in one of peel set or not_peel set 1.2067 + for (i = 0; i < loop->_body.size(); i++ ) { 1.2068 + Node *def = loop->_body.at(i); 1.2069 + uint di = def->_idx; 1.2070 + // Check that peel set elements are in peel_list 1.2071 + if (peel.test(di)) { 1.2072 + if (not_peel.test(di)) { 1.2073 + return false; 1.2074 + } 1.2075 + // Must be in peel_list also 1.2076 + bool found = false; 1.2077 + for (uint j = 0; j < peel_list.size(); j++) { 1.2078 + if (peel_list.at(j)->_idx == di) { 1.2079 + found = true; 1.2080 + break; 1.2081 + } 1.2082 + } 1.2083 + if (!found) { 1.2084 + return false; 1.2085 + } 1.2086 + } else if (not_peel.test(di)) { 1.2087 + if (peel.test(di)) { 1.2088 + return false; 1.2089 + } 1.2090 + } else { 1.2091 + return false; 1.2092 + } 1.2093 + } 1.2094 + return true; 1.2095 +} 1.2096 + 1.2097 +//------------------------------ is_valid_clone_loop_exit_use ------------------------------------- 1.2098 +// Ensure a use outside of loop is of the right form 1.2099 +bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) { 1.2100 + Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; 1.2101 + return (use->is_Phi() && 1.2102 + use_c->is_Region() && use_c->req() == 3 && 1.2103 + (use_c->in(exit_idx)->Opcode() == Op_IfTrue || 1.2104 + use_c->in(exit_idx)->Opcode() == Op_IfFalse || 1.2105 + use_c->in(exit_idx)->Opcode() == Op_JumpProj) && 1.2106 + loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) ); 1.2107 +} 1.2108 + 1.2109 +//------------------------------ is_valid_clone_loop_form ------------------------------------- 1.2110 +// Ensure that all uses outside of loop are of the right form 1.2111 +bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, 1.2112 + uint orig_exit_idx, uint clone_exit_idx) { 1.2113 + uint len = peel_list.size(); 1.2114 + for (uint i = 0; i < len; i++) { 1.2115 + Node *def = peel_list.at(i); 1.2116 + 1.2117 + for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1.2118 + Node *use = def->fast_out(j); 1.2119 + Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; 1.2120 + if (!loop->is_member(get_loop(use_c))) { 1.2121 + // use is not in the loop, check for correct structure 1.2122 + if (use->in(0) == def) { 1.2123 + // Okay 1.2124 + } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) { 1.2125 + return false; 1.2126 + } 1.2127 + } 1.2128 + } 1.2129 + } 1.2130 + return true; 1.2131 +} 1.2132 +#endif 1.2133 + 1.2134 +//------------------------------ partial_peel ------------------------------------- 1.2135 +// Partially peel (aka loop rotation) the top portion of a loop (called 1.2136 +// the peel section below) by cloning it and placing one copy just before 1.2137 +// the new loop head and the other copy at the bottom of the new loop. 1.2138 +// 1.2139 +// before after where it came from 1.2140 +// 1.2141 +// stmt1 stmt1 1.2142 +// loop: stmt2 clone 1.2143 +// stmt2 if condA goto exitA clone 1.2144 +// if condA goto exitA new_loop: new 1.2145 +// stmt3 stmt3 clone 1.2146 +// if !condB goto loop if condB goto exitB clone 1.2147 +// exitB: stmt2 orig 1.2148 +// stmt4 if !condA goto new_loop orig 1.2149 +// exitA: goto exitA 1.2150 +// exitB: 1.2151 +// stmt4 1.2152 +// exitA: 1.2153 +// 1.2154 +// Step 1: find the cut point: an exit test on probable 1.2155 +// induction variable. 1.2156 +// Step 2: schedule (with cloning) operations in the peel 1.2157 +// section that can be executed after the cut into 1.2158 +// the section that is not peeled. This may need 1.2159 +// to clone operations into exit blocks. For 1.2160 +// instance, a reference to A[i] in the not-peel 1.2161 +// section and a reference to B[i] in an exit block 1.2162 +// may cause a left-shift of i by 2 to be placed 1.2163 +// in the peel block. This step will clone the left 1.2164 +// shift into the exit block and sink the left shift 1.2165 +// from the peel to the not-peel section. 1.2166 +// Step 3: clone the loop, retarget the control, and insert 1.2167 +// phis for values that are live across the new loop 1.2168 +// head. This is very dependent on the graph structure 1.2169 +// from clone_loop. It creates region nodes for 1.2170 +// exit control and associated phi nodes for values 1.2171 +// flow out of the loop through that exit. The region 1.2172 +// node is dominated by the clone's control projection. 1.2173 +// So the clone's peel section is placed before the 1.2174 +// new loop head, and the clone's not-peel section is 1.2175 +// forms the top part of the new loop. The original 1.2176 +// peel section forms the tail of the new loop. 1.2177 +// Step 4: update the dominator tree and recompute the 1.2178 +// dominator depth. 1.2179 +// 1.2180 +// orig 1.2181 +// 1.2182 +// stmt1 1.2183 +// | 1.2184 +// v 1.2185 +// loop predicate 1.2186 +// | 1.2187 +// v 1.2188 +// loop<----+ 1.2189 +// | | 1.2190 +// stmt2 | 1.2191 +// | | 1.2192 +// v | 1.2193 +// ifA | 1.2194 +// / | | 1.2195 +// v v | 1.2196 +// false true ^ <-- last_peel 1.2197 +// / | | 1.2198 +// / ===|==cut | 1.2199 +// / stmt3 | <-- first_not_peel 1.2200 +// / | | 1.2201 +// | v | 1.2202 +// v ifB | 1.2203 +// exitA: / \ | 1.2204 +// / \ | 1.2205 +// v v | 1.2206 +// false true | 1.2207 +// / \ | 1.2208 +// / ----+ 1.2209 +// | 1.2210 +// v 1.2211 +// exitB: 1.2212 +// stmt4 1.2213 +// 1.2214 +// 1.2215 +// after clone loop 1.2216 +// 1.2217 +// stmt1 1.2218 +// | 1.2219 +// v 1.2220 +// loop predicate 1.2221 +// / \ 1.2222 +// clone / \ orig 1.2223 +// / \ 1.2224 +// / \ 1.2225 +// v v 1.2226 +// +---->loop loop<----+ 1.2227 +// | | | | 1.2228 +// | stmt2 stmt2 | 1.2229 +// | | | | 1.2230 +// | v v | 1.2231 +// | ifA ifA | 1.2232 +// | | \ / | | 1.2233 +// | v v v v | 1.2234 +// ^ true false false true ^ <-- last_peel 1.2235 +// | | ^ \ / | | 1.2236 +// | cut==|== \ \ / ===|==cut | 1.2237 +// | stmt3 \ \ / stmt3 | <-- first_not_peel 1.2238 +// | | dom | | | | 1.2239 +// | v \ 1v v2 v | 1.2240 +// | ifB regionA ifB | 1.2241 +// | / \ | / \ | 1.2242 +// | / \ v / \ | 1.2243 +// | v v exitA: v v | 1.2244 +// | true false false true | 1.2245 +// | / ^ \ / \ | 1.2246 +// +---- \ \ / ----+ 1.2247 +// dom \ / 1.2248 +// \ 1v v2 1.2249 +// regionB 1.2250 +// | 1.2251 +// v 1.2252 +// exitB: 1.2253 +// stmt4 1.2254 +// 1.2255 +// 1.2256 +// after partial peel 1.2257 +// 1.2258 +// stmt1 1.2259 +// | 1.2260 +// v 1.2261 +// loop predicate 1.2262 +// / 1.2263 +// clone / orig 1.2264 +// / TOP 1.2265 +// / \ 1.2266 +// v v 1.2267 +// TOP->loop loop----+ 1.2268 +// | | | 1.2269 +// stmt2 stmt2 | 1.2270 +// | | | 1.2271 +// v v | 1.2272 +// ifA ifA | 1.2273 +// | \ / | | 1.2274 +// v v v v | 1.2275 +// true false false true | <-- last_peel 1.2276 +// | ^ \ / +------|---+ 1.2277 +// +->newloop \ \ / === ==cut | | 1.2278 +// | stmt3 \ \ / TOP | | 1.2279 +// | | dom | | stmt3 | | <-- first_not_peel 1.2280 +// | v \ 1v v2 v | | 1.2281 +// | ifB regionA ifB ^ v 1.2282 +// | / \ | / \ | | 1.2283 +// | / \ v / \ | | 1.2284 +// | v v exitA: v v | | 1.2285 +// | true false false true | | 1.2286 +// | / ^ \ / \ | | 1.2287 +// | | \ \ / v | | 1.2288 +// | | dom \ / TOP | | 1.2289 +// | | \ 1v v2 | | 1.2290 +// ^ v regionB | | 1.2291 +// | | | | | 1.2292 +// | | v ^ v 1.2293 +// | | exitB: | | 1.2294 +// | | stmt4 | | 1.2295 +// | +------------>-----------------+ | 1.2296 +// | | 1.2297 +// +-----------------<---------------------+ 1.2298 +// 1.2299 +// 1.2300 +// final graph 1.2301 +// 1.2302 +// stmt1 1.2303 +// | 1.2304 +// v 1.2305 +// loop predicate 1.2306 +// | 1.2307 +// v 1.2308 +// stmt2 clone 1.2309 +// | 1.2310 +// v 1.2311 +// ........> ifA clone 1.2312 +// : / | 1.2313 +// dom / | 1.2314 +// : v v 1.2315 +// : false true 1.2316 +// : | | 1.2317 +// : | v 1.2318 +// : | newloop<-----+ 1.2319 +// : | | | 1.2320 +// : | stmt3 clone | 1.2321 +// : | | | 1.2322 +// : | v | 1.2323 +// : | ifB | 1.2324 +// : | / \ | 1.2325 +// : | v v | 1.2326 +// : | false true | 1.2327 +// : | | | | 1.2328 +// : | v stmt2 | 1.2329 +// : | exitB: | | 1.2330 +// : | stmt4 v | 1.2331 +// : | ifA orig | 1.2332 +// : | / \ | 1.2333 +// : | / \ | 1.2334 +// : | v v | 1.2335 +// : | false true | 1.2336 +// : | / \ | 1.2337 +// : v v -----+ 1.2338 +// RegionA 1.2339 +// | 1.2340 +// v 1.2341 +// exitA 1.2342 +// 1.2343 +bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) { 1.2344 + 1.2345 + assert(!loop->_head->is_CountedLoop(), "Non-counted loop only"); 1.2346 + if (!loop->_head->is_Loop()) { 1.2347 + return false; } 1.2348 + 1.2349 + LoopNode *head = loop->_head->as_Loop(); 1.2350 + 1.2351 + if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) { 1.2352 + return false; 1.2353 + } 1.2354 + 1.2355 + // Check for complex exit control 1.2356 + for(uint ii = 0; ii < loop->_body.size(); ii++ ) { 1.2357 + Node *n = loop->_body.at(ii); 1.2358 + int opc = n->Opcode(); 1.2359 + if (n->is_Call() || 1.2360 + opc == Op_Catch || 1.2361 + opc == Op_CatchProj || 1.2362 + opc == Op_Jump || 1.2363 + opc == Op_JumpProj) { 1.2364 +#if !defined(PRODUCT) 1.2365 + if (TracePartialPeeling) { 1.2366 + tty->print_cr("\nExit control too complex: lp: %d", head->_idx); 1.2367 + } 1.2368 +#endif 1.2369 + return false; 1.2370 + } 1.2371 + } 1.2372 + 1.2373 + int dd = dom_depth(head); 1.2374 + 1.2375 + // Step 1: find cut point 1.2376 + 1.2377 + // Walk up dominators to loop head looking for first loop exit 1.2378 + // which is executed on every path thru loop. 1.2379 + IfNode *peel_if = NULL; 1.2380 + IfNode *peel_if_cmpu = NULL; 1.2381 + 1.2382 + Node *iff = loop->tail(); 1.2383 + while( iff != head ) { 1.2384 + if( iff->is_If() ) { 1.2385 + Node *ctrl = get_ctrl(iff->in(1)); 1.2386 + if (ctrl->is_top()) return false; // Dead test on live IF. 1.2387 + // If loop-varying exit-test, check for induction variable 1.2388 + if( loop->is_member(get_loop(ctrl)) && 1.2389 + loop->is_loop_exit(iff) && 1.2390 + is_possible_iv_test(iff)) { 1.2391 + Node* cmp = iff->in(1)->in(1); 1.2392 + if (cmp->Opcode() == Op_CmpI) { 1.2393 + peel_if = iff->as_If(); 1.2394 + } else { 1.2395 + assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU"); 1.2396 + peel_if_cmpu = iff->as_If(); 1.2397 + } 1.2398 + } 1.2399 + } 1.2400 + iff = idom(iff); 1.2401 + } 1.2402 + // Prefer signed compare over unsigned compare. 1.2403 + IfNode* new_peel_if = NULL; 1.2404 + if (peel_if == NULL) { 1.2405 + if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) { 1.2406 + return false; // No peel point found 1.2407 + } 1.2408 + new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop); 1.2409 + if (new_peel_if == NULL) { 1.2410 + return false; // No peel point found 1.2411 + } 1.2412 + peel_if = new_peel_if; 1.2413 + } 1.2414 + Node* last_peel = stay_in_loop(peel_if, loop); 1.2415 + Node* first_not_peeled = stay_in_loop(last_peel, loop); 1.2416 + if (first_not_peeled == NULL || first_not_peeled == head) { 1.2417 + return false; 1.2418 + } 1.2419 + 1.2420 +#if !defined(PRODUCT) 1.2421 + if (TraceLoopOpts) { 1.2422 + tty->print("PartialPeel "); 1.2423 + loop->dump_head(); 1.2424 + } 1.2425 + 1.2426 + if (TracePartialPeeling) { 1.2427 + tty->print_cr("before partial peel one iteration"); 1.2428 + Node_List wl; 1.2429 + Node* t = head->in(2); 1.2430 + while (true) { 1.2431 + wl.push(t); 1.2432 + if (t == head) break; 1.2433 + t = idom(t); 1.2434 + } 1.2435 + while (wl.size() > 0) { 1.2436 + Node* tt = wl.pop(); 1.2437 + tt->dump(); 1.2438 + if (tt == last_peel) tty->print_cr("-- cut --"); 1.2439 + } 1.2440 + } 1.2441 +#endif 1.2442 + ResourceArea *area = Thread::current()->resource_area(); 1.2443 + VectorSet peel(area); 1.2444 + VectorSet not_peel(area); 1.2445 + Node_List peel_list(area); 1.2446 + Node_List worklist(area); 1.2447 + Node_List sink_list(area); 1.2448 + 1.2449 + // Set of cfg nodes to peel are those that are executable from 1.2450 + // the head through last_peel. 1.2451 + assert(worklist.size() == 0, "should be empty"); 1.2452 + worklist.push(head); 1.2453 + peel.set(head->_idx); 1.2454 + while (worklist.size() > 0) { 1.2455 + Node *n = worklist.pop(); 1.2456 + if (n != last_peel) { 1.2457 + for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1.2458 + Node* use = n->fast_out(j); 1.2459 + if (use->is_CFG() && 1.2460 + loop->is_member(get_loop(use)) && 1.2461 + !peel.test_set(use->_idx)) { 1.2462 + worklist.push(use); 1.2463 + } 1.2464 + } 1.2465 + } 1.2466 + } 1.2467 + 1.2468 + // Set of non-cfg nodes to peel are those that are control 1.2469 + // dependent on the cfg nodes. 1.2470 + uint i; 1.2471 + for(i = 0; i < loop->_body.size(); i++ ) { 1.2472 + Node *n = loop->_body.at(i); 1.2473 + Node *n_c = has_ctrl(n) ? get_ctrl(n) : n; 1.2474 + if (peel.test(n_c->_idx)) { 1.2475 + peel.set(n->_idx); 1.2476 + } else { 1.2477 + not_peel.set(n->_idx); 1.2478 + } 1.2479 + } 1.2480 + 1.2481 + // Step 2: move operations from the peeled section down into the 1.2482 + // not-peeled section 1.2483 + 1.2484 + // Get a post order schedule of nodes in the peel region 1.2485 + // Result in right-most operand. 1.2486 + scheduled_nodelist(loop, peel, peel_list ); 1.2487 + 1.2488 + assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); 1.2489 + 1.2490 + // For future check for too many new phis 1.2491 + uint old_phi_cnt = 0; 1.2492 + for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { 1.2493 + Node* use = head->fast_out(j); 1.2494 + if (use->is_Phi()) old_phi_cnt++; 1.2495 + } 1.2496 + 1.2497 +#if !defined(PRODUCT) 1.2498 + if (TracePartialPeeling) { 1.2499 + tty->print_cr("\npeeled list"); 1.2500 + } 1.2501 +#endif 1.2502 + 1.2503 + // Evacuate nodes in peel region into the not_peeled region if possible 1.2504 + uint new_phi_cnt = 0; 1.2505 + uint cloned_for_outside_use = 0; 1.2506 + for (i = 0; i < peel_list.size();) { 1.2507 + Node* n = peel_list.at(i); 1.2508 +#if !defined(PRODUCT) 1.2509 + if (TracePartialPeeling) n->dump(); 1.2510 +#endif 1.2511 + bool incr = true; 1.2512 + if ( !n->is_CFG() ) { 1.2513 + 1.2514 + if ( has_use_in_set(n, not_peel) ) { 1.2515 + 1.2516 + // If not used internal to the peeled region, 1.2517 + // move "n" from peeled to not_peeled region. 1.2518 + 1.2519 + if ( !has_use_internal_to_set(n, peel, loop) ) { 1.2520 + 1.2521 + // if not pinned and not a load (which maybe anti-dependent on a store) 1.2522 + // and not a CMove (Matcher expects only bool->cmove). 1.2523 + if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) { 1.2524 + cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist ); 1.2525 + sink_list.push(n); 1.2526 + peel >>= n->_idx; // delete n from peel set. 1.2527 + not_peel <<= n->_idx; // add n to not_peel set. 1.2528 + peel_list.remove(i); 1.2529 + incr = false; 1.2530 +#if !defined(PRODUCT) 1.2531 + if (TracePartialPeeling) { 1.2532 + tty->print_cr("sink to not_peeled region: %d newbb: %d", 1.2533 + n->_idx, get_ctrl(n)->_idx); 1.2534 + } 1.2535 +#endif 1.2536 + } 1.2537 + } else { 1.2538 + // Otherwise check for special def-use cases that span 1.2539 + // the peel/not_peel boundary such as bool->if 1.2540 + clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist ); 1.2541 + new_phi_cnt++; 1.2542 + } 1.2543 + } 1.2544 + } 1.2545 + if (incr) i++; 1.2546 + } 1.2547 + 1.2548 + if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) { 1.2549 +#if !defined(PRODUCT) 1.2550 + if (TracePartialPeeling) { 1.2551 + tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c", 1.2552 + new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F'); 1.2553 + } 1.2554 +#endif 1.2555 + if (new_peel_if != NULL) { 1.2556 + remove_cmpi_loop_exit(new_peel_if, loop); 1.2557 + } 1.2558 + // Inhibit more partial peeling on this loop 1.2559 + assert(!head->is_partial_peel_loop(), "not partial peeled"); 1.2560 + head->mark_partial_peel_failed(); 1.2561 + if (cloned_for_outside_use > 0) { 1.2562 + // Terminate this round of loop opts because 1.2563 + // the graph outside this loop was changed. 1.2564 + C->set_major_progress(); 1.2565 + return true; 1.2566 + } 1.2567 + return false; 1.2568 + } 1.2569 + 1.2570 + // Step 3: clone loop, retarget control, and insert new phis 1.2571 + 1.2572 + // Create new loop head for new phis and to hang 1.2573 + // the nodes being moved (sinked) from the peel region. 1.2574 + LoopNode* new_head = new (C) LoopNode(last_peel, last_peel); 1.2575 + new_head->set_unswitch_count(head->unswitch_count()); // Preserve 1.2576 + _igvn.register_new_node_with_optimizer(new_head); 1.2577 + assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled"); 1.2578 + first_not_peeled->set_req(0, new_head); 1.2579 + set_loop(new_head, loop); 1.2580 + loop->_body.push(new_head); 1.2581 + not_peel.set(new_head->_idx); 1.2582 + set_idom(new_head, last_peel, dom_depth(first_not_peeled)); 1.2583 + set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled)); 1.2584 + 1.2585 + while (sink_list.size() > 0) { 1.2586 + Node* n = sink_list.pop(); 1.2587 + set_ctrl(n, new_head); 1.2588 + } 1.2589 + 1.2590 + assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); 1.2591 + 1.2592 + clone_loop( loop, old_new, dd ); 1.2593 + 1.2594 + const uint clone_exit_idx = 1; 1.2595 + const uint orig_exit_idx = 2; 1.2596 + assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop"); 1.2597 + 1.2598 + Node* head_clone = old_new[head->_idx]; 1.2599 + LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop(); 1.2600 + Node* orig_tail_clone = head_clone->in(2); 1.2601 + 1.2602 + // Add phi if "def" node is in peel set and "use" is not 1.2603 + 1.2604 + for(i = 0; i < peel_list.size(); i++ ) { 1.2605 + Node *def = peel_list.at(i); 1.2606 + if (!def->is_CFG()) { 1.2607 + for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 1.2608 + Node *use = def->fast_out(j); 1.2609 + if (has_node(use) && use->in(0) != C->top() && 1.2610 + (!peel.test(use->_idx) || 1.2611 + (use->is_Phi() && use->in(0) == head)) ) { 1.2612 + worklist.push(use); 1.2613 + } 1.2614 + } 1.2615 + while( worklist.size() ) { 1.2616 + Node *use = worklist.pop(); 1.2617 + for (uint j = 1; j < use->req(); j++) { 1.2618 + Node* n = use->in(j); 1.2619 + if (n == def) { 1.2620 + 1.2621 + // "def" is in peel set, "use" is not in peel set 1.2622 + // or "use" is in the entry boundary (a phi) of the peel set 1.2623 + 1.2624 + Node* use_c = has_ctrl(use) ? get_ctrl(use) : use; 1.2625 + 1.2626 + if ( loop->is_member(get_loop( use_c )) ) { 1.2627 + // use is in loop 1.2628 + if (old_new[use->_idx] != NULL) { // null for dead code 1.2629 + Node* use_clone = old_new[use->_idx]; 1.2630 + _igvn.replace_input_of(use, j, C->top()); 1.2631 + insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone ); 1.2632 + } 1.2633 + } else { 1.2634 + assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format"); 1.2635 + // use is not in the loop, check if the live range includes the cut 1.2636 + Node* lp_if = use_c->in(orig_exit_idx)->in(0); 1.2637 + if (not_peel.test(lp_if->_idx)) { 1.2638 + assert(j == orig_exit_idx, "use from original loop"); 1.2639 + insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone ); 1.2640 + } 1.2641 + } 1.2642 + } 1.2643 + } 1.2644 + } 1.2645 + } 1.2646 + } 1.2647 + 1.2648 + // Step 3b: retarget control 1.2649 + 1.2650 + // Redirect control to the new loop head if a cloned node in 1.2651 + // the not_peeled region has control that points into the peeled region. 1.2652 + // This necessary because the cloned peeled region will be outside 1.2653 + // the loop. 1.2654 + // from to 1.2655 + // cloned-peeled <---+ 1.2656 + // new_head_clone: | <--+ 1.2657 + // cloned-not_peeled in(0) in(0) 1.2658 + // orig-peeled 1.2659 + 1.2660 + for(i = 0; i < loop->_body.size(); i++ ) { 1.2661 + Node *n = loop->_body.at(i); 1.2662 + if (!n->is_CFG() && n->in(0) != NULL && 1.2663 + not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) { 1.2664 + Node* n_clone = old_new[n->_idx]; 1.2665 + _igvn.replace_input_of(n_clone, 0, new_head_clone); 1.2666 + } 1.2667 + } 1.2668 + 1.2669 + // Backedge of the surviving new_head (the clone) is original last_peel 1.2670 + _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel); 1.2671 + 1.2672 + // Cut first node in original not_peel set 1.2673 + _igvn.rehash_node_delayed(new_head); // Multiple edge updates: 1.2674 + new_head->set_req(LoopNode::EntryControl, C->top()); // use rehash_node_delayed / set_req instead of 1.2675 + new_head->set_req(LoopNode::LoopBackControl, C->top()); // multiple replace_input_of calls 1.2676 + 1.2677 + // Copy head_clone back-branch info to original head 1.2678 + // and remove original head's loop entry and 1.2679 + // clone head's back-branch 1.2680 + _igvn.rehash_node_delayed(head); // Multiple edge updates 1.2681 + head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl)); 1.2682 + head->set_req(LoopNode::LoopBackControl, C->top()); 1.2683 + _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top()); 1.2684 + 1.2685 + // Similarly modify the phis 1.2686 + for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) { 1.2687 + Node* use = head->fast_out(k); 1.2688 + if (use->is_Phi() && use->outcnt() > 0) { 1.2689 + Node* use_clone = old_new[use->_idx]; 1.2690 + _igvn.rehash_node_delayed(use); // Multiple edge updates 1.2691 + use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl)); 1.2692 + use->set_req(LoopNode::LoopBackControl, C->top()); 1.2693 + _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top()); 1.2694 + } 1.2695 + } 1.2696 + 1.2697 + // Step 4: update dominator tree and dominator depth 1.2698 + 1.2699 + set_idom(head, orig_tail_clone, dd); 1.2700 + recompute_dom_depth(); 1.2701 + 1.2702 + // Inhibit more partial peeling on this loop 1.2703 + new_head_clone->set_partial_peel_loop(); 1.2704 + C->set_major_progress(); 1.2705 + loop->record_for_igvn(); 1.2706 + 1.2707 +#if !defined(PRODUCT) 1.2708 + if (TracePartialPeeling) { 1.2709 + tty->print_cr("\nafter partial peel one iteration"); 1.2710 + Node_List wl(area); 1.2711 + Node* t = last_peel; 1.2712 + while (true) { 1.2713 + wl.push(t); 1.2714 + if (t == head_clone) break; 1.2715 + t = idom(t); 1.2716 + } 1.2717 + while (wl.size() > 0) { 1.2718 + Node* tt = wl.pop(); 1.2719 + if (tt == head) tty->print_cr("orig head"); 1.2720 + else if (tt == new_head_clone) tty->print_cr("new head"); 1.2721 + else if (tt == head_clone) tty->print_cr("clone head"); 1.2722 + tt->dump(); 1.2723 + } 1.2724 + } 1.2725 +#endif 1.2726 + return true; 1.2727 +} 1.2728 + 1.2729 +//------------------------------reorg_offsets---------------------------------- 1.2730 +// Reorganize offset computations to lower register pressure. Mostly 1.2731 +// prevent loop-fallout uses of the pre-incremented trip counter (which are 1.2732 +// then alive with the post-incremented trip counter forcing an extra 1.2733 +// register move) 1.2734 +void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) { 1.2735 + // Perform it only for canonical counted loops. 1.2736 + // Loop's shape could be messed up by iteration_split_impl. 1.2737 + if (!loop->_head->is_CountedLoop()) 1.2738 + return; 1.2739 + if (!loop->_head->as_Loop()->is_valid_counted_loop()) 1.2740 + return; 1.2741 + 1.2742 + CountedLoopNode *cl = loop->_head->as_CountedLoop(); 1.2743 + CountedLoopEndNode *cle = cl->loopexit(); 1.2744 + Node *exit = cle->proj_out(false); 1.2745 + Node *phi = cl->phi(); 1.2746 + 1.2747 + // Check for the special case of folks using the pre-incremented 1.2748 + // trip-counter on the fall-out path (forces the pre-incremented 1.2749 + // and post-incremented trip counter to be live at the same time). 1.2750 + // Fix this by adjusting to use the post-increment trip counter. 1.2751 + 1.2752 + bool progress = true; 1.2753 + while (progress) { 1.2754 + progress = false; 1.2755 + for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) { 1.2756 + Node* use = phi->fast_out(i); // User of trip-counter 1.2757 + if (!has_ctrl(use)) continue; 1.2758 + Node *u_ctrl = get_ctrl(use); 1.2759 + if (use->is_Phi()) { 1.2760 + u_ctrl = NULL; 1.2761 + for (uint j = 1; j < use->req(); j++) 1.2762 + if (use->in(j) == phi) 1.2763 + u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j)); 1.2764 + } 1.2765 + IdealLoopTree *u_loop = get_loop(u_ctrl); 1.2766 + // Look for loop-invariant use 1.2767 + if (u_loop == loop) continue; 1.2768 + if (loop->is_member(u_loop)) continue; 1.2769 + // Check that use is live out the bottom. Assuming the trip-counter 1.2770 + // update is right at the bottom, uses of of the loop middle are ok. 1.2771 + if (dom_lca(exit, u_ctrl) != exit) continue; 1.2772 + // Hit! Refactor use to use the post-incremented tripcounter. 1.2773 + // Compute a post-increment tripcounter. 1.2774 + Node *opaq = new (C) Opaque2Node( C, cle->incr() ); 1.2775 + register_new_node(opaq, exit); 1.2776 + Node *neg_stride = _igvn.intcon(-cle->stride_con()); 1.2777 + set_ctrl(neg_stride, C->root()); 1.2778 + Node *post = new (C) AddINode( opaq, neg_stride); 1.2779 + register_new_node(post, exit); 1.2780 + _igvn.rehash_node_delayed(use); 1.2781 + for (uint j = 1; j < use->req(); j++) { 1.2782 + if (use->in(j) == phi) 1.2783 + use->set_req(j, post); 1.2784 + } 1.2785 + // Since DU info changed, rerun loop 1.2786 + progress = true; 1.2787 + break; 1.2788 + } 1.2789 + } 1.2790 + 1.2791 +}