src/share/vm/opto/loopopts.cpp

changeset 435
a61af66fc99e
child 470
e2ae28d2ce91
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/opto/loopopts.cpp	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,2677 @@
     1.4 +/*
     1.5 + * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_loopopts.cpp.incl"
    1.30 +
    1.31 +//=============================================================================
    1.32 +//------------------------------split_thru_phi---------------------------------
    1.33 +// Split Node 'n' through merge point if there is enough win.
    1.34 +Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
    1.35 +  int wins = 0;
    1.36 +  assert( !n->is_CFG(), "" );
    1.37 +  assert( region->is_Region(), "" );
    1.38 +  Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() );
    1.39 +  uint old_unique = C->unique();
    1.40 +  for( uint i = 1; i < region->req(); i++ ) {
    1.41 +    Node *x;
    1.42 +    Node* the_clone = NULL;
    1.43 +    if( region->in(i) == C->top() ) {
    1.44 +      x = C->top();             // Dead path?  Use a dead data op
    1.45 +    } else {
    1.46 +      x = n->clone();           // Else clone up the data op
    1.47 +      the_clone = x;            // Remember for possible deletion.
    1.48 +      // Alter data node to use pre-phi inputs
    1.49 +      if( n->in(0) == region )
    1.50 +        x->set_req( 0, region->in(i) );
    1.51 +      for( uint j = 1; j < n->req(); j++ ) {
    1.52 +        Node *in = n->in(j);
    1.53 +        if( in->is_Phi() && in->in(0) == region )
    1.54 +          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
    1.55 +      }
    1.56 +    }
    1.57 +    // Check for a 'win' on some paths
    1.58 +    const Type *t = x->Value(&_igvn);
    1.59 +
    1.60 +    bool singleton = t->singleton();
    1.61 +
    1.62 +    // A TOP singleton indicates that there are no possible values incoming
    1.63 +    // along a particular edge. In most cases, this is OK, and the Phi will
    1.64 +    // be eliminated later in an Ideal call. However, we can't allow this to
    1.65 +    // happen if the singleton occurs on loop entry, as the elimination of
    1.66 +    // the PhiNode may cause the resulting node to migrate back to a previous
    1.67 +    // loop iteration.
    1.68 +    if( singleton && t == Type::TOP ) {
    1.69 +      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
    1.70 +      // irreducible loop may not be indicated by an affirmative is_Loop());
    1.71 +      // therefore, the only top we can split thru a phi is on a backedge of
    1.72 +      // a loop.
    1.73 +      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
    1.74 +    }
    1.75 +
    1.76 +    if( singleton ) {
    1.77 +      wins++;
    1.78 +      x = ((PhaseGVN&)_igvn).makecon(t);
    1.79 +    } else {
    1.80 +      // We now call Identity to try to simplify the cloned node.
    1.81 +      // Note that some Identity methods call phase->type(this).
    1.82 +      // Make sure that the type array is big enough for
    1.83 +      // our new node, even though we may throw the node away.
    1.84 +      // (Note: This tweaking with igvn only works because x is a new node.)
    1.85 +      _igvn.set_type(x, t);
    1.86 +      Node *y = x->Identity(&_igvn);
    1.87 +      if( y != x ) {
    1.88 +        wins++;
    1.89 +        x = y;
    1.90 +      } else {
    1.91 +        y = _igvn.hash_find(x);
    1.92 +        if( y ) {
    1.93 +          wins++;
    1.94 +          x = y;
    1.95 +        } else {
    1.96 +          // Else x is a new node we are keeping
    1.97 +          // We do not need register_new_node_with_optimizer
    1.98 +          // because set_type has already been called.
    1.99 +          _igvn._worklist.push(x);
   1.100 +        }
   1.101 +      }
   1.102 +    }
   1.103 +    if (x != the_clone && the_clone != NULL)
   1.104 +      _igvn.remove_dead_node(the_clone);
   1.105 +    phi->set_req( i, x );
   1.106 +  }
   1.107 +  // Too few wins?
   1.108 +  if( wins <= policy ) {
   1.109 +    _igvn.remove_dead_node(phi);
   1.110 +    return NULL;
   1.111 +  }
   1.112 +
   1.113 +  // Record Phi
   1.114 +  register_new_node( phi, region );
   1.115 +
   1.116 +  for( uint i2 = 1; i2 < phi->req(); i2++ ) {
   1.117 +    Node *x = phi->in(i2);
   1.118 +    // If we commoned up the cloned 'x' with another existing Node,
   1.119 +    // the existing Node picks up a new use.  We need to make the
   1.120 +    // existing Node occur higher up so it dominates its uses.
   1.121 +    Node *old_ctrl;
   1.122 +    IdealLoopTree *old_loop;
   1.123 +
   1.124 +    // The occasional new node
   1.125 +    if( x->_idx >= old_unique ) {   // Found a new, unplaced node?
   1.126 +      old_ctrl = x->is_Con() ? C->root() : NULL;
   1.127 +      old_loop = NULL;              // Not in any prior loop
   1.128 +    } else {
   1.129 +      old_ctrl = x->is_Con() ? C->root() : get_ctrl(x);
   1.130 +      old_loop = get_loop(old_ctrl); // Get prior loop
   1.131 +    }
   1.132 +    // New late point must dominate new use
   1.133 +    Node *new_ctrl = dom_lca( old_ctrl, region->in(i2) );
   1.134 +    // Set new location
   1.135 +    set_ctrl(x, new_ctrl);
   1.136 +    IdealLoopTree *new_loop = get_loop( new_ctrl );
   1.137 +    // If changing loop bodies, see if we need to collect into new body
   1.138 +    if( old_loop != new_loop ) {
   1.139 +      if( old_loop && !old_loop->_child )
   1.140 +        old_loop->_body.yank(x);
   1.141 +      if( !new_loop->_child )
   1.142 +        new_loop->_body.push(x);  // Collect body info
   1.143 +    }
   1.144 +  }
   1.145 +
   1.146 +  return phi;
   1.147 +}
   1.148 +
   1.149 +//------------------------------dominated_by------------------------------------
   1.150 +// Replace the dominated test with an obvious true or false.  Place it on the
   1.151 +// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
   1.152 +// live path up to the dominating control.
   1.153 +void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff ) {
   1.154 +#ifndef PRODUCT
   1.155 +  if( VerifyLoopOptimizations && PrintOpto ) tty->print_cr("dominating test");
   1.156 +#endif
   1.157 +
   1.158 +
   1.159 +  // prevdom is the dominating projection of the dominating test.
   1.160 +  assert( iff->is_If(), "" );
   1.161 +  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
   1.162 +  int pop = prevdom->Opcode();
   1.163 +  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
   1.164 +  // 'con' is set to true or false to kill the dominated test.
   1.165 +  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
   1.166 +  set_ctrl(con, C->root()); // Constant gets a new use
   1.167 +  // Hack the dominated test
   1.168 +  _igvn.hash_delete(iff);
   1.169 +  iff->set_req(1, con);
   1.170 +  _igvn._worklist.push(iff);
   1.171 +
   1.172 +  // If I dont have a reachable TRUE and FALSE path following the IfNode then
   1.173 +  // I can assume this path reaches an infinite loop.  In this case it's not
   1.174 +  // important to optimize the data Nodes - either the whole compilation will
   1.175 +  // be tossed or this path (and all data Nodes) will go dead.
   1.176 +  if( iff->outcnt() != 2 ) return;
   1.177 +
   1.178 +  // Make control-dependent data Nodes on the live path (path that will remain
   1.179 +  // once the dominated IF is removed) become control-dependent on the
   1.180 +  // dominating projection.
   1.181 +  Node* dp = ((IfNode*)iff)->proj_out(pop == Op_IfTrue);
   1.182 +  IdealLoopTree *old_loop = get_loop(dp);
   1.183 +
   1.184 +  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
   1.185 +    Node* cd = dp->fast_out(i); // Control-dependent node
   1.186 +    if( cd->depends_only_on_test() ) {
   1.187 +      assert( cd->in(0) == dp, "" );
   1.188 +      _igvn.hash_delete( cd );
   1.189 +      cd->set_req(0, prevdom);
   1.190 +      set_early_ctrl( cd );
   1.191 +      _igvn._worklist.push(cd);
   1.192 +      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
   1.193 +      if( old_loop != new_loop ) {
   1.194 +        if( !old_loop->_child ) old_loop->_body.yank(cd);
   1.195 +        if( !new_loop->_child ) new_loop->_body.push(cd);
   1.196 +      }
   1.197 +      --i;
   1.198 +      --imax;
   1.199 +    }
   1.200 +  }
   1.201 +}
   1.202 +
   1.203 +//------------------------------has_local_phi_input----------------------------
   1.204 +// Return TRUE if 'n' has Phi inputs from its local block and no other
   1.205 +// block-local inputs (all non-local-phi inputs come from earlier blocks)
   1.206 +Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
   1.207 +  Node *n_ctrl = get_ctrl(n);
   1.208 +  // See if some inputs come from a Phi in this block, or from before
   1.209 +  // this block.
   1.210 +  uint i;
   1.211 +  for( i = 1; i < n->req(); i++ ) {
   1.212 +    Node *phi = n->in(i);
   1.213 +    if( phi->is_Phi() && phi->in(0) == n_ctrl )
   1.214 +      break;
   1.215 +  }
   1.216 +  if( i >= n->req() )
   1.217 +    return NULL;                // No Phi inputs; nowhere to clone thru
   1.218 +
   1.219 +  // Check for inputs created between 'n' and the Phi input.  These
   1.220 +  // must split as well; they have already been given the chance
   1.221 +  // (courtesy of a post-order visit) and since they did not we must
   1.222 +  // recover the 'cost' of splitting them by being very profitable
   1.223 +  // when splitting 'n'.  Since this is unlikely we simply give up.
   1.224 +  for( i = 1; i < n->req(); i++ ) {
   1.225 +    Node *m = n->in(i);
   1.226 +    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
   1.227 +      // We allow the special case of AddP's with no local inputs.
   1.228 +      // This allows us to split-up address expressions.
   1.229 +      if (m->is_AddP() &&
   1.230 +          get_ctrl(m->in(2)) != n_ctrl &&
   1.231 +          get_ctrl(m->in(3)) != n_ctrl) {
   1.232 +        // Move the AddP up to dominating point
   1.233 +        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
   1.234 +        continue;
   1.235 +      }
   1.236 +      return NULL;
   1.237 +    }
   1.238 +  }
   1.239 +
   1.240 +  return n_ctrl;
   1.241 +}
   1.242 +
   1.243 +//------------------------------remix_address_expressions----------------------
   1.244 +// Rework addressing expressions to get the most loop-invariant stuff
   1.245 +// moved out.  We'd like to do all associative operators, but it's especially
   1.246 +// important (common) to do address expressions.
   1.247 +Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
   1.248 +  if (!has_ctrl(n))  return NULL;
   1.249 +  Node *n_ctrl = get_ctrl(n);
   1.250 +  IdealLoopTree *n_loop = get_loop(n_ctrl);
   1.251 +
   1.252 +  // See if 'n' mixes loop-varying and loop-invariant inputs and
   1.253 +  // itself is loop-varying.
   1.254 +
   1.255 +  // Only interested in binary ops (and AddP)
   1.256 +  if( n->req() < 3 || n->req() > 4 ) return NULL;
   1.257 +
   1.258 +  Node *n1_ctrl = get_ctrl(n->in(                    1));
   1.259 +  Node *n2_ctrl = get_ctrl(n->in(                    2));
   1.260 +  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
   1.261 +  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
   1.262 +  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
   1.263 +  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
   1.264 +
   1.265 +  // Does one of my inputs spin in a tighter loop than self?
   1.266 +  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
   1.267 +      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
   1.268 +      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
   1.269 +    return NULL;                // Leave well enough alone
   1.270 +
   1.271 +  // Is at least one of my inputs loop-invariant?
   1.272 +  if( n1_loop == n_loop &&
   1.273 +      n2_loop == n_loop &&
   1.274 +      n3_loop == n_loop )
   1.275 +    return NULL;                // No loop-invariant inputs
   1.276 +
   1.277 +
   1.278 +  int n_op = n->Opcode();
   1.279 +
   1.280 +  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
   1.281 +  if( n_op == Op_LShiftI ) {
   1.282 +    // Scale is loop invariant
   1.283 +    Node *scale = n->in(2);
   1.284 +    Node *scale_ctrl = get_ctrl(scale);
   1.285 +    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
   1.286 +    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
   1.287 +      return NULL;
   1.288 +    const TypeInt *scale_t = scale->bottom_type()->isa_int();
   1.289 +    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
   1.290 +      return NULL;              // Dont bother with byte/short masking
   1.291 +    // Add must vary with loop (else shift would be loop-invariant)
   1.292 +    Node *add = n->in(1);
   1.293 +    Node *add_ctrl = get_ctrl(add);
   1.294 +    IdealLoopTree *add_loop = get_loop(add_ctrl);
   1.295 +    //assert( n_loop == add_loop, "" );
   1.296 +    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
   1.297 +
   1.298 +    // Convert I-V into I+ (0-V); same for V-I
   1.299 +    if( add->Opcode() == Op_SubI &&
   1.300 +        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
   1.301 +      Node *zero = _igvn.intcon(0);
   1.302 +      set_ctrl(zero, C->root());
   1.303 +      Node *neg = new (C, 3) SubINode( _igvn.intcon(0), add->in(2) );
   1.304 +      register_new_node( neg, get_ctrl(add->in(2) ) );
   1.305 +      add = new (C, 3) AddINode( add->in(1), neg );
   1.306 +      register_new_node( add, add_ctrl );
   1.307 +    }
   1.308 +    if( add->Opcode() != Op_AddI ) return NULL;
   1.309 +    // See if one add input is loop invariant
   1.310 +    Node *add_var = add->in(1);
   1.311 +    Node *add_var_ctrl = get_ctrl(add_var);
   1.312 +    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
   1.313 +    Node *add_invar = add->in(2);
   1.314 +    Node *add_invar_ctrl = get_ctrl(add_invar);
   1.315 +    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
   1.316 +    if( add_var_loop == n_loop ) {
   1.317 +    } else if( add_invar_loop == n_loop ) {
   1.318 +      // Swap to find the invariant part
   1.319 +      add_invar = add_var;
   1.320 +      add_invar_ctrl = add_var_ctrl;
   1.321 +      add_invar_loop = add_var_loop;
   1.322 +      add_var = add->in(2);
   1.323 +      Node *add_var_ctrl = get_ctrl(add_var);
   1.324 +      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
   1.325 +    } else                      // Else neither input is loop invariant
   1.326 +      return NULL;
   1.327 +    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
   1.328 +      return NULL;              // No invariant part of the add?
   1.329 +
   1.330 +    // Yes!  Reshape address expression!
   1.331 +    Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale );
   1.332 +    register_new_node( inv_scale, add_invar_ctrl );
   1.333 +    Node *var_scale = new (C, 3) LShiftINode( add_var, scale );
   1.334 +    register_new_node( var_scale, n_ctrl );
   1.335 +    Node *var_add = new (C, 3) AddINode( var_scale, inv_scale );
   1.336 +    register_new_node( var_add, n_ctrl );
   1.337 +    _igvn.hash_delete( n );
   1.338 +    _igvn.subsume_node( n, var_add );
   1.339 +    return var_add;
   1.340 +  }
   1.341 +
   1.342 +  // Replace (I+V) with (V+I)
   1.343 +  if( n_op == Op_AddI ||
   1.344 +      n_op == Op_AddL ||
   1.345 +      n_op == Op_AddF ||
   1.346 +      n_op == Op_AddD ||
   1.347 +      n_op == Op_MulI ||
   1.348 +      n_op == Op_MulL ||
   1.349 +      n_op == Op_MulF ||
   1.350 +      n_op == Op_MulD ) {
   1.351 +    if( n2_loop == n_loop ) {
   1.352 +      assert( n1_loop != n_loop, "" );
   1.353 +      n->swap_edges(1, 2);
   1.354 +    }
   1.355 +  }
   1.356 +
   1.357 +  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
   1.358 +  // but not if I2 is a constant.
   1.359 +  if( n_op == Op_AddP ) {
   1.360 +    if( n2_loop == n_loop && n3_loop != n_loop ) {
   1.361 +      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
   1.362 +        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
   1.363 +        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
   1.364 +        IdealLoopTree *n22loop = get_loop( n22_ctrl );
   1.365 +        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
   1.366 +        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
   1.367 +            n23_loop == n_loop ) {
   1.368 +          Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
   1.369 +          // Stuff new AddP in the loop preheader
   1.370 +          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
   1.371 +          Node *add2 = new (C, 4) AddPNode( n->in(1), add1, n->in(2)->in(3) );
   1.372 +          register_new_node( add2, n_ctrl );
   1.373 +          _igvn.hash_delete( n );
   1.374 +          _igvn.subsume_node( n, add2 );
   1.375 +          return add2;
   1.376 +        }
   1.377 +      }
   1.378 +    }
   1.379 +
   1.380 +    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
   1.381 +    if( n2_loop != n_loop && n3_loop == n_loop ) {
   1.382 +      if( n->in(3)->Opcode() == Op_AddI ) {
   1.383 +        Node *V = n->in(3)->in(1);
   1.384 +        Node *I = n->in(3)->in(2);
   1.385 +        if( is_member(n_loop,get_ctrl(V)) ) {
   1.386 +        } else {
   1.387 +          Node *tmp = V; V = I; I = tmp;
   1.388 +        }
   1.389 +        if( !is_member(n_loop,get_ctrl(I)) ) {
   1.390 +          Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2), I );
   1.391 +          // Stuff new AddP in the loop preheader
   1.392 +          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
   1.393 +          Node *add2 = new (C, 4) AddPNode( n->in(1), add1, V );
   1.394 +          register_new_node( add2, n_ctrl );
   1.395 +          _igvn.hash_delete( n );
   1.396 +          _igvn.subsume_node( n, add2 );
   1.397 +          return add2;
   1.398 +        }
   1.399 +      }
   1.400 +    }
   1.401 +  }
   1.402 +
   1.403 +  return NULL;
   1.404 +}
   1.405 +
   1.406 +//------------------------------conditional_move-------------------------------
   1.407 +// Attempt to replace a Phi with a conditional move.  We have some pretty
   1.408 +// strict profitability requirements.  All Phis at the merge point must
   1.409 +// be converted, so we can remove the control flow.  We need to limit the
   1.410 +// number of c-moves to a small handful.  All code that was in the side-arms
   1.411 +// of the CFG diamond is now speculatively executed.  This code has to be
   1.412 +// "cheap enough".  We are pretty much limited to CFG diamonds that merge
   1.413 +// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
   1.414 +Node *PhaseIdealLoop::conditional_move( Node *region ) {
   1.415 +
   1.416 +  assert( region->is_Region(), "sanity check" );
   1.417 +  if( region->req() != 3 ) return NULL;
   1.418 +
   1.419 +  // Check for CFG diamond
   1.420 +  Node *lp = region->in(1);
   1.421 +  Node *rp = region->in(2);
   1.422 +  if( !lp || !rp ) return NULL;
   1.423 +  Node *lp_c = lp->in(0);
   1.424 +  if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
   1.425 +  IfNode *iff = lp_c->as_If();
   1.426 +
   1.427 +  // Check for highly predictable branch.  No point in CMOV'ing if
   1.428 +  // we are going to predict accurately all the time.
   1.429 +  // %%% This hides patterns produced by utility methods like Math.min.
   1.430 +  if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
   1.431 +      iff->_prob > PROB_LIKELY_MAG(3) )
   1.432 +    return NULL;
   1.433 +
   1.434 +  // Check for ops pinned in an arm of the diamond.
   1.435 +  // Can't remove the control flow in this case
   1.436 +  if( lp->outcnt() > 1 ) return NULL;
   1.437 +  if( rp->outcnt() > 1 ) return NULL;
   1.438 +
   1.439 +  // Check profitability
   1.440 +  int cost = 0;
   1.441 +  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.442 +    Node *out = region->fast_out(i);
   1.443 +    if( !out->is_Phi() ) continue; // Ignore other control edges, etc
   1.444 +    PhiNode* phi = out->as_Phi();
   1.445 +    switch (phi->type()->basic_type()) {
   1.446 +    case T_LONG:
   1.447 +      cost++;                   // Probably encodes as 2 CMOV's
   1.448 +    case T_INT:                 // These all CMOV fine
   1.449 +    case T_FLOAT:
   1.450 +    case T_DOUBLE:
   1.451 +    case T_ADDRESS:             // (RawPtr)
   1.452 +      cost++;
   1.453 +      break;
   1.454 +    case T_OBJECT: {            // Base oops are OK, but not derived oops
   1.455 +      const TypeOopPtr *tp = phi->type()->isa_oopptr();
   1.456 +      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
   1.457 +      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
   1.458 +      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
   1.459 +      // have a Phi for the base here that we convert to a CMOVE all is well
   1.460 +      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
   1.461 +      // the allocator will have to produce a base by creating a CMOVE of the
   1.462 +      // relevant bases.  This puts the allocator in the business of
   1.463 +      // manufacturing expensive instructions, generally a bad plan.
   1.464 +      // Just Say No to Conditionally-Moved Derived Pointers.
   1.465 +      if( tp && tp->offset() != 0 )
   1.466 +        return NULL;
   1.467 +      cost++;
   1.468 +      break;
   1.469 +    }
   1.470 +    default:
   1.471 +      return NULL;              // In particular, can't do memory or I/O
   1.472 +    }
   1.473 +    // Add in cost any speculative ops
   1.474 +    for( uint j = 1; j < region->req(); j++ ) {
   1.475 +      Node *proj = region->in(j);
   1.476 +      Node *inp = phi->in(j);
   1.477 +      if (get_ctrl(inp) == proj) { // Found local op
   1.478 +        cost++;
   1.479 +        // Check for a chain of dependent ops; these will all become
   1.480 +        // speculative in a CMOV.
   1.481 +        for( uint k = 1; k < inp->req(); k++ )
   1.482 +          if (get_ctrl(inp->in(k)) == proj)
   1.483 +            return NULL;        // Too much speculative goo
   1.484 +      }
   1.485 +    }
   1.486 +    // See if the Phi is used by a Cmp.  This will likely Split-If, a
   1.487 +    // higher-payoff operation.
   1.488 +    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
   1.489 +      Node* use = phi->fast_out(k);
   1.490 +      if( use->is_Cmp() )
   1.491 +        return NULL;
   1.492 +    }
   1.493 +  }
   1.494 +  if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
   1.495 +
   1.496 +  // --------------
   1.497 +  // Now replace all Phis with CMOV's
   1.498 +  Node *cmov_ctrl = iff->in(0);
   1.499 +  uint flip = (lp->Opcode() == Op_IfTrue);
   1.500 +  while( 1 ) {
   1.501 +    PhiNode* phi = NULL;
   1.502 +    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.503 +      Node *out = region->fast_out(i);
   1.504 +      if (out->is_Phi()) {
   1.505 +        phi = out->as_Phi();
   1.506 +        break;
   1.507 +      }
   1.508 +    }
   1.509 +    if (phi == NULL)  break;
   1.510 +#ifndef PRODUCT
   1.511 +    if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
   1.512 +#endif
   1.513 +    // Move speculative ops
   1.514 +    for( uint j = 1; j < region->req(); j++ ) {
   1.515 +      Node *proj = region->in(j);
   1.516 +      Node *inp = phi->in(j);
   1.517 +      if (get_ctrl(inp) == proj) { // Found local op
   1.518 +#ifndef PRODUCT
   1.519 +        if( PrintOpto && VerifyLoopOptimizations ) {
   1.520 +          tty->print("  speculate: ");
   1.521 +          inp->dump();
   1.522 +        }
   1.523 +#endif
   1.524 +        set_ctrl(inp, cmov_ctrl);
   1.525 +      }
   1.526 +    }
   1.527 +    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
   1.528 +    register_new_node( cmov, cmov_ctrl );
   1.529 +    _igvn.hash_delete(phi);
   1.530 +    _igvn.subsume_node( phi, cmov );
   1.531 +#ifndef PRODUCT
   1.532 +    if( VerifyLoopOptimizations ) verify();
   1.533 +#endif
   1.534 +  }
   1.535 +
   1.536 +  // The useless CFG diamond will fold up later; see the optimization in
   1.537 +  // RegionNode::Ideal.
   1.538 +  _igvn._worklist.push(region);
   1.539 +
   1.540 +  return iff->in(1);
   1.541 +}
   1.542 +
   1.543 +//------------------------------split_if_with_blocks_pre-----------------------
   1.544 +// Do the real work in a non-recursive function.  Data nodes want to be
   1.545 +// cloned in the pre-order so they can feed each other nicely.
   1.546 +Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
   1.547 +  // Cloning these guys is unlikely to win
   1.548 +  int n_op = n->Opcode();
   1.549 +  if( n_op == Op_MergeMem ) return n;
   1.550 +  if( n->is_Proj() ) return n;
   1.551 +  // Do not clone-up CmpFXXX variations, as these are always
   1.552 +  // followed by a CmpI
   1.553 +  if( n->is_Cmp() ) return n;
   1.554 +  // Attempt to use a conditional move instead of a phi/branch
   1.555 +  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
   1.556 +    Node *cmov = conditional_move( n );
   1.557 +    if( cmov ) return cmov;
   1.558 +  }
   1.559 +  if( n->is_CFG() || n_op == Op_StorePConditional || n_op == Op_StoreLConditional || n_op == Op_CompareAndSwapI || n_op == Op_CompareAndSwapL ||n_op == Op_CompareAndSwapP)  return n;
   1.560 +  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
   1.561 +      n_op == Op_Opaque2 ) {
   1.562 +    if( !C->major_progress() )   // If chance of no more loop opts...
   1.563 +      _igvn._worklist.push(n);  // maybe we'll remove them
   1.564 +    return n;
   1.565 +  }
   1.566 +
   1.567 +  if( n->is_Con() ) return n;   // No cloning for Con nodes
   1.568 +
   1.569 +  Node *n_ctrl = get_ctrl(n);
   1.570 +  if( !n_ctrl ) return n;       // Dead node
   1.571 +
   1.572 +  // Attempt to remix address expressions for loop invariants
   1.573 +  Node *m = remix_address_expressions( n );
   1.574 +  if( m ) return m;
   1.575 +
   1.576 +  // Determine if the Node has inputs from some local Phi.
   1.577 +  // Returns the block to clone thru.
   1.578 +  Node *n_blk = has_local_phi_input( n );
   1.579 +  if( !n_blk ) return n;
   1.580 +  // Do not clone the trip counter through on a CountedLoop
   1.581 +  // (messes up the canonical shape).
   1.582 +  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
   1.583 +
   1.584 +  // Check for having no control input; not pinned.  Allow
   1.585 +  // dominating control.
   1.586 +  if( n->in(0) ) {
   1.587 +    Node *dom = idom(n_blk);
   1.588 +    if( dom_lca( n->in(0), dom ) != n->in(0) )
   1.589 +      return n;
   1.590 +  }
   1.591 +  // Policy: when is it profitable.  You must get more wins than
   1.592 +  // policy before it is considered profitable.  Policy is usually 0,
   1.593 +  // so 1 win is considered profitable.  Big merges will require big
   1.594 +  // cloning, so get a larger policy.
   1.595 +  int policy = n_blk->req() >> 2;
   1.596 +
   1.597 +  // If the loop is a candidate for range check elimination,
   1.598 +  // delay splitting through it's phi until a later loop optimization
   1.599 +  if (n_blk->is_CountedLoop()) {
   1.600 +    IdealLoopTree *lp = get_loop(n_blk);
   1.601 +    if (lp && lp->_rce_candidate) {
   1.602 +      return n;
   1.603 +    }
   1.604 +  }
   1.605 +
   1.606 +  // Use same limit as split_if_with_blocks_post
   1.607 +  if( C->unique() > 35000 ) return n; // Method too big
   1.608 +
   1.609 +  // Split 'n' through the merge point if it is profitable
   1.610 +  Node *phi = split_thru_phi( n, n_blk, policy );
   1.611 +  if( !phi ) return n;
   1.612 +
   1.613 +  // Found a Phi to split thru!
   1.614 +  // Replace 'n' with the new phi
   1.615 +  _igvn.hash_delete(n);
   1.616 +  _igvn.subsume_node( n, phi );
   1.617 +  // Moved a load around the loop, 'en-registering' something.
   1.618 +  if( n_blk->Opcode() == Op_Loop && n->is_Load() &&
   1.619 +      !phi->in(LoopNode::LoopBackControl)->is_Load() )
   1.620 +    C->set_major_progress();
   1.621 +
   1.622 +  return phi;
   1.623 +}
   1.624 +
   1.625 +static bool merge_point_too_heavy(Compile* C, Node* region) {
   1.626 +  // Bail out if the region and its phis have too many users.
   1.627 +  int weight = 0;
   1.628 +  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.629 +    weight += region->fast_out(i)->outcnt();
   1.630 +  }
   1.631 +  int nodes_left = MaxNodeLimit - C->unique();
   1.632 +  if (weight * 8 > nodes_left) {
   1.633 +#ifndef PRODUCT
   1.634 +    if (PrintOpto)
   1.635 +      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
   1.636 +#endif
   1.637 +    return true;
   1.638 +  } else {
   1.639 +    return false;
   1.640 +  }
   1.641 +}
   1.642 +
   1.643 +#ifdef _LP64
   1.644 +static bool merge_point_safe(Node* region) {
   1.645 +  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
   1.646 +  // having a PhiNode input. This sidesteps the dangerous case where the split
   1.647 +  // ConvI2LNode may become TOP if the input Value() does not
   1.648 +  // overlap the ConvI2L range, leaving a node which may not dominate its
   1.649 +  // uses.
   1.650 +  // A better fix for this problem can be found in the BugTraq entry, but
   1.651 +  // expediency for Mantis demands this hack.
   1.652 +  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.653 +    Node* n = region->fast_out(i);
   1.654 +    if (n->is_Phi()) {
   1.655 +      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
   1.656 +        Node* m = n->fast_out(j);
   1.657 +        if (m->Opcode() == Op_ConvI2L) {
   1.658 +          return false;
   1.659 +        }
   1.660 +      }
   1.661 +    }
   1.662 +  }
   1.663 +  return true;
   1.664 +}
   1.665 +#endif
   1.666 +
   1.667 +
   1.668 +//------------------------------place_near_use---------------------------------
   1.669 +// Place some computation next to use but not inside inner loops.
   1.670 +// For inner loop uses move it to the preheader area.
   1.671 +Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
   1.672 +  IdealLoopTree *u_loop = get_loop( useblock );
   1.673 +  return (u_loop->_irreducible || u_loop->_child)
   1.674 +    ? useblock
   1.675 +    : u_loop->_head->in(LoopNode::EntryControl);
   1.676 +}
   1.677 +
   1.678 +
   1.679 +//------------------------------split_if_with_blocks_post----------------------
   1.680 +// Do the real work in a non-recursive function.  CFG hackery wants to be
   1.681 +// in the post-order, so it can dirty the I-DOM info and not use the dirtied
   1.682 +// info.
   1.683 +void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
   1.684 +
   1.685 +  // Cloning Cmp through Phi's involves the split-if transform.
   1.686 +  // FastLock is not used by an If
   1.687 +  if( n->is_Cmp() && !n->is_FastLock() ) {
   1.688 +    if( C->unique() > 35000 ) return; // Method too big
   1.689 +
   1.690 +    // Do not do 'split-if' if irreducible loops are present.
   1.691 +    if( _has_irreducible_loops )
   1.692 +      return;
   1.693 +
   1.694 +    Node *n_ctrl = get_ctrl(n);
   1.695 +    // Determine if the Node has inputs from some local Phi.
   1.696 +    // Returns the block to clone thru.
   1.697 +    Node *n_blk = has_local_phi_input( n );
   1.698 +    if( n_blk != n_ctrl ) return;
   1.699 +
   1.700 +    if( merge_point_too_heavy(C, n_ctrl) )
   1.701 +      return;
   1.702 +
   1.703 +    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
   1.704 +    Node *bol = n->unique_out();
   1.705 +    assert( bol->is_Bool(), "expect a bool here" );
   1.706 +    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
   1.707 +    Node *iff = bol->unique_out();
   1.708 +
   1.709 +    // Check some safety conditions
   1.710 +    if( iff->is_If() ) {        // Classic split-if?
   1.711 +      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
   1.712 +    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
   1.713 +      if( get_ctrl(iff->in(2)) == n_ctrl ||
   1.714 +          get_ctrl(iff->in(3)) == n_ctrl )
   1.715 +        return;                 // Inputs not yet split-up
   1.716 +      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
   1.717 +        return;                 // Loop-invar test gates loop-varying CMOVE
   1.718 +      }
   1.719 +    } else {
   1.720 +      return;  // some other kind of node, such as an Allocate
   1.721 +    }
   1.722 +
   1.723 +    // Do not do 'split-if' if some paths are dead.  First do dead code
   1.724 +    // elimination and then see if its still profitable.
   1.725 +    for( uint i = 1; i < n_ctrl->req(); i++ )
   1.726 +      if( n_ctrl->in(i) == C->top() )
   1.727 +        return;
   1.728 +
   1.729 +    // When is split-if profitable?  Every 'win' on means some control flow
   1.730 +    // goes dead, so it's almost always a win.
   1.731 +    int policy = 0;
   1.732 +    // If trying to do a 'Split-If' at the loop head, it is only
   1.733 +    // profitable if the cmp folds up on BOTH paths.  Otherwise we
   1.734 +    // risk peeling a loop forever.
   1.735 +
   1.736 +    // CNC - Disabled for now.  Requires careful handling of loop
   1.737 +    // body selection for the cloned code.  Also, make sure we check
   1.738 +    // for any input path not being in the same loop as n_ctrl.  For
   1.739 +    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
   1.740 +    // because the alternative loop entry points won't be converted
   1.741 +    // into LoopNodes.
   1.742 +    IdealLoopTree *n_loop = get_loop(n_ctrl);
   1.743 +    for( uint j = 1; j < n_ctrl->req(); j++ )
   1.744 +      if( get_loop(n_ctrl->in(j)) != n_loop )
   1.745 +        return;
   1.746 +
   1.747 +#ifdef _LP64
   1.748 +    // Check for safety of the merge point.
   1.749 +    if( !merge_point_safe(n_ctrl) ) {
   1.750 +      return;
   1.751 +    }
   1.752 +#endif
   1.753 +
   1.754 +    // Split compare 'n' through the merge point if it is profitable
   1.755 +    Node *phi = split_thru_phi( n, n_ctrl, policy );
   1.756 +    if( !phi ) return;
   1.757 +
   1.758 +    // Found a Phi to split thru!
   1.759 +    // Replace 'n' with the new phi
   1.760 +    _igvn.hash_delete(n);
   1.761 +    _igvn.subsume_node( n, phi );
   1.762 +
   1.763 +    // Now split the bool up thru the phi
   1.764 +    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
   1.765 +    _igvn.hash_delete(bol);
   1.766 +    _igvn.subsume_node( bol, bolphi );
   1.767 +    assert( iff->in(1) == bolphi, "" );
   1.768 +    if( bolphi->Value(&_igvn)->singleton() )
   1.769 +      return;
   1.770 +
   1.771 +    // Conditional-move?  Must split up now
   1.772 +    if( !iff->is_If() ) {
   1.773 +      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
   1.774 +      _igvn.hash_delete(iff);
   1.775 +      _igvn.subsume_node( iff, cmovphi );
   1.776 +      return;
   1.777 +    }
   1.778 +
   1.779 +    // Now split the IF
   1.780 +    do_split_if( iff );
   1.781 +    return;
   1.782 +  }
   1.783 +
   1.784 +  // Check for an IF ready to split; one that has its
   1.785 +  // condition codes input coming from a Phi at the block start.
   1.786 +  int n_op = n->Opcode();
   1.787 +
   1.788 +  // Check for an IF being dominated by another IF same test
   1.789 +  if( n_op == Op_If ) {
   1.790 +    Node *bol = n->in(1);
   1.791 +    uint max = bol->outcnt();
   1.792 +    // Check for same test used more than once?
   1.793 +    if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
   1.794 +      // Search up IDOMs to see if this IF is dominated.
   1.795 +      Node *cutoff = get_ctrl(bol);
   1.796 +
   1.797 +      // Now search up IDOMs till cutoff, looking for a dominating test
   1.798 +      Node *prevdom = n;
   1.799 +      Node *dom = idom(prevdom);
   1.800 +      while( dom != cutoff ) {
   1.801 +        if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
   1.802 +          // Replace the dominated test with an obvious true or false.
   1.803 +          // Place it on the IGVN worklist for later cleanup.
   1.804 +          C->set_major_progress();
   1.805 +          dominated_by( prevdom, n );
   1.806 +#ifndef PRODUCT
   1.807 +          if( VerifyLoopOptimizations ) verify();
   1.808 +#endif
   1.809 +          return;
   1.810 +        }
   1.811 +        prevdom = dom;
   1.812 +        dom = idom(prevdom);
   1.813 +      }
   1.814 +    }
   1.815 +  }
   1.816 +
   1.817 +  // See if a shared loop-varying computation has no loop-varying uses.
   1.818 +  // Happens if something is only used for JVM state in uncommon trap exits,
   1.819 +  // like various versions of induction variable+offset.  Clone the
   1.820 +  // computation per usage to allow it to sink out of the loop.
   1.821 +  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
   1.822 +    Node *n_ctrl = get_ctrl(n);
   1.823 +    IdealLoopTree *n_loop = get_loop(n_ctrl);
   1.824 +    if( n_loop != _ltree_root ) {
   1.825 +      DUIterator_Fast imax, i = n->fast_outs(imax);
   1.826 +      for (; i < imax; i++) {
   1.827 +        Node* u = n->fast_out(i);
   1.828 +        if( !has_ctrl(u) )     break; // Found control user
   1.829 +        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
   1.830 +        if( u_loop == n_loop ) break; // Found loop-varying use
   1.831 +        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
   1.832 +        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
   1.833 +      }
   1.834 +      bool did_break = (i < imax);  // Did we break out of the previous loop?
   1.835 +      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
   1.836 +        Node *late_load_ctrl;
   1.837 +        if (n->is_Load()) {
   1.838 +          // If n is a load, get and save the result from get_late_ctrl(),
   1.839 +          // to be later used in calculating the control for n's clones.
   1.840 +          clear_dom_lca_tags();
   1.841 +          late_load_ctrl = get_late_ctrl(n, n_ctrl);
   1.842 +        }
   1.843 +        // If n is a load, and the late control is the same as the current
   1.844 +        // control, then the cloning of n is a pointless exercise, because
   1.845 +        // GVN will ensure that we end up where we started.
   1.846 +        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
   1.847 +          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
   1.848 +            Node *u = n->last_out(j); // Clone private computation per use
   1.849 +            _igvn.hash_delete(u);
   1.850 +            _igvn._worklist.push(u);
   1.851 +            Node *x = n->clone(); // Clone computation
   1.852 +            Node *x_ctrl = NULL;
   1.853 +            if( u->is_Phi() ) {
   1.854 +              // Replace all uses of normal nodes.  Replace Phi uses
   1.855 +              // individually, so the seperate Nodes can sink down
   1.856 +              // different paths.
   1.857 +              uint k = 1;
   1.858 +              while( u->in(k) != n ) k++;
   1.859 +              u->set_req( k, x );
   1.860 +              // x goes next to Phi input path
   1.861 +              x_ctrl = u->in(0)->in(k);
   1.862 +              --j;
   1.863 +            } else {              // Normal use
   1.864 +              // Replace all uses
   1.865 +              for( uint k = 0; k < u->req(); k++ ) {
   1.866 +                if( u->in(k) == n ) {
   1.867 +                  u->set_req( k, x );
   1.868 +                  --j;
   1.869 +                }
   1.870 +              }
   1.871 +              x_ctrl = get_ctrl(u);
   1.872 +            }
   1.873 +
   1.874 +            // Find control for 'x' next to use but not inside inner loops.
   1.875 +            // For inner loop uses get the preheader area.
   1.876 +            x_ctrl = place_near_use(x_ctrl);
   1.877 +
   1.878 +            if (n->is_Load()) {
   1.879 +              // For loads, add a control edge to a CFG node outside of the loop
   1.880 +              // to force them to not combine and return back inside the loop
   1.881 +              // during GVN optimization (4641526).
   1.882 +              //
   1.883 +              // Because we are setting the actual control input, factor in
   1.884 +              // the result from get_late_ctrl() so we respect any
   1.885 +              // anti-dependences. (6233005).
   1.886 +              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
   1.887 +
   1.888 +              // Don't allow the control input to be a CFG splitting node.
   1.889 +              // Such nodes should only have ProjNodes as outs, e.g. IfNode
   1.890 +              // should only have IfTrueNode and IfFalseNode (4985384).
   1.891 +              x_ctrl = find_non_split_ctrl(x_ctrl);
   1.892 +              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
   1.893 +
   1.894 +              x->set_req(0, x_ctrl);
   1.895 +            }
   1.896 +            register_new_node(x, x_ctrl);
   1.897 +
   1.898 +            // Some institutional knowledge is needed here: 'x' is
   1.899 +            // yanked because if the optimizer runs GVN on it all the
   1.900 +            // cloned x's will common up and undo this optimization and
   1.901 +            // be forced back in the loop.  This is annoying because it
   1.902 +            // makes +VerifyOpto report false-positives on progress.  I
   1.903 +            // tried setting control edges on the x's to force them to
   1.904 +            // not combine, but the matching gets worried when it tries
   1.905 +            // to fold a StoreP and an AddP together (as part of an
   1.906 +            // address expression) and the AddP and StoreP have
   1.907 +            // different controls.
   1.908 +            if( !x->is_Load() ) _igvn._worklist.yank(x);
   1.909 +          }
   1.910 +          _igvn.remove_dead_node(n);
   1.911 +        }
   1.912 +      }
   1.913 +    }
   1.914 +  }
   1.915 +
   1.916 +  // Check for Opaque2's who's loop has disappeared - who's input is in the
   1.917 +  // same loop nest as their output.  Remove 'em, they are no longer useful.
   1.918 +  if( n_op == Op_Opaque2 &&
   1.919 +      n->in(1) != NULL &&
   1.920 +      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
   1.921 +    _igvn.add_users_to_worklist(n);
   1.922 +    _igvn.hash_delete(n);
   1.923 +    _igvn.subsume_node( n, n->in(1) );
   1.924 +  }
   1.925 +}
   1.926 +
   1.927 +//------------------------------split_if_with_blocks---------------------------
   1.928 +// Check for aggressive application of 'split-if' optimization,
   1.929 +// using basic block level info.
   1.930 +void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
   1.931 +  Node *n = C->root();
   1.932 +  visited.set(n->_idx); // first, mark node as visited
   1.933 +  // Do pre-visit work for root
   1.934 +  n = split_if_with_blocks_pre( n );
   1.935 +  uint cnt = n->outcnt();
   1.936 +  uint i   = 0;
   1.937 +  while (true) {
   1.938 +    // Visit all children
   1.939 +    if (i < cnt) {
   1.940 +      Node* use = n->raw_out(i);
   1.941 +      ++i;
   1.942 +      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
   1.943 +        // Now do pre-visit work for this use
   1.944 +        use = split_if_with_blocks_pre( use );
   1.945 +        nstack.push(n, i); // Save parent and next use's index.
   1.946 +        n   = use;         // Process all children of current use.
   1.947 +        cnt = use->outcnt();
   1.948 +        i   = 0;
   1.949 +      }
   1.950 +    }
   1.951 +    else {
   1.952 +      // All of n's children have been processed, complete post-processing.
   1.953 +      if (cnt != 0 && !n->is_Con()) {
   1.954 +        assert(has_node(n), "no dead nodes");
   1.955 +        split_if_with_blocks_post( n );
   1.956 +      }
   1.957 +      if (nstack.is_empty()) {
   1.958 +        // Finished all nodes on stack.
   1.959 +        break;
   1.960 +      }
   1.961 +      // Get saved parent node and next use's index. Visit the rest of uses.
   1.962 +      n   = nstack.node();
   1.963 +      cnt = n->outcnt();
   1.964 +      i   = nstack.index();
   1.965 +      nstack.pop();
   1.966 +    }
   1.967 +  }
   1.968 +}
   1.969 +
   1.970 +
   1.971 +//=============================================================================
   1.972 +//
   1.973 +//                   C L O N E   A   L O O P   B O D Y
   1.974 +//
   1.975 +
   1.976 +//------------------------------clone_iff--------------------------------------
   1.977 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
   1.978 +// "Nearly" because all Nodes have been cloned from the original in the loop,
   1.979 +// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
   1.980 +// through the Phi recursively, and return a Bool.
   1.981 +BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
   1.982 +
   1.983 +  // Convert this Phi into a Phi merging Bools
   1.984 +  uint i;
   1.985 +  for( i = 1; i < phi->req(); i++ ) {
   1.986 +    Node *b = phi->in(i);
   1.987 +    if( b->is_Phi() ) {
   1.988 +      _igvn.hash_delete(phi);
   1.989 +      _igvn._worklist.push(phi);
   1.990 +      phi->set_req(i, clone_iff( b->as_Phi(), loop ));
   1.991 +    } else {
   1.992 +      assert( b->is_Bool(), "" );
   1.993 +    }
   1.994 +  }
   1.995 +
   1.996 +  Node *sample_bool = phi->in(1);
   1.997 +  Node *sample_cmp  = sample_bool->in(1);
   1.998 +
   1.999 +  // Make Phis to merge the Cmp's inputs.
  1.1000 +  int size = phi->in(0)->req();
  1.1001 +  PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP );
  1.1002 +  PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP );
  1.1003 +  for( i = 1; i < phi->req(); i++ ) {
  1.1004 +    Node *n1 = phi->in(i)->in(1)->in(1);
  1.1005 +    Node *n2 = phi->in(i)->in(1)->in(2);
  1.1006 +    phi1->set_req( i, n1 );
  1.1007 +    phi2->set_req( i, n2 );
  1.1008 +    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
  1.1009 +    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
  1.1010 +  }
  1.1011 +  // See if these Phis have been made before.
  1.1012 +  // Register with optimizer
  1.1013 +  Node *hit1 = _igvn.hash_find_insert(phi1);
  1.1014 +  if( hit1 ) {                  // Hit, toss just made Phi
  1.1015 +    _igvn.remove_dead_node(phi1); // Remove new phi
  1.1016 +    assert( hit1->is_Phi(), "" );
  1.1017 +    phi1 = (PhiNode*)hit1;      // Use existing phi
  1.1018 +  } else {                      // Miss
  1.1019 +    _igvn.register_new_node_with_optimizer(phi1);
  1.1020 +  }
  1.1021 +  Node *hit2 = _igvn.hash_find_insert(phi2);
  1.1022 +  if( hit2 ) {                  // Hit, toss just made Phi
  1.1023 +    _igvn.remove_dead_node(phi2); // Remove new phi
  1.1024 +    assert( hit2->is_Phi(), "" );
  1.1025 +    phi2 = (PhiNode*)hit2;      // Use existing phi
  1.1026 +  } else {                      // Miss
  1.1027 +    _igvn.register_new_node_with_optimizer(phi2);
  1.1028 +  }
  1.1029 +  // Register Phis with loop/block info
  1.1030 +  set_ctrl(phi1, phi->in(0));
  1.1031 +  set_ctrl(phi2, phi->in(0));
  1.1032 +  // Make a new Cmp
  1.1033 +  Node *cmp = sample_cmp->clone();
  1.1034 +  cmp->set_req( 1, phi1 );
  1.1035 +  cmp->set_req( 2, phi2 );
  1.1036 +  _igvn.register_new_node_with_optimizer(cmp);
  1.1037 +  set_ctrl(cmp, phi->in(0));
  1.1038 +
  1.1039 +  // Make a new Bool
  1.1040 +  Node *b = sample_bool->clone();
  1.1041 +  b->set_req(1,cmp);
  1.1042 +  _igvn.register_new_node_with_optimizer(b);
  1.1043 +  set_ctrl(b, phi->in(0));
  1.1044 +
  1.1045 +  assert( b->is_Bool(), "" );
  1.1046 +  return (BoolNode*)b;
  1.1047 +}
  1.1048 +
  1.1049 +//------------------------------clone_bool-------------------------------------
  1.1050 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
  1.1051 +// "Nearly" because all Nodes have been cloned from the original in the loop,
  1.1052 +// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
  1.1053 +// through the Phi recursively, and return a Bool.
  1.1054 +CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
  1.1055 +  uint i;
  1.1056 +  // Convert this Phi into a Phi merging Bools
  1.1057 +  for( i = 1; i < phi->req(); i++ ) {
  1.1058 +    Node *b = phi->in(i);
  1.1059 +    if( b->is_Phi() ) {
  1.1060 +      _igvn.hash_delete(phi);
  1.1061 +      _igvn._worklist.push(phi);
  1.1062 +      phi->set_req(i, clone_bool( b->as_Phi(), loop ));
  1.1063 +    } else {
  1.1064 +      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
  1.1065 +    }
  1.1066 +  }
  1.1067 +
  1.1068 +  Node *sample_cmp = phi->in(1);
  1.1069 +
  1.1070 +  // Make Phis to merge the Cmp's inputs.
  1.1071 +  int size = phi->in(0)->req();
  1.1072 +  PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP );
  1.1073 +  PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP );
  1.1074 +  for( uint j = 1; j < phi->req(); j++ ) {
  1.1075 +    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
  1.1076 +    Node *n1, *n2;
  1.1077 +    if( cmp_top->is_Cmp() ) {
  1.1078 +      n1 = cmp_top->in(1);
  1.1079 +      n2 = cmp_top->in(2);
  1.1080 +    } else {
  1.1081 +      n1 = n2 = cmp_top;
  1.1082 +    }
  1.1083 +    phi1->set_req( j, n1 );
  1.1084 +    phi2->set_req( j, n2 );
  1.1085 +    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
  1.1086 +    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
  1.1087 +  }
  1.1088 +
  1.1089 +  // See if these Phis have been made before.
  1.1090 +  // Register with optimizer
  1.1091 +  Node *hit1 = _igvn.hash_find_insert(phi1);
  1.1092 +  if( hit1 ) {                  // Hit, toss just made Phi
  1.1093 +    _igvn.remove_dead_node(phi1); // Remove new phi
  1.1094 +    assert( hit1->is_Phi(), "" );
  1.1095 +    phi1 = (PhiNode*)hit1;      // Use existing phi
  1.1096 +  } else {                      // Miss
  1.1097 +    _igvn.register_new_node_with_optimizer(phi1);
  1.1098 +  }
  1.1099 +  Node *hit2 = _igvn.hash_find_insert(phi2);
  1.1100 +  if( hit2 ) {                  // Hit, toss just made Phi
  1.1101 +    _igvn.remove_dead_node(phi2); // Remove new phi
  1.1102 +    assert( hit2->is_Phi(), "" );
  1.1103 +    phi2 = (PhiNode*)hit2;      // Use existing phi
  1.1104 +  } else {                      // Miss
  1.1105 +    _igvn.register_new_node_with_optimizer(phi2);
  1.1106 +  }
  1.1107 +  // Register Phis with loop/block info
  1.1108 +  set_ctrl(phi1, phi->in(0));
  1.1109 +  set_ctrl(phi2, phi->in(0));
  1.1110 +  // Make a new Cmp
  1.1111 +  Node *cmp = sample_cmp->clone();
  1.1112 +  cmp->set_req( 1, phi1 );
  1.1113 +  cmp->set_req( 2, phi2 );
  1.1114 +  _igvn.register_new_node_with_optimizer(cmp);
  1.1115 +  set_ctrl(cmp, phi->in(0));
  1.1116 +
  1.1117 +  assert( cmp->is_Cmp(), "" );
  1.1118 +  return (CmpNode*)cmp;
  1.1119 +}
  1.1120 +
  1.1121 +//------------------------------sink_use---------------------------------------
  1.1122 +// If 'use' was in the loop-exit block, it now needs to be sunk
  1.1123 +// below the post-loop merge point.
  1.1124 +void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
  1.1125 +  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
  1.1126 +    set_ctrl(use, post_loop);
  1.1127 +    for (DUIterator j = use->outs(); use->has_out(j); j++)
  1.1128 +      sink_use(use->out(j), post_loop);
  1.1129 +  }
  1.1130 +}
  1.1131 +
  1.1132 +//------------------------------clone_loop-------------------------------------
  1.1133 +//
  1.1134 +//                   C L O N E   A   L O O P   B O D Y
  1.1135 +//
  1.1136 +// This is the basic building block of the loop optimizations.  It clones an
  1.1137 +// entire loop body.  It makes an old_new loop body mapping; with this mapping
  1.1138 +// you can find the new-loop equivalent to an old-loop node.  All new-loop
  1.1139 +// nodes are exactly equal to their old-loop counterparts, all edges are the
  1.1140 +// same.  All exits from the old-loop now have a RegionNode that merges the
  1.1141 +// equivalent new-loop path.  This is true even for the normal "loop-exit"
  1.1142 +// condition.  All uses of loop-invariant old-loop values now come from (one
  1.1143 +// or more) Phis that merge their new-loop equivalents.
  1.1144 +//
  1.1145 +// This operation leaves the graph in an illegal state: there are two valid
  1.1146 +// control edges coming from the loop pre-header to both loop bodies.  I'll
  1.1147 +// definitely have to hack the graph after running this transform.
  1.1148 +//
  1.1149 +// From this building block I will further edit edges to perform loop peeling
  1.1150 +// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
  1.1151 +//
  1.1152 +// Parameter side_by_size_idom:
  1.1153 +//   When side_by_size_idom is NULL, the dominator tree is constructed for
  1.1154 +//      the clone loop to dominate the original.  Used in construction of
  1.1155 +//      pre-main-post loop sequence.
  1.1156 +//   When nonnull, the clone and original are side-by-side, both are
  1.1157 +//      dominated by the side_by_side_idom node.  Used in construction of
  1.1158 +//      unswitched loops.
  1.1159 +void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
  1.1160 +                                 Node* side_by_side_idom) {
  1.1161 +
  1.1162 +  // Step 1: Clone the loop body.  Make the old->new mapping.
  1.1163 +  uint i;
  1.1164 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1165 +    Node *old = loop->_body.at(i);
  1.1166 +    Node *nnn = old->clone();
  1.1167 +    old_new.map( old->_idx, nnn );
  1.1168 +    _igvn.register_new_node_with_optimizer(nnn);
  1.1169 +  }
  1.1170 +
  1.1171 +
  1.1172 +  // Step 2: Fix the edges in the new body.  If the old input is outside the
  1.1173 +  // loop use it.  If the old input is INside the loop, use the corresponding
  1.1174 +  // new node instead.
  1.1175 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1176 +    Node *old = loop->_body.at(i);
  1.1177 +    Node *nnn = old_new[old->_idx];
  1.1178 +    // Fix CFG/Loop controlling the new node
  1.1179 +    if (has_ctrl(old)) {
  1.1180 +      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
  1.1181 +    } else {
  1.1182 +      set_loop(nnn, loop->_parent);
  1.1183 +      if (old->outcnt() > 0) {
  1.1184 +        set_idom( nnn, old_new[idom(old)->_idx], dd );
  1.1185 +      }
  1.1186 +    }
  1.1187 +    // Correct edges to the new node
  1.1188 +    for( uint j = 0; j < nnn->req(); j++ ) {
  1.1189 +        Node *n = nnn->in(j);
  1.1190 +        if( n ) {
  1.1191 +          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
  1.1192 +          if( loop->is_member( old_in_loop ) )
  1.1193 +            nnn->set_req(j, old_new[n->_idx]);
  1.1194 +        }
  1.1195 +    }
  1.1196 +    _igvn.hash_find_insert(nnn);
  1.1197 +  }
  1.1198 +  Node *newhead = old_new[loop->_head->_idx];
  1.1199 +  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
  1.1200 +
  1.1201 +
  1.1202 +  // Step 3: Now fix control uses.  Loop varying control uses have already
  1.1203 +  // been fixed up (as part of all input edges in Step 2).  Loop invariant
  1.1204 +  // control uses must be either an IfFalse or an IfTrue.  Make a merge
  1.1205 +  // point to merge the old and new IfFalse/IfTrue nodes; make the use
  1.1206 +  // refer to this.
  1.1207 +  ResourceArea *area = Thread::current()->resource_area();
  1.1208 +  Node_List worklist(area);
  1.1209 +  uint new_counter = C->unique();
  1.1210 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1211 +    Node* old = loop->_body.at(i);
  1.1212 +    if( !old->is_CFG() ) continue;
  1.1213 +    Node* nnn = old_new[old->_idx];
  1.1214 +
  1.1215 +    // Copy uses to a worklist, so I can munge the def-use info
  1.1216 +    // with impunity.
  1.1217 +    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
  1.1218 +      worklist.push(old->fast_out(j));
  1.1219 +
  1.1220 +    while( worklist.size() ) {  // Visit all uses
  1.1221 +      Node *use = worklist.pop();
  1.1222 +      if (!has_node(use))  continue; // Ignore dead nodes
  1.1223 +      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
  1.1224 +      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
  1.1225 +        // Both OLD and USE are CFG nodes here.
  1.1226 +        assert( use->is_Proj(), "" );
  1.1227 +
  1.1228 +        // Clone the loop exit control projection
  1.1229 +        Node *newuse = use->clone();
  1.1230 +        newuse->set_req(0,nnn);
  1.1231 +        _igvn.register_new_node_with_optimizer(newuse);
  1.1232 +        set_loop(newuse, use_loop);
  1.1233 +        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
  1.1234 +
  1.1235 +        // We need a Region to merge the exit from the peeled body and the
  1.1236 +        // exit from the old loop body.
  1.1237 +        RegionNode *r = new (C, 3) RegionNode(3);
  1.1238 +        // Map the old use to the new merge point
  1.1239 +        old_new.map( use->_idx, r );
  1.1240 +        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
  1.1241 +        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
  1.1242 +
  1.1243 +        // The original user of 'use' uses 'r' instead.
  1.1244 +        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
  1.1245 +          Node* useuse = use->last_out(l);
  1.1246 +          _igvn.hash_delete(useuse);
  1.1247 +          _igvn._worklist.push(useuse);
  1.1248 +          uint uses_found = 0;
  1.1249 +          if( useuse->in(0) == use ) {
  1.1250 +            useuse->set_req(0, r);
  1.1251 +            uses_found++;
  1.1252 +            if( useuse->is_CFG() ) {
  1.1253 +              assert( dom_depth(useuse) > dd_r, "" );
  1.1254 +              set_idom(useuse, r, dom_depth(useuse));
  1.1255 +            }
  1.1256 +          }
  1.1257 +          for( uint k = 1; k < useuse->req(); k++ ) {
  1.1258 +            if( useuse->in(k) == use ) {
  1.1259 +              useuse->set_req(k, r);
  1.1260 +              uses_found++;
  1.1261 +            }
  1.1262 +          }
  1.1263 +          l -= uses_found;    // we deleted 1 or more copies of this edge
  1.1264 +        }
  1.1265 +
  1.1266 +        // Now finish up 'r'
  1.1267 +        r->set_req( 1, newuse );
  1.1268 +        r->set_req( 2,    use );
  1.1269 +        _igvn.register_new_node_with_optimizer(r);
  1.1270 +        set_loop(r, use_loop);
  1.1271 +        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
  1.1272 +      } // End of if a loop-exit test
  1.1273 +    }
  1.1274 +  }
  1.1275 +
  1.1276 +  // Step 4: If loop-invariant use is not control, it must be dominated by a
  1.1277 +  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
  1.1278 +  // there if needed.  Make a Phi there merging old and new used values.
  1.1279 +  Node_List *split_if_set = NULL;
  1.1280 +  Node_List *split_bool_set = NULL;
  1.1281 +  Node_List *split_cex_set = NULL;
  1.1282 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1283 +    Node* old = loop->_body.at(i);
  1.1284 +    Node* nnn = old_new[old->_idx];
  1.1285 +    // Copy uses to a worklist, so I can munge the def-use info
  1.1286 +    // with impunity.
  1.1287 +    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
  1.1288 +      worklist.push(old->fast_out(j));
  1.1289 +
  1.1290 +    while( worklist.size() ) {
  1.1291 +      Node *use = worklist.pop();
  1.1292 +      if (!has_node(use))  continue; // Ignore dead nodes
  1.1293 +      if (use->in(0) == C->top())  continue;
  1.1294 +      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
  1.1295 +      // Check for data-use outside of loop - at least one of OLD or USE
  1.1296 +      // must not be a CFG node.
  1.1297 +      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
  1.1298 +
  1.1299 +        // If the Data use is an IF, that means we have an IF outside of the
  1.1300 +        // loop that is switching on a condition that is set inside of the
  1.1301 +        // loop.  Happens if people set a loop-exit flag; then test the flag
  1.1302 +        // in the loop to break the loop, then test is again outside of the
  1.1303 +        // loop to determine which way the loop exited.
  1.1304 +        if( use->is_If() || use->is_CMove() ) {
  1.1305 +          // Since this code is highly unlikely, we lazily build the worklist
  1.1306 +          // of such Nodes to go split.
  1.1307 +          if( !split_if_set )
  1.1308 +            split_if_set = new Node_List(area);
  1.1309 +          split_if_set->push(use);
  1.1310 +        }
  1.1311 +        if( use->is_Bool() ) {
  1.1312 +          if( !split_bool_set )
  1.1313 +            split_bool_set = new Node_List(area);
  1.1314 +          split_bool_set->push(use);
  1.1315 +        }
  1.1316 +        if( use->Opcode() == Op_CreateEx ) {
  1.1317 +          if( !split_cex_set )
  1.1318 +            split_cex_set = new Node_List(area);
  1.1319 +          split_cex_set->push(use);
  1.1320 +        }
  1.1321 +
  1.1322 +
  1.1323 +        // Get "block" use is in
  1.1324 +        uint idx = 0;
  1.1325 +        while( use->in(idx) != old ) idx++;
  1.1326 +        Node *prev = use->is_CFG() ? use : get_ctrl(use);
  1.1327 +        assert( !loop->is_member( get_loop( prev ) ), "" );
  1.1328 +        Node *cfg = prev->_idx >= new_counter
  1.1329 +          ? prev->in(2)
  1.1330 +          : idom(prev);
  1.1331 +        if( use->is_Phi() )     // Phi use is in prior block
  1.1332 +          cfg = prev->in(idx);  // NOT in block of Phi itself
  1.1333 +        if (cfg->is_top()) {    // Use is dead?
  1.1334 +          _igvn.hash_delete(use);
  1.1335 +          _igvn._worklist.push(use);
  1.1336 +          use->set_req(idx, C->top());
  1.1337 +          continue;
  1.1338 +        }
  1.1339 +
  1.1340 +        while( !loop->is_member( get_loop( cfg ) ) ) {
  1.1341 +          prev = cfg;
  1.1342 +          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
  1.1343 +        }
  1.1344 +        // If the use occurs after merging several exits from the loop, then
  1.1345 +        // old value must have dominated all those exits.  Since the same old
  1.1346 +        // value was used on all those exits we did not need a Phi at this
  1.1347 +        // merge point.  NOW we do need a Phi here.  Each loop exit value
  1.1348 +        // is now merged with the peeled body exit; each exit gets its own
  1.1349 +        // private Phi and those Phis need to be merged here.
  1.1350 +        Node *phi;
  1.1351 +        if( prev->is_Region() ) {
  1.1352 +          if( idx == 0 ) {      // Updating control edge?
  1.1353 +            phi = prev;         // Just use existing control
  1.1354 +          } else {              // Else need a new Phi
  1.1355 +            phi = PhiNode::make( prev, old );
  1.1356 +            // Now recursively fix up the new uses of old!
  1.1357 +            for( uint i = 1; i < prev->req(); i++ ) {
  1.1358 +              worklist.push(phi); // Onto worklist once for each 'old' input
  1.1359 +            }
  1.1360 +          }
  1.1361 +        } else {
  1.1362 +          // Get new RegionNode merging old and new loop exits
  1.1363 +          prev = old_new[prev->_idx];
  1.1364 +          assert( prev, "just made this in step 7" );
  1.1365 +          if( idx == 0 ) {      // Updating control edge?
  1.1366 +            phi = prev;         // Just use existing control
  1.1367 +          } else {              // Else need a new Phi
  1.1368 +            // Make a new Phi merging data values properly
  1.1369 +            phi = PhiNode::make( prev, old );
  1.1370 +            phi->set_req( 1, nnn );
  1.1371 +          }
  1.1372 +        }
  1.1373 +        // If inserting a new Phi, check for prior hits
  1.1374 +        if( idx != 0 ) {
  1.1375 +          Node *hit = _igvn.hash_find_insert(phi);
  1.1376 +          if( hit == NULL ) {
  1.1377 +           _igvn.register_new_node_with_optimizer(phi); // Register new phi
  1.1378 +          } else {                                      // or
  1.1379 +            // Remove the new phi from the graph and use the hit
  1.1380 +            _igvn.remove_dead_node(phi);
  1.1381 +            phi = hit;                                  // Use existing phi
  1.1382 +          }
  1.1383 +          set_ctrl(phi, prev);
  1.1384 +        }
  1.1385 +        // Make 'use' use the Phi instead of the old loop body exit value
  1.1386 +        _igvn.hash_delete(use);
  1.1387 +        _igvn._worklist.push(use);
  1.1388 +        use->set_req(idx, phi);
  1.1389 +        if( use->_idx >= new_counter ) { // If updating new phis
  1.1390 +          // Not needed for correctness, but prevents a weak assert
  1.1391 +          // in AddPNode from tripping (when we end up with different
  1.1392 +          // base & derived Phis that will become the same after
  1.1393 +          // IGVN does CSE).
  1.1394 +          Node *hit = _igvn.hash_find_insert(use);
  1.1395 +          if( hit )             // Go ahead and re-hash for hits.
  1.1396 +            _igvn.subsume_node( use, hit );
  1.1397 +        }
  1.1398 +
  1.1399 +        // If 'use' was in the loop-exit block, it now needs to be sunk
  1.1400 +        // below the post-loop merge point.
  1.1401 +        sink_use( use, prev );
  1.1402 +      }
  1.1403 +    }
  1.1404 +  }
  1.1405 +
  1.1406 +  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
  1.1407 +  // the loop uses a condition set in the loop.  The original IF probably
  1.1408 +  // takes control from one or more OLD Regions (which in turn get from NEW
  1.1409 +  // Regions).  In any case, there will be a set of Phis for each merge point
  1.1410 +  // from the IF up to where the original BOOL def exists the loop.
  1.1411 +  if( split_if_set ) {
  1.1412 +    while( split_if_set->size() ) {
  1.1413 +      Node *iff = split_if_set->pop();
  1.1414 +      if( iff->in(1)->is_Phi() ) {
  1.1415 +        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
  1.1416 +        _igvn.hash_delete(iff);
  1.1417 +        _igvn._worklist.push(iff);
  1.1418 +        iff->set_req(1, b);
  1.1419 +      }
  1.1420 +    }
  1.1421 +  }
  1.1422 +  if( split_bool_set ) {
  1.1423 +    while( split_bool_set->size() ) {
  1.1424 +      Node *b = split_bool_set->pop();
  1.1425 +      Node *phi = b->in(1);
  1.1426 +      assert( phi->is_Phi(), "" );
  1.1427 +      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
  1.1428 +      _igvn.hash_delete(b);
  1.1429 +      _igvn._worklist.push(b);
  1.1430 +      b->set_req(1, cmp);
  1.1431 +    }
  1.1432 +  }
  1.1433 +  if( split_cex_set ) {
  1.1434 +    while( split_cex_set->size() ) {
  1.1435 +      Node *b = split_cex_set->pop();
  1.1436 +      assert( b->in(0)->is_Region(), "" );
  1.1437 +      assert( b->in(1)->is_Phi(), "" );
  1.1438 +      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
  1.1439 +      split_up( b, b->in(0), NULL );
  1.1440 +    }
  1.1441 +  }
  1.1442 +
  1.1443 +}
  1.1444 +
  1.1445 +
  1.1446 +//---------------------- stride_of_possible_iv -------------------------------------
  1.1447 +// Looks for an iff/bool/comp with one operand of the compare
  1.1448 +// being a cycle involving an add and a phi,
  1.1449 +// with an optional truncation (left-shift followed by a right-shift)
  1.1450 +// of the add. Returns zero if not an iv.
  1.1451 +int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
  1.1452 +  Node* trunc1 = NULL;
  1.1453 +  Node* trunc2 = NULL;
  1.1454 +  const TypeInt* ttype = NULL;
  1.1455 +  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
  1.1456 +    return 0;
  1.1457 +  }
  1.1458 +  BoolNode* bl = iff->in(1)->as_Bool();
  1.1459 +  Node* cmp = bl->in(1);
  1.1460 +  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
  1.1461 +    return 0;
  1.1462 +  }
  1.1463 +  // Must have an invariant operand
  1.1464 +  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
  1.1465 +    return 0;
  1.1466 +  }
  1.1467 +  Node* add2 = NULL;
  1.1468 +  Node* cmp1 = cmp->in(1);
  1.1469 +  if (cmp1->is_Phi()) {
  1.1470 +    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
  1.1471 +    Node* phi = cmp1;
  1.1472 +    for (uint i = 1; i < phi->req(); i++) {
  1.1473 +      Node* in = phi->in(i);
  1.1474 +      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
  1.1475 +                                &trunc1, &trunc2, &ttype);
  1.1476 +      if (add && add->in(1) == phi) {
  1.1477 +        add2 = add->in(2);
  1.1478 +        break;
  1.1479 +      }
  1.1480 +    }
  1.1481 +  } else {
  1.1482 +    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
  1.1483 +    Node* addtrunc = cmp1;
  1.1484 +    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
  1.1485 +                                &trunc1, &trunc2, &ttype);
  1.1486 +    if (add && add->in(1)->is_Phi()) {
  1.1487 +      Node* phi = add->in(1);
  1.1488 +      for (uint i = 1; i < phi->req(); i++) {
  1.1489 +        if (phi->in(i) == addtrunc) {
  1.1490 +          add2 = add->in(2);
  1.1491 +          break;
  1.1492 +        }
  1.1493 +      }
  1.1494 +    }
  1.1495 +  }
  1.1496 +  if (add2 != NULL) {
  1.1497 +    const TypeInt* add2t = _igvn.type(add2)->is_int();
  1.1498 +    if (add2t->is_con()) {
  1.1499 +      return add2t->get_con();
  1.1500 +    }
  1.1501 +  }
  1.1502 +  return 0;
  1.1503 +}
  1.1504 +
  1.1505 +
  1.1506 +//---------------------- stay_in_loop -------------------------------------
  1.1507 +// Return the (unique) control output node that's in the loop (if it exists.)
  1.1508 +Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
  1.1509 +  Node* unique = NULL;
  1.1510 +  if (!n) return NULL;
  1.1511 +  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
  1.1512 +    Node* use = n->fast_out(i);
  1.1513 +    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
  1.1514 +      if (unique != NULL) {
  1.1515 +        return NULL;
  1.1516 +      }
  1.1517 +      unique = use;
  1.1518 +    }
  1.1519 +  }
  1.1520 +  return unique;
  1.1521 +}
  1.1522 +
  1.1523 +//------------------------------ register_node -------------------------------------
  1.1524 +// Utility to register node "n" with PhaseIdealLoop
  1.1525 +void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
  1.1526 +  _igvn.register_new_node_with_optimizer(n);
  1.1527 +  loop->_body.push(n);
  1.1528 +  if (n->is_CFG()) {
  1.1529 +    set_loop(n, loop);
  1.1530 +    set_idom(n, pred, ddepth);
  1.1531 +  } else {
  1.1532 +    set_ctrl(n, pred);
  1.1533 +  }
  1.1534 +}
  1.1535 +
  1.1536 +//------------------------------ proj_clone -------------------------------------
  1.1537 +// Utility to create an if-projection
  1.1538 +ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
  1.1539 +  ProjNode* c = p->clone()->as_Proj();
  1.1540 +  c->set_req(0, iff);
  1.1541 +  return c;
  1.1542 +}
  1.1543 +
  1.1544 +//------------------------------ short_circuit_if -------------------------------------
  1.1545 +// Force the iff control output to be the live_proj
  1.1546 +Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
  1.1547 +  int proj_con = live_proj->_con;
  1.1548 +  assert(proj_con == 0 || proj_con == 1, "false or true projection");
  1.1549 +  Node *con = _igvn.intcon(proj_con);
  1.1550 +  set_ctrl(con, C->root());
  1.1551 +  if (iff) {
  1.1552 +    iff->set_req(1, con);
  1.1553 +  }
  1.1554 +  return con;
  1.1555 +}
  1.1556 +
  1.1557 +//------------------------------ insert_if_before_proj -------------------------------------
  1.1558 +// Insert a new if before an if projection (* - new node)
  1.1559 +//
  1.1560 +// before
  1.1561 +//           if(test)
  1.1562 +//           /     \
  1.1563 +//          v       v
  1.1564 +//    other-proj   proj (arg)
  1.1565 +//
  1.1566 +// after
  1.1567 +//           if(test)
  1.1568 +//           /     \
  1.1569 +//          /       v
  1.1570 +//         |      * proj-clone
  1.1571 +//         v          |
  1.1572 +//    other-proj      v
  1.1573 +//                * new_if(relop(cmp[IU](left,right)))
  1.1574 +//                  /  \
  1.1575 +//                 v    v
  1.1576 +//         * new-proj  proj
  1.1577 +//         (returned)
  1.1578 +//
  1.1579 +ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
  1.1580 +  IfNode* iff = proj->in(0)->as_If();
  1.1581 +  IdealLoopTree *loop = get_loop(proj);
  1.1582 +  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
  1.1583 +  int ddepth = dom_depth(proj);
  1.1584 +
  1.1585 +  _igvn.hash_delete(iff);
  1.1586 +  _igvn._worklist.push(iff);
  1.1587 +  _igvn.hash_delete(proj);
  1.1588 +  _igvn._worklist.push(proj);
  1.1589 +
  1.1590 +  proj->set_req(0, NULL);  // temporary disconnect
  1.1591 +  ProjNode* proj2 = proj_clone(proj, iff);
  1.1592 +  register_node(proj2, loop, iff, ddepth);
  1.1593 +
  1.1594 +  Node* cmp = Signed ? (Node*) new (C,3)CmpINode(left, right) : (Node*) new (C,3)CmpUNode(left, right);
  1.1595 +  register_node(cmp, loop, proj2, ddepth);
  1.1596 +
  1.1597 +  BoolNode* bol = new (C,2)BoolNode(cmp, relop);
  1.1598 +  register_node(bol, loop, proj2, ddepth);
  1.1599 +
  1.1600 +  IfNode* new_if = new (C,2)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
  1.1601 +  register_node(new_if, loop, proj2, ddepth);
  1.1602 +
  1.1603 +  proj->set_req(0, new_if); // reattach
  1.1604 +  set_idom(proj, new_if, ddepth);
  1.1605 +
  1.1606 +  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
  1.1607 +  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
  1.1608 +
  1.1609 +  return new_exit;
  1.1610 +}
  1.1611 +
  1.1612 +//------------------------------ insert_region_before_proj -------------------------------------
  1.1613 +// Insert a region before an if projection (* - new node)
  1.1614 +//
  1.1615 +// before
  1.1616 +//           if(test)
  1.1617 +//          /      |
  1.1618 +//         v       |
  1.1619 +//       proj      v
  1.1620 +//               other-proj
  1.1621 +//
  1.1622 +// after
  1.1623 +//           if(test)
  1.1624 +//          /      |
  1.1625 +//         v       |
  1.1626 +// * proj-clone    v
  1.1627 +//         |     other-proj
  1.1628 +//         v
  1.1629 +// * new-region
  1.1630 +//         |
  1.1631 +//         v
  1.1632 +// *      dum_if
  1.1633 +//       /     \
  1.1634 +//      v       \
  1.1635 +// * dum-proj    v
  1.1636 +//              proj
  1.1637 +//
  1.1638 +RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
  1.1639 +  IfNode* iff = proj->in(0)->as_If();
  1.1640 +  IdealLoopTree *loop = get_loop(proj);
  1.1641 +  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
  1.1642 +  int ddepth = dom_depth(proj);
  1.1643 +
  1.1644 +  _igvn.hash_delete(iff);
  1.1645 +  _igvn._worklist.push(iff);
  1.1646 +  _igvn.hash_delete(proj);
  1.1647 +  _igvn._worklist.push(proj);
  1.1648 +
  1.1649 +  proj->set_req(0, NULL);  // temporary disconnect
  1.1650 +  ProjNode* proj2 = proj_clone(proj, iff);
  1.1651 +  register_node(proj2, loop, iff, ddepth);
  1.1652 +
  1.1653 +  RegionNode* reg = new (C,2)RegionNode(2);
  1.1654 +  reg->set_req(1, proj2);
  1.1655 +  register_node(reg, loop, iff, ddepth);
  1.1656 +
  1.1657 +  IfNode* dum_if = new (C,2)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
  1.1658 +  register_node(dum_if, loop, reg, ddepth);
  1.1659 +
  1.1660 +  proj->set_req(0, dum_if); // reattach
  1.1661 +  set_idom(proj, dum_if, ddepth);
  1.1662 +
  1.1663 +  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
  1.1664 +  register_node(dum_proj, loop, dum_if, ddepth);
  1.1665 +
  1.1666 +  return reg;
  1.1667 +}
  1.1668 +
  1.1669 +//------------------------------ insert_cmpi_loop_exit -------------------------------------
  1.1670 +// Clone a signed compare loop exit from an unsigned compare and
  1.1671 +// insert it before the unsigned cmp on the stay-in-loop path.
  1.1672 +// All new nodes inserted in the dominator tree between the original
  1.1673 +// if and it's projections.  The original if test is replaced with
  1.1674 +// a constant to force the stay-in-loop path.
  1.1675 +//
  1.1676 +// This is done to make sure that the original if and it's projections
  1.1677 +// still dominate the same set of control nodes, that the ctrl() relation
  1.1678 +// from data nodes to them is preserved, and that their loop nesting is
  1.1679 +// preserved.
  1.1680 +//
  1.1681 +// before
  1.1682 +//          if(i <u limit)    unsigned compare loop exit
  1.1683 +//         /       |
  1.1684 +//        v        v
  1.1685 +//   exit-proj   stay-in-loop-proj
  1.1686 +//
  1.1687 +// after
  1.1688 +//          if(stay-in-loop-const)  original if
  1.1689 +//         /       |
  1.1690 +//        /        v
  1.1691 +//       /  if(i <  limit)    new signed test
  1.1692 +//      /  /       |
  1.1693 +//     /  /        v
  1.1694 +//    /  /  if(i <u limit)    new cloned unsigned test
  1.1695 +//   /  /   /      |
  1.1696 +//   v  v  v       |
  1.1697 +//    region       |
  1.1698 +//        |        |
  1.1699 +//      dum-if     |
  1.1700 +//     /  |        |
  1.1701 +// ether  |        |
  1.1702 +//        v        v
  1.1703 +//   exit-proj   stay-in-loop-proj
  1.1704 +//
  1.1705 +IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
  1.1706 +  const bool Signed   = true;
  1.1707 +  const bool Unsigned = false;
  1.1708 +
  1.1709 +  BoolNode* bol = if_cmpu->in(1)->as_Bool();
  1.1710 +  if (bol->_test._test != BoolTest::lt) return NULL;
  1.1711 +  CmpNode* cmpu = bol->in(1)->as_Cmp();
  1.1712 +  if (cmpu->Opcode() != Op_CmpU) return NULL;
  1.1713 +  int stride = stride_of_possible_iv(if_cmpu);
  1.1714 +  if (stride == 0) return NULL;
  1.1715 +
  1.1716 +  ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj();
  1.1717 +  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
  1.1718 +
  1.1719 +  Node* limit = NULL;
  1.1720 +  if (stride > 0) {
  1.1721 +    limit = cmpu->in(2);
  1.1722 +  } else {
  1.1723 +    limit = _igvn.makecon(TypeInt::ZERO);
  1.1724 +    set_ctrl(limit, C->root());
  1.1725 +  }
  1.1726 +  // Create a new region on the exit path
  1.1727 +  RegionNode* reg = insert_region_before_proj(lp_exit);
  1.1728 +
  1.1729 +  // Clone the if-cmpu-true-false using a signed compare
  1.1730 +  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
  1.1731 +  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
  1.1732 +  reg->add_req(cmpi_exit);
  1.1733 +
  1.1734 +  // Clone the if-cmpu-true-false
  1.1735 +  BoolTest::mask rel_u = bol->_test._test;
  1.1736 +  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
  1.1737 +  reg->add_req(cmpu_exit);
  1.1738 +
  1.1739 +  // Force original if to stay in loop.
  1.1740 +  short_circuit_if(if_cmpu, lp_continue);
  1.1741 +
  1.1742 +  return cmpi_exit->in(0)->as_If();
  1.1743 +}
  1.1744 +
  1.1745 +//------------------------------ remove_cmpi_loop_exit -------------------------------------
  1.1746 +// Remove a previously inserted signed compare loop exit.
  1.1747 +void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
  1.1748 +  Node* lp_proj = stay_in_loop(if_cmp, loop);
  1.1749 +  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
  1.1750 +         stay_in_loop(lp_proj, loop)->is_If() &&
  1.1751 +         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
  1.1752 +  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
  1.1753 +  set_ctrl(con, C->root());
  1.1754 +  if_cmp->set_req(1, con);
  1.1755 +}
  1.1756 +
  1.1757 +//------------------------------ scheduled_nodelist -------------------------------------
  1.1758 +// Create a post order schedule of nodes that are in the
  1.1759 +// "member" set.  The list is returned in "sched".
  1.1760 +// The first node in "sched" is the loop head, followed by
  1.1761 +// nodes which have no inputs in the "member" set, and then
  1.1762 +// followed by the nodes that have an immediate input dependence
  1.1763 +// on a node in "sched".
  1.1764 +void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
  1.1765 +
  1.1766 +  assert(member.test(loop->_head->_idx), "loop head must be in member set");
  1.1767 +  Arena *a = Thread::current()->resource_area();
  1.1768 +  VectorSet visited(a);
  1.1769 +  Node_Stack nstack(a, loop->_body.size());
  1.1770 +
  1.1771 +  Node* n  = loop->_head;  // top of stack is cached in "n"
  1.1772 +  uint idx = 0;
  1.1773 +  visited.set(n->_idx);
  1.1774 +
  1.1775 +  // Initially push all with no inputs from within member set
  1.1776 +  for(uint i = 0; i < loop->_body.size(); i++ ) {
  1.1777 +    Node *elt = loop->_body.at(i);
  1.1778 +    if (member.test(elt->_idx)) {
  1.1779 +      bool found = false;
  1.1780 +      for (uint j = 0; j < elt->req(); j++) {
  1.1781 +        Node* def = elt->in(j);
  1.1782 +        if (def && member.test(def->_idx) && def != elt) {
  1.1783 +          found = true;
  1.1784 +          break;
  1.1785 +        }
  1.1786 +      }
  1.1787 +      if (!found && elt != loop->_head) {
  1.1788 +        nstack.push(n, idx);
  1.1789 +        n = elt;
  1.1790 +        assert(!visited.test(n->_idx), "not seen yet");
  1.1791 +        visited.set(n->_idx);
  1.1792 +      }
  1.1793 +    }
  1.1794 +  }
  1.1795 +
  1.1796 +  // traverse out's that are in the member set
  1.1797 +  while (true) {
  1.1798 +    if (idx < n->outcnt()) {
  1.1799 +      Node* use = n->raw_out(idx);
  1.1800 +      idx++;
  1.1801 +      if (!visited.test_set(use->_idx)) {
  1.1802 +        if (member.test(use->_idx)) {
  1.1803 +          nstack.push(n, idx);
  1.1804 +          n = use;
  1.1805 +          idx = 0;
  1.1806 +        }
  1.1807 +      }
  1.1808 +    } else {
  1.1809 +      // All outputs processed
  1.1810 +      sched.push(n);
  1.1811 +      if (nstack.is_empty()) break;
  1.1812 +      n   = nstack.node();
  1.1813 +      idx = nstack.index();
  1.1814 +      nstack.pop();
  1.1815 +    }
  1.1816 +  }
  1.1817 +}
  1.1818 +
  1.1819 +
  1.1820 +//------------------------------ has_use_in_set -------------------------------------
  1.1821 +// Has a use in the vector set
  1.1822 +bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
  1.1823 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1824 +    Node* use = n->fast_out(j);
  1.1825 +    if (vset.test(use->_idx)) {
  1.1826 +      return true;
  1.1827 +    }
  1.1828 +  }
  1.1829 +  return false;
  1.1830 +}
  1.1831 +
  1.1832 +
  1.1833 +//------------------------------ has_use_internal_to_set -------------------------------------
  1.1834 +// Has use internal to the vector set (ie. not in a phi at the loop head)
  1.1835 +bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
  1.1836 +  Node* head  = loop->_head;
  1.1837 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1838 +    Node* use = n->fast_out(j);
  1.1839 +    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
  1.1840 +      return true;
  1.1841 +    }
  1.1842 +  }
  1.1843 +  return false;
  1.1844 +}
  1.1845 +
  1.1846 +
  1.1847 +//------------------------------ clone_for_use_outside_loop -------------------------------------
  1.1848 +// clone "n" for uses that are outside of loop
  1.1849 +void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
  1.1850 +
  1.1851 +  assert(worklist.size() == 0, "should be empty");
  1.1852 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1853 +    Node* use = n->fast_out(j);
  1.1854 +    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
  1.1855 +      worklist.push(use);
  1.1856 +    }
  1.1857 +  }
  1.1858 +  while( worklist.size() ) {
  1.1859 +    Node *use = worklist.pop();
  1.1860 +    if (!has_node(use) || use->in(0) == C->top()) continue;
  1.1861 +    uint j;
  1.1862 +    for (j = 0; j < use->req(); j++) {
  1.1863 +      if (use->in(j) == n) break;
  1.1864 +    }
  1.1865 +    assert(j < use->req(), "must be there");
  1.1866 +
  1.1867 +    // clone "n" and insert it between the inputs of "n" and the use outside the loop
  1.1868 +    Node* n_clone = n->clone();
  1.1869 +    _igvn.hash_delete(use);
  1.1870 +    use->set_req(j, n_clone);
  1.1871 +    _igvn._worklist.push(use);
  1.1872 +    if (!use->is_Phi()) {
  1.1873 +      Node* use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
  1.1874 +      set_ctrl(n_clone, use_c);
  1.1875 +      assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
  1.1876 +      get_loop(use_c)->_body.push(n_clone);
  1.1877 +    } else {
  1.1878 +      // Use in a phi is considered a use in the associated predecessor block
  1.1879 +      Node *prevbb = use->in(0)->in(j);
  1.1880 +      set_ctrl(n_clone, prevbb);
  1.1881 +      assert(!loop->is_member(get_loop(prevbb)), "should be outside loop");
  1.1882 +      get_loop(prevbb)->_body.push(n_clone);
  1.1883 +    }
  1.1884 +    _igvn.register_new_node_with_optimizer(n_clone);
  1.1885 +#if !defined(PRODUCT)
  1.1886 +    if (TracePartialPeeling) {
  1.1887 +      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
  1.1888 +    }
  1.1889 +#endif
  1.1890 +  }
  1.1891 +}
  1.1892 +
  1.1893 +
  1.1894 +//------------------------------ clone_for_special_use_inside_loop -------------------------------------
  1.1895 +// clone "n" for special uses that are in the not_peeled region.
  1.1896 +// If these def-uses occur in separate blocks, the code generator
  1.1897 +// marks the method as not compilable.  For example, if a "BoolNode"
  1.1898 +// is in a different basic block than the "IfNode" that uses it, then
  1.1899 +// the compilation is aborted in the code generator.
  1.1900 +void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
  1.1901 +                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
  1.1902 +  if (n->is_Phi() || n->is_Load()) {
  1.1903 +    return;
  1.1904 +  }
  1.1905 +  assert(worklist.size() == 0, "should be empty");
  1.1906 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1907 +    Node* use = n->fast_out(j);
  1.1908 +    if ( not_peel.test(use->_idx) &&
  1.1909 +         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
  1.1910 +         use->in(1) == n)  {
  1.1911 +      worklist.push(use);
  1.1912 +    }
  1.1913 +  }
  1.1914 +  if (worklist.size() > 0) {
  1.1915 +    // clone "n" and insert it between inputs of "n" and the use
  1.1916 +    Node* n_clone = n->clone();
  1.1917 +    loop->_body.push(n_clone);
  1.1918 +    _igvn.register_new_node_with_optimizer(n_clone);
  1.1919 +    set_ctrl(n_clone, get_ctrl(n));
  1.1920 +    sink_list.push(n_clone);
  1.1921 +    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
  1.1922 +#if !defined(PRODUCT)
  1.1923 +    if (TracePartialPeeling) {
  1.1924 +      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
  1.1925 +    }
  1.1926 +#endif
  1.1927 +    while( worklist.size() ) {
  1.1928 +      Node *use = worklist.pop();
  1.1929 +      _igvn.hash_delete(use);
  1.1930 +      _igvn._worklist.push(use);
  1.1931 +      for (uint j = 1; j < use->req(); j++) {
  1.1932 +        if (use->in(j) == n) {
  1.1933 +          use->set_req(j, n_clone);
  1.1934 +        }
  1.1935 +      }
  1.1936 +    }
  1.1937 +  }
  1.1938 +}
  1.1939 +
  1.1940 +
  1.1941 +//------------------------------ insert_phi_for_loop -------------------------------------
  1.1942 +// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
  1.1943 +void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
  1.1944 +  Node *phi = PhiNode::make(lp, back_edge_val);
  1.1945 +  phi->set_req(LoopNode::EntryControl, lp_entry_val);
  1.1946 +  // Use existing phi if it already exists
  1.1947 +  Node *hit = _igvn.hash_find_insert(phi);
  1.1948 +  if( hit == NULL ) {
  1.1949 +    _igvn.register_new_node_with_optimizer(phi);
  1.1950 +    set_ctrl(phi, lp);
  1.1951 +  } else {
  1.1952 +    // Remove the new phi from the graph and use the hit
  1.1953 +    _igvn.remove_dead_node(phi);
  1.1954 +    phi = hit;
  1.1955 +  }
  1.1956 +  _igvn.hash_delete(use);
  1.1957 +  _igvn._worklist.push(use);
  1.1958 +  use->set_req(idx, phi);
  1.1959 +}
  1.1960 +
  1.1961 +#ifdef ASSERT
  1.1962 +//------------------------------ is_valid_loop_partition -------------------------------------
  1.1963 +// Validate the loop partition sets: peel and not_peel
  1.1964 +bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
  1.1965 +                                              VectorSet& not_peel ) {
  1.1966 +  uint i;
  1.1967 +  // Check that peel_list entries are in the peel set
  1.1968 +  for (i = 0; i < peel_list.size(); i++) {
  1.1969 +    if (!peel.test(peel_list.at(i)->_idx)) {
  1.1970 +      return false;
  1.1971 +    }
  1.1972 +  }
  1.1973 +  // Check at loop members are in one of peel set or not_peel set
  1.1974 +  for (i = 0; i < loop->_body.size(); i++ ) {
  1.1975 +    Node *def  = loop->_body.at(i);
  1.1976 +    uint di = def->_idx;
  1.1977 +    // Check that peel set elements are in peel_list
  1.1978 +    if (peel.test(di)) {
  1.1979 +      if (not_peel.test(di)) {
  1.1980 +        return false;
  1.1981 +      }
  1.1982 +      // Must be in peel_list also
  1.1983 +      bool found = false;
  1.1984 +      for (uint j = 0; j < peel_list.size(); j++) {
  1.1985 +        if (peel_list.at(j)->_idx == di) {
  1.1986 +          found = true;
  1.1987 +          break;
  1.1988 +        }
  1.1989 +      }
  1.1990 +      if (!found) {
  1.1991 +        return false;
  1.1992 +      }
  1.1993 +    } else if (not_peel.test(di)) {
  1.1994 +      if (peel.test(di)) {
  1.1995 +        return false;
  1.1996 +      }
  1.1997 +    } else {
  1.1998 +      return false;
  1.1999 +    }
  1.2000 +  }
  1.2001 +  return true;
  1.2002 +}
  1.2003 +
  1.2004 +//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
  1.2005 +// Ensure a use outside of loop is of the right form
  1.2006 +bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
  1.2007 +  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
  1.2008 +  return (use->is_Phi() &&
  1.2009 +          use_c->is_Region() && use_c->req() == 3 &&
  1.2010 +          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
  1.2011 +           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
  1.2012 +           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
  1.2013 +          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
  1.2014 +}
  1.2015 +
  1.2016 +//------------------------------ is_valid_clone_loop_form -------------------------------------
  1.2017 +// Ensure that all uses outside of loop are of the right form
  1.2018 +bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
  1.2019 +                                               uint orig_exit_idx, uint clone_exit_idx) {
  1.2020 +  uint len = peel_list.size();
  1.2021 +  for (uint i = 0; i < len; i++) {
  1.2022 +    Node *def = peel_list.at(i);
  1.2023 +
  1.2024 +    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
  1.2025 +      Node *use = def->fast_out(j);
  1.2026 +      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
  1.2027 +      if (!loop->is_member(get_loop(use_c))) {
  1.2028 +        // use is not in the loop, check for correct structure
  1.2029 +        if (use->in(0) == def) {
  1.2030 +          // Okay
  1.2031 +        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
  1.2032 +          return false;
  1.2033 +        }
  1.2034 +      }
  1.2035 +    }
  1.2036 +  }
  1.2037 +  return true;
  1.2038 +}
  1.2039 +#endif
  1.2040 +
  1.2041 +//------------------------------ partial_peel -------------------------------------
  1.2042 +// Partially peel (aka loop rotation) the top portion of a loop (called
  1.2043 +// the peel section below) by cloning it and placing one copy just before
  1.2044 +// the new loop head and the other copy at the bottom of the new loop.
  1.2045 +//
  1.2046 +//    before                       after                where it came from
  1.2047 +//
  1.2048 +//    stmt1                        stmt1
  1.2049 +//  loop:                          stmt2                     clone
  1.2050 +//    stmt2                        if condA goto exitA       clone
  1.2051 +//    if condA goto exitA        new_loop:                   new
  1.2052 +//    stmt3                        stmt3                     clone
  1.2053 +//    if !condB goto loop          if condB goto exitB       clone
  1.2054 +//  exitB:                         stmt2                     orig
  1.2055 +//    stmt4                        if !condA goto new_loop   orig
  1.2056 +//  exitA:                         goto exitA
  1.2057 +//                               exitB:
  1.2058 +//                                 stmt4
  1.2059 +//                               exitA:
  1.2060 +//
  1.2061 +// Step 1: find the cut point: an exit test on probable
  1.2062 +//         induction variable.
  1.2063 +// Step 2: schedule (with cloning) operations in the peel
  1.2064 +//         section that can be executed after the cut into
  1.2065 +//         the section that is not peeled.  This may need
  1.2066 +//         to clone operations into exit blocks.  For
  1.2067 +//         instance, a reference to A[i] in the not-peel
  1.2068 +//         section and a reference to B[i] in an exit block
  1.2069 +//         may cause a left-shift of i by 2 to be placed
  1.2070 +//         in the peel block.  This step will clone the left
  1.2071 +//         shift into the exit block and sink the left shift
  1.2072 +//         from the peel to the not-peel section.
  1.2073 +// Step 3: clone the loop, retarget the control, and insert
  1.2074 +//         phis for values that are live across the new loop
  1.2075 +//         head.  This is very dependent on the graph structure
  1.2076 +//         from clone_loop.  It creates region nodes for
  1.2077 +//         exit control and associated phi nodes for values
  1.2078 +//         flow out of the loop through that exit.  The region
  1.2079 +//         node is dominated by the clone's control projection.
  1.2080 +//         So the clone's peel section is placed before the
  1.2081 +//         new loop head, and the clone's not-peel section is
  1.2082 +//         forms the top part of the new loop.  The original
  1.2083 +//         peel section forms the tail of the new loop.
  1.2084 +// Step 4: update the dominator tree and recompute the
  1.2085 +//         dominator depth.
  1.2086 +//
  1.2087 +//                   orig
  1.2088 +//
  1.2089 +//                  stmt1
  1.2090 +//                    |
  1.2091 +//                    v
  1.2092 +//                   loop<----+
  1.2093 +//                     |      |
  1.2094 +//                   stmt2    |
  1.2095 +//                     |      |
  1.2096 +//                     v      |
  1.2097 +//                    ifA     |
  1.2098 +//                   / |      |
  1.2099 +//                  v  v      |
  1.2100 +//               false true   ^  <-- last_peel
  1.2101 +//               /     |      |
  1.2102 +//              /   ===|==cut |
  1.2103 +//             /     stmt3    |  <-- first_not_peel
  1.2104 +//            /        |      |
  1.2105 +//            |        v      |
  1.2106 +//            v       ifB     |
  1.2107 +//          exitA:   / \      |
  1.2108 +//                  /   \     |
  1.2109 +//                 v     v    |
  1.2110 +//               false true   |
  1.2111 +//               /       \    |
  1.2112 +//              /         ----+
  1.2113 +//             |
  1.2114 +//             v
  1.2115 +//           exitB:
  1.2116 +//           stmt4
  1.2117 +//
  1.2118 +//
  1.2119 +//            after clone loop
  1.2120 +//
  1.2121 +//                   stmt1
  1.2122 +//                 /       \
  1.2123 +//        clone   /         \   orig
  1.2124 +//               /           \
  1.2125 +//              /             \
  1.2126 +//             v               v
  1.2127 +//   +---->loop                loop<----+
  1.2128 +//   |      |                    |      |
  1.2129 +//   |    stmt2                stmt2    |
  1.2130 +//   |      |                    |      |
  1.2131 +//   |      v                    v      |
  1.2132 +//   |      ifA                 ifA     |
  1.2133 +//   |      | \                / |      |
  1.2134 +//   |      v  v              v  v      |
  1.2135 +//   ^    true  false      false true   ^  <-- last_peel
  1.2136 +//   |      |   ^   \       /    |      |
  1.2137 +//   | cut==|==  \   \     /  ===|==cut |
  1.2138 +//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
  1.2139 +//   |      |    dom   | |       |      |
  1.2140 +//   |      v      \  1v v2      v      |
  1.2141 +//   |      ifB     regionA     ifB     |
  1.2142 +//   |      / \        |       / \      |
  1.2143 +//   |     /   \       v      /   \     |
  1.2144 +//   |    v     v    exitA:  v     v    |
  1.2145 +//   |    true  false      false true   |
  1.2146 +//   |    /     ^   \      /       \    |
  1.2147 +//   +----       \   \    /         ----+
  1.2148 +//               dom  \  /
  1.2149 +//                 \  1v v2
  1.2150 +//                  regionB
  1.2151 +//                     |
  1.2152 +//                     v
  1.2153 +//                   exitB:
  1.2154 +//                   stmt4
  1.2155 +//
  1.2156 +//
  1.2157 +//           after partial peel
  1.2158 +//
  1.2159 +//                  stmt1
  1.2160 +//                 /
  1.2161 +//        clone   /             orig
  1.2162 +//               /          TOP
  1.2163 +//              /             \
  1.2164 +//             v               v
  1.2165 +//    TOP->region             region----+
  1.2166 +//          |                    |      |
  1.2167 +//        stmt2                stmt2    |
  1.2168 +//          |                    |      |
  1.2169 +//          v                    v      |
  1.2170 +//          ifA                 ifA     |
  1.2171 +//          | \                / |      |
  1.2172 +//          v  v              v  v      |
  1.2173 +//        true  false      false true   |     <-- last_peel
  1.2174 +//          |   ^   \       /    +------|---+
  1.2175 +//  +->newloop   \   \     /  === ==cut |   |
  1.2176 +//  |     stmt3   \   \   /     TOP     |   |
  1.2177 +//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
  1.2178 +//  |       v      \  1v v2      v      |   |
  1.2179 +//  |       ifB     regionA     ifB     ^   v
  1.2180 +//  |       / \        |       / \      |   |
  1.2181 +//  |      /   \       v      /   \     |   |
  1.2182 +//  |     v     v    exitA:  v     v    |   |
  1.2183 +//  |     true  false      false true   |   |
  1.2184 +//  |     /     ^   \      /       \    |   |
  1.2185 +//  |    |       \   \    /         v   |   |
  1.2186 +//  |    |       dom  \  /         TOP  |   |
  1.2187 +//  |    |         \  1v v2             |   |
  1.2188 +//  ^    v          regionB             |   |
  1.2189 +//  |    |             |                |   |
  1.2190 +//  |    |             v                ^   v
  1.2191 +//  |    |           exitB:             |   |
  1.2192 +//  |    |           stmt4              |   |
  1.2193 +//  |    +------------>-----------------+   |
  1.2194 +//  |                                       |
  1.2195 +//  +-----------------<---------------------+
  1.2196 +//
  1.2197 +//
  1.2198 +//              final graph
  1.2199 +//
  1.2200 +//                  stmt1
  1.2201 +//                    |
  1.2202 +//                    v
  1.2203 +//         ........> ifA clone
  1.2204 +//         :        / |
  1.2205 +//        dom      /  |
  1.2206 +//         :      v   v
  1.2207 +//         :  false   true
  1.2208 +//         :  |       |
  1.2209 +//         :  |     stmt2 clone
  1.2210 +//         :  |       |
  1.2211 +//         :  |       v
  1.2212 +//         :  |    newloop<-----+
  1.2213 +//         :  |        |        |
  1.2214 +//         :  |     stmt3 clone |
  1.2215 +//         :  |        |        |
  1.2216 +//         :  |        v        |
  1.2217 +//         :  |       ifB       |
  1.2218 +//         :  |      / \        |
  1.2219 +//         :  |     v   v       |
  1.2220 +//         :  |  false true     |
  1.2221 +//         :  |   |     |       |
  1.2222 +//         :  |   v    stmt2    |
  1.2223 +//         :  | exitB:  |       |
  1.2224 +//         :  | stmt4   v       |
  1.2225 +//         :  |       ifA orig  |
  1.2226 +//         :  |      /  \       |
  1.2227 +//         :  |     /    \      |
  1.2228 +//         :  |    v     v      |
  1.2229 +//         :  |  false  true    |
  1.2230 +//         :  |  /        \     |
  1.2231 +//         :  v  v         -----+
  1.2232 +//          RegionA
  1.2233 +//             |
  1.2234 +//             v
  1.2235 +//           exitA
  1.2236 +//
  1.2237 +bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
  1.2238 +
  1.2239 +  LoopNode *head  = loop->_head->as_Loop();
  1.2240 +
  1.2241 +  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
  1.2242 +    return false;
  1.2243 +  }
  1.2244 +
  1.2245 +  // Check for complex exit control
  1.2246 +  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
  1.2247 +    Node *n = loop->_body.at(ii);
  1.2248 +    int opc = n->Opcode();
  1.2249 +    if (n->is_Call()        ||
  1.2250 +        opc == Op_Catch     ||
  1.2251 +        opc == Op_CatchProj ||
  1.2252 +        opc == Op_Jump      ||
  1.2253 +        opc == Op_JumpProj) {
  1.2254 +#if !defined(PRODUCT)
  1.2255 +      if (TracePartialPeeling) {
  1.2256 +        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
  1.2257 +      }
  1.2258 +#endif
  1.2259 +      return false;
  1.2260 +    }
  1.2261 +  }
  1.2262 +
  1.2263 +  int dd = dom_depth(head);
  1.2264 +
  1.2265 +  // Step 1: find cut point
  1.2266 +
  1.2267 +  // Walk up dominators to loop head looking for first loop exit
  1.2268 +  // which is executed on every path thru loop.
  1.2269 +  IfNode *peel_if = NULL;
  1.2270 +  IfNode *peel_if_cmpu = NULL;
  1.2271 +
  1.2272 +  Node *iff = loop->tail();
  1.2273 +  while( iff != head ) {
  1.2274 +    if( iff->is_If() ) {
  1.2275 +      Node *ctrl = get_ctrl(iff->in(1));
  1.2276 +      if (ctrl->is_top()) return false; // Dead test on live IF.
  1.2277 +      // If loop-varying exit-test, check for induction variable
  1.2278 +      if( loop->is_member(get_loop(ctrl)) &&
  1.2279 +          loop->is_loop_exit(iff) &&
  1.2280 +          is_possible_iv_test(iff)) {
  1.2281 +        Node* cmp = iff->in(1)->in(1);
  1.2282 +        if (cmp->Opcode() == Op_CmpI) {
  1.2283 +          peel_if = iff->as_If();
  1.2284 +        } else {
  1.2285 +          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
  1.2286 +          peel_if_cmpu = iff->as_If();
  1.2287 +        }
  1.2288 +      }
  1.2289 +    }
  1.2290 +    iff = idom(iff);
  1.2291 +  }
  1.2292 +  // Prefer signed compare over unsigned compare.
  1.2293 +  IfNode* new_peel_if = NULL;
  1.2294 +  if (peel_if == NULL) {
  1.2295 +    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
  1.2296 +      return false;   // No peel point found
  1.2297 +    }
  1.2298 +    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
  1.2299 +    if (new_peel_if == NULL) {
  1.2300 +      return false;   // No peel point found
  1.2301 +    }
  1.2302 +    peel_if = new_peel_if;
  1.2303 +  }
  1.2304 +  Node* last_peel        = stay_in_loop(peel_if, loop);
  1.2305 +  Node* first_not_peeled = stay_in_loop(last_peel, loop);
  1.2306 +  if (first_not_peeled == NULL || first_not_peeled == head) {
  1.2307 +    return false;
  1.2308 +  }
  1.2309 +
  1.2310 +#if !defined(PRODUCT)
  1.2311 +  if (TracePartialPeeling) {
  1.2312 +    tty->print_cr("before partial peel one iteration");
  1.2313 +    Node_List wl;
  1.2314 +    Node* t = head->in(2);
  1.2315 +    while (true) {
  1.2316 +      wl.push(t);
  1.2317 +      if (t == head) break;
  1.2318 +      t = idom(t);
  1.2319 +    }
  1.2320 +    while (wl.size() > 0) {
  1.2321 +      Node* tt = wl.pop();
  1.2322 +      tt->dump();
  1.2323 +      if (tt == last_peel) tty->print_cr("-- cut --");
  1.2324 +    }
  1.2325 +  }
  1.2326 +#endif
  1.2327 +  ResourceArea *area = Thread::current()->resource_area();
  1.2328 +  VectorSet peel(area);
  1.2329 +  VectorSet not_peel(area);
  1.2330 +  Node_List peel_list(area);
  1.2331 +  Node_List worklist(area);
  1.2332 +  Node_List sink_list(area);
  1.2333 +
  1.2334 +  // Set of cfg nodes to peel are those that are executable from
  1.2335 +  // the head through last_peel.
  1.2336 +  assert(worklist.size() == 0, "should be empty");
  1.2337 +  worklist.push(head);
  1.2338 +  peel.set(head->_idx);
  1.2339 +  while (worklist.size() > 0) {
  1.2340 +    Node *n = worklist.pop();
  1.2341 +    if (n != last_peel) {
  1.2342 +      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.2343 +        Node* use = n->fast_out(j);
  1.2344 +        if (use->is_CFG() &&
  1.2345 +            loop->is_member(get_loop(use)) &&
  1.2346 +            !peel.test_set(use->_idx)) {
  1.2347 +          worklist.push(use);
  1.2348 +        }
  1.2349 +      }
  1.2350 +    }
  1.2351 +  }
  1.2352 +
  1.2353 +  // Set of non-cfg nodes to peel are those that are control
  1.2354 +  // dependent on the cfg nodes.
  1.2355 +  uint i;
  1.2356 +  for(i = 0; i < loop->_body.size(); i++ ) {
  1.2357 +    Node *n = loop->_body.at(i);
  1.2358 +    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
  1.2359 +    if (peel.test(n_c->_idx)) {
  1.2360 +      peel.set(n->_idx);
  1.2361 +    } else {
  1.2362 +      not_peel.set(n->_idx);
  1.2363 +    }
  1.2364 +  }
  1.2365 +
  1.2366 +  // Step 2: move operations from the peeled section down into the
  1.2367 +  //         not-peeled section
  1.2368 +
  1.2369 +  // Get a post order schedule of nodes in the peel region
  1.2370 +  // Result in right-most operand.
  1.2371 +  scheduled_nodelist(loop, peel, peel_list );
  1.2372 +
  1.2373 +  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
  1.2374 +
  1.2375 +  // For future check for too many new phis
  1.2376 +  uint old_phi_cnt = 0;
  1.2377 +  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
  1.2378 +    Node* use = head->fast_out(j);
  1.2379 +    if (use->is_Phi()) old_phi_cnt++;
  1.2380 +  }
  1.2381 +
  1.2382 +#if !defined(PRODUCT)
  1.2383 +  if (TracePartialPeeling) {
  1.2384 +    tty->print_cr("\npeeled list");
  1.2385 +  }
  1.2386 +#endif
  1.2387 +
  1.2388 +  // Evacuate nodes in peel region into the not_peeled region if possible
  1.2389 +  uint new_phi_cnt = 0;
  1.2390 +  for (i = 0; i < peel_list.size();) {
  1.2391 +    Node* n = peel_list.at(i);
  1.2392 +#if !defined(PRODUCT)
  1.2393 +  if (TracePartialPeeling) n->dump();
  1.2394 +#endif
  1.2395 +    bool incr = true;
  1.2396 +    if ( !n->is_CFG() ) {
  1.2397 +
  1.2398 +      if ( has_use_in_set(n, not_peel) ) {
  1.2399 +
  1.2400 +        // If not used internal to the peeled region,
  1.2401 +        // move "n" from peeled to not_peeled region.
  1.2402 +
  1.2403 +        if ( !has_use_internal_to_set(n, peel, loop) ) {
  1.2404 +
  1.2405 +          // if not pinned and not a load (which maybe anti-dependent on a store)
  1.2406 +          // and not a CMove (Matcher expects only bool->cmove).
  1.2407 +          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
  1.2408 +            clone_for_use_outside_loop( loop, n, worklist );
  1.2409 +
  1.2410 +            sink_list.push(n);
  1.2411 +            peel     >>= n->_idx; // delete n from peel set.
  1.2412 +            not_peel <<= n->_idx; // add n to not_peel set.
  1.2413 +            peel_list.remove(i);
  1.2414 +            incr = false;
  1.2415 +#if !defined(PRODUCT)
  1.2416 +            if (TracePartialPeeling) {
  1.2417 +              tty->print_cr("sink to not_peeled region: %d newbb: %d",
  1.2418 +                            n->_idx, get_ctrl(n)->_idx);
  1.2419 +            }
  1.2420 +#endif
  1.2421 +          }
  1.2422 +        } else {
  1.2423 +          // Otherwise check for special def-use cases that span
  1.2424 +          // the peel/not_peel boundary such as bool->if
  1.2425 +          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
  1.2426 +          new_phi_cnt++;
  1.2427 +        }
  1.2428 +      }
  1.2429 +    }
  1.2430 +    if (incr) i++;
  1.2431 +  }
  1.2432 +
  1.2433 +  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
  1.2434 +#if !defined(PRODUCT)
  1.2435 +    if (TracePartialPeeling) {
  1.2436 +      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
  1.2437 +                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
  1.2438 +    }
  1.2439 +#endif
  1.2440 +    if (new_peel_if != NULL) {
  1.2441 +      remove_cmpi_loop_exit(new_peel_if, loop);
  1.2442 +    }
  1.2443 +    // Inhibit more partial peeling on this loop
  1.2444 +    assert(!head->is_partial_peel_loop(), "not partial peeled");
  1.2445 +    head->mark_partial_peel_failed();
  1.2446 +    return false;
  1.2447 +  }
  1.2448 +
  1.2449 +  // Step 3: clone loop, retarget control, and insert new phis
  1.2450 +
  1.2451 +  // Create new loop head for new phis and to hang
  1.2452 +  // the nodes being moved (sinked) from the peel region.
  1.2453 +  LoopNode* new_head = new (C, 3) LoopNode(last_peel, last_peel);
  1.2454 +  _igvn.register_new_node_with_optimizer(new_head);
  1.2455 +  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
  1.2456 +  first_not_peeled->set_req(0, new_head);
  1.2457 +  set_loop(new_head, loop);
  1.2458 +  loop->_body.push(new_head);
  1.2459 +  not_peel.set(new_head->_idx);
  1.2460 +  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
  1.2461 +  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
  1.2462 +
  1.2463 +  while (sink_list.size() > 0) {
  1.2464 +    Node* n = sink_list.pop();
  1.2465 +    set_ctrl(n, new_head);
  1.2466 +  }
  1.2467 +
  1.2468 +  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
  1.2469 +
  1.2470 +  clone_loop( loop, old_new, dd );
  1.2471 +
  1.2472 +  const uint clone_exit_idx = 1;
  1.2473 +  const uint orig_exit_idx  = 2;
  1.2474 +  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
  1.2475 +
  1.2476 +  Node* head_clone             = old_new[head->_idx];
  1.2477 +  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
  1.2478 +  Node* orig_tail_clone        = head_clone->in(2);
  1.2479 +
  1.2480 +  // Add phi if "def" node is in peel set and "use" is not
  1.2481 +
  1.2482 +  for(i = 0; i < peel_list.size(); i++ ) {
  1.2483 +    Node *def  = peel_list.at(i);
  1.2484 +    if (!def->is_CFG()) {
  1.2485 +      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
  1.2486 +        Node *use = def->fast_out(j);
  1.2487 +        if (has_node(use) && use->in(0) != C->top() &&
  1.2488 +            (!peel.test(use->_idx) ||
  1.2489 +             (use->is_Phi() && use->in(0) == head)) ) {
  1.2490 +          worklist.push(use);
  1.2491 +        }
  1.2492 +      }
  1.2493 +      while( worklist.size() ) {
  1.2494 +        Node *use = worklist.pop();
  1.2495 +        for (uint j = 1; j < use->req(); j++) {
  1.2496 +          Node* n = use->in(j);
  1.2497 +          if (n == def) {
  1.2498 +
  1.2499 +            // "def" is in peel set, "use" is not in peel set
  1.2500 +            // or "use" is in the entry boundary (a phi) of the peel set
  1.2501 +
  1.2502 +            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
  1.2503 +
  1.2504 +            if ( loop->is_member(get_loop( use_c )) ) {
  1.2505 +              // use is in loop
  1.2506 +              if (old_new[use->_idx] != NULL) { // null for dead code
  1.2507 +                Node* use_clone = old_new[use->_idx];
  1.2508 +                _igvn.hash_delete(use);
  1.2509 +                use->set_req(j, C->top());
  1.2510 +                _igvn._worklist.push(use);
  1.2511 +                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
  1.2512 +              }
  1.2513 +            } else {
  1.2514 +              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
  1.2515 +              // use is not in the loop, check if the live range includes the cut
  1.2516 +              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
  1.2517 +              if (not_peel.test(lp_if->_idx)) {
  1.2518 +                assert(j == orig_exit_idx, "use from original loop");
  1.2519 +                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
  1.2520 +              }
  1.2521 +            }
  1.2522 +          }
  1.2523 +        }
  1.2524 +      }
  1.2525 +    }
  1.2526 +  }
  1.2527 +
  1.2528 +  // Step 3b: retarget control
  1.2529 +
  1.2530 +  // Redirect control to the new loop head if a cloned node in
  1.2531 +  // the not_peeled region has control that points into the peeled region.
  1.2532 +  // This necessary because the cloned peeled region will be outside
  1.2533 +  // the loop.
  1.2534 +  //                            from    to
  1.2535 +  //          cloned-peeled    <---+
  1.2536 +  //    new_head_clone:            |    <--+
  1.2537 +  //          cloned-not_peeled  in(0)    in(0)
  1.2538 +  //          orig-peeled
  1.2539 +
  1.2540 +  for(i = 0; i < loop->_body.size(); i++ ) {
  1.2541 +    Node *n = loop->_body.at(i);
  1.2542 +    if (!n->is_CFG()           && n->in(0) != NULL        &&
  1.2543 +        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
  1.2544 +      Node* n_clone = old_new[n->_idx];
  1.2545 +      _igvn.hash_delete(n_clone);
  1.2546 +      n_clone->set_req(0, new_head_clone);
  1.2547 +      _igvn._worklist.push(n_clone);
  1.2548 +    }
  1.2549 +  }
  1.2550 +
  1.2551 +  // Backedge of the surviving new_head (the clone) is original last_peel
  1.2552 +  _igvn.hash_delete(new_head_clone);
  1.2553 +  new_head_clone->set_req(LoopNode::LoopBackControl, last_peel);
  1.2554 +  _igvn._worklist.push(new_head_clone);
  1.2555 +
  1.2556 +  // Cut first node in original not_peel set
  1.2557 +  _igvn.hash_delete(new_head);
  1.2558 +  new_head->set_req(LoopNode::EntryControl, C->top());
  1.2559 +  new_head->set_req(LoopNode::LoopBackControl, C->top());
  1.2560 +  _igvn._worklist.push(new_head);
  1.2561 +
  1.2562 +  // Copy head_clone back-branch info to original head
  1.2563 +  // and remove original head's loop entry and
  1.2564 +  // clone head's back-branch
  1.2565 +  _igvn.hash_delete(head);
  1.2566 +  _igvn.hash_delete(head_clone);
  1.2567 +  head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl));
  1.2568 +  head->set_req(LoopNode::LoopBackControl, C->top());
  1.2569 +  head_clone->set_req(LoopNode::LoopBackControl, C->top());
  1.2570 +  _igvn._worklist.push(head);
  1.2571 +  _igvn._worklist.push(head_clone);
  1.2572 +
  1.2573 +  // Similarly modify the phis
  1.2574 +  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
  1.2575 +    Node* use = head->fast_out(k);
  1.2576 +    if (use->is_Phi() && use->outcnt() > 0) {
  1.2577 +      Node* use_clone = old_new[use->_idx];
  1.2578 +      _igvn.hash_delete(use);
  1.2579 +      _igvn.hash_delete(use_clone);
  1.2580 +      use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl));
  1.2581 +      use->set_req(LoopNode::LoopBackControl, C->top());
  1.2582 +      use_clone->set_req(LoopNode::LoopBackControl, C->top());
  1.2583 +      _igvn._worklist.push(use);
  1.2584 +      _igvn._worklist.push(use_clone);
  1.2585 +    }
  1.2586 +  }
  1.2587 +
  1.2588 +  // Step 4: update dominator tree and dominator depth
  1.2589 +
  1.2590 +  set_idom(head, orig_tail_clone, dd);
  1.2591 +  recompute_dom_depth();
  1.2592 +
  1.2593 +  // Inhibit more partial peeling on this loop
  1.2594 +  new_head_clone->set_partial_peel_loop();
  1.2595 +  C->set_major_progress();
  1.2596 +
  1.2597 +#if !defined(PRODUCT)
  1.2598 +  if (TracePartialPeeling) {
  1.2599 +    tty->print_cr("\nafter partial peel one iteration");
  1.2600 +    Node_List wl(area);
  1.2601 +    Node* t = last_peel;
  1.2602 +    while (true) {
  1.2603 +      wl.push(t);
  1.2604 +      if (t == head_clone) break;
  1.2605 +      t = idom(t);
  1.2606 +    }
  1.2607 +    while (wl.size() > 0) {
  1.2608 +      Node* tt = wl.pop();
  1.2609 +      if (tt == head) tty->print_cr("orig head");
  1.2610 +      else if (tt == new_head_clone) tty->print_cr("new head");
  1.2611 +      else if (tt == head_clone) tty->print_cr("clone head");
  1.2612 +      tt->dump();
  1.2613 +    }
  1.2614 +  }
  1.2615 +#endif
  1.2616 +  return true;
  1.2617 +}
  1.2618 +
  1.2619 +//------------------------------reorg_offsets----------------------------------
  1.2620 +// Reorganize offset computations to lower register pressure.  Mostly
  1.2621 +// prevent loop-fallout uses of the pre-incremented trip counter (which are
  1.2622 +// then alive with the post-incremented trip counter forcing an extra
  1.2623 +// register move)
  1.2624 +void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
  1.2625 +
  1.2626 +  CountedLoopNode *cl = loop->_head->as_CountedLoop();
  1.2627 +  CountedLoopEndNode *cle = cl->loopexit();
  1.2628 +  if( !cle ) return;            // The occasional dead loop
  1.2629 +  // Find loop exit control
  1.2630 +  Node *exit = cle->proj_out(false);
  1.2631 +  assert( exit->Opcode() == Op_IfFalse, "" );
  1.2632 +
  1.2633 +  // Check for the special case of folks using the pre-incremented
  1.2634 +  // trip-counter on the fall-out path (forces the pre-incremented
  1.2635 +  // and post-incremented trip counter to be live at the same time).
  1.2636 +  // Fix this by adjusting to use the post-increment trip counter.
  1.2637 +  Node *phi = cl->phi();
  1.2638 +  if( !phi ) return;            // Dead infinite loop
  1.2639 +  bool progress = true;
  1.2640 +  while (progress) {
  1.2641 +    progress = false;
  1.2642 +    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
  1.2643 +      Node* use = phi->fast_out(i);   // User of trip-counter
  1.2644 +      if (!has_ctrl(use))  continue;
  1.2645 +      Node *u_ctrl = get_ctrl(use);
  1.2646 +      if( use->is_Phi() ) {
  1.2647 +        u_ctrl = NULL;
  1.2648 +        for( uint j = 1; j < use->req(); j++ )
  1.2649 +          if( use->in(j) == phi )
  1.2650 +            u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
  1.2651 +      }
  1.2652 +      IdealLoopTree *u_loop = get_loop(u_ctrl);
  1.2653 +      // Look for loop-invariant use
  1.2654 +      if( u_loop == loop ) continue;
  1.2655 +      if( loop->is_member( u_loop ) ) continue;
  1.2656 +      // Check that use is live out the bottom.  Assuming the trip-counter
  1.2657 +      // update is right at the bottom, uses of of the loop middle are ok.
  1.2658 +      if( dom_lca( exit, u_ctrl ) != exit ) continue;
  1.2659 +      // protect against stride not being a constant
  1.2660 +      if( !cle->stride_is_con() ) continue;
  1.2661 +      // Hit!  Refactor use to use the post-incremented tripcounter.
  1.2662 +      // Compute a post-increment tripcounter.
  1.2663 +      Node *opaq = new (C, 2) Opaque2Node( cle->incr() );
  1.2664 +      register_new_node( opaq, u_ctrl );
  1.2665 +      Node *neg_stride = _igvn.intcon(-cle->stride_con());
  1.2666 +      set_ctrl(neg_stride, C->root());
  1.2667 +      Node *post = new (C, 3) AddINode( opaq, neg_stride);
  1.2668 +      register_new_node( post, u_ctrl );
  1.2669 +      _igvn.hash_delete(use);
  1.2670 +      _igvn._worklist.push(use);
  1.2671 +      for( uint j = 1; j < use->req(); j++ )
  1.2672 +        if( use->in(j) == phi )
  1.2673 +          use->set_req(j, post);
  1.2674 +      // Since DU info changed, rerun loop
  1.2675 +      progress = true;
  1.2676 +      break;
  1.2677 +    }
  1.2678 +  }
  1.2679 +
  1.2680 +}

mercurial