src/share/vm/opto/loopopts.cpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/opto/loopopts.cpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,2788 @@
     1.4 +/*
     1.5 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "precompiled.hpp"
    1.29 +#include "memory/allocation.inline.hpp"
    1.30 +#include "opto/addnode.hpp"
    1.31 +#include "opto/connode.hpp"
    1.32 +#include "opto/divnode.hpp"
    1.33 +#include "opto/loopnode.hpp"
    1.34 +#include "opto/matcher.hpp"
    1.35 +#include "opto/mulnode.hpp"
    1.36 +#include "opto/rootnode.hpp"
    1.37 +#include "opto/subnode.hpp"
    1.38 +
    1.39 +//=============================================================================
    1.40 +//------------------------------split_thru_phi---------------------------------
    1.41 +// Split Node 'n' through merge point if there is enough win.
    1.42 +Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
    1.43 +  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
    1.44 +    // ConvI2L may have type information on it which is unsafe to push up
    1.45 +    // so disable this for now
    1.46 +    return NULL;
    1.47 +  }
    1.48 +
    1.49 +  int wins = 0;
    1.50 +  assert(!n->is_CFG(), "");
    1.51 +  assert(region->is_Region(), "");
    1.52 +
    1.53 +  const Type* type = n->bottom_type();
    1.54 +  const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
    1.55 +  Node *phi;
    1.56 +  if (t_oop != NULL && t_oop->is_known_instance_field()) {
    1.57 +    int iid    = t_oop->instance_id();
    1.58 +    int index  = C->get_alias_index(t_oop);
    1.59 +    int offset = t_oop->offset();
    1.60 +    phi = new (C) PhiNode(region, type, NULL, iid, index, offset);
    1.61 +  } else {
    1.62 +    phi = PhiNode::make_blank(region, n);
    1.63 +  }
    1.64 +  uint old_unique = C->unique();
    1.65 +  for (uint i = 1; i < region->req(); i++) {
    1.66 +    Node *x;
    1.67 +    Node* the_clone = NULL;
    1.68 +    if (region->in(i) == C->top()) {
    1.69 +      x = C->top();             // Dead path?  Use a dead data op
    1.70 +    } else {
    1.71 +      x = n->clone();           // Else clone up the data op
    1.72 +      the_clone = x;            // Remember for possible deletion.
    1.73 +      // Alter data node to use pre-phi inputs
    1.74 +      if (n->in(0) == region)
    1.75 +        x->set_req( 0, region->in(i) );
    1.76 +      for (uint j = 1; j < n->req(); j++) {
    1.77 +        Node *in = n->in(j);
    1.78 +        if (in->is_Phi() && in->in(0) == region)
    1.79 +          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
    1.80 +      }
    1.81 +    }
    1.82 +    // Check for a 'win' on some paths
    1.83 +    const Type *t = x->Value(&_igvn);
    1.84 +
    1.85 +    bool singleton = t->singleton();
    1.86 +
    1.87 +    // A TOP singleton indicates that there are no possible values incoming
    1.88 +    // along a particular edge. In most cases, this is OK, and the Phi will
    1.89 +    // be eliminated later in an Ideal call. However, we can't allow this to
    1.90 +    // happen if the singleton occurs on loop entry, as the elimination of
    1.91 +    // the PhiNode may cause the resulting node to migrate back to a previous
    1.92 +    // loop iteration.
    1.93 +    if (singleton && t == Type::TOP) {
    1.94 +      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
    1.95 +      // irreducible loop may not be indicated by an affirmative is_Loop());
    1.96 +      // therefore, the only top we can split thru a phi is on a backedge of
    1.97 +      // a loop.
    1.98 +      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
    1.99 +    }
   1.100 +
   1.101 +    if (singleton) {
   1.102 +      wins++;
   1.103 +      x = ((PhaseGVN&)_igvn).makecon(t);
   1.104 +    } else {
   1.105 +      // We now call Identity to try to simplify the cloned node.
   1.106 +      // Note that some Identity methods call phase->type(this).
   1.107 +      // Make sure that the type array is big enough for
   1.108 +      // our new node, even though we may throw the node away.
   1.109 +      // (Note: This tweaking with igvn only works because x is a new node.)
   1.110 +      _igvn.set_type(x, t);
   1.111 +      // If x is a TypeNode, capture any more-precise type permanently into Node
   1.112 +      // otherwise it will be not updated during igvn->transform since
   1.113 +      // igvn->type(x) is set to x->Value() already.
   1.114 +      x->raise_bottom_type(t);
   1.115 +      Node *y = x->Identity(&_igvn);
   1.116 +      if (y != x) {
   1.117 +        wins++;
   1.118 +        x = y;
   1.119 +      } else {
   1.120 +        y = _igvn.hash_find(x);
   1.121 +        if (y) {
   1.122 +          wins++;
   1.123 +          x = y;
   1.124 +        } else {
   1.125 +          // Else x is a new node we are keeping
   1.126 +          // We do not need register_new_node_with_optimizer
   1.127 +          // because set_type has already been called.
   1.128 +          _igvn._worklist.push(x);
   1.129 +        }
   1.130 +      }
   1.131 +    }
   1.132 +    if (x != the_clone && the_clone != NULL)
   1.133 +      _igvn.remove_dead_node(the_clone);
   1.134 +    phi->set_req( i, x );
   1.135 +  }
   1.136 +  // Too few wins?
   1.137 +  if (wins <= policy) {
   1.138 +    _igvn.remove_dead_node(phi);
   1.139 +    return NULL;
   1.140 +  }
   1.141 +
   1.142 +  // Record Phi
   1.143 +  register_new_node( phi, region );
   1.144 +
   1.145 +  for (uint i2 = 1; i2 < phi->req(); i2++) {
   1.146 +    Node *x = phi->in(i2);
   1.147 +    // If we commoned up the cloned 'x' with another existing Node,
   1.148 +    // the existing Node picks up a new use.  We need to make the
   1.149 +    // existing Node occur higher up so it dominates its uses.
   1.150 +    Node *old_ctrl;
   1.151 +    IdealLoopTree *old_loop;
   1.152 +
   1.153 +    if (x->is_Con()) {
   1.154 +      // Constant's control is always root.
   1.155 +      set_ctrl(x, C->root());
   1.156 +      continue;
   1.157 +    }
   1.158 +    // The occasional new node
   1.159 +    if (x->_idx >= old_unique) {     // Found a new, unplaced node?
   1.160 +      old_ctrl = NULL;
   1.161 +      old_loop = NULL;               // Not in any prior loop
   1.162 +    } else {
   1.163 +      old_ctrl = get_ctrl(x);
   1.164 +      old_loop = get_loop(old_ctrl); // Get prior loop
   1.165 +    }
   1.166 +    // New late point must dominate new use
   1.167 +    Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
   1.168 +    if (new_ctrl == old_ctrl) // Nothing is changed
   1.169 +      continue;
   1.170 +
   1.171 +    IdealLoopTree *new_loop = get_loop(new_ctrl);
   1.172 +
   1.173 +    // Don't move x into a loop if its uses are
   1.174 +    // outside of loop. Otherwise x will be cloned
   1.175 +    // for each use outside of this loop.
   1.176 +    IdealLoopTree *use_loop = get_loop(region);
   1.177 +    if (!new_loop->is_member(use_loop) &&
   1.178 +        (old_loop == NULL || !new_loop->is_member(old_loop))) {
   1.179 +      // Take early control, later control will be recalculated
   1.180 +      // during next iteration of loop optimizations.
   1.181 +      new_ctrl = get_early_ctrl(x);
   1.182 +      new_loop = get_loop(new_ctrl);
   1.183 +    }
   1.184 +    // Set new location
   1.185 +    set_ctrl(x, new_ctrl);
   1.186 +    // If changing loop bodies, see if we need to collect into new body
   1.187 +    if (old_loop != new_loop) {
   1.188 +      if (old_loop && !old_loop->_child)
   1.189 +        old_loop->_body.yank(x);
   1.190 +      if (!new_loop->_child)
   1.191 +        new_loop->_body.push(x);  // Collect body info
   1.192 +    }
   1.193 +  }
   1.194 +
   1.195 +  return phi;
   1.196 +}
   1.197 +
   1.198 +//------------------------------dominated_by------------------------------------
   1.199 +// Replace the dominated test with an obvious true or false.  Place it on the
   1.200 +// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
   1.201 +// live path up to the dominating control.
   1.202 +void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
   1.203 +#ifndef PRODUCT
   1.204 +  if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
   1.205 +#endif
   1.206 +
   1.207 +
   1.208 +  // prevdom is the dominating projection of the dominating test.
   1.209 +  assert( iff->is_If(), "" );
   1.210 +  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
   1.211 +  int pop = prevdom->Opcode();
   1.212 +  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
   1.213 +  if (flip) {
   1.214 +    if (pop == Op_IfTrue)
   1.215 +      pop = Op_IfFalse;
   1.216 +    else
   1.217 +      pop = Op_IfTrue;
   1.218 +  }
   1.219 +  // 'con' is set to true or false to kill the dominated test.
   1.220 +  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
   1.221 +  set_ctrl(con, C->root()); // Constant gets a new use
   1.222 +  // Hack the dominated test
   1.223 +  _igvn.replace_input_of(iff, 1, con);
   1.224 +
   1.225 +  // If I dont have a reachable TRUE and FALSE path following the IfNode then
   1.226 +  // I can assume this path reaches an infinite loop.  In this case it's not
   1.227 +  // important to optimize the data Nodes - either the whole compilation will
   1.228 +  // be tossed or this path (and all data Nodes) will go dead.
   1.229 +  if (iff->outcnt() != 2) return;
   1.230 +
   1.231 +  // Make control-dependent data Nodes on the live path (path that will remain
   1.232 +  // once the dominated IF is removed) become control-dependent on the
   1.233 +  // dominating projection.
   1.234 +  Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
   1.235 +
   1.236 +  // Loop predicates may have depending checks which should not
   1.237 +  // be skipped. For example, range check predicate has two checks
   1.238 +  // for lower and upper bounds.
   1.239 +  if (dp == NULL)
   1.240 +    return;
   1.241 +
   1.242 +  ProjNode* dp_proj  = dp->as_Proj();
   1.243 +  ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
   1.244 +  if (exclude_loop_predicate &&
   1.245 +      unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate))
   1.246 +    return; // Let IGVN transformation change control dependence.
   1.247 +
   1.248 +  IdealLoopTree *old_loop = get_loop(dp);
   1.249 +
   1.250 +  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
   1.251 +    Node* cd = dp->fast_out(i); // Control-dependent node
   1.252 +    if (cd->depends_only_on_test()) {
   1.253 +      assert(cd->in(0) == dp, "");
   1.254 +      _igvn.replace_input_of(cd, 0, prevdom);
   1.255 +      set_early_ctrl(cd);
   1.256 +      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
   1.257 +      if (old_loop != new_loop) {
   1.258 +        if (!old_loop->_child) old_loop->_body.yank(cd);
   1.259 +        if (!new_loop->_child) new_loop->_body.push(cd);
   1.260 +      }
   1.261 +      --i;
   1.262 +      --imax;
   1.263 +    }
   1.264 +  }
   1.265 +}
   1.266 +
   1.267 +//------------------------------has_local_phi_input----------------------------
   1.268 +// Return TRUE if 'n' has Phi inputs from its local block and no other
   1.269 +// block-local inputs (all non-local-phi inputs come from earlier blocks)
   1.270 +Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
   1.271 +  Node *n_ctrl = get_ctrl(n);
   1.272 +  // See if some inputs come from a Phi in this block, or from before
   1.273 +  // this block.
   1.274 +  uint i;
   1.275 +  for( i = 1; i < n->req(); i++ ) {
   1.276 +    Node *phi = n->in(i);
   1.277 +    if( phi->is_Phi() && phi->in(0) == n_ctrl )
   1.278 +      break;
   1.279 +  }
   1.280 +  if( i >= n->req() )
   1.281 +    return NULL;                // No Phi inputs; nowhere to clone thru
   1.282 +
   1.283 +  // Check for inputs created between 'n' and the Phi input.  These
   1.284 +  // must split as well; they have already been given the chance
   1.285 +  // (courtesy of a post-order visit) and since they did not we must
   1.286 +  // recover the 'cost' of splitting them by being very profitable
   1.287 +  // when splitting 'n'.  Since this is unlikely we simply give up.
   1.288 +  for( i = 1; i < n->req(); i++ ) {
   1.289 +    Node *m = n->in(i);
   1.290 +    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
   1.291 +      // We allow the special case of AddP's with no local inputs.
   1.292 +      // This allows us to split-up address expressions.
   1.293 +      if (m->is_AddP() &&
   1.294 +          get_ctrl(m->in(2)) != n_ctrl &&
   1.295 +          get_ctrl(m->in(3)) != n_ctrl) {
   1.296 +        // Move the AddP up to dominating point
   1.297 +        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
   1.298 +        continue;
   1.299 +      }
   1.300 +      return NULL;
   1.301 +    }
   1.302 +  }
   1.303 +
   1.304 +  return n_ctrl;
   1.305 +}
   1.306 +
   1.307 +//------------------------------remix_address_expressions----------------------
   1.308 +// Rework addressing expressions to get the most loop-invariant stuff
   1.309 +// moved out.  We'd like to do all associative operators, but it's especially
   1.310 +// important (common) to do address expressions.
   1.311 +Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
   1.312 +  if (!has_ctrl(n))  return NULL;
   1.313 +  Node *n_ctrl = get_ctrl(n);
   1.314 +  IdealLoopTree *n_loop = get_loop(n_ctrl);
   1.315 +
   1.316 +  // See if 'n' mixes loop-varying and loop-invariant inputs and
   1.317 +  // itself is loop-varying.
   1.318 +
   1.319 +  // Only interested in binary ops (and AddP)
   1.320 +  if( n->req() < 3 || n->req() > 4 ) return NULL;
   1.321 +
   1.322 +  Node *n1_ctrl = get_ctrl(n->in(                    1));
   1.323 +  Node *n2_ctrl = get_ctrl(n->in(                    2));
   1.324 +  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
   1.325 +  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
   1.326 +  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
   1.327 +  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
   1.328 +
   1.329 +  // Does one of my inputs spin in a tighter loop than self?
   1.330 +  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
   1.331 +      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
   1.332 +      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
   1.333 +    return NULL;                // Leave well enough alone
   1.334 +
   1.335 +  // Is at least one of my inputs loop-invariant?
   1.336 +  if( n1_loop == n_loop &&
   1.337 +      n2_loop == n_loop &&
   1.338 +      n3_loop == n_loop )
   1.339 +    return NULL;                // No loop-invariant inputs
   1.340 +
   1.341 +
   1.342 +  int n_op = n->Opcode();
   1.343 +
   1.344 +  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
   1.345 +  if( n_op == Op_LShiftI ) {
   1.346 +    // Scale is loop invariant
   1.347 +    Node *scale = n->in(2);
   1.348 +    Node *scale_ctrl = get_ctrl(scale);
   1.349 +    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
   1.350 +    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
   1.351 +      return NULL;
   1.352 +    const TypeInt *scale_t = scale->bottom_type()->isa_int();
   1.353 +    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
   1.354 +      return NULL;              // Dont bother with byte/short masking
   1.355 +    // Add must vary with loop (else shift would be loop-invariant)
   1.356 +    Node *add = n->in(1);
   1.357 +    Node *add_ctrl = get_ctrl(add);
   1.358 +    IdealLoopTree *add_loop = get_loop(add_ctrl);
   1.359 +    //assert( n_loop == add_loop, "" );
   1.360 +    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
   1.361 +
   1.362 +    // Convert I-V into I+ (0-V); same for V-I
   1.363 +    if( add->Opcode() == Op_SubI &&
   1.364 +        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
   1.365 +      Node *zero = _igvn.intcon(0);
   1.366 +      set_ctrl(zero, C->root());
   1.367 +      Node *neg = new (C) SubINode( _igvn.intcon(0), add->in(2) );
   1.368 +      register_new_node( neg, get_ctrl(add->in(2) ) );
   1.369 +      add = new (C) AddINode( add->in(1), neg );
   1.370 +      register_new_node( add, add_ctrl );
   1.371 +    }
   1.372 +    if( add->Opcode() != Op_AddI ) return NULL;
   1.373 +    // See if one add input is loop invariant
   1.374 +    Node *add_var = add->in(1);
   1.375 +    Node *add_var_ctrl = get_ctrl(add_var);
   1.376 +    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
   1.377 +    Node *add_invar = add->in(2);
   1.378 +    Node *add_invar_ctrl = get_ctrl(add_invar);
   1.379 +    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
   1.380 +    if( add_var_loop == n_loop ) {
   1.381 +    } else if( add_invar_loop == n_loop ) {
   1.382 +      // Swap to find the invariant part
   1.383 +      add_invar = add_var;
   1.384 +      add_invar_ctrl = add_var_ctrl;
   1.385 +      add_invar_loop = add_var_loop;
   1.386 +      add_var = add->in(2);
   1.387 +      Node *add_var_ctrl = get_ctrl(add_var);
   1.388 +      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
   1.389 +    } else                      // Else neither input is loop invariant
   1.390 +      return NULL;
   1.391 +    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
   1.392 +      return NULL;              // No invariant part of the add?
   1.393 +
   1.394 +    // Yes!  Reshape address expression!
   1.395 +    Node *inv_scale = new (C) LShiftINode( add_invar, scale );
   1.396 +    Node *inv_scale_ctrl =
   1.397 +      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
   1.398 +      add_invar_ctrl : scale_ctrl;
   1.399 +    register_new_node( inv_scale, inv_scale_ctrl );
   1.400 +    Node *var_scale = new (C) LShiftINode( add_var, scale );
   1.401 +    register_new_node( var_scale, n_ctrl );
   1.402 +    Node *var_add = new (C) AddINode( var_scale, inv_scale );
   1.403 +    register_new_node( var_add, n_ctrl );
   1.404 +    _igvn.replace_node( n, var_add );
   1.405 +    return var_add;
   1.406 +  }
   1.407 +
   1.408 +  // Replace (I+V) with (V+I)
   1.409 +  if( n_op == Op_AddI ||
   1.410 +      n_op == Op_AddL ||
   1.411 +      n_op == Op_AddF ||
   1.412 +      n_op == Op_AddD ||
   1.413 +      n_op == Op_MulI ||
   1.414 +      n_op == Op_MulL ||
   1.415 +      n_op == Op_MulF ||
   1.416 +      n_op == Op_MulD ) {
   1.417 +    if( n2_loop == n_loop ) {
   1.418 +      assert( n1_loop != n_loop, "" );
   1.419 +      n->swap_edges(1, 2);
   1.420 +    }
   1.421 +  }
   1.422 +
   1.423 +  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
   1.424 +  // but not if I2 is a constant.
   1.425 +  if( n_op == Op_AddP ) {
   1.426 +    if( n2_loop == n_loop && n3_loop != n_loop ) {
   1.427 +      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
   1.428 +        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
   1.429 +        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
   1.430 +        IdealLoopTree *n22loop = get_loop( n22_ctrl );
   1.431 +        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
   1.432 +        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
   1.433 +            n23_loop == n_loop ) {
   1.434 +          Node *add1 = new (C) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
   1.435 +          // Stuff new AddP in the loop preheader
   1.436 +          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
   1.437 +          Node *add2 = new (C) AddPNode( n->in(1), add1, n->in(2)->in(3) );
   1.438 +          register_new_node( add2, n_ctrl );
   1.439 +          _igvn.replace_node( n, add2 );
   1.440 +          return add2;
   1.441 +        }
   1.442 +      }
   1.443 +    }
   1.444 +
   1.445 +    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
   1.446 +    if( n2_loop != n_loop && n3_loop == n_loop ) {
   1.447 +      if( n->in(3)->Opcode() == Op_AddI ) {
   1.448 +        Node *V = n->in(3)->in(1);
   1.449 +        Node *I = n->in(3)->in(2);
   1.450 +        if( is_member(n_loop,get_ctrl(V)) ) {
   1.451 +        } else {
   1.452 +          Node *tmp = V; V = I; I = tmp;
   1.453 +        }
   1.454 +        if( !is_member(n_loop,get_ctrl(I)) ) {
   1.455 +          Node *add1 = new (C) AddPNode( n->in(1), n->in(2), I );
   1.456 +          // Stuff new AddP in the loop preheader
   1.457 +          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
   1.458 +          Node *add2 = new (C) AddPNode( n->in(1), add1, V );
   1.459 +          register_new_node( add2, n_ctrl );
   1.460 +          _igvn.replace_node( n, add2 );
   1.461 +          return add2;
   1.462 +        }
   1.463 +      }
   1.464 +    }
   1.465 +  }
   1.466 +
   1.467 +  return NULL;
   1.468 +}
   1.469 +
   1.470 +//------------------------------conditional_move-------------------------------
   1.471 +// Attempt to replace a Phi with a conditional move.  We have some pretty
   1.472 +// strict profitability requirements.  All Phis at the merge point must
   1.473 +// be converted, so we can remove the control flow.  We need to limit the
   1.474 +// number of c-moves to a small handful.  All code that was in the side-arms
   1.475 +// of the CFG diamond is now speculatively executed.  This code has to be
   1.476 +// "cheap enough".  We are pretty much limited to CFG diamonds that merge
   1.477 +// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
   1.478 +Node *PhaseIdealLoop::conditional_move( Node *region ) {
   1.479 +
   1.480 +  assert(region->is_Region(), "sanity check");
   1.481 +  if (region->req() != 3) return NULL;
   1.482 +
   1.483 +  // Check for CFG diamond
   1.484 +  Node *lp = region->in(1);
   1.485 +  Node *rp = region->in(2);
   1.486 +  if (!lp || !rp) return NULL;
   1.487 +  Node *lp_c = lp->in(0);
   1.488 +  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
   1.489 +  IfNode *iff = lp_c->as_If();
   1.490 +
   1.491 +  // Check for ops pinned in an arm of the diamond.
   1.492 +  // Can't remove the control flow in this case
   1.493 +  if (lp->outcnt() > 1) return NULL;
   1.494 +  if (rp->outcnt() > 1) return NULL;
   1.495 +
   1.496 +  IdealLoopTree* r_loop = get_loop(region);
   1.497 +  assert(r_loop == get_loop(iff), "sanity");
   1.498 +  // Always convert to CMOVE if all results are used only outside this loop.
   1.499 +  bool used_inside_loop = (r_loop == _ltree_root);
   1.500 +
   1.501 +  // Check profitability
   1.502 +  int cost = 0;
   1.503 +  int phis = 0;
   1.504 +  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.505 +    Node *out = region->fast_out(i);
   1.506 +    if (!out->is_Phi()) continue; // Ignore other control edges, etc
   1.507 +    phis++;
   1.508 +    PhiNode* phi = out->as_Phi();
   1.509 +    BasicType bt = phi->type()->basic_type();
   1.510 +    switch (bt) {
   1.511 +    case T_FLOAT:
   1.512 +    case T_DOUBLE: {
   1.513 +      cost += Matcher::float_cmove_cost(); // Could be very expensive
   1.514 +      break;
   1.515 +    }
   1.516 +    case T_LONG: {
   1.517 +      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
   1.518 +    }
   1.519 +    case T_INT:                 // These all CMOV fine
   1.520 +    case T_ADDRESS: {           // (RawPtr)
   1.521 +      cost++;
   1.522 +      break;
   1.523 +    }
   1.524 +    case T_NARROWOOP: // Fall through
   1.525 +    case T_OBJECT: {            // Base oops are OK, but not derived oops
   1.526 +      const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
   1.527 +      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
   1.528 +      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
   1.529 +      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
   1.530 +      // have a Phi for the base here that we convert to a CMOVE all is well
   1.531 +      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
   1.532 +      // the allocator will have to produce a base by creating a CMOVE of the
   1.533 +      // relevant bases.  This puts the allocator in the business of
   1.534 +      // manufacturing expensive instructions, generally a bad plan.
   1.535 +      // Just Say No to Conditionally-Moved Derived Pointers.
   1.536 +      if (tp && tp->offset() != 0)
   1.537 +        return NULL;
   1.538 +      cost++;
   1.539 +      break;
   1.540 +    }
   1.541 +    default:
   1.542 +      return NULL;              // In particular, can't do memory or I/O
   1.543 +    }
   1.544 +    // Add in cost any speculative ops
   1.545 +    for (uint j = 1; j < region->req(); j++) {
   1.546 +      Node *proj = region->in(j);
   1.547 +      Node *inp = phi->in(j);
   1.548 +      if (get_ctrl(inp) == proj) { // Found local op
   1.549 +        cost++;
   1.550 +        // Check for a chain of dependent ops; these will all become
   1.551 +        // speculative in a CMOV.
   1.552 +        for (uint k = 1; k < inp->req(); k++)
   1.553 +          if (get_ctrl(inp->in(k)) == proj)
   1.554 +            cost += ConditionalMoveLimit; // Too much speculative goo
   1.555 +      }
   1.556 +    }
   1.557 +    // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
   1.558 +    // This will likely Split-If, a higher-payoff operation.
   1.559 +    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
   1.560 +      Node* use = phi->fast_out(k);
   1.561 +      if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
   1.562 +        cost += ConditionalMoveLimit;
   1.563 +      // Is there a use inside the loop?
   1.564 +      // Note: check only basic types since CMoveP is pinned.
   1.565 +      if (!used_inside_loop && is_java_primitive(bt)) {
   1.566 +        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
   1.567 +        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
   1.568 +          used_inside_loop = true;
   1.569 +        }
   1.570 +      }
   1.571 +    }
   1.572 +  }
   1.573 +  Node* bol = iff->in(1);
   1.574 +  assert(bol->Opcode() == Op_Bool, "");
   1.575 +  int cmp_op = bol->in(1)->Opcode();
   1.576 +  // It is expensive to generate flags from a float compare.
   1.577 +  // Avoid duplicated float compare.
   1.578 +  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
   1.579 +
   1.580 +  float infrequent_prob = PROB_UNLIKELY_MAG(3);
   1.581 +  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
   1.582 +  if (used_inside_loop) {
   1.583 +    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
   1.584 +
   1.585 +    // BlockLayoutByFrequency optimization moves infrequent branch
   1.586 +    // from hot path. No point in CMOV'ing in such case (110 is used
   1.587 +    // instead of 100 to take into account not exactness of float value).
   1.588 +    if (BlockLayoutByFrequency) {
   1.589 +      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
   1.590 +    }
   1.591 +  }
   1.592 +  // Check for highly predictable branch.  No point in CMOV'ing if
   1.593 +  // we are going to predict accurately all the time.
   1.594 +  if (iff->_prob < infrequent_prob ||
   1.595 +      iff->_prob > (1.0f - infrequent_prob))
   1.596 +    return NULL;
   1.597 +
   1.598 +  // --------------
   1.599 +  // Now replace all Phis with CMOV's
   1.600 +  Node *cmov_ctrl = iff->in(0);
   1.601 +  uint flip = (lp->Opcode() == Op_IfTrue);
   1.602 +  while (1) {
   1.603 +    PhiNode* phi = NULL;
   1.604 +    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.605 +      Node *out = region->fast_out(i);
   1.606 +      if (out->is_Phi()) {
   1.607 +        phi = out->as_Phi();
   1.608 +        break;
   1.609 +      }
   1.610 +    }
   1.611 +    if (phi == NULL)  break;
   1.612 +#ifndef PRODUCT
   1.613 +    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
   1.614 +#endif
   1.615 +    // Move speculative ops
   1.616 +    for (uint j = 1; j < region->req(); j++) {
   1.617 +      Node *proj = region->in(j);
   1.618 +      Node *inp = phi->in(j);
   1.619 +      if (get_ctrl(inp) == proj) { // Found local op
   1.620 +#ifndef PRODUCT
   1.621 +        if (PrintOpto && VerifyLoopOptimizations) {
   1.622 +          tty->print("  speculate: ");
   1.623 +          inp->dump();
   1.624 +        }
   1.625 +#endif
   1.626 +        set_ctrl(inp, cmov_ctrl);
   1.627 +      }
   1.628 +    }
   1.629 +    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
   1.630 +    register_new_node( cmov, cmov_ctrl );
   1.631 +    _igvn.replace_node( phi, cmov );
   1.632 +#ifndef PRODUCT
   1.633 +    if (TraceLoopOpts) {
   1.634 +      tty->print("CMOV  ");
   1.635 +      r_loop->dump_head();
   1.636 +      if (Verbose) {
   1.637 +        bol->in(1)->dump(1);
   1.638 +        cmov->dump(1);
   1.639 +      }
   1.640 +    }
   1.641 +    if (VerifyLoopOptimizations) verify();
   1.642 +#endif
   1.643 +  }
   1.644 +
   1.645 +  // The useless CFG diamond will fold up later; see the optimization in
   1.646 +  // RegionNode::Ideal.
   1.647 +  _igvn._worklist.push(region);
   1.648 +
   1.649 +  return iff->in(1);
   1.650 +}
   1.651 +
   1.652 +//------------------------------split_if_with_blocks_pre-----------------------
   1.653 +// Do the real work in a non-recursive function.  Data nodes want to be
   1.654 +// cloned in the pre-order so they can feed each other nicely.
   1.655 +Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
   1.656 +  // Cloning these guys is unlikely to win
   1.657 +  int n_op = n->Opcode();
   1.658 +  if( n_op == Op_MergeMem ) return n;
   1.659 +  if( n->is_Proj() ) return n;
   1.660 +  // Do not clone-up CmpFXXX variations, as these are always
   1.661 +  // followed by a CmpI
   1.662 +  if( n->is_Cmp() ) return n;
   1.663 +  // Attempt to use a conditional move instead of a phi/branch
   1.664 +  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
   1.665 +    Node *cmov = conditional_move( n );
   1.666 +    if( cmov ) return cmov;
   1.667 +  }
   1.668 +  if( n->is_CFG() || n->is_LoadStore() )
   1.669 +    return n;
   1.670 +  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
   1.671 +      n_op == Op_Opaque2 ) {
   1.672 +    if( !C->major_progress() )   // If chance of no more loop opts...
   1.673 +      _igvn._worklist.push(n);  // maybe we'll remove them
   1.674 +    return n;
   1.675 +  }
   1.676 +
   1.677 +  if( n->is_Con() ) return n;   // No cloning for Con nodes
   1.678 +
   1.679 +  Node *n_ctrl = get_ctrl(n);
   1.680 +  if( !n_ctrl ) return n;       // Dead node
   1.681 +
   1.682 +  // Attempt to remix address expressions for loop invariants
   1.683 +  Node *m = remix_address_expressions( n );
   1.684 +  if( m ) return m;
   1.685 +
   1.686 +  // Determine if the Node has inputs from some local Phi.
   1.687 +  // Returns the block to clone thru.
   1.688 +  Node *n_blk = has_local_phi_input( n );
   1.689 +  if( !n_blk ) return n;
   1.690 +  // Do not clone the trip counter through on a CountedLoop
   1.691 +  // (messes up the canonical shape).
   1.692 +  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
   1.693 +
   1.694 +  // Check for having no control input; not pinned.  Allow
   1.695 +  // dominating control.
   1.696 +  if( n->in(0) ) {
   1.697 +    Node *dom = idom(n_blk);
   1.698 +    if( dom_lca( n->in(0), dom ) != n->in(0) )
   1.699 +      return n;
   1.700 +  }
   1.701 +  // Policy: when is it profitable.  You must get more wins than
   1.702 +  // policy before it is considered profitable.  Policy is usually 0,
   1.703 +  // so 1 win is considered profitable.  Big merges will require big
   1.704 +  // cloning, so get a larger policy.
   1.705 +  int policy = n_blk->req() >> 2;
   1.706 +
   1.707 +  // If the loop is a candidate for range check elimination,
   1.708 +  // delay splitting through it's phi until a later loop optimization
   1.709 +  if (n_blk->is_CountedLoop()) {
   1.710 +    IdealLoopTree *lp = get_loop(n_blk);
   1.711 +    if (lp && lp->_rce_candidate) {
   1.712 +      return n;
   1.713 +    }
   1.714 +  }
   1.715 +
   1.716 +  // Use same limit as split_if_with_blocks_post
   1.717 +  if( C->unique() > 35000 ) return n; // Method too big
   1.718 +
   1.719 +  // Split 'n' through the merge point if it is profitable
   1.720 +  Node *phi = split_thru_phi( n, n_blk, policy );
   1.721 +  if (!phi) return n;
   1.722 +
   1.723 +  // Found a Phi to split thru!
   1.724 +  // Replace 'n' with the new phi
   1.725 +  _igvn.replace_node( n, phi );
   1.726 +  // Moved a load around the loop, 'en-registering' something.
   1.727 +  if (n_blk->is_Loop() && n->is_Load() &&
   1.728 +      !phi->in(LoopNode::LoopBackControl)->is_Load())
   1.729 +    C->set_major_progress();
   1.730 +
   1.731 +  return phi;
   1.732 +}
   1.733 +
   1.734 +static bool merge_point_too_heavy(Compile* C, Node* region) {
   1.735 +  // Bail out if the region and its phis have too many users.
   1.736 +  int weight = 0;
   1.737 +  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.738 +    weight += region->fast_out(i)->outcnt();
   1.739 +  }
   1.740 +  int nodes_left = MaxNodeLimit - C->live_nodes();
   1.741 +  if (weight * 8 > nodes_left) {
   1.742 +#ifndef PRODUCT
   1.743 +    if (PrintOpto)
   1.744 +      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
   1.745 +#endif
   1.746 +    return true;
   1.747 +  } else {
   1.748 +    return false;
   1.749 +  }
   1.750 +}
   1.751 +
   1.752 +static bool merge_point_safe(Node* region) {
   1.753 +  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
   1.754 +  // having a PhiNode input. This sidesteps the dangerous case where the split
   1.755 +  // ConvI2LNode may become TOP if the input Value() does not
   1.756 +  // overlap the ConvI2L range, leaving a node which may not dominate its
   1.757 +  // uses.
   1.758 +  // A better fix for this problem can be found in the BugTraq entry, but
   1.759 +  // expediency for Mantis demands this hack.
   1.760 +  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
   1.761 +  // split_if_with_blocks from splitting a block because we could not move around
   1.762 +  // the FastLockNode.
   1.763 +  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
   1.764 +    Node* n = region->fast_out(i);
   1.765 +    if (n->is_Phi()) {
   1.766 +      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
   1.767 +        Node* m = n->fast_out(j);
   1.768 +        if (m->is_FastLock())
   1.769 +          return false;
   1.770 +#ifdef _LP64
   1.771 +        if (m->Opcode() == Op_ConvI2L)
   1.772 +          return false;
   1.773 +#endif
   1.774 +      }
   1.775 +    }
   1.776 +  }
   1.777 +  return true;
   1.778 +}
   1.779 +
   1.780 +
   1.781 +//------------------------------place_near_use---------------------------------
   1.782 +// Place some computation next to use but not inside inner loops.
   1.783 +// For inner loop uses move it to the preheader area.
   1.784 +Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
   1.785 +  IdealLoopTree *u_loop = get_loop( useblock );
   1.786 +  return (u_loop->_irreducible || u_loop->_child)
   1.787 +    ? useblock
   1.788 +    : u_loop->_head->in(LoopNode::EntryControl);
   1.789 +}
   1.790 +
   1.791 +
   1.792 +//------------------------------split_if_with_blocks_post----------------------
   1.793 +// Do the real work in a non-recursive function.  CFG hackery wants to be
   1.794 +// in the post-order, so it can dirty the I-DOM info and not use the dirtied
   1.795 +// info.
   1.796 +void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
   1.797 +
   1.798 +  // Cloning Cmp through Phi's involves the split-if transform.
   1.799 +  // FastLock is not used by an If
   1.800 +  if( n->is_Cmp() && !n->is_FastLock() ) {
   1.801 +    if( C->unique() > 35000 ) return; // Method too big
   1.802 +
   1.803 +    // Do not do 'split-if' if irreducible loops are present.
   1.804 +    if( _has_irreducible_loops )
   1.805 +      return;
   1.806 +
   1.807 +    Node *n_ctrl = get_ctrl(n);
   1.808 +    // Determine if the Node has inputs from some local Phi.
   1.809 +    // Returns the block to clone thru.
   1.810 +    Node *n_blk = has_local_phi_input( n );
   1.811 +    if( n_blk != n_ctrl ) return;
   1.812 +
   1.813 +    if( merge_point_too_heavy(C, n_ctrl) )
   1.814 +      return;
   1.815 +
   1.816 +    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
   1.817 +    Node *bol = n->unique_out();
   1.818 +    assert( bol->is_Bool(), "expect a bool here" );
   1.819 +    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
   1.820 +    Node *iff = bol->unique_out();
   1.821 +
   1.822 +    // Check some safety conditions
   1.823 +    if( iff->is_If() ) {        // Classic split-if?
   1.824 +      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
   1.825 +    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
   1.826 +      // Can't split CMove with different control edge.
   1.827 +      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
   1.828 +      if( get_ctrl(iff->in(2)) == n_ctrl ||
   1.829 +          get_ctrl(iff->in(3)) == n_ctrl )
   1.830 +        return;                 // Inputs not yet split-up
   1.831 +      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
   1.832 +        return;                 // Loop-invar test gates loop-varying CMOVE
   1.833 +      }
   1.834 +    } else {
   1.835 +      return;  // some other kind of node, such as an Allocate
   1.836 +    }
   1.837 +
   1.838 +    // Do not do 'split-if' if some paths are dead.  First do dead code
   1.839 +    // elimination and then see if its still profitable.
   1.840 +    for( uint i = 1; i < n_ctrl->req(); i++ )
   1.841 +      if( n_ctrl->in(i) == C->top() )
   1.842 +        return;
   1.843 +
   1.844 +    // When is split-if profitable?  Every 'win' on means some control flow
   1.845 +    // goes dead, so it's almost always a win.
   1.846 +    int policy = 0;
   1.847 +    // If trying to do a 'Split-If' at the loop head, it is only
   1.848 +    // profitable if the cmp folds up on BOTH paths.  Otherwise we
   1.849 +    // risk peeling a loop forever.
   1.850 +
   1.851 +    // CNC - Disabled for now.  Requires careful handling of loop
   1.852 +    // body selection for the cloned code.  Also, make sure we check
   1.853 +    // for any input path not being in the same loop as n_ctrl.  For
   1.854 +    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
   1.855 +    // because the alternative loop entry points won't be converted
   1.856 +    // into LoopNodes.
   1.857 +    IdealLoopTree *n_loop = get_loop(n_ctrl);
   1.858 +    for( uint j = 1; j < n_ctrl->req(); j++ )
   1.859 +      if( get_loop(n_ctrl->in(j)) != n_loop )
   1.860 +        return;
   1.861 +
   1.862 +    // Check for safety of the merge point.
   1.863 +    if( !merge_point_safe(n_ctrl) ) {
   1.864 +      return;
   1.865 +    }
   1.866 +
   1.867 +    // Split compare 'n' through the merge point if it is profitable
   1.868 +    Node *phi = split_thru_phi( n, n_ctrl, policy );
   1.869 +    if( !phi ) return;
   1.870 +
   1.871 +    // Found a Phi to split thru!
   1.872 +    // Replace 'n' with the new phi
   1.873 +    _igvn.replace_node( n, phi );
   1.874 +
   1.875 +    // Now split the bool up thru the phi
   1.876 +    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
   1.877 +    guarantee(bolphi != NULL, "null boolean phi node");
   1.878 +
   1.879 +    _igvn.replace_node( bol, bolphi );
   1.880 +    assert( iff->in(1) == bolphi, "" );
   1.881 +
   1.882 +    if( bolphi->Value(&_igvn)->singleton() )
   1.883 +      return;
   1.884 +
   1.885 +    // Conditional-move?  Must split up now
   1.886 +    if( !iff->is_If() ) {
   1.887 +      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
   1.888 +      _igvn.replace_node( iff, cmovphi );
   1.889 +      return;
   1.890 +    }
   1.891 +
   1.892 +    // Now split the IF
   1.893 +    do_split_if( iff );
   1.894 +    return;
   1.895 +  }
   1.896 +
   1.897 +  // Check for an IF ready to split; one that has its
   1.898 +  // condition codes input coming from a Phi at the block start.
   1.899 +  int n_op = n->Opcode();
   1.900 +
   1.901 +  // Check for an IF being dominated by another IF same test
   1.902 +  if( n_op == Op_If ) {
   1.903 +    Node *bol = n->in(1);
   1.904 +    uint max = bol->outcnt();
   1.905 +    // Check for same test used more than once?
   1.906 +    if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
   1.907 +      // Search up IDOMs to see if this IF is dominated.
   1.908 +      Node *cutoff = get_ctrl(bol);
   1.909 +
   1.910 +      // Now search up IDOMs till cutoff, looking for a dominating test
   1.911 +      Node *prevdom = n;
   1.912 +      Node *dom = idom(prevdom);
   1.913 +      while( dom != cutoff ) {
   1.914 +        if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
   1.915 +          // Replace the dominated test with an obvious true or false.
   1.916 +          // Place it on the IGVN worklist for later cleanup.
   1.917 +          C->set_major_progress();
   1.918 +          dominated_by( prevdom, n, false, true );
   1.919 +#ifndef PRODUCT
   1.920 +          if( VerifyLoopOptimizations ) verify();
   1.921 +#endif
   1.922 +          return;
   1.923 +        }
   1.924 +        prevdom = dom;
   1.925 +        dom = idom(prevdom);
   1.926 +      }
   1.927 +    }
   1.928 +  }
   1.929 +
   1.930 +  // See if a shared loop-varying computation has no loop-varying uses.
   1.931 +  // Happens if something is only used for JVM state in uncommon trap exits,
   1.932 +  // like various versions of induction variable+offset.  Clone the
   1.933 +  // computation per usage to allow it to sink out of the loop.
   1.934 +  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
   1.935 +    Node *n_ctrl = get_ctrl(n);
   1.936 +    IdealLoopTree *n_loop = get_loop(n_ctrl);
   1.937 +    if( n_loop != _ltree_root ) {
   1.938 +      DUIterator_Fast imax, i = n->fast_outs(imax);
   1.939 +      for (; i < imax; i++) {
   1.940 +        Node* u = n->fast_out(i);
   1.941 +        if( !has_ctrl(u) )     break; // Found control user
   1.942 +        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
   1.943 +        if( u_loop == n_loop ) break; // Found loop-varying use
   1.944 +        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
   1.945 +        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
   1.946 +      }
   1.947 +      bool did_break = (i < imax);  // Did we break out of the previous loop?
   1.948 +      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
   1.949 +        Node *late_load_ctrl = NULL;
   1.950 +        if (n->is_Load()) {
   1.951 +          // If n is a load, get and save the result from get_late_ctrl(),
   1.952 +          // to be later used in calculating the control for n's clones.
   1.953 +          clear_dom_lca_tags();
   1.954 +          late_load_ctrl = get_late_ctrl(n, n_ctrl);
   1.955 +        }
   1.956 +        // If n is a load, and the late control is the same as the current
   1.957 +        // control, then the cloning of n is a pointless exercise, because
   1.958 +        // GVN will ensure that we end up where we started.
   1.959 +        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
   1.960 +          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
   1.961 +            Node *u = n->last_out(j); // Clone private computation per use
   1.962 +            _igvn.rehash_node_delayed(u);
   1.963 +            Node *x = n->clone(); // Clone computation
   1.964 +            Node *x_ctrl = NULL;
   1.965 +            if( u->is_Phi() ) {
   1.966 +              // Replace all uses of normal nodes.  Replace Phi uses
   1.967 +              // individually, so the separate Nodes can sink down
   1.968 +              // different paths.
   1.969 +              uint k = 1;
   1.970 +              while( u->in(k) != n ) k++;
   1.971 +              u->set_req( k, x );
   1.972 +              // x goes next to Phi input path
   1.973 +              x_ctrl = u->in(0)->in(k);
   1.974 +              --j;
   1.975 +            } else {              // Normal use
   1.976 +              // Replace all uses
   1.977 +              for( uint k = 0; k < u->req(); k++ ) {
   1.978 +                if( u->in(k) == n ) {
   1.979 +                  u->set_req( k, x );
   1.980 +                  --j;
   1.981 +                }
   1.982 +              }
   1.983 +              x_ctrl = get_ctrl(u);
   1.984 +            }
   1.985 +
   1.986 +            // Find control for 'x' next to use but not inside inner loops.
   1.987 +            // For inner loop uses get the preheader area.
   1.988 +            x_ctrl = place_near_use(x_ctrl);
   1.989 +
   1.990 +            if (n->is_Load()) {
   1.991 +              // For loads, add a control edge to a CFG node outside of the loop
   1.992 +              // to force them to not combine and return back inside the loop
   1.993 +              // during GVN optimization (4641526).
   1.994 +              //
   1.995 +              // Because we are setting the actual control input, factor in
   1.996 +              // the result from get_late_ctrl() so we respect any
   1.997 +              // anti-dependences. (6233005).
   1.998 +              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
   1.999 +
  1.1000 +              // Don't allow the control input to be a CFG splitting node.
  1.1001 +              // Such nodes should only have ProjNodes as outs, e.g. IfNode
  1.1002 +              // should only have IfTrueNode and IfFalseNode (4985384).
  1.1003 +              x_ctrl = find_non_split_ctrl(x_ctrl);
  1.1004 +              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
  1.1005 +
  1.1006 +              x->set_req(0, x_ctrl);
  1.1007 +            }
  1.1008 +            register_new_node(x, x_ctrl);
  1.1009 +
  1.1010 +            // Some institutional knowledge is needed here: 'x' is
  1.1011 +            // yanked because if the optimizer runs GVN on it all the
  1.1012 +            // cloned x's will common up and undo this optimization and
  1.1013 +            // be forced back in the loop.  This is annoying because it
  1.1014 +            // makes +VerifyOpto report false-positives on progress.  I
  1.1015 +            // tried setting control edges on the x's to force them to
  1.1016 +            // not combine, but the matching gets worried when it tries
  1.1017 +            // to fold a StoreP and an AddP together (as part of an
  1.1018 +            // address expression) and the AddP and StoreP have
  1.1019 +            // different controls.
  1.1020 +            if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
  1.1021 +          }
  1.1022 +          _igvn.remove_dead_node(n);
  1.1023 +        }
  1.1024 +      }
  1.1025 +    }
  1.1026 +  }
  1.1027 +
  1.1028 +  // Check for Opaque2's who's loop has disappeared - who's input is in the
  1.1029 +  // same loop nest as their output.  Remove 'em, they are no longer useful.
  1.1030 +  if( n_op == Op_Opaque2 &&
  1.1031 +      n->in(1) != NULL &&
  1.1032 +      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
  1.1033 +    _igvn.replace_node( n, n->in(1) );
  1.1034 +  }
  1.1035 +}
  1.1036 +
  1.1037 +//------------------------------split_if_with_blocks---------------------------
  1.1038 +// Check for aggressive application of 'split-if' optimization,
  1.1039 +// using basic block level info.
  1.1040 +void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
  1.1041 +  Node *n = C->root();
  1.1042 +  visited.set(n->_idx); // first, mark node as visited
  1.1043 +  // Do pre-visit work for root
  1.1044 +  n = split_if_with_blocks_pre( n );
  1.1045 +  uint cnt = n->outcnt();
  1.1046 +  uint i   = 0;
  1.1047 +  while (true) {
  1.1048 +    // Visit all children
  1.1049 +    if (i < cnt) {
  1.1050 +      Node* use = n->raw_out(i);
  1.1051 +      ++i;
  1.1052 +      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
  1.1053 +        // Now do pre-visit work for this use
  1.1054 +        use = split_if_with_blocks_pre( use );
  1.1055 +        nstack.push(n, i); // Save parent and next use's index.
  1.1056 +        n   = use;         // Process all children of current use.
  1.1057 +        cnt = use->outcnt();
  1.1058 +        i   = 0;
  1.1059 +      }
  1.1060 +    }
  1.1061 +    else {
  1.1062 +      // All of n's children have been processed, complete post-processing.
  1.1063 +      if (cnt != 0 && !n->is_Con()) {
  1.1064 +        assert(has_node(n), "no dead nodes");
  1.1065 +        split_if_with_blocks_post( n );
  1.1066 +      }
  1.1067 +      if (nstack.is_empty()) {
  1.1068 +        // Finished all nodes on stack.
  1.1069 +        break;
  1.1070 +      }
  1.1071 +      // Get saved parent node and next use's index. Visit the rest of uses.
  1.1072 +      n   = nstack.node();
  1.1073 +      cnt = n->outcnt();
  1.1074 +      i   = nstack.index();
  1.1075 +      nstack.pop();
  1.1076 +    }
  1.1077 +  }
  1.1078 +}
  1.1079 +
  1.1080 +
  1.1081 +//=============================================================================
  1.1082 +//
  1.1083 +//                   C L O N E   A   L O O P   B O D Y
  1.1084 +//
  1.1085 +
  1.1086 +//------------------------------clone_iff--------------------------------------
  1.1087 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
  1.1088 +// "Nearly" because all Nodes have been cloned from the original in the loop,
  1.1089 +// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
  1.1090 +// through the Phi recursively, and return a Bool.
  1.1091 +BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
  1.1092 +
  1.1093 +  // Convert this Phi into a Phi merging Bools
  1.1094 +  uint i;
  1.1095 +  for( i = 1; i < phi->req(); i++ ) {
  1.1096 +    Node *b = phi->in(i);
  1.1097 +    if( b->is_Phi() ) {
  1.1098 +      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
  1.1099 +    } else {
  1.1100 +      assert( b->is_Bool(), "" );
  1.1101 +    }
  1.1102 +  }
  1.1103 +
  1.1104 +  Node *sample_bool = phi->in(1);
  1.1105 +  Node *sample_cmp  = sample_bool->in(1);
  1.1106 +
  1.1107 +  // Make Phis to merge the Cmp's inputs.
  1.1108 +  PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
  1.1109 +  PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
  1.1110 +  for( i = 1; i < phi->req(); i++ ) {
  1.1111 +    Node *n1 = phi->in(i)->in(1)->in(1);
  1.1112 +    Node *n2 = phi->in(i)->in(1)->in(2);
  1.1113 +    phi1->set_req( i, n1 );
  1.1114 +    phi2->set_req( i, n2 );
  1.1115 +    phi1->set_type( phi1->type()->meet_speculative(n1->bottom_type()));
  1.1116 +    phi2->set_type( phi2->type()->meet_speculative(n2->bottom_type()));
  1.1117 +  }
  1.1118 +  // See if these Phis have been made before.
  1.1119 +  // Register with optimizer
  1.1120 +  Node *hit1 = _igvn.hash_find_insert(phi1);
  1.1121 +  if( hit1 ) {                  // Hit, toss just made Phi
  1.1122 +    _igvn.remove_dead_node(phi1); // Remove new phi
  1.1123 +    assert( hit1->is_Phi(), "" );
  1.1124 +    phi1 = (PhiNode*)hit1;      // Use existing phi
  1.1125 +  } else {                      // Miss
  1.1126 +    _igvn.register_new_node_with_optimizer(phi1);
  1.1127 +  }
  1.1128 +  Node *hit2 = _igvn.hash_find_insert(phi2);
  1.1129 +  if( hit2 ) {                  // Hit, toss just made Phi
  1.1130 +    _igvn.remove_dead_node(phi2); // Remove new phi
  1.1131 +    assert( hit2->is_Phi(), "" );
  1.1132 +    phi2 = (PhiNode*)hit2;      // Use existing phi
  1.1133 +  } else {                      // Miss
  1.1134 +    _igvn.register_new_node_with_optimizer(phi2);
  1.1135 +  }
  1.1136 +  // Register Phis with loop/block info
  1.1137 +  set_ctrl(phi1, phi->in(0));
  1.1138 +  set_ctrl(phi2, phi->in(0));
  1.1139 +  // Make a new Cmp
  1.1140 +  Node *cmp = sample_cmp->clone();
  1.1141 +  cmp->set_req( 1, phi1 );
  1.1142 +  cmp->set_req( 2, phi2 );
  1.1143 +  _igvn.register_new_node_with_optimizer(cmp);
  1.1144 +  set_ctrl(cmp, phi->in(0));
  1.1145 +
  1.1146 +  // Make a new Bool
  1.1147 +  Node *b = sample_bool->clone();
  1.1148 +  b->set_req(1,cmp);
  1.1149 +  _igvn.register_new_node_with_optimizer(b);
  1.1150 +  set_ctrl(b, phi->in(0));
  1.1151 +
  1.1152 +  assert( b->is_Bool(), "" );
  1.1153 +  return (BoolNode*)b;
  1.1154 +}
  1.1155 +
  1.1156 +//------------------------------clone_bool-------------------------------------
  1.1157 +// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
  1.1158 +// "Nearly" because all Nodes have been cloned from the original in the loop,
  1.1159 +// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
  1.1160 +// through the Phi recursively, and return a Bool.
  1.1161 +CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
  1.1162 +  uint i;
  1.1163 +  // Convert this Phi into a Phi merging Bools
  1.1164 +  for( i = 1; i < phi->req(); i++ ) {
  1.1165 +    Node *b = phi->in(i);
  1.1166 +    if( b->is_Phi() ) {
  1.1167 +      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
  1.1168 +    } else {
  1.1169 +      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
  1.1170 +    }
  1.1171 +  }
  1.1172 +
  1.1173 +  Node *sample_cmp = phi->in(1);
  1.1174 +
  1.1175 +  // Make Phis to merge the Cmp's inputs.
  1.1176 +  PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
  1.1177 +  PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
  1.1178 +  for( uint j = 1; j < phi->req(); j++ ) {
  1.1179 +    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
  1.1180 +    Node *n1, *n2;
  1.1181 +    if( cmp_top->is_Cmp() ) {
  1.1182 +      n1 = cmp_top->in(1);
  1.1183 +      n2 = cmp_top->in(2);
  1.1184 +    } else {
  1.1185 +      n1 = n2 = cmp_top;
  1.1186 +    }
  1.1187 +    phi1->set_req( j, n1 );
  1.1188 +    phi2->set_req( j, n2 );
  1.1189 +    phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
  1.1190 +    phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
  1.1191 +  }
  1.1192 +
  1.1193 +  // See if these Phis have been made before.
  1.1194 +  // Register with optimizer
  1.1195 +  Node *hit1 = _igvn.hash_find_insert(phi1);
  1.1196 +  if( hit1 ) {                  // Hit, toss just made Phi
  1.1197 +    _igvn.remove_dead_node(phi1); // Remove new phi
  1.1198 +    assert( hit1->is_Phi(), "" );
  1.1199 +    phi1 = (PhiNode*)hit1;      // Use existing phi
  1.1200 +  } else {                      // Miss
  1.1201 +    _igvn.register_new_node_with_optimizer(phi1);
  1.1202 +  }
  1.1203 +  Node *hit2 = _igvn.hash_find_insert(phi2);
  1.1204 +  if( hit2 ) {                  // Hit, toss just made Phi
  1.1205 +    _igvn.remove_dead_node(phi2); // Remove new phi
  1.1206 +    assert( hit2->is_Phi(), "" );
  1.1207 +    phi2 = (PhiNode*)hit2;      // Use existing phi
  1.1208 +  } else {                      // Miss
  1.1209 +    _igvn.register_new_node_with_optimizer(phi2);
  1.1210 +  }
  1.1211 +  // Register Phis with loop/block info
  1.1212 +  set_ctrl(phi1, phi->in(0));
  1.1213 +  set_ctrl(phi2, phi->in(0));
  1.1214 +  // Make a new Cmp
  1.1215 +  Node *cmp = sample_cmp->clone();
  1.1216 +  cmp->set_req( 1, phi1 );
  1.1217 +  cmp->set_req( 2, phi2 );
  1.1218 +  _igvn.register_new_node_with_optimizer(cmp);
  1.1219 +  set_ctrl(cmp, phi->in(0));
  1.1220 +
  1.1221 +  assert( cmp->is_Cmp(), "" );
  1.1222 +  return (CmpNode*)cmp;
  1.1223 +}
  1.1224 +
  1.1225 +//------------------------------sink_use---------------------------------------
  1.1226 +// If 'use' was in the loop-exit block, it now needs to be sunk
  1.1227 +// below the post-loop merge point.
  1.1228 +void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
  1.1229 +  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
  1.1230 +    set_ctrl(use, post_loop);
  1.1231 +    for (DUIterator j = use->outs(); use->has_out(j); j++)
  1.1232 +      sink_use(use->out(j), post_loop);
  1.1233 +  }
  1.1234 +}
  1.1235 +
  1.1236 +//------------------------------clone_loop-------------------------------------
  1.1237 +//
  1.1238 +//                   C L O N E   A   L O O P   B O D Y
  1.1239 +//
  1.1240 +// This is the basic building block of the loop optimizations.  It clones an
  1.1241 +// entire loop body.  It makes an old_new loop body mapping; with this mapping
  1.1242 +// you can find the new-loop equivalent to an old-loop node.  All new-loop
  1.1243 +// nodes are exactly equal to their old-loop counterparts, all edges are the
  1.1244 +// same.  All exits from the old-loop now have a RegionNode that merges the
  1.1245 +// equivalent new-loop path.  This is true even for the normal "loop-exit"
  1.1246 +// condition.  All uses of loop-invariant old-loop values now come from (one
  1.1247 +// or more) Phis that merge their new-loop equivalents.
  1.1248 +//
  1.1249 +// This operation leaves the graph in an illegal state: there are two valid
  1.1250 +// control edges coming from the loop pre-header to both loop bodies.  I'll
  1.1251 +// definitely have to hack the graph after running this transform.
  1.1252 +//
  1.1253 +// From this building block I will further edit edges to perform loop peeling
  1.1254 +// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
  1.1255 +//
  1.1256 +// Parameter side_by_size_idom:
  1.1257 +//   When side_by_size_idom is NULL, the dominator tree is constructed for
  1.1258 +//      the clone loop to dominate the original.  Used in construction of
  1.1259 +//      pre-main-post loop sequence.
  1.1260 +//   When nonnull, the clone and original are side-by-side, both are
  1.1261 +//      dominated by the side_by_side_idom node.  Used in construction of
  1.1262 +//      unswitched loops.
  1.1263 +void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
  1.1264 +                                 Node* side_by_side_idom) {
  1.1265 +
  1.1266 +  // Step 1: Clone the loop body.  Make the old->new mapping.
  1.1267 +  uint i;
  1.1268 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1269 +    Node *old = loop->_body.at(i);
  1.1270 +    Node *nnn = old->clone();
  1.1271 +    old_new.map( old->_idx, nnn );
  1.1272 +    _igvn.register_new_node_with_optimizer(nnn);
  1.1273 +  }
  1.1274 +
  1.1275 +
  1.1276 +  // Step 2: Fix the edges in the new body.  If the old input is outside the
  1.1277 +  // loop use it.  If the old input is INside the loop, use the corresponding
  1.1278 +  // new node instead.
  1.1279 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1280 +    Node *old = loop->_body.at(i);
  1.1281 +    Node *nnn = old_new[old->_idx];
  1.1282 +    // Fix CFG/Loop controlling the new node
  1.1283 +    if (has_ctrl(old)) {
  1.1284 +      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
  1.1285 +    } else {
  1.1286 +      set_loop(nnn, loop->_parent);
  1.1287 +      if (old->outcnt() > 0) {
  1.1288 +        set_idom( nnn, old_new[idom(old)->_idx], dd );
  1.1289 +      }
  1.1290 +    }
  1.1291 +    // Correct edges to the new node
  1.1292 +    for( uint j = 0; j < nnn->req(); j++ ) {
  1.1293 +        Node *n = nnn->in(j);
  1.1294 +        if( n ) {
  1.1295 +          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
  1.1296 +          if( loop->is_member( old_in_loop ) )
  1.1297 +            nnn->set_req(j, old_new[n->_idx]);
  1.1298 +        }
  1.1299 +    }
  1.1300 +    _igvn.hash_find_insert(nnn);
  1.1301 +  }
  1.1302 +  Node *newhead = old_new[loop->_head->_idx];
  1.1303 +  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
  1.1304 +
  1.1305 +
  1.1306 +  // Step 3: Now fix control uses.  Loop varying control uses have already
  1.1307 +  // been fixed up (as part of all input edges in Step 2).  Loop invariant
  1.1308 +  // control uses must be either an IfFalse or an IfTrue.  Make a merge
  1.1309 +  // point to merge the old and new IfFalse/IfTrue nodes; make the use
  1.1310 +  // refer to this.
  1.1311 +  ResourceArea *area = Thread::current()->resource_area();
  1.1312 +  Node_List worklist(area);
  1.1313 +  uint new_counter = C->unique();
  1.1314 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1315 +    Node* old = loop->_body.at(i);
  1.1316 +    if( !old->is_CFG() ) continue;
  1.1317 +    Node* nnn = old_new[old->_idx];
  1.1318 +
  1.1319 +    // Copy uses to a worklist, so I can munge the def-use info
  1.1320 +    // with impunity.
  1.1321 +    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
  1.1322 +      worklist.push(old->fast_out(j));
  1.1323 +
  1.1324 +    while( worklist.size() ) {  // Visit all uses
  1.1325 +      Node *use = worklist.pop();
  1.1326 +      if (!has_node(use))  continue; // Ignore dead nodes
  1.1327 +      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
  1.1328 +      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
  1.1329 +        // Both OLD and USE are CFG nodes here.
  1.1330 +        assert( use->is_Proj(), "" );
  1.1331 +
  1.1332 +        // Clone the loop exit control projection
  1.1333 +        Node *newuse = use->clone();
  1.1334 +        newuse->set_req(0,nnn);
  1.1335 +        _igvn.register_new_node_with_optimizer(newuse);
  1.1336 +        set_loop(newuse, use_loop);
  1.1337 +        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
  1.1338 +
  1.1339 +        // We need a Region to merge the exit from the peeled body and the
  1.1340 +        // exit from the old loop body.
  1.1341 +        RegionNode *r = new (C) RegionNode(3);
  1.1342 +        // Map the old use to the new merge point
  1.1343 +        old_new.map( use->_idx, r );
  1.1344 +        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
  1.1345 +        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
  1.1346 +
  1.1347 +        // The original user of 'use' uses 'r' instead.
  1.1348 +        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
  1.1349 +          Node* useuse = use->last_out(l);
  1.1350 +          _igvn.rehash_node_delayed(useuse);
  1.1351 +          uint uses_found = 0;
  1.1352 +          if( useuse->in(0) == use ) {
  1.1353 +            useuse->set_req(0, r);
  1.1354 +            uses_found++;
  1.1355 +            if( useuse->is_CFG() ) {
  1.1356 +              assert( dom_depth(useuse) > dd_r, "" );
  1.1357 +              set_idom(useuse, r, dom_depth(useuse));
  1.1358 +            }
  1.1359 +          }
  1.1360 +          for( uint k = 1; k < useuse->req(); k++ ) {
  1.1361 +            if( useuse->in(k) == use ) {
  1.1362 +              useuse->set_req(k, r);
  1.1363 +              uses_found++;
  1.1364 +            }
  1.1365 +          }
  1.1366 +          l -= uses_found;    // we deleted 1 or more copies of this edge
  1.1367 +        }
  1.1368 +
  1.1369 +        // Now finish up 'r'
  1.1370 +        r->set_req( 1, newuse );
  1.1371 +        r->set_req( 2,    use );
  1.1372 +        _igvn.register_new_node_with_optimizer(r);
  1.1373 +        set_loop(r, use_loop);
  1.1374 +        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
  1.1375 +      } // End of if a loop-exit test
  1.1376 +    }
  1.1377 +  }
  1.1378 +
  1.1379 +  // Step 4: If loop-invariant use is not control, it must be dominated by a
  1.1380 +  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
  1.1381 +  // there if needed.  Make a Phi there merging old and new used values.
  1.1382 +  Node_List *split_if_set = NULL;
  1.1383 +  Node_List *split_bool_set = NULL;
  1.1384 +  Node_List *split_cex_set = NULL;
  1.1385 +  for( i = 0; i < loop->_body.size(); i++ ) {
  1.1386 +    Node* old = loop->_body.at(i);
  1.1387 +    Node* nnn = old_new[old->_idx];
  1.1388 +    // Copy uses to a worklist, so I can munge the def-use info
  1.1389 +    // with impunity.
  1.1390 +    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
  1.1391 +      worklist.push(old->fast_out(j));
  1.1392 +
  1.1393 +    while( worklist.size() ) {
  1.1394 +      Node *use = worklist.pop();
  1.1395 +      if (!has_node(use))  continue; // Ignore dead nodes
  1.1396 +      if (use->in(0) == C->top())  continue;
  1.1397 +      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
  1.1398 +      // Check for data-use outside of loop - at least one of OLD or USE
  1.1399 +      // must not be a CFG node.
  1.1400 +      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
  1.1401 +
  1.1402 +        // If the Data use is an IF, that means we have an IF outside of the
  1.1403 +        // loop that is switching on a condition that is set inside of the
  1.1404 +        // loop.  Happens if people set a loop-exit flag; then test the flag
  1.1405 +        // in the loop to break the loop, then test is again outside of the
  1.1406 +        // loop to determine which way the loop exited.
  1.1407 +        // Loop predicate If node connects to Bool node through Opaque1 node.
  1.1408 +        if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) {
  1.1409 +          // Since this code is highly unlikely, we lazily build the worklist
  1.1410 +          // of such Nodes to go split.
  1.1411 +          if( !split_if_set )
  1.1412 +            split_if_set = new Node_List(area);
  1.1413 +          split_if_set->push(use);
  1.1414 +        }
  1.1415 +        if( use->is_Bool() ) {
  1.1416 +          if( !split_bool_set )
  1.1417 +            split_bool_set = new Node_List(area);
  1.1418 +          split_bool_set->push(use);
  1.1419 +        }
  1.1420 +        if( use->Opcode() == Op_CreateEx ) {
  1.1421 +          if( !split_cex_set )
  1.1422 +            split_cex_set = new Node_List(area);
  1.1423 +          split_cex_set->push(use);
  1.1424 +        }
  1.1425 +
  1.1426 +
  1.1427 +        // Get "block" use is in
  1.1428 +        uint idx = 0;
  1.1429 +        while( use->in(idx) != old ) idx++;
  1.1430 +        Node *prev = use->is_CFG() ? use : get_ctrl(use);
  1.1431 +        assert( !loop->is_member( get_loop( prev ) ), "" );
  1.1432 +        Node *cfg = prev->_idx >= new_counter
  1.1433 +          ? prev->in(2)
  1.1434 +          : idom(prev);
  1.1435 +        if( use->is_Phi() )     // Phi use is in prior block
  1.1436 +          cfg = prev->in(idx);  // NOT in block of Phi itself
  1.1437 +        if (cfg->is_top()) {    // Use is dead?
  1.1438 +          _igvn.replace_input_of(use, idx, C->top());
  1.1439 +          continue;
  1.1440 +        }
  1.1441 +
  1.1442 +        while( !loop->is_member( get_loop( cfg ) ) ) {
  1.1443 +          prev = cfg;
  1.1444 +          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
  1.1445 +        }
  1.1446 +        // If the use occurs after merging several exits from the loop, then
  1.1447 +        // old value must have dominated all those exits.  Since the same old
  1.1448 +        // value was used on all those exits we did not need a Phi at this
  1.1449 +        // merge point.  NOW we do need a Phi here.  Each loop exit value
  1.1450 +        // is now merged with the peeled body exit; each exit gets its own
  1.1451 +        // private Phi and those Phis need to be merged here.
  1.1452 +        Node *phi;
  1.1453 +        if( prev->is_Region() ) {
  1.1454 +          if( idx == 0 ) {      // Updating control edge?
  1.1455 +            phi = prev;         // Just use existing control
  1.1456 +          } else {              // Else need a new Phi
  1.1457 +            phi = PhiNode::make( prev, old );
  1.1458 +            // Now recursively fix up the new uses of old!
  1.1459 +            for( uint i = 1; i < prev->req(); i++ ) {
  1.1460 +              worklist.push(phi); // Onto worklist once for each 'old' input
  1.1461 +            }
  1.1462 +          }
  1.1463 +        } else {
  1.1464 +          // Get new RegionNode merging old and new loop exits
  1.1465 +          prev = old_new[prev->_idx];
  1.1466 +          assert( prev, "just made this in step 7" );
  1.1467 +          if( idx == 0 ) {      // Updating control edge?
  1.1468 +            phi = prev;         // Just use existing control
  1.1469 +          } else {              // Else need a new Phi
  1.1470 +            // Make a new Phi merging data values properly
  1.1471 +            phi = PhiNode::make( prev, old );
  1.1472 +            phi->set_req( 1, nnn );
  1.1473 +          }
  1.1474 +        }
  1.1475 +        // If inserting a new Phi, check for prior hits
  1.1476 +        if( idx != 0 ) {
  1.1477 +          Node *hit = _igvn.hash_find_insert(phi);
  1.1478 +          if( hit == NULL ) {
  1.1479 +           _igvn.register_new_node_with_optimizer(phi); // Register new phi
  1.1480 +          } else {                                      // or
  1.1481 +            // Remove the new phi from the graph and use the hit
  1.1482 +            _igvn.remove_dead_node(phi);
  1.1483 +            phi = hit;                                  // Use existing phi
  1.1484 +          }
  1.1485 +          set_ctrl(phi, prev);
  1.1486 +        }
  1.1487 +        // Make 'use' use the Phi instead of the old loop body exit value
  1.1488 +        _igvn.replace_input_of(use, idx, phi);
  1.1489 +        if( use->_idx >= new_counter ) { // If updating new phis
  1.1490 +          // Not needed for correctness, but prevents a weak assert
  1.1491 +          // in AddPNode from tripping (when we end up with different
  1.1492 +          // base & derived Phis that will become the same after
  1.1493 +          // IGVN does CSE).
  1.1494 +          Node *hit = _igvn.hash_find_insert(use);
  1.1495 +          if( hit )             // Go ahead and re-hash for hits.
  1.1496 +            _igvn.replace_node( use, hit );
  1.1497 +        }
  1.1498 +
  1.1499 +        // If 'use' was in the loop-exit block, it now needs to be sunk
  1.1500 +        // below the post-loop merge point.
  1.1501 +        sink_use( use, prev );
  1.1502 +      }
  1.1503 +    }
  1.1504 +  }
  1.1505 +
  1.1506 +  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
  1.1507 +  // the loop uses a condition set in the loop.  The original IF probably
  1.1508 +  // takes control from one or more OLD Regions (which in turn get from NEW
  1.1509 +  // Regions).  In any case, there will be a set of Phis for each merge point
  1.1510 +  // from the IF up to where the original BOOL def exists the loop.
  1.1511 +  if( split_if_set ) {
  1.1512 +    while( split_if_set->size() ) {
  1.1513 +      Node *iff = split_if_set->pop();
  1.1514 +      if( iff->in(1)->is_Phi() ) {
  1.1515 +        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
  1.1516 +        _igvn.replace_input_of(iff, 1, b);
  1.1517 +      }
  1.1518 +    }
  1.1519 +  }
  1.1520 +  if( split_bool_set ) {
  1.1521 +    while( split_bool_set->size() ) {
  1.1522 +      Node *b = split_bool_set->pop();
  1.1523 +      Node *phi = b->in(1);
  1.1524 +      assert( phi->is_Phi(), "" );
  1.1525 +      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
  1.1526 +      _igvn.replace_input_of(b, 1, cmp);
  1.1527 +    }
  1.1528 +  }
  1.1529 +  if( split_cex_set ) {
  1.1530 +    while( split_cex_set->size() ) {
  1.1531 +      Node *b = split_cex_set->pop();
  1.1532 +      assert( b->in(0)->is_Region(), "" );
  1.1533 +      assert( b->in(1)->is_Phi(), "" );
  1.1534 +      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
  1.1535 +      split_up( b, b->in(0), NULL );
  1.1536 +    }
  1.1537 +  }
  1.1538 +
  1.1539 +}
  1.1540 +
  1.1541 +
  1.1542 +//---------------------- stride_of_possible_iv -------------------------------------
  1.1543 +// Looks for an iff/bool/comp with one operand of the compare
  1.1544 +// being a cycle involving an add and a phi,
  1.1545 +// with an optional truncation (left-shift followed by a right-shift)
  1.1546 +// of the add. Returns zero if not an iv.
  1.1547 +int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
  1.1548 +  Node* trunc1 = NULL;
  1.1549 +  Node* trunc2 = NULL;
  1.1550 +  const TypeInt* ttype = NULL;
  1.1551 +  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
  1.1552 +    return 0;
  1.1553 +  }
  1.1554 +  BoolNode* bl = iff->in(1)->as_Bool();
  1.1555 +  Node* cmp = bl->in(1);
  1.1556 +  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
  1.1557 +    return 0;
  1.1558 +  }
  1.1559 +  // Must have an invariant operand
  1.1560 +  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
  1.1561 +    return 0;
  1.1562 +  }
  1.1563 +  Node* add2 = NULL;
  1.1564 +  Node* cmp1 = cmp->in(1);
  1.1565 +  if (cmp1->is_Phi()) {
  1.1566 +    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
  1.1567 +    Node* phi = cmp1;
  1.1568 +    for (uint i = 1; i < phi->req(); i++) {
  1.1569 +      Node* in = phi->in(i);
  1.1570 +      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
  1.1571 +                                &trunc1, &trunc2, &ttype);
  1.1572 +      if (add && add->in(1) == phi) {
  1.1573 +        add2 = add->in(2);
  1.1574 +        break;
  1.1575 +      }
  1.1576 +    }
  1.1577 +  } else {
  1.1578 +    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
  1.1579 +    Node* addtrunc = cmp1;
  1.1580 +    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
  1.1581 +                                &trunc1, &trunc2, &ttype);
  1.1582 +    if (add && add->in(1)->is_Phi()) {
  1.1583 +      Node* phi = add->in(1);
  1.1584 +      for (uint i = 1; i < phi->req(); i++) {
  1.1585 +        if (phi->in(i) == addtrunc) {
  1.1586 +          add2 = add->in(2);
  1.1587 +          break;
  1.1588 +        }
  1.1589 +      }
  1.1590 +    }
  1.1591 +  }
  1.1592 +  if (add2 != NULL) {
  1.1593 +    const TypeInt* add2t = _igvn.type(add2)->is_int();
  1.1594 +    if (add2t->is_con()) {
  1.1595 +      return add2t->get_con();
  1.1596 +    }
  1.1597 +  }
  1.1598 +  return 0;
  1.1599 +}
  1.1600 +
  1.1601 +
  1.1602 +//---------------------- stay_in_loop -------------------------------------
  1.1603 +// Return the (unique) control output node that's in the loop (if it exists.)
  1.1604 +Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
  1.1605 +  Node* unique = NULL;
  1.1606 +  if (!n) return NULL;
  1.1607 +  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
  1.1608 +    Node* use = n->fast_out(i);
  1.1609 +    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
  1.1610 +      if (unique != NULL) {
  1.1611 +        return NULL;
  1.1612 +      }
  1.1613 +      unique = use;
  1.1614 +    }
  1.1615 +  }
  1.1616 +  return unique;
  1.1617 +}
  1.1618 +
  1.1619 +//------------------------------ register_node -------------------------------------
  1.1620 +// Utility to register node "n" with PhaseIdealLoop
  1.1621 +void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
  1.1622 +  _igvn.register_new_node_with_optimizer(n);
  1.1623 +  loop->_body.push(n);
  1.1624 +  if (n->is_CFG()) {
  1.1625 +    set_loop(n, loop);
  1.1626 +    set_idom(n, pred, ddepth);
  1.1627 +  } else {
  1.1628 +    set_ctrl(n, pred);
  1.1629 +  }
  1.1630 +}
  1.1631 +
  1.1632 +//------------------------------ proj_clone -------------------------------------
  1.1633 +// Utility to create an if-projection
  1.1634 +ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
  1.1635 +  ProjNode* c = p->clone()->as_Proj();
  1.1636 +  c->set_req(0, iff);
  1.1637 +  return c;
  1.1638 +}
  1.1639 +
  1.1640 +//------------------------------ short_circuit_if -------------------------------------
  1.1641 +// Force the iff control output to be the live_proj
  1.1642 +Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
  1.1643 +  guarantee(live_proj != NULL, "null projection");
  1.1644 +  int proj_con = live_proj->_con;
  1.1645 +  assert(proj_con == 0 || proj_con == 1, "false or true projection");
  1.1646 +  Node *con = _igvn.intcon(proj_con);
  1.1647 +  set_ctrl(con, C->root());
  1.1648 +  if (iff) {
  1.1649 +    iff->set_req(1, con);
  1.1650 +  }
  1.1651 +  return con;
  1.1652 +}
  1.1653 +
  1.1654 +//------------------------------ insert_if_before_proj -------------------------------------
  1.1655 +// Insert a new if before an if projection (* - new node)
  1.1656 +//
  1.1657 +// before
  1.1658 +//           if(test)
  1.1659 +//           /     \
  1.1660 +//          v       v
  1.1661 +//    other-proj   proj (arg)
  1.1662 +//
  1.1663 +// after
  1.1664 +//           if(test)
  1.1665 +//           /     \
  1.1666 +//          /       v
  1.1667 +//         |      * proj-clone
  1.1668 +//         v          |
  1.1669 +//    other-proj      v
  1.1670 +//                * new_if(relop(cmp[IU](left,right)))
  1.1671 +//                  /  \
  1.1672 +//                 v    v
  1.1673 +//         * new-proj  proj
  1.1674 +//         (returned)
  1.1675 +//
  1.1676 +ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
  1.1677 +  IfNode* iff = proj->in(0)->as_If();
  1.1678 +  IdealLoopTree *loop = get_loop(proj);
  1.1679 +  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
  1.1680 +  int ddepth = dom_depth(proj);
  1.1681 +
  1.1682 +  _igvn.rehash_node_delayed(iff);
  1.1683 +  _igvn.rehash_node_delayed(proj);
  1.1684 +
  1.1685 +  proj->set_req(0, NULL);  // temporary disconnect
  1.1686 +  ProjNode* proj2 = proj_clone(proj, iff);
  1.1687 +  register_node(proj2, loop, iff, ddepth);
  1.1688 +
  1.1689 +  Node* cmp = Signed ? (Node*) new (C)CmpINode(left, right) : (Node*) new (C)CmpUNode(left, right);
  1.1690 +  register_node(cmp, loop, proj2, ddepth);
  1.1691 +
  1.1692 +  BoolNode* bol = new (C)BoolNode(cmp, relop);
  1.1693 +  register_node(bol, loop, proj2, ddepth);
  1.1694 +
  1.1695 +  IfNode* new_if = new (C)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
  1.1696 +  register_node(new_if, loop, proj2, ddepth);
  1.1697 +
  1.1698 +  proj->set_req(0, new_if); // reattach
  1.1699 +  set_idom(proj, new_if, ddepth);
  1.1700 +
  1.1701 +  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
  1.1702 +  guarantee(new_exit != NULL, "null exit node");
  1.1703 +  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
  1.1704 +
  1.1705 +  return new_exit;
  1.1706 +}
  1.1707 +
  1.1708 +//------------------------------ insert_region_before_proj -------------------------------------
  1.1709 +// Insert a region before an if projection (* - new node)
  1.1710 +//
  1.1711 +// before
  1.1712 +//           if(test)
  1.1713 +//          /      |
  1.1714 +//         v       |
  1.1715 +//       proj      v
  1.1716 +//               other-proj
  1.1717 +//
  1.1718 +// after
  1.1719 +//           if(test)
  1.1720 +//          /      |
  1.1721 +//         v       |
  1.1722 +// * proj-clone    v
  1.1723 +//         |     other-proj
  1.1724 +//         v
  1.1725 +// * new-region
  1.1726 +//         |
  1.1727 +//         v
  1.1728 +// *      dum_if
  1.1729 +//       /     \
  1.1730 +//      v       \
  1.1731 +// * dum-proj    v
  1.1732 +//              proj
  1.1733 +//
  1.1734 +RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
  1.1735 +  IfNode* iff = proj->in(0)->as_If();
  1.1736 +  IdealLoopTree *loop = get_loop(proj);
  1.1737 +  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
  1.1738 +  int ddepth = dom_depth(proj);
  1.1739 +
  1.1740 +  _igvn.rehash_node_delayed(iff);
  1.1741 +  _igvn.rehash_node_delayed(proj);
  1.1742 +
  1.1743 +  proj->set_req(0, NULL);  // temporary disconnect
  1.1744 +  ProjNode* proj2 = proj_clone(proj, iff);
  1.1745 +  register_node(proj2, loop, iff, ddepth);
  1.1746 +
  1.1747 +  RegionNode* reg = new (C)RegionNode(2);
  1.1748 +  reg->set_req(1, proj2);
  1.1749 +  register_node(reg, loop, iff, ddepth);
  1.1750 +
  1.1751 +  IfNode* dum_if = new (C)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
  1.1752 +  register_node(dum_if, loop, reg, ddepth);
  1.1753 +
  1.1754 +  proj->set_req(0, dum_if); // reattach
  1.1755 +  set_idom(proj, dum_if, ddepth);
  1.1756 +
  1.1757 +  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
  1.1758 +  register_node(dum_proj, loop, dum_if, ddepth);
  1.1759 +
  1.1760 +  return reg;
  1.1761 +}
  1.1762 +
  1.1763 +//------------------------------ insert_cmpi_loop_exit -------------------------------------
  1.1764 +// Clone a signed compare loop exit from an unsigned compare and
  1.1765 +// insert it before the unsigned cmp on the stay-in-loop path.
  1.1766 +// All new nodes inserted in the dominator tree between the original
  1.1767 +// if and it's projections.  The original if test is replaced with
  1.1768 +// a constant to force the stay-in-loop path.
  1.1769 +//
  1.1770 +// This is done to make sure that the original if and it's projections
  1.1771 +// still dominate the same set of control nodes, that the ctrl() relation
  1.1772 +// from data nodes to them is preserved, and that their loop nesting is
  1.1773 +// preserved.
  1.1774 +//
  1.1775 +// before
  1.1776 +//          if(i <u limit)    unsigned compare loop exit
  1.1777 +//         /       |
  1.1778 +//        v        v
  1.1779 +//   exit-proj   stay-in-loop-proj
  1.1780 +//
  1.1781 +// after
  1.1782 +//          if(stay-in-loop-const)  original if
  1.1783 +//         /       |
  1.1784 +//        /        v
  1.1785 +//       /  if(i <  limit)    new signed test
  1.1786 +//      /  /       |
  1.1787 +//     /  /        v
  1.1788 +//    /  /  if(i <u limit)    new cloned unsigned test
  1.1789 +//   /  /   /      |
  1.1790 +//   v  v  v       |
  1.1791 +//    region       |
  1.1792 +//        |        |
  1.1793 +//      dum-if     |
  1.1794 +//     /  |        |
  1.1795 +// ether  |        |
  1.1796 +//        v        v
  1.1797 +//   exit-proj   stay-in-loop-proj
  1.1798 +//
  1.1799 +IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
  1.1800 +  const bool Signed   = true;
  1.1801 +  const bool Unsigned = false;
  1.1802 +
  1.1803 +  BoolNode* bol = if_cmpu->in(1)->as_Bool();
  1.1804 +  if (bol->_test._test != BoolTest::lt) return NULL;
  1.1805 +  CmpNode* cmpu = bol->in(1)->as_Cmp();
  1.1806 +  if (cmpu->Opcode() != Op_CmpU) return NULL;
  1.1807 +  int stride = stride_of_possible_iv(if_cmpu);
  1.1808 +  if (stride == 0) return NULL;
  1.1809 +
  1.1810 +  Node* lp_proj = stay_in_loop(if_cmpu, loop);
  1.1811 +  guarantee(lp_proj != NULL, "null loop node");
  1.1812 +
  1.1813 +  ProjNode* lp_continue = lp_proj->as_Proj();
  1.1814 +  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
  1.1815 +
  1.1816 +  Node* limit = NULL;
  1.1817 +  if (stride > 0) {
  1.1818 +    limit = cmpu->in(2);
  1.1819 +  } else {
  1.1820 +    limit = _igvn.makecon(TypeInt::ZERO);
  1.1821 +    set_ctrl(limit, C->root());
  1.1822 +  }
  1.1823 +  // Create a new region on the exit path
  1.1824 +  RegionNode* reg = insert_region_before_proj(lp_exit);
  1.1825 +  guarantee(reg != NULL, "null region node");
  1.1826 +
  1.1827 +  // Clone the if-cmpu-true-false using a signed compare
  1.1828 +  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
  1.1829 +  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
  1.1830 +  reg->add_req(cmpi_exit);
  1.1831 +
  1.1832 +  // Clone the if-cmpu-true-false
  1.1833 +  BoolTest::mask rel_u = bol->_test._test;
  1.1834 +  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
  1.1835 +  reg->add_req(cmpu_exit);
  1.1836 +
  1.1837 +  // Force original if to stay in loop.
  1.1838 +  short_circuit_if(if_cmpu, lp_continue);
  1.1839 +
  1.1840 +  return cmpi_exit->in(0)->as_If();
  1.1841 +}
  1.1842 +
  1.1843 +//------------------------------ remove_cmpi_loop_exit -------------------------------------
  1.1844 +// Remove a previously inserted signed compare loop exit.
  1.1845 +void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
  1.1846 +  Node* lp_proj = stay_in_loop(if_cmp, loop);
  1.1847 +  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
  1.1848 +         stay_in_loop(lp_proj, loop)->is_If() &&
  1.1849 +         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
  1.1850 +  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
  1.1851 +  set_ctrl(con, C->root());
  1.1852 +  if_cmp->set_req(1, con);
  1.1853 +}
  1.1854 +
  1.1855 +//------------------------------ scheduled_nodelist -------------------------------------
  1.1856 +// Create a post order schedule of nodes that are in the
  1.1857 +// "member" set.  The list is returned in "sched".
  1.1858 +// The first node in "sched" is the loop head, followed by
  1.1859 +// nodes which have no inputs in the "member" set, and then
  1.1860 +// followed by the nodes that have an immediate input dependence
  1.1861 +// on a node in "sched".
  1.1862 +void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
  1.1863 +
  1.1864 +  assert(member.test(loop->_head->_idx), "loop head must be in member set");
  1.1865 +  Arena *a = Thread::current()->resource_area();
  1.1866 +  VectorSet visited(a);
  1.1867 +  Node_Stack nstack(a, loop->_body.size());
  1.1868 +
  1.1869 +  Node* n  = loop->_head;  // top of stack is cached in "n"
  1.1870 +  uint idx = 0;
  1.1871 +  visited.set(n->_idx);
  1.1872 +
  1.1873 +  // Initially push all with no inputs from within member set
  1.1874 +  for(uint i = 0; i < loop->_body.size(); i++ ) {
  1.1875 +    Node *elt = loop->_body.at(i);
  1.1876 +    if (member.test(elt->_idx)) {
  1.1877 +      bool found = false;
  1.1878 +      for (uint j = 0; j < elt->req(); j++) {
  1.1879 +        Node* def = elt->in(j);
  1.1880 +        if (def && member.test(def->_idx) && def != elt) {
  1.1881 +          found = true;
  1.1882 +          break;
  1.1883 +        }
  1.1884 +      }
  1.1885 +      if (!found && elt != loop->_head) {
  1.1886 +        nstack.push(n, idx);
  1.1887 +        n = elt;
  1.1888 +        assert(!visited.test(n->_idx), "not seen yet");
  1.1889 +        visited.set(n->_idx);
  1.1890 +      }
  1.1891 +    }
  1.1892 +  }
  1.1893 +
  1.1894 +  // traverse out's that are in the member set
  1.1895 +  while (true) {
  1.1896 +    if (idx < n->outcnt()) {
  1.1897 +      Node* use = n->raw_out(idx);
  1.1898 +      idx++;
  1.1899 +      if (!visited.test_set(use->_idx)) {
  1.1900 +        if (member.test(use->_idx)) {
  1.1901 +          nstack.push(n, idx);
  1.1902 +          n = use;
  1.1903 +          idx = 0;
  1.1904 +        }
  1.1905 +      }
  1.1906 +    } else {
  1.1907 +      // All outputs processed
  1.1908 +      sched.push(n);
  1.1909 +      if (nstack.is_empty()) break;
  1.1910 +      n   = nstack.node();
  1.1911 +      idx = nstack.index();
  1.1912 +      nstack.pop();
  1.1913 +    }
  1.1914 +  }
  1.1915 +}
  1.1916 +
  1.1917 +
  1.1918 +//------------------------------ has_use_in_set -------------------------------------
  1.1919 +// Has a use in the vector set
  1.1920 +bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
  1.1921 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1922 +    Node* use = n->fast_out(j);
  1.1923 +    if (vset.test(use->_idx)) {
  1.1924 +      return true;
  1.1925 +    }
  1.1926 +  }
  1.1927 +  return false;
  1.1928 +}
  1.1929 +
  1.1930 +
  1.1931 +//------------------------------ has_use_internal_to_set -------------------------------------
  1.1932 +// Has use internal to the vector set (ie. not in a phi at the loop head)
  1.1933 +bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
  1.1934 +  Node* head  = loop->_head;
  1.1935 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1936 +    Node* use = n->fast_out(j);
  1.1937 +    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
  1.1938 +      return true;
  1.1939 +    }
  1.1940 +  }
  1.1941 +  return false;
  1.1942 +}
  1.1943 +
  1.1944 +
  1.1945 +//------------------------------ clone_for_use_outside_loop -------------------------------------
  1.1946 +// clone "n" for uses that are outside of loop
  1.1947 +int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
  1.1948 +  int cloned = 0;
  1.1949 +  assert(worklist.size() == 0, "should be empty");
  1.1950 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.1951 +    Node* use = n->fast_out(j);
  1.1952 +    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
  1.1953 +      worklist.push(use);
  1.1954 +    }
  1.1955 +  }
  1.1956 +  while( worklist.size() ) {
  1.1957 +    Node *use = worklist.pop();
  1.1958 +    if (!has_node(use) || use->in(0) == C->top()) continue;
  1.1959 +    uint j;
  1.1960 +    for (j = 0; j < use->req(); j++) {
  1.1961 +      if (use->in(j) == n) break;
  1.1962 +    }
  1.1963 +    assert(j < use->req(), "must be there");
  1.1964 +
  1.1965 +    // clone "n" and insert it between the inputs of "n" and the use outside the loop
  1.1966 +    Node* n_clone = n->clone();
  1.1967 +    _igvn.replace_input_of(use, j, n_clone);
  1.1968 +    cloned++;
  1.1969 +    Node* use_c;
  1.1970 +    if (!use->is_Phi()) {
  1.1971 +      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
  1.1972 +    } else {
  1.1973 +      // Use in a phi is considered a use in the associated predecessor block
  1.1974 +      use_c = use->in(0)->in(j);
  1.1975 +    }
  1.1976 +    set_ctrl(n_clone, use_c);
  1.1977 +    assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
  1.1978 +    get_loop(use_c)->_body.push(n_clone);
  1.1979 +    _igvn.register_new_node_with_optimizer(n_clone);
  1.1980 +#if !defined(PRODUCT)
  1.1981 +    if (TracePartialPeeling) {
  1.1982 +      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
  1.1983 +    }
  1.1984 +#endif
  1.1985 +  }
  1.1986 +  return cloned;
  1.1987 +}
  1.1988 +
  1.1989 +
  1.1990 +//------------------------------ clone_for_special_use_inside_loop -------------------------------------
  1.1991 +// clone "n" for special uses that are in the not_peeled region.
  1.1992 +// If these def-uses occur in separate blocks, the code generator
  1.1993 +// marks the method as not compilable.  For example, if a "BoolNode"
  1.1994 +// is in a different basic block than the "IfNode" that uses it, then
  1.1995 +// the compilation is aborted in the code generator.
  1.1996 +void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
  1.1997 +                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
  1.1998 +  if (n->is_Phi() || n->is_Load()) {
  1.1999 +    return;
  1.2000 +  }
  1.2001 +  assert(worklist.size() == 0, "should be empty");
  1.2002 +  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.2003 +    Node* use = n->fast_out(j);
  1.2004 +    if ( not_peel.test(use->_idx) &&
  1.2005 +         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
  1.2006 +         use->in(1) == n)  {
  1.2007 +      worklist.push(use);
  1.2008 +    }
  1.2009 +  }
  1.2010 +  if (worklist.size() > 0) {
  1.2011 +    // clone "n" and insert it between inputs of "n" and the use
  1.2012 +    Node* n_clone = n->clone();
  1.2013 +    loop->_body.push(n_clone);
  1.2014 +    _igvn.register_new_node_with_optimizer(n_clone);
  1.2015 +    set_ctrl(n_clone, get_ctrl(n));
  1.2016 +    sink_list.push(n_clone);
  1.2017 +    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
  1.2018 +#if !defined(PRODUCT)
  1.2019 +    if (TracePartialPeeling) {
  1.2020 +      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
  1.2021 +    }
  1.2022 +#endif
  1.2023 +    while( worklist.size() ) {
  1.2024 +      Node *use = worklist.pop();
  1.2025 +      _igvn.rehash_node_delayed(use);
  1.2026 +      for (uint j = 1; j < use->req(); j++) {
  1.2027 +        if (use->in(j) == n) {
  1.2028 +          use->set_req(j, n_clone);
  1.2029 +        }
  1.2030 +      }
  1.2031 +    }
  1.2032 +  }
  1.2033 +}
  1.2034 +
  1.2035 +
  1.2036 +//------------------------------ insert_phi_for_loop -------------------------------------
  1.2037 +// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
  1.2038 +void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
  1.2039 +  Node *phi = PhiNode::make(lp, back_edge_val);
  1.2040 +  phi->set_req(LoopNode::EntryControl, lp_entry_val);
  1.2041 +  // Use existing phi if it already exists
  1.2042 +  Node *hit = _igvn.hash_find_insert(phi);
  1.2043 +  if( hit == NULL ) {
  1.2044 +    _igvn.register_new_node_with_optimizer(phi);
  1.2045 +    set_ctrl(phi, lp);
  1.2046 +  } else {
  1.2047 +    // Remove the new phi from the graph and use the hit
  1.2048 +    _igvn.remove_dead_node(phi);
  1.2049 +    phi = hit;
  1.2050 +  }
  1.2051 +  _igvn.replace_input_of(use, idx, phi);
  1.2052 +}
  1.2053 +
  1.2054 +#ifdef ASSERT
  1.2055 +//------------------------------ is_valid_loop_partition -------------------------------------
  1.2056 +// Validate the loop partition sets: peel and not_peel
  1.2057 +bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
  1.2058 +                                              VectorSet& not_peel ) {
  1.2059 +  uint i;
  1.2060 +  // Check that peel_list entries are in the peel set
  1.2061 +  for (i = 0; i < peel_list.size(); i++) {
  1.2062 +    if (!peel.test(peel_list.at(i)->_idx)) {
  1.2063 +      return false;
  1.2064 +    }
  1.2065 +  }
  1.2066 +  // Check at loop members are in one of peel set or not_peel set
  1.2067 +  for (i = 0; i < loop->_body.size(); i++ ) {
  1.2068 +    Node *def  = loop->_body.at(i);
  1.2069 +    uint di = def->_idx;
  1.2070 +    // Check that peel set elements are in peel_list
  1.2071 +    if (peel.test(di)) {
  1.2072 +      if (not_peel.test(di)) {
  1.2073 +        return false;
  1.2074 +      }
  1.2075 +      // Must be in peel_list also
  1.2076 +      bool found = false;
  1.2077 +      for (uint j = 0; j < peel_list.size(); j++) {
  1.2078 +        if (peel_list.at(j)->_idx == di) {
  1.2079 +          found = true;
  1.2080 +          break;
  1.2081 +        }
  1.2082 +      }
  1.2083 +      if (!found) {
  1.2084 +        return false;
  1.2085 +      }
  1.2086 +    } else if (not_peel.test(di)) {
  1.2087 +      if (peel.test(di)) {
  1.2088 +        return false;
  1.2089 +      }
  1.2090 +    } else {
  1.2091 +      return false;
  1.2092 +    }
  1.2093 +  }
  1.2094 +  return true;
  1.2095 +}
  1.2096 +
  1.2097 +//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
  1.2098 +// Ensure a use outside of loop is of the right form
  1.2099 +bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
  1.2100 +  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
  1.2101 +  return (use->is_Phi() &&
  1.2102 +          use_c->is_Region() && use_c->req() == 3 &&
  1.2103 +          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
  1.2104 +           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
  1.2105 +           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
  1.2106 +          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
  1.2107 +}
  1.2108 +
  1.2109 +//------------------------------ is_valid_clone_loop_form -------------------------------------
  1.2110 +// Ensure that all uses outside of loop are of the right form
  1.2111 +bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
  1.2112 +                                               uint orig_exit_idx, uint clone_exit_idx) {
  1.2113 +  uint len = peel_list.size();
  1.2114 +  for (uint i = 0; i < len; i++) {
  1.2115 +    Node *def = peel_list.at(i);
  1.2116 +
  1.2117 +    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
  1.2118 +      Node *use = def->fast_out(j);
  1.2119 +      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
  1.2120 +      if (!loop->is_member(get_loop(use_c))) {
  1.2121 +        // use is not in the loop, check for correct structure
  1.2122 +        if (use->in(0) == def) {
  1.2123 +          // Okay
  1.2124 +        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
  1.2125 +          return false;
  1.2126 +        }
  1.2127 +      }
  1.2128 +    }
  1.2129 +  }
  1.2130 +  return true;
  1.2131 +}
  1.2132 +#endif
  1.2133 +
  1.2134 +//------------------------------ partial_peel -------------------------------------
  1.2135 +// Partially peel (aka loop rotation) the top portion of a loop (called
  1.2136 +// the peel section below) by cloning it and placing one copy just before
  1.2137 +// the new loop head and the other copy at the bottom of the new loop.
  1.2138 +//
  1.2139 +//    before                       after                where it came from
  1.2140 +//
  1.2141 +//    stmt1                        stmt1
  1.2142 +//  loop:                          stmt2                     clone
  1.2143 +//    stmt2                        if condA goto exitA       clone
  1.2144 +//    if condA goto exitA        new_loop:                   new
  1.2145 +//    stmt3                        stmt3                     clone
  1.2146 +//    if !condB goto loop          if condB goto exitB       clone
  1.2147 +//  exitB:                         stmt2                     orig
  1.2148 +//    stmt4                        if !condA goto new_loop   orig
  1.2149 +//  exitA:                         goto exitA
  1.2150 +//                               exitB:
  1.2151 +//                                 stmt4
  1.2152 +//                               exitA:
  1.2153 +//
  1.2154 +// Step 1: find the cut point: an exit test on probable
  1.2155 +//         induction variable.
  1.2156 +// Step 2: schedule (with cloning) operations in the peel
  1.2157 +//         section that can be executed after the cut into
  1.2158 +//         the section that is not peeled.  This may need
  1.2159 +//         to clone operations into exit blocks.  For
  1.2160 +//         instance, a reference to A[i] in the not-peel
  1.2161 +//         section and a reference to B[i] in an exit block
  1.2162 +//         may cause a left-shift of i by 2 to be placed
  1.2163 +//         in the peel block.  This step will clone the left
  1.2164 +//         shift into the exit block and sink the left shift
  1.2165 +//         from the peel to the not-peel section.
  1.2166 +// Step 3: clone the loop, retarget the control, and insert
  1.2167 +//         phis for values that are live across the new loop
  1.2168 +//         head.  This is very dependent on the graph structure
  1.2169 +//         from clone_loop.  It creates region nodes for
  1.2170 +//         exit control and associated phi nodes for values
  1.2171 +//         flow out of the loop through that exit.  The region
  1.2172 +//         node is dominated by the clone's control projection.
  1.2173 +//         So the clone's peel section is placed before the
  1.2174 +//         new loop head, and the clone's not-peel section is
  1.2175 +//         forms the top part of the new loop.  The original
  1.2176 +//         peel section forms the tail of the new loop.
  1.2177 +// Step 4: update the dominator tree and recompute the
  1.2178 +//         dominator depth.
  1.2179 +//
  1.2180 +//                   orig
  1.2181 +//
  1.2182 +//                   stmt1
  1.2183 +//                     |
  1.2184 +//                     v
  1.2185 +//               loop predicate
  1.2186 +//                     |
  1.2187 +//                     v
  1.2188 +//                   loop<----+
  1.2189 +//                     |      |
  1.2190 +//                   stmt2    |
  1.2191 +//                     |      |
  1.2192 +//                     v      |
  1.2193 +//                    ifA     |
  1.2194 +//                   / |      |
  1.2195 +//                  v  v      |
  1.2196 +//               false true   ^  <-- last_peel
  1.2197 +//               /     |      |
  1.2198 +//              /   ===|==cut |
  1.2199 +//             /     stmt3    |  <-- first_not_peel
  1.2200 +//            /        |      |
  1.2201 +//            |        v      |
  1.2202 +//            v       ifB     |
  1.2203 +//          exitA:   / \      |
  1.2204 +//                  /   \     |
  1.2205 +//                 v     v    |
  1.2206 +//               false true   |
  1.2207 +//               /       \    |
  1.2208 +//              /         ----+
  1.2209 +//             |
  1.2210 +//             v
  1.2211 +//           exitB:
  1.2212 +//           stmt4
  1.2213 +//
  1.2214 +//
  1.2215 +//            after clone loop
  1.2216 +//
  1.2217 +//                   stmt1
  1.2218 +//                     |
  1.2219 +//                     v
  1.2220 +//               loop predicate
  1.2221 +//                 /       \
  1.2222 +//        clone   /         \   orig
  1.2223 +//               /           \
  1.2224 +//              /             \
  1.2225 +//             v               v
  1.2226 +//   +---->loop                loop<----+
  1.2227 +//   |      |                    |      |
  1.2228 +//   |    stmt2                stmt2    |
  1.2229 +//   |      |                    |      |
  1.2230 +//   |      v                    v      |
  1.2231 +//   |      ifA                 ifA     |
  1.2232 +//   |      | \                / |      |
  1.2233 +//   |      v  v              v  v      |
  1.2234 +//   ^    true  false      false true   ^  <-- last_peel
  1.2235 +//   |      |   ^   \       /    |      |
  1.2236 +//   | cut==|==  \   \     /  ===|==cut |
  1.2237 +//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
  1.2238 +//   |      |    dom   | |       |      |
  1.2239 +//   |      v      \  1v v2      v      |
  1.2240 +//   |      ifB     regionA     ifB     |
  1.2241 +//   |      / \        |       / \      |
  1.2242 +//   |     /   \       v      /   \     |
  1.2243 +//   |    v     v    exitA:  v     v    |
  1.2244 +//   |    true  false      false true   |
  1.2245 +//   |    /     ^   \      /       \    |
  1.2246 +//   +----       \   \    /         ----+
  1.2247 +//               dom  \  /
  1.2248 +//                 \  1v v2
  1.2249 +//                  regionB
  1.2250 +//                     |
  1.2251 +//                     v
  1.2252 +//                   exitB:
  1.2253 +//                   stmt4
  1.2254 +//
  1.2255 +//
  1.2256 +//           after partial peel
  1.2257 +//
  1.2258 +//                  stmt1
  1.2259 +//                     |
  1.2260 +//                     v
  1.2261 +//               loop predicate
  1.2262 +//                 /
  1.2263 +//        clone   /             orig
  1.2264 +//               /          TOP
  1.2265 +//              /             \
  1.2266 +//             v               v
  1.2267 +//    TOP->loop                loop----+
  1.2268 +//          |                    |      |
  1.2269 +//        stmt2                stmt2    |
  1.2270 +//          |                    |      |
  1.2271 +//          v                    v      |
  1.2272 +//          ifA                 ifA     |
  1.2273 +//          | \                / |      |
  1.2274 +//          v  v              v  v      |
  1.2275 +//        true  false      false true   |     <-- last_peel
  1.2276 +//          |   ^   \       /    +------|---+
  1.2277 +//  +->newloop   \   \     /  === ==cut |   |
  1.2278 +//  |     stmt3   \   \   /     TOP     |   |
  1.2279 +//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
  1.2280 +//  |       v      \  1v v2      v      |   |
  1.2281 +//  |       ifB     regionA     ifB     ^   v
  1.2282 +//  |       / \        |       / \      |   |
  1.2283 +//  |      /   \       v      /   \     |   |
  1.2284 +//  |     v     v    exitA:  v     v    |   |
  1.2285 +//  |     true  false      false true   |   |
  1.2286 +//  |     /     ^   \      /       \    |   |
  1.2287 +//  |    |       \   \    /         v   |   |
  1.2288 +//  |    |       dom  \  /         TOP  |   |
  1.2289 +//  |    |         \  1v v2             |   |
  1.2290 +//  ^    v          regionB             |   |
  1.2291 +//  |    |             |                |   |
  1.2292 +//  |    |             v                ^   v
  1.2293 +//  |    |           exitB:             |   |
  1.2294 +//  |    |           stmt4              |   |
  1.2295 +//  |    +------------>-----------------+   |
  1.2296 +//  |                                       |
  1.2297 +//  +-----------------<---------------------+
  1.2298 +//
  1.2299 +//
  1.2300 +//              final graph
  1.2301 +//
  1.2302 +//                  stmt1
  1.2303 +//                    |
  1.2304 +//                    v
  1.2305 +//               loop predicate
  1.2306 +//                    |
  1.2307 +//                    v
  1.2308 +//                  stmt2 clone
  1.2309 +//                    |
  1.2310 +//                    v
  1.2311 +//         ........> ifA clone
  1.2312 +//         :        / |
  1.2313 +//        dom      /  |
  1.2314 +//         :      v   v
  1.2315 +//         :  false   true
  1.2316 +//         :  |       |
  1.2317 +//         :  |       v
  1.2318 +//         :  |    newloop<-----+
  1.2319 +//         :  |        |        |
  1.2320 +//         :  |     stmt3 clone |
  1.2321 +//         :  |        |        |
  1.2322 +//         :  |        v        |
  1.2323 +//         :  |       ifB       |
  1.2324 +//         :  |      / \        |
  1.2325 +//         :  |     v   v       |
  1.2326 +//         :  |  false true     |
  1.2327 +//         :  |   |     |       |
  1.2328 +//         :  |   v    stmt2    |
  1.2329 +//         :  | exitB:  |       |
  1.2330 +//         :  | stmt4   v       |
  1.2331 +//         :  |       ifA orig  |
  1.2332 +//         :  |      /  \       |
  1.2333 +//         :  |     /    \      |
  1.2334 +//         :  |    v     v      |
  1.2335 +//         :  |  false  true    |
  1.2336 +//         :  |  /        \     |
  1.2337 +//         :  v  v         -----+
  1.2338 +//          RegionA
  1.2339 +//             |
  1.2340 +//             v
  1.2341 +//           exitA
  1.2342 +//
  1.2343 +bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
  1.2344 +
  1.2345 +  assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
  1.2346 +  if (!loop->_head->is_Loop()) {
  1.2347 +    return false;  }
  1.2348 +
  1.2349 +  LoopNode *head  = loop->_head->as_Loop();
  1.2350 +
  1.2351 +  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
  1.2352 +    return false;
  1.2353 +  }
  1.2354 +
  1.2355 +  // Check for complex exit control
  1.2356 +  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
  1.2357 +    Node *n = loop->_body.at(ii);
  1.2358 +    int opc = n->Opcode();
  1.2359 +    if (n->is_Call()        ||
  1.2360 +        opc == Op_Catch     ||
  1.2361 +        opc == Op_CatchProj ||
  1.2362 +        opc == Op_Jump      ||
  1.2363 +        opc == Op_JumpProj) {
  1.2364 +#if !defined(PRODUCT)
  1.2365 +      if (TracePartialPeeling) {
  1.2366 +        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
  1.2367 +      }
  1.2368 +#endif
  1.2369 +      return false;
  1.2370 +    }
  1.2371 +  }
  1.2372 +
  1.2373 +  int dd = dom_depth(head);
  1.2374 +
  1.2375 +  // Step 1: find cut point
  1.2376 +
  1.2377 +  // Walk up dominators to loop head looking for first loop exit
  1.2378 +  // which is executed on every path thru loop.
  1.2379 +  IfNode *peel_if = NULL;
  1.2380 +  IfNode *peel_if_cmpu = NULL;
  1.2381 +
  1.2382 +  Node *iff = loop->tail();
  1.2383 +  while( iff != head ) {
  1.2384 +    if( iff->is_If() ) {
  1.2385 +      Node *ctrl = get_ctrl(iff->in(1));
  1.2386 +      if (ctrl->is_top()) return false; // Dead test on live IF.
  1.2387 +      // If loop-varying exit-test, check for induction variable
  1.2388 +      if( loop->is_member(get_loop(ctrl)) &&
  1.2389 +          loop->is_loop_exit(iff) &&
  1.2390 +          is_possible_iv_test(iff)) {
  1.2391 +        Node* cmp = iff->in(1)->in(1);
  1.2392 +        if (cmp->Opcode() == Op_CmpI) {
  1.2393 +          peel_if = iff->as_If();
  1.2394 +        } else {
  1.2395 +          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
  1.2396 +          peel_if_cmpu = iff->as_If();
  1.2397 +        }
  1.2398 +      }
  1.2399 +    }
  1.2400 +    iff = idom(iff);
  1.2401 +  }
  1.2402 +  // Prefer signed compare over unsigned compare.
  1.2403 +  IfNode* new_peel_if = NULL;
  1.2404 +  if (peel_if == NULL) {
  1.2405 +    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
  1.2406 +      return false;   // No peel point found
  1.2407 +    }
  1.2408 +    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
  1.2409 +    if (new_peel_if == NULL) {
  1.2410 +      return false;   // No peel point found
  1.2411 +    }
  1.2412 +    peel_if = new_peel_if;
  1.2413 +  }
  1.2414 +  Node* last_peel        = stay_in_loop(peel_if, loop);
  1.2415 +  Node* first_not_peeled = stay_in_loop(last_peel, loop);
  1.2416 +  if (first_not_peeled == NULL || first_not_peeled == head) {
  1.2417 +    return false;
  1.2418 +  }
  1.2419 +
  1.2420 +#if !defined(PRODUCT)
  1.2421 +  if (TraceLoopOpts) {
  1.2422 +    tty->print("PartialPeel  ");
  1.2423 +    loop->dump_head();
  1.2424 +  }
  1.2425 +
  1.2426 +  if (TracePartialPeeling) {
  1.2427 +    tty->print_cr("before partial peel one iteration");
  1.2428 +    Node_List wl;
  1.2429 +    Node* t = head->in(2);
  1.2430 +    while (true) {
  1.2431 +      wl.push(t);
  1.2432 +      if (t == head) break;
  1.2433 +      t = idom(t);
  1.2434 +    }
  1.2435 +    while (wl.size() > 0) {
  1.2436 +      Node* tt = wl.pop();
  1.2437 +      tt->dump();
  1.2438 +      if (tt == last_peel) tty->print_cr("-- cut --");
  1.2439 +    }
  1.2440 +  }
  1.2441 +#endif
  1.2442 +  ResourceArea *area = Thread::current()->resource_area();
  1.2443 +  VectorSet peel(area);
  1.2444 +  VectorSet not_peel(area);
  1.2445 +  Node_List peel_list(area);
  1.2446 +  Node_List worklist(area);
  1.2447 +  Node_List sink_list(area);
  1.2448 +
  1.2449 +  // Set of cfg nodes to peel are those that are executable from
  1.2450 +  // the head through last_peel.
  1.2451 +  assert(worklist.size() == 0, "should be empty");
  1.2452 +  worklist.push(head);
  1.2453 +  peel.set(head->_idx);
  1.2454 +  while (worklist.size() > 0) {
  1.2455 +    Node *n = worklist.pop();
  1.2456 +    if (n != last_peel) {
  1.2457 +      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
  1.2458 +        Node* use = n->fast_out(j);
  1.2459 +        if (use->is_CFG() &&
  1.2460 +            loop->is_member(get_loop(use)) &&
  1.2461 +            !peel.test_set(use->_idx)) {
  1.2462 +          worklist.push(use);
  1.2463 +        }
  1.2464 +      }
  1.2465 +    }
  1.2466 +  }
  1.2467 +
  1.2468 +  // Set of non-cfg nodes to peel are those that are control
  1.2469 +  // dependent on the cfg nodes.
  1.2470 +  uint i;
  1.2471 +  for(i = 0; i < loop->_body.size(); i++ ) {
  1.2472 +    Node *n = loop->_body.at(i);
  1.2473 +    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
  1.2474 +    if (peel.test(n_c->_idx)) {
  1.2475 +      peel.set(n->_idx);
  1.2476 +    } else {
  1.2477 +      not_peel.set(n->_idx);
  1.2478 +    }
  1.2479 +  }
  1.2480 +
  1.2481 +  // Step 2: move operations from the peeled section down into the
  1.2482 +  //         not-peeled section
  1.2483 +
  1.2484 +  // Get a post order schedule of nodes in the peel region
  1.2485 +  // Result in right-most operand.
  1.2486 +  scheduled_nodelist(loop, peel, peel_list );
  1.2487 +
  1.2488 +  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
  1.2489 +
  1.2490 +  // For future check for too many new phis
  1.2491 +  uint old_phi_cnt = 0;
  1.2492 +  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
  1.2493 +    Node* use = head->fast_out(j);
  1.2494 +    if (use->is_Phi()) old_phi_cnt++;
  1.2495 +  }
  1.2496 +
  1.2497 +#if !defined(PRODUCT)
  1.2498 +  if (TracePartialPeeling) {
  1.2499 +    tty->print_cr("\npeeled list");
  1.2500 +  }
  1.2501 +#endif
  1.2502 +
  1.2503 +  // Evacuate nodes in peel region into the not_peeled region if possible
  1.2504 +  uint new_phi_cnt = 0;
  1.2505 +  uint cloned_for_outside_use = 0;
  1.2506 +  for (i = 0; i < peel_list.size();) {
  1.2507 +    Node* n = peel_list.at(i);
  1.2508 +#if !defined(PRODUCT)
  1.2509 +  if (TracePartialPeeling) n->dump();
  1.2510 +#endif
  1.2511 +    bool incr = true;
  1.2512 +    if ( !n->is_CFG() ) {
  1.2513 +
  1.2514 +      if ( has_use_in_set(n, not_peel) ) {
  1.2515 +
  1.2516 +        // If not used internal to the peeled region,
  1.2517 +        // move "n" from peeled to not_peeled region.
  1.2518 +
  1.2519 +        if ( !has_use_internal_to_set(n, peel, loop) ) {
  1.2520 +
  1.2521 +          // if not pinned and not a load (which maybe anti-dependent on a store)
  1.2522 +          // and not a CMove (Matcher expects only bool->cmove).
  1.2523 +          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
  1.2524 +            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
  1.2525 +            sink_list.push(n);
  1.2526 +            peel     >>= n->_idx; // delete n from peel set.
  1.2527 +            not_peel <<= n->_idx; // add n to not_peel set.
  1.2528 +            peel_list.remove(i);
  1.2529 +            incr = false;
  1.2530 +#if !defined(PRODUCT)
  1.2531 +            if (TracePartialPeeling) {
  1.2532 +              tty->print_cr("sink to not_peeled region: %d newbb: %d",
  1.2533 +                            n->_idx, get_ctrl(n)->_idx);
  1.2534 +            }
  1.2535 +#endif
  1.2536 +          }
  1.2537 +        } else {
  1.2538 +          // Otherwise check for special def-use cases that span
  1.2539 +          // the peel/not_peel boundary such as bool->if
  1.2540 +          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
  1.2541 +          new_phi_cnt++;
  1.2542 +        }
  1.2543 +      }
  1.2544 +    }
  1.2545 +    if (incr) i++;
  1.2546 +  }
  1.2547 +
  1.2548 +  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
  1.2549 +#if !defined(PRODUCT)
  1.2550 +    if (TracePartialPeeling) {
  1.2551 +      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
  1.2552 +                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
  1.2553 +    }
  1.2554 +#endif
  1.2555 +    if (new_peel_if != NULL) {
  1.2556 +      remove_cmpi_loop_exit(new_peel_if, loop);
  1.2557 +    }
  1.2558 +    // Inhibit more partial peeling on this loop
  1.2559 +    assert(!head->is_partial_peel_loop(), "not partial peeled");
  1.2560 +    head->mark_partial_peel_failed();
  1.2561 +    if (cloned_for_outside_use > 0) {
  1.2562 +      // Terminate this round of loop opts because
  1.2563 +      // the graph outside this loop was changed.
  1.2564 +      C->set_major_progress();
  1.2565 +      return true;
  1.2566 +    }
  1.2567 +    return false;
  1.2568 +  }
  1.2569 +
  1.2570 +  // Step 3: clone loop, retarget control, and insert new phis
  1.2571 +
  1.2572 +  // Create new loop head for new phis and to hang
  1.2573 +  // the nodes being moved (sinked) from the peel region.
  1.2574 +  LoopNode* new_head = new (C) LoopNode(last_peel, last_peel);
  1.2575 +  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
  1.2576 +  _igvn.register_new_node_with_optimizer(new_head);
  1.2577 +  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
  1.2578 +  first_not_peeled->set_req(0, new_head);
  1.2579 +  set_loop(new_head, loop);
  1.2580 +  loop->_body.push(new_head);
  1.2581 +  not_peel.set(new_head->_idx);
  1.2582 +  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
  1.2583 +  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
  1.2584 +
  1.2585 +  while (sink_list.size() > 0) {
  1.2586 +    Node* n = sink_list.pop();
  1.2587 +    set_ctrl(n, new_head);
  1.2588 +  }
  1.2589 +
  1.2590 +  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
  1.2591 +
  1.2592 +  clone_loop( loop, old_new, dd );
  1.2593 +
  1.2594 +  const uint clone_exit_idx = 1;
  1.2595 +  const uint orig_exit_idx  = 2;
  1.2596 +  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
  1.2597 +
  1.2598 +  Node* head_clone             = old_new[head->_idx];
  1.2599 +  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
  1.2600 +  Node* orig_tail_clone        = head_clone->in(2);
  1.2601 +
  1.2602 +  // Add phi if "def" node is in peel set and "use" is not
  1.2603 +
  1.2604 +  for(i = 0; i < peel_list.size(); i++ ) {
  1.2605 +    Node *def  = peel_list.at(i);
  1.2606 +    if (!def->is_CFG()) {
  1.2607 +      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
  1.2608 +        Node *use = def->fast_out(j);
  1.2609 +        if (has_node(use) && use->in(0) != C->top() &&
  1.2610 +            (!peel.test(use->_idx) ||
  1.2611 +             (use->is_Phi() && use->in(0) == head)) ) {
  1.2612 +          worklist.push(use);
  1.2613 +        }
  1.2614 +      }
  1.2615 +      while( worklist.size() ) {
  1.2616 +        Node *use = worklist.pop();
  1.2617 +        for (uint j = 1; j < use->req(); j++) {
  1.2618 +          Node* n = use->in(j);
  1.2619 +          if (n == def) {
  1.2620 +
  1.2621 +            // "def" is in peel set, "use" is not in peel set
  1.2622 +            // or "use" is in the entry boundary (a phi) of the peel set
  1.2623 +
  1.2624 +            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
  1.2625 +
  1.2626 +            if ( loop->is_member(get_loop( use_c )) ) {
  1.2627 +              // use is in loop
  1.2628 +              if (old_new[use->_idx] != NULL) { // null for dead code
  1.2629 +                Node* use_clone = old_new[use->_idx];
  1.2630 +                _igvn.replace_input_of(use, j, C->top());
  1.2631 +                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
  1.2632 +              }
  1.2633 +            } else {
  1.2634 +              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
  1.2635 +              // use is not in the loop, check if the live range includes the cut
  1.2636 +              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
  1.2637 +              if (not_peel.test(lp_if->_idx)) {
  1.2638 +                assert(j == orig_exit_idx, "use from original loop");
  1.2639 +                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
  1.2640 +              }
  1.2641 +            }
  1.2642 +          }
  1.2643 +        }
  1.2644 +      }
  1.2645 +    }
  1.2646 +  }
  1.2647 +
  1.2648 +  // Step 3b: retarget control
  1.2649 +
  1.2650 +  // Redirect control to the new loop head if a cloned node in
  1.2651 +  // the not_peeled region has control that points into the peeled region.
  1.2652 +  // This necessary because the cloned peeled region will be outside
  1.2653 +  // the loop.
  1.2654 +  //                            from    to
  1.2655 +  //          cloned-peeled    <---+
  1.2656 +  //    new_head_clone:            |    <--+
  1.2657 +  //          cloned-not_peeled  in(0)    in(0)
  1.2658 +  //          orig-peeled
  1.2659 +
  1.2660 +  for(i = 0; i < loop->_body.size(); i++ ) {
  1.2661 +    Node *n = loop->_body.at(i);
  1.2662 +    if (!n->is_CFG()           && n->in(0) != NULL        &&
  1.2663 +        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
  1.2664 +      Node* n_clone = old_new[n->_idx];
  1.2665 +      _igvn.replace_input_of(n_clone, 0, new_head_clone);
  1.2666 +    }
  1.2667 +  }
  1.2668 +
  1.2669 +  // Backedge of the surviving new_head (the clone) is original last_peel
  1.2670 +  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
  1.2671 +
  1.2672 +  // Cut first node in original not_peel set
  1.2673 +  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
  1.2674 +  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
  1.2675 +  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
  1.2676 +
  1.2677 +  // Copy head_clone back-branch info to original head
  1.2678 +  // and remove original head's loop entry and
  1.2679 +  // clone head's back-branch
  1.2680 +  _igvn.rehash_node_delayed(head); // Multiple edge updates
  1.2681 +  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
  1.2682 +  head->set_req(LoopNode::LoopBackControl, C->top());
  1.2683 +  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
  1.2684 +
  1.2685 +  // Similarly modify the phis
  1.2686 +  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
  1.2687 +    Node* use = head->fast_out(k);
  1.2688 +    if (use->is_Phi() && use->outcnt() > 0) {
  1.2689 +      Node* use_clone = old_new[use->_idx];
  1.2690 +      _igvn.rehash_node_delayed(use); // Multiple edge updates
  1.2691 +      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
  1.2692 +      use->set_req(LoopNode::LoopBackControl, C->top());
  1.2693 +      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
  1.2694 +    }
  1.2695 +  }
  1.2696 +
  1.2697 +  // Step 4: update dominator tree and dominator depth
  1.2698 +
  1.2699 +  set_idom(head, orig_tail_clone, dd);
  1.2700 +  recompute_dom_depth();
  1.2701 +
  1.2702 +  // Inhibit more partial peeling on this loop
  1.2703 +  new_head_clone->set_partial_peel_loop();
  1.2704 +  C->set_major_progress();
  1.2705 +  loop->record_for_igvn();
  1.2706 +
  1.2707 +#if !defined(PRODUCT)
  1.2708 +  if (TracePartialPeeling) {
  1.2709 +    tty->print_cr("\nafter partial peel one iteration");
  1.2710 +    Node_List wl(area);
  1.2711 +    Node* t = last_peel;
  1.2712 +    while (true) {
  1.2713 +      wl.push(t);
  1.2714 +      if (t == head_clone) break;
  1.2715 +      t = idom(t);
  1.2716 +    }
  1.2717 +    while (wl.size() > 0) {
  1.2718 +      Node* tt = wl.pop();
  1.2719 +      if (tt == head) tty->print_cr("orig head");
  1.2720 +      else if (tt == new_head_clone) tty->print_cr("new head");
  1.2721 +      else if (tt == head_clone) tty->print_cr("clone head");
  1.2722 +      tt->dump();
  1.2723 +    }
  1.2724 +  }
  1.2725 +#endif
  1.2726 +  return true;
  1.2727 +}
  1.2728 +
  1.2729 +//------------------------------reorg_offsets----------------------------------
  1.2730 +// Reorganize offset computations to lower register pressure.  Mostly
  1.2731 +// prevent loop-fallout uses of the pre-incremented trip counter (which are
  1.2732 +// then alive with the post-incremented trip counter forcing an extra
  1.2733 +// register move)
  1.2734 +void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
  1.2735 +  // Perform it only for canonical counted loops.
  1.2736 +  // Loop's shape could be messed up by iteration_split_impl.
  1.2737 +  if (!loop->_head->is_CountedLoop())
  1.2738 +    return;
  1.2739 +  if (!loop->_head->as_Loop()->is_valid_counted_loop())
  1.2740 +    return;
  1.2741 +
  1.2742 +  CountedLoopNode *cl = loop->_head->as_CountedLoop();
  1.2743 +  CountedLoopEndNode *cle = cl->loopexit();
  1.2744 +  Node *exit = cle->proj_out(false);
  1.2745 +  Node *phi = cl->phi();
  1.2746 +
  1.2747 +  // Check for the special case of folks using the pre-incremented
  1.2748 +  // trip-counter on the fall-out path (forces the pre-incremented
  1.2749 +  // and post-incremented trip counter to be live at the same time).
  1.2750 +  // Fix this by adjusting to use the post-increment trip counter.
  1.2751 +
  1.2752 +  bool progress = true;
  1.2753 +  while (progress) {
  1.2754 +    progress = false;
  1.2755 +    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
  1.2756 +      Node* use = phi->fast_out(i);   // User of trip-counter
  1.2757 +      if (!has_ctrl(use))  continue;
  1.2758 +      Node *u_ctrl = get_ctrl(use);
  1.2759 +      if (use->is_Phi()) {
  1.2760 +        u_ctrl = NULL;
  1.2761 +        for (uint j = 1; j < use->req(); j++)
  1.2762 +          if (use->in(j) == phi)
  1.2763 +            u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
  1.2764 +      }
  1.2765 +      IdealLoopTree *u_loop = get_loop(u_ctrl);
  1.2766 +      // Look for loop-invariant use
  1.2767 +      if (u_loop == loop) continue;
  1.2768 +      if (loop->is_member(u_loop)) continue;
  1.2769 +      // Check that use is live out the bottom.  Assuming the trip-counter
  1.2770 +      // update is right at the bottom, uses of of the loop middle are ok.
  1.2771 +      if (dom_lca(exit, u_ctrl) != exit) continue;
  1.2772 +      // Hit!  Refactor use to use the post-incremented tripcounter.
  1.2773 +      // Compute a post-increment tripcounter.
  1.2774 +      Node *opaq = new (C) Opaque2Node( C, cle->incr() );
  1.2775 +      register_new_node(opaq, exit);
  1.2776 +      Node *neg_stride = _igvn.intcon(-cle->stride_con());
  1.2777 +      set_ctrl(neg_stride, C->root());
  1.2778 +      Node *post = new (C) AddINode( opaq, neg_stride);
  1.2779 +      register_new_node(post, exit);
  1.2780 +      _igvn.rehash_node_delayed(use);
  1.2781 +      for (uint j = 1; j < use->req(); j++) {
  1.2782 +        if (use->in(j) == phi)
  1.2783 +          use->set_req(j, post);
  1.2784 +      }
  1.2785 +      // Since DU info changed, rerun loop
  1.2786 +      progress = true;
  1.2787 +      break;
  1.2788 +    }
  1.2789 +  }
  1.2790 +
  1.2791 +}

mercurial