6894779: Loop Predication for Loop Optimizer in C2

Tue, 12 Jan 2010 14:37:35 -0800

author
cfang
date
Tue, 12 Jan 2010 14:37:35 -0800
changeset 1607
b2b6a9bf6238
parent 1595
f24201449cac
child 1608
73b22f919c34

6894779: Loop Predication for Loop Optimizer in C2
Summary: Loop predication implementation
Reviewed-by: never, kvn

src/share/vm/includeDB_compiler2 file | annotate | diff | comparison | revisions
src/share/vm/opto/c2_globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/compile.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/compile.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/loopTransform.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/loopnode.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/loopnode.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/parse.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/parse1.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/parse2.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/split_if.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/deoptimization.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/deoptimization.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/includeDB_compiler2	Sat Jan 09 00:59:35 2010 -0800
     1.2 +++ b/src/share/vm/includeDB_compiler2	Tue Jan 12 14:37:35 2010 -0800
     1.3 @@ -601,6 +601,7 @@
     1.4  
     1.5  loopTransform.cpp                       addnode.hpp
     1.6  loopTransform.cpp                       allocation.inline.hpp
     1.7 +loopTransform.cpp                       callnode.hpp
     1.8  loopTransform.cpp                       connode.hpp
     1.9  loopTransform.cpp                       compileLog.hpp
    1.10  loopTransform.cpp                       divnode.hpp
     2.1 --- a/src/share/vm/opto/c2_globals.hpp	Sat Jan 09 00:59:35 2010 -0800
     2.2 +++ b/src/share/vm/opto/c2_globals.hpp	Tue Jan 12 14:37:35 2010 -0800
     2.3 @@ -154,6 +154,12 @@
     2.4    notproduct(bool, TraceProfileTripCount, false,                            \
     2.5            "Trace profile loop trip count information")                      \
     2.6                                                                              \
     2.7 +  product(bool, UseLoopPredicate, true,                                     \
     2.8 +          "Generate a predicate to select fast/slow loop versions")         \
     2.9 +                                                                            \
    2.10 +  develop(bool, TraceLoopPredicate, false,                                  \
    2.11 +          "Trace generation of loop predicates")                            \
    2.12 +                                                                            \
    2.13    develop(bool, OptoCoalesce, true,                                         \
    2.14            "Use Conservative Copy Coalescing in the Register Allocator")     \
    2.15                                                                              \
     3.1 --- a/src/share/vm/opto/compile.cpp	Sat Jan 09 00:59:35 2010 -0800
     3.2 +++ b/src/share/vm/opto/compile.cpp	Tue Jan 12 14:37:35 2010 -0800
     3.3 @@ -932,6 +932,7 @@
     3.4  
     3.5    _intrinsics = NULL;
     3.6    _macro_nodes = new GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
     3.7 +  _predicate_opaqs = new GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
     3.8    register_library_intrinsics();
     3.9  }
    3.10  
    3.11 @@ -1553,6 +1554,19 @@
    3.12    }
    3.13  }
    3.14  
    3.15 +//---------------------cleanup_loop_predicates-----------------------
    3.16 +// Remove the opaque nodes that protect the predicates so that all unused
    3.17 +// checks and uncommon_traps will be eliminated from the ideal graph
    3.18 +void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
    3.19 +  if (predicate_count()==0) return;
    3.20 +  for (int i = predicate_count(); i > 0; i--) {
    3.21 +    Node * n = predicate_opaque1_node(i-1);
    3.22 +    assert(n->Opcode() == Op_Opaque1, "must be");
    3.23 +    igvn.replace_node(n, n->in(1));
    3.24 +  }
    3.25 +  assert(predicate_count()==0, "should be clean!");
    3.26 +  igvn.optimize();
    3.27 +}
    3.28  
    3.29  //------------------------------Optimize---------------------------------------
    3.30  // Given a graph, optimize it.
    3.31 @@ -1594,7 +1608,7 @@
    3.32    if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
    3.33      {
    3.34        TracePhase t2("idealLoop", &_t_idealLoop, true);
    3.35 -      PhaseIdealLoop ideal_loop( igvn, true );
    3.36 +      PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate);
    3.37        loop_opts_cnt--;
    3.38        if (major_progress()) print_method("PhaseIdealLoop 1", 2);
    3.39        if (failing())  return;
    3.40 @@ -1602,7 +1616,7 @@
    3.41      // Loop opts pass if partial peeling occurred in previous pass
    3.42      if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
    3.43        TracePhase t3("idealLoop", &_t_idealLoop, true);
    3.44 -      PhaseIdealLoop ideal_loop( igvn, false );
    3.45 +      PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
    3.46        loop_opts_cnt--;
    3.47        if (major_progress()) print_method("PhaseIdealLoop 2", 2);
    3.48        if (failing())  return;
    3.49 @@ -1610,7 +1624,7 @@
    3.50      // Loop opts pass for loop-unrolling before CCP
    3.51      if(major_progress() && (loop_opts_cnt > 0)) {
    3.52        TracePhase t4("idealLoop", &_t_idealLoop, true);
    3.53 -      PhaseIdealLoop ideal_loop( igvn, false );
    3.54 +      PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
    3.55        loop_opts_cnt--;
    3.56        if (major_progress()) print_method("PhaseIdealLoop 3", 2);
    3.57      }
    3.58 @@ -1648,13 +1662,21 @@
    3.59    // peeling, unrolling, etc.
    3.60    if(loop_opts_cnt > 0) {
    3.61      debug_only( int cnt = 0; );
    3.62 +    bool loop_predication = UseLoopPredicate;
    3.63      while(major_progress() && (loop_opts_cnt > 0)) {
    3.64        TracePhase t2("idealLoop", &_t_idealLoop, true);
    3.65        assert( cnt++ < 40, "infinite cycle in loop optimization" );
    3.66 -      PhaseIdealLoop ideal_loop( igvn, true );
    3.67 +      PhaseIdealLoop ideal_loop( igvn, true, loop_predication);
    3.68        loop_opts_cnt--;
    3.69        if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
    3.70        if (failing())  return;
    3.71 +      // Perform loop predication optimization during first iteration after CCP.
    3.72 +      // After that switch it off and cleanup unused loop predicates.
    3.73 +      if (loop_predication) {
    3.74 +        loop_predication = false;
    3.75 +        cleanup_loop_predicates(igvn);
    3.76 +        if (failing())  return;
    3.77 +      }
    3.78      }
    3.79    }
    3.80  
     4.1 --- a/src/share/vm/opto/compile.hpp	Sat Jan 09 00:59:35 2010 -0800
     4.2 +++ b/src/share/vm/opto/compile.hpp	Tue Jan 12 14:37:35 2010 -0800
     4.3 @@ -38,6 +38,7 @@
     4.4  class OptoReg;
     4.5  class PhaseCFG;
     4.6  class PhaseGVN;
     4.7 +class PhaseIterGVN;
     4.8  class PhaseRegAlloc;
     4.9  class PhaseCCP;
    4.10  class PhaseCCP_DCE;
    4.11 @@ -172,6 +173,7 @@
    4.12    const char*           _failure_reason;        // for record_failure/failing pattern
    4.13    GrowableArray<CallGenerator*>* _intrinsics;   // List of intrinsics.
    4.14    GrowableArray<Node*>* _macro_nodes;           // List of nodes which need to be expanded before matching.
    4.15 +  GrowableArray<Node*>* _predicate_opaqs;       // List of Opaque1 nodes for the loop predicates.
    4.16    ConnectionGraph*      _congraph;
    4.17  #ifndef PRODUCT
    4.18    IdealGraphPrinter*    _printer;
    4.19 @@ -351,7 +353,9 @@
    4.20    }
    4.21  
    4.22    int           macro_count()                   { return _macro_nodes->length(); }
    4.23 +  int           predicate_count()               { return _predicate_opaqs->length();}
    4.24    Node*         macro_node(int idx)             { return _macro_nodes->at(idx); }
    4.25 +  Node*         predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
    4.26    ConnectionGraph* congraph()                   { return _congraph;}
    4.27    void add_macro_node(Node * n) {
    4.28      //assert(n->is_macro(), "must be a macro node");
    4.29 @@ -363,7 +367,19 @@
    4.30      // that the node is in the array before attempting to remove it
    4.31      if (_macro_nodes->contains(n))
    4.32        _macro_nodes->remove(n);
    4.33 +    // remove from _predicate_opaqs list also if it is there
    4.34 +    if (predicate_count() > 0 && _predicate_opaqs->contains(n)){
    4.35 +      _predicate_opaqs->remove(n);
    4.36 +    }
    4.37    }
    4.38 +  void add_predicate_opaq(Node * n) {
    4.39 +    assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
    4.40 +    assert(_macro_nodes->contains(n), "should have already been in macro list");
    4.41 +    _predicate_opaqs->append(n);
    4.42 +  }
    4.43 +  // remove the opaque nodes that protect the predicates so that the unused checks and
    4.44 +  // uncommon traps will be eliminated from the graph.
    4.45 +  void cleanup_loop_predicates(PhaseIterGVN &igvn);
    4.46  
    4.47    // Compilation environment.
    4.48    Arena*            comp_arena()                { return &_comp_arena; }
     5.1 --- a/src/share/vm/opto/loopTransform.cpp	Sat Jan 09 00:59:35 2010 -0800
     5.2 +++ b/src/share/vm/opto/loopTransform.cpp	Tue Jan 12 14:37:35 2010 -0800
     5.3 @@ -549,6 +549,10 @@
     5.4        // Comparing trip+off vs limit
     5.5        Node *bol = iff->in(1);
     5.6        if( bol->req() != 2 ) continue; // dead constant test
     5.7 +      if (!bol->is_Bool()) {
     5.8 +        assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
     5.9 +        continue;
    5.10 +      }
    5.11        Node *cmp = bol->in(1);
    5.12  
    5.13        Node *rc_exp = cmp->in(1);
    5.14 @@ -875,7 +879,7 @@
    5.15  //------------------------------is_invariant-----------------------------
    5.16  // Return true if n is invariant
    5.17  bool IdealLoopTree::is_invariant(Node* n) const {
    5.18 -  Node *n_c = _phase->get_ctrl(n);
    5.19 +  Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
    5.20    if (n_c->is_top()) return false;
    5.21    return !is_member(_phase->get_loop(n_c));
    5.22  }
    5.23 @@ -1594,7 +1598,7 @@
    5.24  bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
    5.25    // Check and remove empty loops (spam micro-benchmarks)
    5.26    if( policy_do_remove_empty_loop(phase) )
    5.27 -    return true;                     // Here we removed an empty loop
    5.28 +    return true;  // Here we removed an empty loop
    5.29  
    5.30    bool should_peel = policy_peeling(phase); // Should we peel?
    5.31  
    5.32 @@ -1688,8 +1692,8 @@
    5.33      // an even number of trips).  If we are peeling, we might enable some RCE
    5.34      // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
    5.35      // peeling.
    5.36 -    if( should_unroll && !should_peel )
    5.37 -      phase->do_unroll(this,old_new, true);
    5.38 +      if( should_unroll && !should_peel )
    5.39 +        phase->do_unroll(this,old_new, true);
    5.40  
    5.41      // Adjust the pre-loop limits to align the main body
    5.42      // iterations.
    5.43 @@ -1731,9 +1735,9 @@
    5.44        _allow_optimizations &&
    5.45        !tail()->is_top() ) {     // Also ignore the occasional dead backedge
    5.46      if (!_has_call) {
    5.47 -      if (!iteration_split_impl( phase, old_new )) {
    5.48 -        return false;
    5.49 -      }
    5.50 +        if (!iteration_split_impl( phase, old_new )) {
    5.51 +          return false;
    5.52 +        }
    5.53      } else if (policy_unswitching(phase)) {
    5.54        phase->do_unswitching(this, old_new);
    5.55      }
    5.56 @@ -1746,3 +1750,576 @@
    5.57      return false;
    5.58    return true;
    5.59  }
    5.60 +
    5.61 +//-------------------------------is_uncommon_trap_proj----------------------------
    5.62 +// Return true if proj is the form of "proj->[region->..]call_uct"
    5.63 +bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) {
    5.64 +  int path_limit = 10;
    5.65 +  assert(proj, "invalid argument");
    5.66 +  Node* out = proj;
    5.67 +  for (int ct = 0; ct < path_limit; ct++) {
    5.68 +    out = out->unique_ctrl_out();
    5.69 +    if (out == NULL || out->is_Root() || out->is_Start())
    5.70 +      return false;
    5.71 +    if (out->is_CallStaticJava()) {
    5.72 +      int req = out->as_CallStaticJava()->uncommon_trap_request();
    5.73 +      if (req != 0) {
    5.74 +        Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req);
    5.75 +        if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){
    5.76 +           return true;
    5.77 +        }
    5.78 +      }
    5.79 +      return false; // don't do further after call
    5.80 +    }
    5.81 +  }
    5.82 +  return false;
    5.83 +}
    5.84 +
    5.85 +//-------------------------------is_uncommon_trap_if_pattern-------------------------
    5.86 +// Return true  for "if(test)-> proj -> ...
    5.87 +//                          |
    5.88 +//                          V
    5.89 +//                      other_proj->[region->..]call_uct"
    5.90 +//
    5.91 +// "must_reason_predicate" means the uct reason must be Reason_predicate
    5.92 +bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) {
    5.93 +  Node *in0 = proj->in(0);
    5.94 +  if (!in0->is_If()) return false;
    5.95 +  IfNode* iff = in0->as_If();
    5.96 +
    5.97 +  // we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate
    5.98 +  if (must_reason_predicate) {
    5.99 +    if (iff->in(1)->Opcode() != Op_Conv2B ||
   5.100 +       iff->in(1)->in(1)->Opcode() != Op_Opaque1) {
   5.101 +      return false;
   5.102 +    }
   5.103 +  }
   5.104 +
   5.105 +  ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj();
   5.106 +  return is_uncommon_trap_proj(other_proj, must_reason_predicate);
   5.107 +}
   5.108 +
   5.109 +//------------------------------create_new_if_for_predicate------------------------
   5.110 +// create a new if above the uct_if_pattern for the predicate to be promoted.
   5.111 +//
   5.112 +//          before                                after
   5.113 +//        ----------                           ----------
   5.114 +//           ctrl                                 ctrl
   5.115 +//            |                                     |
   5.116 +//            |                                     |
   5.117 +//            v                                     v
   5.118 +//           iff                                 new_iff
   5.119 +//          /    \                                /      \
   5.120 +//         /      \                              /        \
   5.121 +//        v        v                            v          v
   5.122 +//  uncommon_proj cont_proj                   if_uct     if_cont
   5.123 +// \      |        |                           |          |
   5.124 +//  \     |        |                           |          |
   5.125 +//   v    v        v                           |          v
   5.126 +//     rgn       loop                          |         iff
   5.127 +//      |                                      |        /     \
   5.128 +//      |                                      |       /       \
   5.129 +//      v                                      |      v         v
   5.130 +// uncommon_trap                               | uncommon_proj cont_proj
   5.131 +//                                           \  \    |           |
   5.132 +//                                            \  \   |           |
   5.133 +//                                             v  v  v           v
   5.134 +//                                               rgn           loop
   5.135 +//                                                |
   5.136 +//                                                |
   5.137 +//                                                v
   5.138 +//                                           uncommon_trap
   5.139 +//
   5.140 +//
   5.141 +// We will create a region to guard the uct call if there is no one there.
   5.142 +// The true projecttion (if_cont) of the new_iff is returned.
   5.143 +ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) {
   5.144 +  assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!");
   5.145 +  IfNode* iff = cont_proj->in(0)->as_If();
   5.146 +
   5.147 +  ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con);
   5.148 +  Node     *rgn   = uncommon_proj->unique_ctrl_out();
   5.149 +  assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
   5.150 +
   5.151 +  if (!rgn->is_Region()) { // create a region to guard the call
   5.152 +    assert(rgn->is_Call(), "must be call uct");
   5.153 +    CallNode* call = rgn->as_Call();
   5.154 +    rgn = new (C, 1) RegionNode(1);
   5.155 +    _igvn.set_type(rgn, rgn->bottom_type());
   5.156 +    rgn->add_req(uncommon_proj);
   5.157 +    set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1);
   5.158 +    _igvn.hash_delete(call);
   5.159 +    call->set_req(0, rgn);
   5.160 +  }
   5.161 +
   5.162 +  // Create new_iff
   5.163 +  uint  iffdd  = dom_depth(iff);
   5.164 +  IdealLoopTree* lp = get_loop(iff);
   5.165 +  IfNode *new_iff = new (C, 2) IfNode(iff->in(0), NULL, iff->_prob, iff->_fcnt);
   5.166 +  register_node(new_iff, lp, idom(iff), iffdd);
   5.167 +  Node *if_cont = new (C, 1) IfTrueNode(new_iff);
   5.168 +  Node *if_uct  = new (C, 1) IfFalseNode(new_iff);
   5.169 +  if (cont_proj->is_IfFalse()) {
   5.170 +    // Swap
   5.171 +    Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp;
   5.172 +  }
   5.173 +  register_node(if_cont, lp, new_iff, iffdd);
   5.174 +  register_node(if_uct, get_loop(rgn), new_iff, iffdd);
   5.175 +
   5.176 +  // if_cont to iff
   5.177 +  _igvn.hash_delete(iff);
   5.178 +  iff->set_req(0, if_cont);
   5.179 +  set_idom(iff, if_cont, dom_depth(iff));
   5.180 +
   5.181 +  // if_uct to rgn
   5.182 +  _igvn.hash_delete(rgn);
   5.183 +  rgn->add_req(if_uct);
   5.184 +  Node* ridom = idom(rgn);
   5.185 +  Node* nrdom = dom_lca(ridom, new_iff);
   5.186 +  set_idom(rgn, nrdom, dom_depth(rgn));
   5.187 +
   5.188 +  // rgn must have no phis
   5.189 +  assert(!rgn->as_Region()->has_phi(), "region must have no phis");
   5.190 +
   5.191 +  return if_cont->as_Proj();
   5.192 +}
   5.193 +
   5.194 +//------------------------------find_predicate_insertion_point--------------------------
   5.195 +// Find a good location to insert a predicate
   5.196 +ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) {
   5.197 +  if (start_c == C->root() || !start_c->is_Proj())
   5.198 +    return NULL;
   5.199 +  if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) {
   5.200 +    return start_c->as_Proj();
   5.201 +  }
   5.202 +  return NULL;
   5.203 +}
   5.204 +
   5.205 +//------------------------------Invariance-----------------------------------
   5.206 +// Helper class for loop_predication_impl to compute invariance on the fly and
   5.207 +// clone invariants.
   5.208 +class Invariance : public StackObj {
   5.209 +  VectorSet _visited, _invariant;
   5.210 +  Node_Stack _stack;
   5.211 +  VectorSet _clone_visited;
   5.212 +  Node_List _old_new; // map of old to new (clone)
   5.213 +  IdealLoopTree* _lpt;
   5.214 +  PhaseIdealLoop* _phase;
   5.215 +
   5.216 +  // Helper function to set up the invariance for invariance computation
   5.217 +  // If n is a known invariant, set up directly. Otherwise, look up the
   5.218 +  // the possibility to push n onto the stack for further processing.
   5.219 +  void visit(Node* use, Node* n) {
   5.220 +    if (_lpt->is_invariant(n)) { // known invariant
   5.221 +      _invariant.set(n->_idx);
   5.222 +    } else if (!n->is_CFG()) {
   5.223 +      Node *n_ctrl = _phase->ctrl_or_self(n);
   5.224 +      Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG
   5.225 +      if (_phase->is_dominator(n_ctrl, u_ctrl)) {
   5.226 +        _stack.push(n, n->in(0) == NULL ? 1 : 0);
   5.227 +      }
   5.228 +    }
   5.229 +  }
   5.230 +
   5.231 +  // Compute invariance for "the_node" and (possibly) all its inputs recursively
   5.232 +  // on the fly
   5.233 +  void compute_invariance(Node* n) {
   5.234 +    assert(_visited.test(n->_idx), "must be");
   5.235 +    visit(n, n);
   5.236 +    while (_stack.is_nonempty()) {
   5.237 +      Node*  n = _stack.node();
   5.238 +      uint idx = _stack.index();
   5.239 +      if (idx == n->req()) { // all inputs are processed
   5.240 +        _stack.pop();
   5.241 +        // n is invariant if it's inputs are all invariant
   5.242 +        bool all_inputs_invariant = true;
   5.243 +        for (uint i = 0; i < n->req(); i++) {
   5.244 +          Node* in = n->in(i);
   5.245 +          if (in == NULL) continue;
   5.246 +          assert(_visited.test(in->_idx), "must have visited input");
   5.247 +          if (!_invariant.test(in->_idx)) { // bad guy
   5.248 +            all_inputs_invariant = false;
   5.249 +            break;
   5.250 +          }
   5.251 +        }
   5.252 +        if (all_inputs_invariant) {
   5.253 +          _invariant.set(n->_idx); // I am a invariant too
   5.254 +        }
   5.255 +      } else { // process next input
   5.256 +        _stack.set_index(idx + 1);
   5.257 +        Node* m = n->in(idx);
   5.258 +        if (m != NULL && !_visited.test_set(m->_idx)) {
   5.259 +          visit(n, m);
   5.260 +        }
   5.261 +      }
   5.262 +    }
   5.263 +  }
   5.264 +
   5.265 +  // Helper function to set up _old_new map for clone_nodes.
   5.266 +  // If n is a known invariant, set up directly ("clone" of n == n).
   5.267 +  // Otherwise, push n onto the stack for real cloning.
   5.268 +  void clone_visit(Node* n) {
   5.269 +    assert(_invariant.test(n->_idx), "must be invariant");
   5.270 +    if (_lpt->is_invariant(n)) { // known invariant
   5.271 +      _old_new.map(n->_idx, n);
   5.272 +    } else{ // to be cloned
   5.273 +      assert (!n->is_CFG(), "should not see CFG here");
   5.274 +      _stack.push(n, n->in(0) == NULL ? 1 : 0);
   5.275 +    }
   5.276 +  }
   5.277 +
   5.278 +  // Clone "n" and (possibly) all its inputs recursively
   5.279 +  void clone_nodes(Node* n, Node* ctrl) {
   5.280 +    clone_visit(n);
   5.281 +    while (_stack.is_nonempty()) {
   5.282 +      Node*  n = _stack.node();
   5.283 +      uint idx = _stack.index();
   5.284 +      if (idx == n->req()) { // all inputs processed, clone n!
   5.285 +        _stack.pop();
   5.286 +        // clone invariant node
   5.287 +        Node* n_cl = n->clone();
   5.288 +        _old_new.map(n->_idx, n_cl);
   5.289 +        _phase->register_new_node(n_cl, ctrl);
   5.290 +        for (uint i = 0; i < n->req(); i++) {
   5.291 +          Node* in = n_cl->in(i);
   5.292 +          if (in == NULL) continue;
   5.293 +          n_cl->set_req(i, _old_new[in->_idx]);
   5.294 +        }
   5.295 +      } else { // process next input
   5.296 +        _stack.set_index(idx + 1);
   5.297 +        Node* m = n->in(idx);
   5.298 +        if (m != NULL && !_clone_visited.test_set(m->_idx)) {
   5.299 +          clone_visit(m); // visit the input
   5.300 +        }
   5.301 +      }
   5.302 +    }
   5.303 +  }
   5.304 +
   5.305 + public:
   5.306 +  Invariance(Arena* area, IdealLoopTree* lpt) :
   5.307 +    _lpt(lpt), _phase(lpt->_phase),
   5.308 +    _visited(area), _invariant(area), _stack(area, 10 /* guess */),
   5.309 +    _clone_visited(area), _old_new(area)
   5.310 +  {}
   5.311 +
   5.312 +  // Map old to n for invariance computation and clone
   5.313 +  void map_ctrl(Node* old, Node* n) {
   5.314 +    assert(old->is_CFG() && n->is_CFG(), "must be");
   5.315 +    _old_new.map(old->_idx, n); // "clone" of old is n
   5.316 +    _invariant.set(old->_idx);  // old is invariant
   5.317 +    _clone_visited.set(old->_idx);
   5.318 +  }
   5.319 +
   5.320 +  // Driver function to compute invariance
   5.321 +  bool is_invariant(Node* n) {
   5.322 +    if (!_visited.test_set(n->_idx))
   5.323 +      compute_invariance(n);
   5.324 +    return (_invariant.test(n->_idx) != 0);
   5.325 +  }
   5.326 +
   5.327 +  // Driver function to clone invariant
   5.328 +  Node* clone(Node* n, Node* ctrl) {
   5.329 +    assert(ctrl->is_CFG(), "must be");
   5.330 +    assert(_invariant.test(n->_idx), "must be an invariant");
   5.331 +    if (!_clone_visited.test(n->_idx))
   5.332 +      clone_nodes(n, ctrl);
   5.333 +    return _old_new[n->_idx];
   5.334 +  }
   5.335 +};
   5.336 +
   5.337 +//------------------------------is_range_check_if -----------------------------------
   5.338 +// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format
   5.339 +// Note: this function is particularly designed for loop predication. We require load_range
   5.340 +//       and offset to be loop invariant computed on the fly by "invar"
   5.341 +bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const {
   5.342 +  if (!is_loop_exit(iff)) {
   5.343 +    return false;
   5.344 +  }
   5.345 +  if (!iff->in(1)->is_Bool()) {
   5.346 +    return false;
   5.347 +  }
   5.348 +  const BoolNode *bol = iff->in(1)->as_Bool();
   5.349 +  if (bol->_test._test != BoolTest::lt) {
   5.350 +    return false;
   5.351 +  }
   5.352 +  if (!bol->in(1)->is_Cmp()) {
   5.353 +    return false;
   5.354 +  }
   5.355 +  const CmpNode *cmp = bol->in(1)->as_Cmp();
   5.356 +  if (cmp->Opcode() != Op_CmpU ) {
   5.357 +    return false;
   5.358 +  }
   5.359 +  if (cmp->in(2)->Opcode() != Op_LoadRange) {
   5.360 +    return false;
   5.361 +  }
   5.362 +  LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2);
   5.363 +  if (!invar.is_invariant(lr)) { // loadRange must be invariant
   5.364 +    return false;
   5.365 +  }
   5.366 +  Node *iv     = _head->as_CountedLoop()->phi();
   5.367 +  int   scale  = 0;
   5.368 +  Node *offset = NULL;
   5.369 +  if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) {
   5.370 +    return false;
   5.371 +  }
   5.372 +  if(offset && !invar.is_invariant(offset)) { // offset must be invariant
   5.373 +    return false;
   5.374 +  }
   5.375 +  return true;
   5.376 +}
   5.377 +
   5.378 +//------------------------------rc_predicate-----------------------------------
   5.379 +// Create a range check predicate
   5.380 +//
   5.381 +// for (i = init; i < limit; i += stride) {
   5.382 +//    a[scale*i+offset]
   5.383 +// }
   5.384 +//
   5.385 +// Compute max(scale*i + offset) for init <= i < limit and build the predicate
   5.386 +// as "max(scale*i + offset) u< a.length".
   5.387 +//
   5.388 +// There are two cases for max(scale*i + offset):
   5.389 +// (1) stride*scale > 0
   5.390 +//   max(scale*i + offset) = scale*(limit-stride) + offset
   5.391 +// (2) stride*scale < 0
   5.392 +//   max(scale*i + offset) = scale*init + offset
   5.393 +BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl,
   5.394 +                                       int scale, Node* offset,
   5.395 +                                       Node* init, Node* limit, Node* stride,
   5.396 +                                       Node* range) {
   5.397 +  Node* max_idx_expr  = init;
   5.398 +  int stride_con = stride->get_int();
   5.399 +  if ((stride_con > 0) == (scale > 0)) {
   5.400 +    max_idx_expr = new (C, 3) SubINode(limit, stride);
   5.401 +    register_new_node(max_idx_expr, ctrl);
   5.402 +  }
   5.403 +
   5.404 +  if (scale != 1) {
   5.405 +    ConNode* con_scale = _igvn.intcon(scale);
   5.406 +    max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale);
   5.407 +    register_new_node(max_idx_expr, ctrl);
   5.408 +  }
   5.409 +
   5.410 +  if (offset && (!offset->is_Con() || offset->get_int() != 0)){
   5.411 +    max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset);
   5.412 +    register_new_node(max_idx_expr, ctrl);
   5.413 +  }
   5.414 +
   5.415 +  CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range);
   5.416 +  register_new_node(cmp, ctrl);
   5.417 +  BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt);
   5.418 +  register_new_node(bol, ctrl);
   5.419 +  return bol;
   5.420 +}
   5.421 +
   5.422 +//------------------------------ loop_predication_impl--------------------------
   5.423 +// Insert loop predicates for null checks and range checks
   5.424 +bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
   5.425 +  if (!UseLoopPredicate) return false;
   5.426 +
   5.427 +  // Too many traps seen?
   5.428 +  bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate);
   5.429 +  int tc = C->trap_count(Deoptimization::Reason_predicate);
   5.430 +  if (tmt || tc > 0) {
   5.431 +    if (TraceLoopPredicate) {
   5.432 +      tty->print_cr("too many predicate traps: %d", tc);
   5.433 +      C->method()->print(); // which method has too many predicate traps
   5.434 +      tty->print_cr("");
   5.435 +    }
   5.436 +    return false;
   5.437 +  }
   5.438 +
   5.439 +  CountedLoopNode *cl = NULL;
   5.440 +  if (loop->_head->is_CountedLoop()) {
   5.441 +    cl = loop->_head->as_CountedLoop();
   5.442 +    // do nothing for iteration-splitted loops
   5.443 +    if(!cl->is_normal_loop()) return false;
   5.444 +  }
   5.445 +
   5.446 +  LoopNode *lpn  = loop->_head->as_Loop();
   5.447 +  Node* entry = lpn->in(LoopNode::EntryControl);
   5.448 +
   5.449 +  ProjNode *predicate_proj = find_predicate_insertion_point(entry);
   5.450 +  if (!predicate_proj){
   5.451 +#ifndef PRODUCT
   5.452 +    if (TraceLoopPredicate) {
   5.453 +      tty->print("missing predicate:");
   5.454 +      loop->dump_head();
   5.455 +    }
   5.456 +#endif
   5.457 +    return false;
   5.458 +  }
   5.459 +
   5.460 +  ConNode* zero = _igvn.intcon(0);
   5.461 +  set_ctrl(zero, C->root());
   5.462 +  Node *cond_false = new (C, 2) Conv2BNode(zero);
   5.463 +  register_new_node(cond_false, C->root());
   5.464 +  ConNode* one = _igvn.intcon(1);
   5.465 +  set_ctrl(one, C->root());
   5.466 +  Node *cond_true = new (C, 2) Conv2BNode(one);
   5.467 +  register_new_node(cond_true, C->root());
   5.468 +
   5.469 +  ResourceArea *area = Thread::current()->resource_area();
   5.470 +  Invariance invar(area, loop);
   5.471 +
   5.472 +  // Create list of if-projs such that a newer proj dominates all older
   5.473 +  // projs in the list, and they all dominate loop->tail()
   5.474 +  Node_List if_proj_list(area);
   5.475 +  LoopNode *head  = loop->_head->as_Loop();
   5.476 +  Node *current_proj = loop->tail(); //start from tail
   5.477 +  while ( current_proj != head ) {
   5.478 +    if (loop == get_loop(current_proj) && // still in the loop ?
   5.479 +        current_proj->is_Proj()        && // is a projection  ?
   5.480 +        current_proj->in(0)->Opcode() == Op_If) { // is a if projection ?
   5.481 +      if_proj_list.push(current_proj);
   5.482 +    }
   5.483 +    current_proj = idom(current_proj);
   5.484 +  }
   5.485 +
   5.486 +  bool hoisted = false; // true if at least one proj is promoted
   5.487 +  while (if_proj_list.size() > 0) {
   5.488 +    // Following are changed to nonnull when a predicate can be hoisted
   5.489 +    ProjNode* new_predicate_proj = NULL;
   5.490 +    BoolNode* new_predicate_bol   = NULL;
   5.491 +
   5.492 +    ProjNode* proj = if_proj_list.pop()->as_Proj();
   5.493 +    IfNode*   iff  = proj->in(0)->as_If();
   5.494 +
   5.495 +    if (!is_uncommon_trap_if_pattern(proj)) {
   5.496 +      if (loop->is_loop_exit(iff)) {
   5.497 +        // stop processing the remaining projs in the list because the execution of them
   5.498 +        // depends on the condition of "iff" (iff->in(1)).
   5.499 +        break;
   5.500 +      } else {
   5.501 +        // Both arms are inside the loop. There are two cases:
   5.502 +        // (1) there is one backward branch. In this case, any remaining proj
   5.503 +        //     in the if_proj list post-dominates "iff". So, the condition of "iff"
   5.504 +        //     does not determine the execution the remining projs directly, and we
   5.505 +        //     can safely continue.
   5.506 +        // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
   5.507 +        //     does not dominate loop->tail(), so it can not be in the if_proj list.
   5.508 +        continue;
   5.509 +      }
   5.510 +    }
   5.511 +
   5.512 +    Node*     test = iff->in(1);
   5.513 +    if (!test->is_Bool()){ //Conv2B, ...
   5.514 +      continue;
   5.515 +    }
   5.516 +    BoolNode* bol = test->as_Bool();
   5.517 +    if (invar.is_invariant(bol)) {
   5.518 +      // Invariant test
   5.519 +      new_predicate_proj = create_new_if_for_predicate(predicate_proj);
   5.520 +      Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
   5.521 +      new_predicate_bol  = invar.clone(bol, ctrl)->as_Bool();
   5.522 +      if (TraceLoopPredicate) tty->print("invariant");
   5.523 +    } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
   5.524 +      // Range check (only for counted loops)
   5.525 +      new_predicate_proj = create_new_if_for_predicate(predicate_proj);
   5.526 +      Node *ctrl = new_predicate_proj->in(0)->as_If()->in(0);
   5.527 +      const Node*    cmp    = bol->in(1)->as_Cmp();
   5.528 +      Node*          idx    = cmp->in(1);
   5.529 +      assert(!invar.is_invariant(idx), "index is variant");
   5.530 +      assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be");
   5.531 +      LoadRangeNode* ld_rng = (LoadRangeNode*)cmp->in(2); // LoadRangeNode
   5.532 +      assert(invar.is_invariant(ld_rng), "load range must be invariant");
   5.533 +      ld_rng = (LoadRangeNode*)invar.clone(ld_rng, ctrl);
   5.534 +      int scale    = 1;
   5.535 +      Node* offset = zero;
   5.536 +      bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
   5.537 +      assert(ok, "must be index expression");
   5.538 +      if (offset && offset != zero) {
   5.539 +        assert(invar.is_invariant(offset), "offset must be loop invariant");
   5.540 +        offset = invar.clone(offset, ctrl);
   5.541 +      }
   5.542 +      Node* init    = cl->init_trip();
   5.543 +      Node* limit   = cl->limit();
   5.544 +      Node* stride  = cl->stride();
   5.545 +      new_predicate_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng);
   5.546 +      if (TraceLoopPredicate) tty->print("range check");
   5.547 +    }
   5.548 +
   5.549 +    if (new_predicate_proj == NULL) {
   5.550 +      // The other proj of the "iff" is a uncommon trap projection, and we can assume
   5.551 +      // the other proj will not be executed ("executed" means uct raised).
   5.552 +      continue;
   5.553 +    } else {
   5.554 +      // Success - attach condition (new_predicate_bol) to predicate if
   5.555 +      invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
   5.556 +      IfNode* new_iff = new_predicate_proj->in(0)->as_If();
   5.557 +
   5.558 +      // Negate test if necessary
   5.559 +      if (proj->_con != predicate_proj->_con) {
   5.560 +        new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
   5.561 +        register_new_node(new_predicate_bol, new_iff->in(0));
   5.562 +        if (TraceLoopPredicate) tty->print_cr(" if negated: %d", iff->_idx);
   5.563 +      } else {
   5.564 +        if (TraceLoopPredicate) tty->print_cr(" if: %d", iff->_idx);
   5.565 +      }
   5.566 +
   5.567 +      _igvn.hash_delete(new_iff);
   5.568 +      new_iff->set_req(1, new_predicate_bol);
   5.569 +
   5.570 +      _igvn.hash_delete(iff);
   5.571 +      iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true);
   5.572 +
   5.573 +      Node* ctrl = new_predicate_proj; // new control
   5.574 +      ProjNode* dp = proj;     // old control
   5.575 +      assert(get_loop(dp) == loop, "guarenteed at the time of collecting proj");
   5.576 +      // Find nodes (depends only on the test) off the surviving projection;
   5.577 +      // move them outside the loop with the control of proj_clone
   5.578 +      for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
   5.579 +        Node* cd = dp->fast_out(i); // Control-dependent node
   5.580 +        if (cd->depends_only_on_test()) {
   5.581 +          assert(cd->in(0) == dp, "");
   5.582 +          _igvn.hash_delete(cd);
   5.583 +          cd->set_req(0, ctrl); // ctrl, not NULL
   5.584 +          set_early_ctrl(cd);
   5.585 +          _igvn._worklist.push(cd);
   5.586 +          IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
   5.587 +          if (new_loop != loop) {
   5.588 +            if (!loop->_child) loop->_body.yank(cd);
   5.589 +            if (!new_loop->_child ) new_loop->_body.push(cd);
   5.590 +          }
   5.591 +          --i;
   5.592 +          --imax;
   5.593 +        }
   5.594 +      }
   5.595 +
   5.596 +      hoisted = true;
   5.597 +      C->set_major_progress();
   5.598 +    }
   5.599 +  } // end while
   5.600 +
   5.601 +#ifndef PRODUCT
   5.602 +    // report that the loop predication has been actually performed
   5.603 +    // for this loop
   5.604 +    if (TraceLoopPredicate && hoisted) {
   5.605 +      tty->print("Loop Predication Performed:");
   5.606 +      loop->dump_head();
   5.607 +    }
   5.608 +#endif
   5.609 +
   5.610 +  return hoisted;
   5.611 +}
   5.612 +
   5.613 +//------------------------------loop_predication--------------------------------
   5.614 +// driver routine for loop predication optimization
   5.615 +bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) {
   5.616 +  bool hoisted = false;
   5.617 +  // Recursively promote predicates
   5.618 +  if ( _child ) {
   5.619 +    hoisted = _child->loop_predication( phase);
   5.620 +  }
   5.621 +
   5.622 +  // self
   5.623 +  if (!_irreducible && !tail()->is_top()) {
   5.624 +    hoisted |= phase->loop_predication_impl(this);
   5.625 +  }
   5.626 +
   5.627 +  if ( _next ) { //sibling
   5.628 +    hoisted |= _next->loop_predication( phase);
   5.629 +  }
   5.630 +
   5.631 +  return hoisted;
   5.632 +}
     6.1 --- a/src/share/vm/opto/loopnode.cpp	Sat Jan 09 00:59:35 2010 -0800
     6.2 +++ b/src/share/vm/opto/loopnode.cpp	Tue Jan 12 14:37:35 2010 -0800
     6.3 @@ -1420,11 +1420,57 @@
     6.4    }
     6.5  }
     6.6  
     6.7 +//---------------------collect_potentially_useful_predicates-----------------------
     6.8 +// Helper function to collect potentially useful predicates to prevent them from
     6.9 +// being eliminated by PhaseIdealLoop::eliminate_useless_predicates
    6.10 +void PhaseIdealLoop::collect_potentially_useful_predicates(
    6.11 +                         IdealLoopTree * loop, Unique_Node_List &useful_predicates) {
    6.12 +  if (loop->_child) { // child
    6.13 +    collect_potentially_useful_predicates(loop->_child, useful_predicates);
    6.14 +  }
    6.15 +
    6.16 +  // self (only loops that we can apply loop predication may use their predicates)
    6.17 +  if (loop->_head->is_Loop()     &&
    6.18 +      !loop->_irreducible        &&
    6.19 +      !loop->tail()->is_top()) {
    6.20 +    LoopNode *lpn  = loop->_head->as_Loop();
    6.21 +    Node* entry = lpn->in(LoopNode::EntryControl);
    6.22 +    ProjNode *predicate_proj = find_predicate_insertion_point(entry);
    6.23 +    if (predicate_proj != NULL ) { // right pattern that can be used by loop predication
    6.24 +      assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
    6.25 +      useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
    6.26 +    }
    6.27 +  }
    6.28 +
    6.29 +  if ( loop->_next ) { // sibling
    6.30 +    collect_potentially_useful_predicates(loop->_next, useful_predicates);
    6.31 +  }
    6.32 +}
    6.33 +
    6.34 +//------------------------eliminate_useless_predicates-----------------------------
    6.35 +// Eliminate all inserted predicates if they could not be used by loop predication.
    6.36 +void PhaseIdealLoop::eliminate_useless_predicates() {
    6.37 +  if (C->predicate_count() == 0) return; // no predicate left
    6.38 +
    6.39 +  Unique_Node_List useful_predicates; // to store useful predicates
    6.40 +  if (C->has_loops()) {
    6.41 +    collect_potentially_useful_predicates(_ltree_root->_child, useful_predicates);
    6.42 +  }
    6.43 +
    6.44 +  for (int i = C->predicate_count(); i > 0; i--) {
    6.45 +     Node * n = C->predicate_opaque1_node(i-1);
    6.46 +     assert(n->Opcode() == Op_Opaque1, "must be");
    6.47 +     if (!useful_predicates.member(n)) { // not in the useful list
    6.48 +       _igvn.replace_node(n, n->in(1));
    6.49 +     }
    6.50 +  }
    6.51 +}
    6.52 +
    6.53  //=============================================================================
    6.54  //----------------------------build_and_optimize-------------------------------
    6.55  // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
    6.56  // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
    6.57 -void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) {
    6.58 +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
    6.59    int old_progress = C->major_progress();
    6.60  
    6.61    // Reset major-progress flag for the driver's heuristics
    6.62 @@ -1577,6 +1623,12 @@
    6.63      return;
    6.64    }
    6.65  
    6.66 +  // some parser-inserted loop predicates could never be used by loop
    6.67 +  // predication. Eliminate them before loop optimization
    6.68 +  if (UseLoopPredicate) {
    6.69 +    eliminate_useless_predicates();
    6.70 +  }
    6.71 +
    6.72    // clear out the dead code
    6.73    while(_deadlist.size()) {
    6.74      _igvn.remove_globally_dead_node(_deadlist.pop());
    6.75 @@ -1603,7 +1655,7 @@
    6.76        // Because RCE opportunities can be masked by split_thru_phi,
    6.77        // look for RCE candidates and inhibit split_thru_phi
    6.78        // on just their loop-phi's for this pass of loop opts
    6.79 -      if( SplitIfBlocks && do_split_ifs ) {
    6.80 +      if (SplitIfBlocks && do_split_ifs) {
    6.81          if (lpt->policy_range_check(this)) {
    6.82            lpt->_rce_candidate = 1; // = true
    6.83          }
    6.84 @@ -1619,12 +1671,17 @@
    6.85      NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
    6.86    }
    6.87  
    6.88 +  // Perform loop predication before iteration splitting
    6.89 +  if (do_loop_pred && C->has_loops() && !C->major_progress()) {
    6.90 +    _ltree_root->_child->loop_predication(this);
    6.91 +  }
    6.92 +
    6.93    // Perform iteration-splitting on inner loops.  Split iterations to avoid
    6.94    // range checks or one-shot null checks.
    6.95  
    6.96    // If split-if's didn't hack the graph too bad (no CFG changes)
    6.97    // then do loop opts.
    6.98 -  if( C->has_loops() && !C->major_progress() ) {
    6.99 +  if (C->has_loops() && !C->major_progress()) {
   6.100      memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) );
   6.101      _ltree_root->_child->iteration_split( this, worklist );
   6.102      // No verify after peeling!  GCM has hoisted code out of the loop.
   6.103 @@ -1636,7 +1693,7 @@
   6.104    // Do verify graph edges in any case
   6.105    NOT_PRODUCT( C->verify_graph_edges(); );
   6.106  
   6.107 -  if( !do_split_ifs ) {
   6.108 +  if (!do_split_ifs) {
   6.109      // We saw major progress in Split-If to get here.  We forced a
   6.110      // pass with unrolling and not split-if, however more split-if's
   6.111      // might make progress.  If the unrolling didn't make progress
   6.112 @@ -2763,6 +2820,22 @@
   6.113    Node *legal = LCA;            // Walk 'legal' up the IDOM chain
   6.114    Node *least = legal;          // Best legal position so far
   6.115    while( early != legal ) {     // While not at earliest legal
   6.116 +#ifdef ASSERT
   6.117 +    if (legal->is_Start() && !early->is_Root()) {
   6.118 +      // Bad graph. Print idom path and fail.
   6.119 +      tty->print_cr( "Bad graph detected in build_loop_late");
   6.120 +      tty->print("n: ");n->dump(); tty->cr();
   6.121 +      tty->print("early: ");early->dump(); tty->cr();
   6.122 +      int ct = 0;
   6.123 +      Node *dbg_legal = LCA;
   6.124 +      while(!dbg_legal->is_Start() && ct < 100) {
   6.125 +        tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr();
   6.126 +        ct++;
   6.127 +        dbg_legal = idom(dbg_legal);
   6.128 +      }
   6.129 +      assert(false, "Bad graph detected in build_loop_late");
   6.130 +    }
   6.131 +#endif
   6.132      // Find least loop nesting depth
   6.133      legal = idom(legal);        // Bump up the IDOM tree
   6.134      // Check for lower nesting depth
     7.1 --- a/src/share/vm/opto/loopnode.hpp	Sat Jan 09 00:59:35 2010 -0800
     7.2 +++ b/src/share/vm/opto/loopnode.hpp	Tue Jan 12 14:37:35 2010 -0800
     7.3 @@ -30,6 +30,7 @@
     7.4  class Node;
     7.5  class PhaseIdealLoop;
     7.6  class VectorSet;
     7.7 +class Invariance;
     7.8  struct small_cache;
     7.9  
    7.10  //
    7.11 @@ -325,6 +326,10 @@
    7.12    // Returns TRUE if loop tree is structurally changed.
    7.13    bool beautify_loops( PhaseIdealLoop *phase );
    7.14  
    7.15 +  // Perform optimization to use the loop predicates for null checks and range checks.
    7.16 +  // Applies to any loop level (not just the innermost one)
    7.17 +  bool loop_predication( PhaseIdealLoop *phase);
    7.18 +
    7.19    // Perform iteration-splitting on inner loops.  Split iterations to
    7.20    // avoid range checks or one-shot null checks.  Returns false if the
    7.21    // current round of loop opts should stop.
    7.22 @@ -395,6 +400,9 @@
    7.23    // into longer memory ops, we may want to increase alignment.
    7.24    bool policy_align( PhaseIdealLoop *phase ) const;
    7.25  
    7.26 +  // Return TRUE if "iff" is a range check.
    7.27 +  bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
    7.28 +
    7.29    // Compute loop trip count from profile data
    7.30    void compute_profile_trip_cnt( PhaseIdealLoop *phase );
    7.31  
    7.32 @@ -521,9 +529,6 @@
    7.33    }
    7.34    Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
    7.35  
    7.36 -  // true if CFG node d dominates CFG node n
    7.37 -  bool is_dominator(Node *d, Node *n);
    7.38 -
    7.39    // Helper function for directing control inputs away from CFG split
    7.40    // points.
    7.41    Node *find_non_split_ctrl( Node *ctrl ) const {
    7.42 @@ -572,6 +577,17 @@
    7.43      assert(n == find_non_split_ctrl(n), "must return legal ctrl" );
    7.44      return n;
    7.45    }
    7.46 +  // true if CFG node d dominates CFG node n
    7.47 +  bool is_dominator(Node *d, Node *n);
    7.48 +  // return get_ctrl for a data node and self(n) for a CFG node
    7.49 +  Node* ctrl_or_self(Node* n) {
    7.50 +    if (has_ctrl(n))
    7.51 +      return get_ctrl(n);
    7.52 +    else {
    7.53 +      assert (n->is_CFG(), "must be a CFG node");
    7.54 +      return n;
    7.55 +    }
    7.56 +  }
    7.57  
    7.58  private:
    7.59    Node *get_ctrl_no_update( Node *i ) const {
    7.60 @@ -600,7 +616,7 @@
    7.61    // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms.  Replace
    7.62    // the 'old_node' with 'new_node'.  Kill old-node.  Add a reference
    7.63    // from old_node to new_node to support the lazy update.  Reference
    7.64 -  // replaces loop reference, since that is not neede for dead node.
    7.65 +  // replaces loop reference, since that is not needed for dead node.
    7.66  public:
    7.67    void lazy_update( Node *old_node, Node *new_node ) {
    7.68      assert( old_node != new_node, "no cycles please" );
    7.69 @@ -679,11 +695,11 @@
    7.70      _dom_lca_tags(C->comp_arena()),
    7.71      _verify_me(NULL),
    7.72      _verify_only(true) {
    7.73 -    build_and_optimize(false);
    7.74 +    build_and_optimize(false, false);
    7.75    }
    7.76  
    7.77    // build the loop tree and perform any requested optimizations
    7.78 -  void build_and_optimize(bool do_split_if);
    7.79 +  void build_and_optimize(bool do_split_if, bool do_loop_pred);
    7.80  
    7.81  public:
    7.82    // Dominators for the sea of nodes
    7.83 @@ -694,13 +710,13 @@
    7.84    Node *dom_lca_internal( Node *n1, Node *n2 ) const;
    7.85  
    7.86    // Compute the Ideal Node to Loop mapping
    7.87 -  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) :
    7.88 +  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) :
    7.89      PhaseTransform(Ideal_Loop),
    7.90      _igvn(igvn),
    7.91      _dom_lca_tags(C->comp_arena()),
    7.92      _verify_me(NULL),
    7.93      _verify_only(false) {
    7.94 -    build_and_optimize(do_split_ifs);
    7.95 +    build_and_optimize(do_split_ifs, do_loop_pred);
    7.96    }
    7.97  
    7.98    // Verify that verify_me made the same decisions as a fresh run.
    7.99 @@ -710,7 +726,7 @@
   7.100      _dom_lca_tags(C->comp_arena()),
   7.101      _verify_me(verify_me),
   7.102      _verify_only(false) {
   7.103 -    build_and_optimize(false);
   7.104 +    build_and_optimize(false, false);
   7.105    }
   7.106  
   7.107    // Build and verify the loop tree without modifying the graph.  This
   7.108 @@ -790,6 +806,30 @@
   7.109    // Return true if exp is a scaled induction var plus (or minus) constant
   7.110    bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0);
   7.111  
   7.112 +  // Return true if proj is for "proj->[region->..]call_uct"
   7.113 +  bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false);
   7.114 +  // Return true for    "if(test)-> proj -> ...
   7.115 +  //                          |
   7.116 +  //                          V
   7.117 +  //                      other_proj->[region->..]call_uct"
   7.118 +  bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false);
   7.119 +  // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted
   7.120 +  ProjNode* create_new_if_for_predicate(ProjNode* cont_proj);
   7.121 +  // Find a good location to insert a predicate
   7.122 +  ProjNode* find_predicate_insertion_point(Node* start_c);
   7.123 +  // Construct a range check for a predicate if
   7.124 +  BoolNode* rc_predicate(Node* ctrl,
   7.125 +                         int scale, Node* offset,
   7.126 +                         Node* init, Node* limit, Node* stride,
   7.127 +                         Node* range);
   7.128 +
   7.129 +  // Implementation of the loop predication to promote checks outside the loop
   7.130 +  bool loop_predication_impl(IdealLoopTree *loop);
   7.131 +
   7.132 +  // Helper function to collect predicate for eliminating the useless ones
   7.133 +  void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
   7.134 +  void eliminate_useless_predicates();
   7.135 +
   7.136    // Eliminate range-checks and other trip-counter vs loop-invariant tests.
   7.137    void do_range_check( IdealLoopTree *loop, Node_List &old_new );
   7.138  
   7.139 @@ -906,7 +946,6 @@
   7.140    const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
   7.141  
   7.142    // Helper functions
   7.143 -  void register_new_node( Node *n, Node *blk );
   7.144    Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
   7.145    Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
   7.146    void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
   7.147 @@ -918,6 +957,7 @@
   7.148  public:
   7.149    void set_created_loop_node() { _created_loop_node = true; }
   7.150    bool created_loop_node()     { return _created_loop_node; }
   7.151 +  void register_new_node( Node *n, Node *blk );
   7.152  
   7.153  #ifndef PRODUCT
   7.154    void dump( ) const;
     8.1 --- a/src/share/vm/opto/parse.hpp	Sat Jan 09 00:59:35 2010 -0800
     8.2 +++ b/src/share/vm/opto/parse.hpp	Tue Jan 12 14:37:35 2010 -0800
     8.3 @@ -430,6 +430,11 @@
     8.4      }
     8.5    }
     8.6  
     8.7 +  // Return true if the parser should add a loop predicate
     8.8 +  bool should_add_predicate(int target_bci);
     8.9 +  // Insert a loop predicate into the graph
    8.10 +  void add_predicate();
    8.11 +
    8.12    // Note:  Intrinsic generation routines may be found in library_call.cpp.
    8.13  
    8.14    // Helper function to setup Ideal Call nodes
    8.15 @@ -491,7 +496,7 @@
    8.16  
    8.17    void    do_ifnull(BoolTest::mask btest, Node* c);
    8.18    void    do_if(BoolTest::mask btest, Node* c);
    8.19 -  void    repush_if_args();
    8.20 +  int     repush_if_args();
    8.21    void    adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
    8.22                                Block* path, Block* other_path);
    8.23    IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
     9.1 --- a/src/share/vm/opto/parse1.cpp	Sat Jan 09 00:59:35 2010 -0800
     9.2 +++ b/src/share/vm/opto/parse1.cpp	Tue Jan 12 14:37:35 2010 -0800
     9.3 @@ -1383,6 +1383,10 @@
     9.4      set_parse_bci(iter().cur_bci());
     9.5  
     9.6      if (bci() == block()->limit()) {
     9.7 +      // insert a predicate if it falls through to a loop head block
     9.8 +      if (should_add_predicate(bci())){
     9.9 +        add_predicate();
    9.10 +      }
    9.11        // Do not walk into the next block until directed by do_all_blocks.
    9.12        merge(bci());
    9.13        break;
    9.14 @@ -2083,6 +2087,37 @@
    9.15    }
    9.16  }
    9.17  
    9.18 +//------------------------------should_add_predicate--------------------------
    9.19 +bool Parse::should_add_predicate(int target_bci) {
    9.20 +  if (!UseLoopPredicate) return false;
    9.21 +  Block* target = successor_for_bci(target_bci);
    9.22 +  if (target != NULL          &&
    9.23 +      target->is_loop_head()  &&
    9.24 +      block()->rpo() < target->rpo()) {
    9.25 +    return true;
    9.26 +  }
    9.27 +  return false;
    9.28 +}
    9.29 +
    9.30 +//------------------------------add_predicate---------------------------------
    9.31 +void Parse::add_predicate() {
    9.32 +  assert(UseLoopPredicate,"use only for loop predicate");
    9.33 +  Node *cont    = _gvn.intcon(1);
    9.34 +  Node* opq     = _gvn.transform(new (C, 2) Opaque1Node(C, cont));
    9.35 +  Node *bol     = _gvn.transform(new (C, 2) Conv2BNode(opq));
    9.36 +  IfNode* iff   = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
    9.37 +  Node* iffalse = _gvn.transform(new (C, 1) IfFalseNode(iff));
    9.38 +  C->add_predicate_opaq(opq);
    9.39 +  {
    9.40 +    PreserveJVMState pjvms(this);
    9.41 +    set_control(iffalse);
    9.42 +    uncommon_trap(Deoptimization::Reason_predicate,
    9.43 +                  Deoptimization::Action_maybe_recompile);
    9.44 +  }
    9.45 +  Node* iftrue = _gvn.transform(new (C, 1) IfTrueNode(iff));
    9.46 +  set_control(iftrue);
    9.47 +}
    9.48 +
    9.49  #ifndef PRODUCT
    9.50  //------------------------show_parse_info--------------------------------------
    9.51  void Parse::show_parse_info() {
    10.1 --- a/src/share/vm/opto/parse2.cpp	Sat Jan 09 00:59:35 2010 -0800
    10.2 +++ b/src/share/vm/opto/parse2.cpp	Tue Jan 12 14:37:35 2010 -0800
    10.3 @@ -278,6 +278,11 @@
    10.4    if (len < 1) {
    10.5      // If this is a backward branch, add safepoint
    10.6      maybe_add_safepoint(default_dest);
    10.7 +    if (should_add_predicate(default_dest)){
    10.8 +      _sp += 1; // set original stack for use by uncommon_trap
    10.9 +      add_predicate();
   10.10 +      _sp -= 1;
   10.11 +    }
   10.12      merge(default_dest);
   10.13      return;
   10.14    }
   10.15 @@ -324,6 +329,11 @@
   10.16  
   10.17    if (len < 1) {    // If this is a backward branch, add safepoint
   10.18      maybe_add_safepoint(default_dest);
   10.19 +    if (should_add_predicate(default_dest)){
   10.20 +      _sp += 1; // set original stack for use by uncommon_trap
   10.21 +      add_predicate();
   10.22 +      _sp -= 1;
   10.23 +    }
   10.24      merge(default_dest);
   10.25      return;
   10.26    }
   10.27 @@ -731,6 +741,9 @@
   10.28    push(_gvn.makecon(ret_addr));
   10.29  
   10.30    // Flow to the jsr.
   10.31 +  if (should_add_predicate(jsr_bci)){
   10.32 +    add_predicate();
   10.33 +  }
   10.34    merge(jsr_bci);
   10.35  }
   10.36  
   10.37 @@ -881,7 +894,7 @@
   10.38  
   10.39  //-------------------------------repush_if_args--------------------------------
   10.40  // Push arguments of an "if" bytecode back onto the stack by adjusting _sp.
   10.41 -inline void Parse::repush_if_args() {
   10.42 +inline int Parse::repush_if_args() {
   10.43  #ifndef PRODUCT
   10.44    if (PrintOpto && WizardMode) {
   10.45      tty->print("defending against excessive implicit null exceptions on %s @%d in ",
   10.46 @@ -895,6 +908,7 @@
   10.47    assert(argument(0) != NULL, "must exist");
   10.48    assert(bc_depth == 1 || argument(1) != NULL, "two must exist");
   10.49    _sp += bc_depth;
   10.50 +  return bc_depth;
   10.51  }
   10.52  
   10.53  //----------------------------------do_ifnull----------------------------------
   10.54 @@ -954,8 +968,14 @@
   10.55        // Update method data
   10.56        profile_taken_branch(target_bci);
   10.57        adjust_map_after_if(btest, c, prob, branch_block, next_block);
   10.58 -      if (!stopped())
   10.59 +      if (!stopped()) {
   10.60 +        if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop
   10.61 +          int nargs = repush_if_args(); // set original stack for uncommon_trap
   10.62 +          add_predicate();
   10.63 +          _sp -= nargs;
   10.64 +        }
   10.65          merge(target_bci);
   10.66 +      }
   10.67      }
   10.68    }
   10.69  
   10.70 @@ -1076,8 +1096,14 @@
   10.71        // Update method data
   10.72        profile_taken_branch(target_bci);
   10.73        adjust_map_after_if(taken_btest, c, prob, branch_block, next_block);
   10.74 -      if (!stopped())
   10.75 +      if (!stopped()) {
   10.76 +        if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop
   10.77 +          int nargs = repush_if_args(); // set original stack for the uncommon_trap
   10.78 +          add_predicate();
   10.79 +          _sp -= nargs;
   10.80 +        }
   10.81          merge(target_bci);
   10.82 +      }
   10.83      }
   10.84    }
   10.85  
   10.86 @@ -2080,6 +2106,10 @@
   10.87      // Update method data
   10.88      profile_taken_branch(target_bci);
   10.89  
   10.90 +    // Add loop predicate if it goes to a loop
   10.91 +    if (should_add_predicate(target_bci)){
   10.92 +      add_predicate();
   10.93 +    }
   10.94      // Merge the current control into the target basic block
   10.95      merge(target_bci);
   10.96  
    11.1 --- a/src/share/vm/opto/split_if.cpp	Sat Jan 09 00:59:35 2010 -0800
    11.2 +++ b/src/share/vm/opto/split_if.cpp	Tue Jan 12 14:37:35 2010 -0800
    11.3 @@ -219,6 +219,7 @@
    11.4  
    11.5  //------------------------------register_new_node------------------------------
    11.6  void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
    11.7 +  assert(!n->is_CFG(), "must be data node");
    11.8    _igvn.register_new_node_with_optimizer(n);
    11.9    set_ctrl(n, blk);
   11.10    IdealLoopTree *loop = get_loop(blk);
    12.1 --- a/src/share/vm/runtime/deoptimization.cpp	Sat Jan 09 00:59:35 2010 -0800
    12.2 +++ b/src/share/vm/runtime/deoptimization.cpp	Tue Jan 12 14:37:35 2010 -0800
    12.3 @@ -1672,7 +1672,8 @@
    12.4    "unhandled",
    12.5    "constraint",
    12.6    "div0_check",
    12.7 -  "age"
    12.8 +  "age",
    12.9 +  "predicate"
   12.10  };
   12.11  const char* Deoptimization::_trap_action_name[Action_LIMIT] = {
   12.12    // Note:  Keep this in sync. with enum DeoptAction.
    13.1 --- a/src/share/vm/runtime/deoptimization.hpp	Sat Jan 09 00:59:35 2010 -0800
    13.2 +++ b/src/share/vm/runtime/deoptimization.hpp	Tue Jan 12 14:37:35 2010 -0800
    13.3 @@ -46,6 +46,7 @@
    13.4      Reason_constraint,            // arbitrary runtime constraint violated
    13.5      Reason_div0_check,            // a null_check due to division by zero
    13.6      Reason_age,                   // nmethod too old; tier threshold reached
    13.7 +    Reason_predicate,             // compiler generated predicate failed
    13.8      Reason_LIMIT,
    13.9      // Note:  Keep this enum in sync. with _trap_reason_name.
   13.10      Reason_RECORDED_LIMIT = Reason_unloaded   // some are not recorded per bc

mercurial