6862956: PhaseIdealLoop should have a CFG verification mode

Fri, 14 Aug 2009 00:02:12 -0700

author
never
date
Fri, 14 Aug 2009 00:02:12 -0700
changeset 1356
046932b72aa2
parent 1345
10d8c0d0d60e
child 1357
1a81ea4b45d4
child 1359
55784fd95fe3

6862956: PhaseIdealLoop should have a CFG verification mode
Reviewed-by: kvn, twisti

src/share/vm/opto/compile.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/domgraph.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/loopnode.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/loopnode.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/phase.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/phase.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/opto/compile.cpp	Wed Aug 12 14:27:54 2009 -0700
     1.2 +++ b/src/share/vm/opto/compile.cpp	Fri Aug 14 00:02:12 2009 -0700
     1.3 @@ -1545,7 +1545,7 @@
     1.4    if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
     1.5      {
     1.6        TracePhase t2("idealLoop", &_t_idealLoop, true);
     1.7 -      PhaseIdealLoop ideal_loop( igvn, NULL, true );
     1.8 +      PhaseIdealLoop ideal_loop( igvn, true );
     1.9        loop_opts_cnt--;
    1.10        if (major_progress()) print_method("PhaseIdealLoop 1", 2);
    1.11        if (failing())  return;
    1.12 @@ -1553,7 +1553,7 @@
    1.13      // Loop opts pass if partial peeling occurred in previous pass
    1.14      if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
    1.15        TracePhase t3("idealLoop", &_t_idealLoop, true);
    1.16 -      PhaseIdealLoop ideal_loop( igvn, NULL, false );
    1.17 +      PhaseIdealLoop ideal_loop( igvn, false );
    1.18        loop_opts_cnt--;
    1.19        if (major_progress()) print_method("PhaseIdealLoop 2", 2);
    1.20        if (failing())  return;
    1.21 @@ -1561,10 +1561,15 @@
    1.22      // Loop opts pass for loop-unrolling before CCP
    1.23      if(major_progress() && (loop_opts_cnt > 0)) {
    1.24        TracePhase t4("idealLoop", &_t_idealLoop, true);
    1.25 -      PhaseIdealLoop ideal_loop( igvn, NULL, false );
    1.26 +      PhaseIdealLoop ideal_loop( igvn, false );
    1.27        loop_opts_cnt--;
    1.28        if (major_progress()) print_method("PhaseIdealLoop 3", 2);
    1.29      }
    1.30 +    if (!failing()) {
    1.31 +      // Verify that last round of loop opts produced a valid graph
    1.32 +      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
    1.33 +      PhaseIdealLoop::verify(igvn);
    1.34 +    }
    1.35    }
    1.36    if (failing())  return;
    1.37  
    1.38 @@ -1597,12 +1602,20 @@
    1.39      while(major_progress() && (loop_opts_cnt > 0)) {
    1.40        TracePhase t2("idealLoop", &_t_idealLoop, true);
    1.41        assert( cnt++ < 40, "infinite cycle in loop optimization" );
    1.42 -      PhaseIdealLoop ideal_loop( igvn, NULL, true );
    1.43 +      PhaseIdealLoop ideal_loop( igvn, true );
    1.44        loop_opts_cnt--;
    1.45        if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
    1.46        if (failing())  return;
    1.47      }
    1.48    }
    1.49 +
    1.50 +  {
    1.51 +    // Verify that all previous optimizations produced a valid graph
    1.52 +    // at least to this point, even if no loop optimizations were done.
    1.53 +    NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
    1.54 +    PhaseIdealLoop::verify(igvn);
    1.55 +  }
    1.56 +
    1.57    {
    1.58      NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
    1.59      PhaseMacroExpand  mex(igvn);
     2.1 --- a/src/share/vm/opto/domgraph.cpp	Wed Aug 12 14:27:54 2009 -0700
     2.2 +++ b/src/share/vm/opto/domgraph.cpp	Fri Aug 14 00:02:12 2009 -0700
     2.3 @@ -1,5 +1,5 @@
     2.4  /*
     2.5 - * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
     2.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
     2.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.8   *
     2.9   * This code is free software; you can redistribute it and/or modify it
    2.10 @@ -396,7 +396,7 @@
    2.11  // nodes (using the is_CFG() call) and places them in a dominator tree.  Thus,
    2.12  // it needs a count of the CFG nodes for the mapping table. This is the
    2.13  // Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
    2.14 -void PhaseIdealLoop::Dominators( ) {
    2.15 +void PhaseIdealLoop::Dominators() {
    2.16    ResourceMark rm;
    2.17    // Setup mappings from my Graph to Tarjan's stuff and back
    2.18    // Note: Tarjan uses 1-based arrays
    2.19 @@ -454,7 +454,7 @@
    2.20      // flow into the main graph (and hence into ROOT) but are not reachable
    2.21      // from above.  Such code is dead, but requires a global pass to detect
    2.22      // it; this global pass was the 'build_loop_tree' pass run just prior.
    2.23 -    if( whead->is_Region() ) {
    2.24 +    if( !_verify_only && whead->is_Region() ) {
    2.25        for( uint i = 1; i < whead->req(); i++ ) {
    2.26          if (!has_node(whead->in(i))) {
    2.27            // Kill dead input path
     3.1 --- a/src/share/vm/opto/loopnode.cpp	Wed Aug 12 14:27:54 2009 -0700
     3.2 +++ b/src/share/vm/opto/loopnode.cpp	Fri Aug 14 00:02:12 2009 -0700
     3.3 @@ -1420,13 +1420,12 @@
     3.4  }
     3.5  
     3.6  //=============================================================================
     3.7 -//------------------------------PhaseIdealLoop---------------------------------
     3.8 +//----------------------------build_and_optimize-------------------------------
     3.9  // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
    3.10  // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
    3.11 -PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs )
    3.12 -  : PhaseTransform(Ideal_Loop),
    3.13 -    _igvn(igvn),
    3.14 -    _dom_lca_tags(C->comp_arena()) {
    3.15 +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) {
    3.16 +  int old_progress = C->major_progress();
    3.17 +
    3.18    // Reset major-progress flag for the driver's heuristics
    3.19    C->clear_major_progress();
    3.20  
    3.21 @@ -1465,18 +1464,20 @@
    3.22    }
    3.23  
    3.24    // No loops after all
    3.25 -  if( !_ltree_root->_child ) C->set_has_loops(false);
    3.26 +  if( !_ltree_root->_child && !_verify_only ) C->set_has_loops(false);
    3.27  
    3.28    // There should always be an outer loop containing the Root and Return nodes.
    3.29    // If not, we have a degenerate empty program.  Bail out in this case.
    3.30    if (!has_node(C->root())) {
    3.31 -    C->clear_major_progress();
    3.32 -    C->record_method_not_compilable("empty program detected during loop optimization");
    3.33 +    if (!_verify_only) {
    3.34 +      C->clear_major_progress();
    3.35 +      C->record_method_not_compilable("empty program detected during loop optimization");
    3.36 +    }
    3.37      return;
    3.38    }
    3.39  
    3.40    // Nothing to do, so get out
    3.41 -  if( !C->has_loops() && !do_split_ifs && !verify_me) {
    3.42 +  if( !C->has_loops() && !do_split_ifs && !_verify_me && !_verify_only ) {
    3.43      _igvn.optimize();           // Cleanup NeverBranches
    3.44      return;
    3.45    }
    3.46 @@ -1486,7 +1487,7 @@
    3.47  
    3.48    // Split shared headers and insert loop landing pads.
    3.49    // Do not bother doing this on the Root loop of course.
    3.50 -  if( !verify_me && _ltree_root->_child ) {
    3.51 +  if( !_verify_me && !_verify_only && _ltree_root->_child ) {
    3.52      if( _ltree_root->_child->beautify_loops( this ) ) {
    3.53        // Re-build loop tree!
    3.54        _ltree_root->_child = NULL;
    3.55 @@ -1515,24 +1516,26 @@
    3.56  
    3.57    Dominators();
    3.58  
    3.59 -  // As a side effect, Dominators removed any unreachable CFG paths
    3.60 -  // into RegionNodes.  It doesn't do this test against Root, so
    3.61 -  // we do it here.
    3.62 -  for( uint i = 1; i < C->root()->req(); i++ ) {
    3.63 -    if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
    3.64 -      _igvn.hash_delete(C->root());
    3.65 -      C->root()->del_req(i);
    3.66 -      _igvn._worklist.push(C->root());
    3.67 -      i--;                      // Rerun same iteration on compressed edges
    3.68 +  if (!_verify_only) {
    3.69 +    // As a side effect, Dominators removed any unreachable CFG paths
    3.70 +    // into RegionNodes.  It doesn't do this test against Root, so
    3.71 +    // we do it here.
    3.72 +    for( uint i = 1; i < C->root()->req(); i++ ) {
    3.73 +      if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
    3.74 +        _igvn.hash_delete(C->root());
    3.75 +        C->root()->del_req(i);
    3.76 +        _igvn._worklist.push(C->root());
    3.77 +        i--;                      // Rerun same iteration on compressed edges
    3.78 +      }
    3.79      }
    3.80 +
    3.81 +    // Given dominators, try to find inner loops with calls that must
    3.82 +    // always be executed (call dominates loop tail).  These loops do
    3.83 +    // not need a separate safepoint.
    3.84 +    Node_List cisstack(a);
    3.85 +    _ltree_root->check_safepts(visited, cisstack);
    3.86    }
    3.87  
    3.88 -  // Given dominators, try to find inner loops with calls that must
    3.89 -  // always be executed (call dominates loop tail).  These loops do
    3.90 -  // not need a separate safepoint.
    3.91 -  Node_List cisstack(a);
    3.92 -  _ltree_root->check_safepts(visited, cisstack);
    3.93 -
    3.94    // Walk the DATA nodes and place into loops.  Find earliest control
    3.95    // node.  For CFG nodes, the _nodes array starts out and remains
    3.96    // holding the associated IdealLoopTree pointer.  For DATA nodes, the
    3.97 @@ -1548,11 +1551,11 @@
    3.98    // it will be processed among C->top() inputs
    3.99    worklist.push( C->top() );
   3.100    visited.set( C->top()->_idx ); // Set C->top() as visited now
   3.101 -  build_loop_early( visited, worklist, nstack, verify_me );
   3.102 +  build_loop_early( visited, worklist, nstack );
   3.103  
   3.104    // Given early legal placement, try finding counted loops.  This placement
   3.105    // is good enough to discover most loop invariants.
   3.106 -  if( !verify_me )
   3.107 +  if( !_verify_me && !_verify_only )
   3.108      _ltree_root->counted_loop( this );
   3.109  
   3.110    // Find latest loop placement.  Find ideal loop placement.
   3.111 @@ -1562,16 +1565,25 @@
   3.112    worklist.push( C->root() );
   3.113    NOT_PRODUCT( C->verify_graph_edges(); )
   3.114    worklist.push( C->top() );
   3.115 -  build_loop_late( visited, worklist, nstack, verify_me );
   3.116 +  build_loop_late( visited, worklist, nstack );
   3.117 +
   3.118 +  if (_verify_only) {
   3.119 +    // restore major progress flag
   3.120 +    for (int i = 0; i < old_progress; i++)
   3.121 +      C->set_major_progress();
   3.122 +    assert(C->unique() == unique, "verification mode made Nodes? ? ?");
   3.123 +    assert(_igvn._worklist.size() == 0, "shouldn't push anything");
   3.124 +    return;
   3.125 +  }
   3.126  
   3.127    // clear out the dead code
   3.128    while(_deadlist.size()) {
   3.129 -    igvn.remove_globally_dead_node(_deadlist.pop());
   3.130 +    _igvn.remove_globally_dead_node(_deadlist.pop());
   3.131    }
   3.132  
   3.133  #ifndef PRODUCT
   3.134    C->verify_graph_edges();
   3.135 -  if( verify_me ) {             // Nested verify pass?
   3.136 +  if( _verify_me ) {             // Nested verify pass?
   3.137      // Check to see if the verify mode is broken
   3.138      assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
   3.139      return;
   3.140 @@ -1678,7 +1690,7 @@
   3.141  void PhaseIdealLoop::verify() const {
   3.142    int old_progress = C->major_progress();
   3.143    ResourceMark rm;
   3.144 -  PhaseIdealLoop loop_verify( _igvn, this, false );
   3.145 +  PhaseIdealLoop loop_verify( _igvn, this );
   3.146    VectorSet visited(Thread::current()->resource_area());
   3.147  
   3.148    fail = 0;
   3.149 @@ -2138,54 +2150,58 @@
   3.150          // optimizing an infinite loop?
   3.151          l = _ltree_root;        // Oops, found infinite loop
   3.152  
   3.153 -        // Insert the NeverBranch between 'm' and it's control user.
   3.154 -        NeverBranchNode *iff = new (C, 1) NeverBranchNode( m );
   3.155 -        _igvn.register_new_node_with_optimizer(iff);
   3.156 -        set_loop(iff, l);
   3.157 -        Node *if_t = new (C, 1) CProjNode( iff, 0 );
   3.158 -        _igvn.register_new_node_with_optimizer(if_t);
   3.159 -        set_loop(if_t, l);
   3.160 +        if (!_verify_only) {
   3.161 +          // Insert the NeverBranch between 'm' and it's control user.
   3.162 +          NeverBranchNode *iff = new (C, 1) NeverBranchNode( m );
   3.163 +          _igvn.register_new_node_with_optimizer(iff);
   3.164 +          set_loop(iff, l);
   3.165 +          Node *if_t = new (C, 1) CProjNode( iff, 0 );
   3.166 +          _igvn.register_new_node_with_optimizer(if_t);
   3.167 +          set_loop(if_t, l);
   3.168  
   3.169 -        Node* cfg = NULL;       // Find the One True Control User of m
   3.170 -        for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
   3.171 -          Node* x = m->fast_out(j);
   3.172 -          if (x->is_CFG() && x != m && x != iff)
   3.173 -            { cfg = x; break; }
   3.174 +          Node* cfg = NULL;       // Find the One True Control User of m
   3.175 +          for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
   3.176 +            Node* x = m->fast_out(j);
   3.177 +            if (x->is_CFG() && x != m && x != iff)
   3.178 +              { cfg = x; break; }
   3.179 +          }
   3.180 +          assert(cfg != NULL, "must find the control user of m");
   3.181 +          uint k = 0;             // Probably cfg->in(0)
   3.182 +          while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
   3.183 +          cfg->set_req( k, if_t ); // Now point to NeverBranch
   3.184 +
   3.185 +          // Now create the never-taken loop exit
   3.186 +          Node *if_f = new (C, 1) CProjNode( iff, 1 );
   3.187 +          _igvn.register_new_node_with_optimizer(if_f);
   3.188 +          set_loop(if_f, l);
   3.189 +          // Find frame ptr for Halt.  Relies on the optimizer
   3.190 +          // V-N'ing.  Easier and quicker than searching through
   3.191 +          // the program structure.
   3.192 +          Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr );
   3.193 +          _igvn.register_new_node_with_optimizer(frame);
   3.194 +          // Halt & Catch Fire
   3.195 +          Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame );
   3.196 +          _igvn.register_new_node_with_optimizer(halt);
   3.197 +          set_loop(halt, l);
   3.198 +          C->root()->add_req(halt);
   3.199          }
   3.200 -        assert(cfg != NULL, "must find the control user of m");
   3.201 -        uint k = 0;             // Probably cfg->in(0)
   3.202 -        while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
   3.203 -        cfg->set_req( k, if_t ); // Now point to NeverBranch
   3.204 -
   3.205 -        // Now create the never-taken loop exit
   3.206 -        Node *if_f = new (C, 1) CProjNode( iff, 1 );
   3.207 -        _igvn.register_new_node_with_optimizer(if_f);
   3.208 -        set_loop(if_f, l);
   3.209 -        // Find frame ptr for Halt.  Relies on the optimizer
   3.210 -        // V-N'ing.  Easier and quicker than searching through
   3.211 -        // the program structure.
   3.212 -        Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr );
   3.213 -        _igvn.register_new_node_with_optimizer(frame);
   3.214 -        // Halt & Catch Fire
   3.215 -        Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame );
   3.216 -        _igvn.register_new_node_with_optimizer(halt);
   3.217 -        set_loop(halt, l);
   3.218 -        C->root()->add_req(halt);
   3.219          set_loop(C->root(), _ltree_root);
   3.220        }
   3.221      }
   3.222      // Weeny check for irreducible.  This child was already visited (this
   3.223      // IS the post-work phase).  Is this child's loop header post-visited
   3.224      // as well?  If so, then I found another entry into the loop.
   3.225 -    while( is_postvisited(l->_head) ) {
   3.226 -      // found irreducible
   3.227 -      l->_irreducible = 1; // = true
   3.228 -      l = l->_parent;
   3.229 -      _has_irreducible_loops = true;
   3.230 -      // Check for bad CFG here to prevent crash, and bailout of compile
   3.231 -      if (l == NULL) {
   3.232 -        C->record_method_not_compilable("unhandled CFG detected during loop optimization");
   3.233 -        return pre_order;
   3.234 +    if (!_verify_only) {
   3.235 +      while( is_postvisited(l->_head) ) {
   3.236 +        // found irreducible
   3.237 +        l->_irreducible = 1; // = true
   3.238 +        l = l->_parent;
   3.239 +        _has_irreducible_loops = true;
   3.240 +        // Check for bad CFG here to prevent crash, and bailout of compile
   3.241 +        if (l == NULL) {
   3.242 +          C->record_method_not_compilable("unhandled CFG detected during loop optimization");
   3.243 +          return pre_order;
   3.244 +        }
   3.245        }
   3.246      }
   3.247  
   3.248 @@ -2253,7 +2269,7 @@
   3.249  // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
   3.250  // First pass computes the earliest controlling node possible.  This is the
   3.251  // controlling input with the deepest dominating depth.
   3.252 -void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
   3.253 +void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) {
   3.254    while (worklist.size() != 0) {
   3.255      // Use local variables nstack_top_n & nstack_top_i to cache values
   3.256      // on nstack's top.
   3.257 @@ -2285,7 +2301,7 @@
   3.258            // (the old code here would yank a 2nd safepoint after seeing a
   3.259            // first one, even though the 1st did not dominate in the loop body
   3.260            // and thus could be avoided indefinitely)
   3.261 -          if( !verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
   3.262 +          if( !_verify_only && !_verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
   3.263                is_deleteable_safept(n)) {
   3.264              Node *in = n->in(TypeFunc::Control);
   3.265              lazy_replace(n,in);       // Pull safepoint now
   3.266 @@ -2408,12 +2424,31 @@
   3.267    return LCA;
   3.268  }
   3.269  
   3.270 -//------------------------------get_late_ctrl----------------------------------
   3.271 -// Compute latest legal control.
   3.272 -Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
   3.273 -  assert(early != NULL, "early control should not be NULL");
   3.274 +bool PhaseIdealLoop::verify_dominance(Node* n, Node* use, Node* LCA, Node* early) {
   3.275 +  bool had_error = false;
   3.276 +#ifdef ASSERT
   3.277 +  if (early != C->root()) {
   3.278 +    // Make sure that there's a dominance path from use to LCA
   3.279 +    Node* d = use;
   3.280 +    while (d != LCA) {
   3.281 +      d = idom(d);
   3.282 +      if (d == C->root()) {
   3.283 +        tty->print_cr("*** Use %d isn't dominated by def %s", use->_idx, n->_idx);
   3.284 +        n->dump();
   3.285 +        use->dump();
   3.286 +        had_error = true;
   3.287 +        break;
   3.288 +      }
   3.289 +    }
   3.290 +  }
   3.291 +#endif
   3.292 +  return had_error;
   3.293 +}
   3.294  
   3.295 +
   3.296 +Node* PhaseIdealLoop::compute_lca_of_uses(Node* n, Node* early, bool verify) {
   3.297    // Compute LCA over list of uses
   3.298 +  bool had_error = false;
   3.299    Node *LCA = NULL;
   3.300    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && LCA != early; i++) {
   3.301      Node* c = n->fast_out(i);
   3.302 @@ -2423,15 +2458,34 @@
   3.303        for( uint j=1; j<c->req(); j++ ) {// For all inputs
   3.304          if( c->in(j) == n ) {   // Found matching input?
   3.305            Node *use = c->in(0)->in(j);
   3.306 +          if (_verify_only && use->is_top()) continue;
   3.307            LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
   3.308 +          if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error;
   3.309          }
   3.310        }
   3.311      } else {
   3.312        // For CFG data-users, use is in the block just prior
   3.313        Node *use = has_ctrl(c) ? get_ctrl(c) : c->in(0);
   3.314        LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
   3.315 +      if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error;
   3.316      }
   3.317    }
   3.318 +  assert(!had_error, "bad dominance");
   3.319 +  return LCA;
   3.320 +}
   3.321 +
   3.322 +//------------------------------get_late_ctrl----------------------------------
   3.323 +// Compute latest legal control.
   3.324 +Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
   3.325 +  assert(early != NULL, "early control should not be NULL");
   3.326 +
   3.327 +  Node* LCA = compute_lca_of_uses(n, early);
   3.328 +#ifdef ASSERT
   3.329 +  if (LCA == C->root() && LCA != early) {
   3.330 +    // def doesn't dominate uses so print some useful debugging output
   3.331 +    compute_lca_of_uses(n, early, true);
   3.332 +  }
   3.333 +#endif
   3.334  
   3.335    // if this is a load, check for anti-dependent stores
   3.336    // We use a conservative algorithm to identify potential interfering
   3.337 @@ -2576,7 +2630,7 @@
   3.338  //------------------------------build_loop_late--------------------------------
   3.339  // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
   3.340  // Second pass finds latest legal placement, and ideal loop placement.
   3.341 -void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
   3.342 +void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) {
   3.343    while (worklist.size() != 0) {
   3.344      Node *n = worklist.pop();
   3.345      // Only visit once
   3.346 @@ -2612,7 +2666,7 @@
   3.347          }
   3.348        } else {
   3.349          // All of n's children have been processed, complete post-processing.
   3.350 -        build_loop_late_post(n, verify_me);
   3.351 +        build_loop_late_post(n);
   3.352          if (nstack.is_empty()) {
   3.353            // Finished all nodes on stack.
   3.354            // Process next node on the worklist.
   3.355 @@ -2631,9 +2685,9 @@
   3.356  //------------------------------build_loop_late_post---------------------------
   3.357  // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
   3.358  // Second pass finds latest legal placement, and ideal loop placement.
   3.359 -void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify_me ) {
   3.360 +void PhaseIdealLoop::build_loop_late_post( Node *n ) {
   3.361  
   3.362 -  if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress()) {
   3.363 +  if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress() && !_verify_only) {
   3.364      _igvn._worklist.push(n);  // Maybe we'll normalize it, if no more loops.
   3.365    }
   3.366  
   3.367 @@ -2714,6 +2768,7 @@
   3.368      if( get_loop(legal)->_nest < get_loop(least)->_nest )
   3.369        least = legal;
   3.370    }
   3.371 +  assert(early == legal || legal != C->root(), "bad dominance of inputs");
   3.372  
   3.373    // Try not to place code on a loop entry projection
   3.374    // which can inhibit range check elimination.
   3.375 @@ -2731,8 +2786,8 @@
   3.376  #ifdef ASSERT
   3.377    // If verifying, verify that 'verify_me' has a legal location
   3.378    // and choose it as our location.
   3.379 -  if( verify_me ) {
   3.380 -    Node *v_ctrl = verify_me->get_ctrl_no_update(n);
   3.381 +  if( _verify_me ) {
   3.382 +    Node *v_ctrl = _verify_me->get_ctrl_no_update(n);
   3.383      Node *legal = LCA;
   3.384      while( early != legal ) {   // While not at earliest legal
   3.385        if( legal == v_ctrl ) break;  // Check for prior good location
     4.1 --- a/src/share/vm/opto/loopnode.hpp	Wed Aug 12 14:27:54 2009 -0700
     4.2 +++ b/src/share/vm/opto/loopnode.hpp	Fri Aug 14 00:02:12 2009 -0700
     4.3 @@ -1,5 +1,5 @@
     4.4  /*
     4.5 - * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
     4.6 + * Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
     4.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4.8   *
     4.9   * This code is free software; you can redistribute it and/or modify it
    4.10 @@ -442,6 +442,9 @@
    4.11    uint *_preorders;
    4.12    uint _max_preorder;
    4.13  
    4.14 +  const PhaseIdealLoop* _verify_me;
    4.15 +  bool _verify_only;
    4.16 +
    4.17    // Allocate _preorders[] array
    4.18    void allocate_preorders() {
    4.19      _max_preorder = C->unique()+8;
    4.20 @@ -497,6 +500,12 @@
    4.21    Node_Array _dom_lca_tags;
    4.22    void   init_dom_lca_tags();
    4.23    void   clear_dom_lca_tags();
    4.24 +
    4.25 +  // Helper for debugging bad dominance relationships
    4.26 +  bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early);
    4.27 +
    4.28 +  Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false);
    4.29 +
    4.30    // Inline wrapper for frequent cases:
    4.31    // 1) only one use
    4.32    // 2) a use is the same as the current LCA passed as 'n1'
    4.33 @@ -511,6 +520,7 @@
    4.34      return find_non_split_ctrl(n);
    4.35    }
    4.36    Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
    4.37 +
    4.38    // true if CFG node d dominates CFG node n
    4.39    bool is_dominator(Node *d, Node *n);
    4.40  
    4.41 @@ -621,9 +631,9 @@
    4.42    IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
    4.43  
    4.44    // Place Data nodes in some loop nest
    4.45 -  void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
    4.46 -  void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
    4.47 -  void build_loop_late_post ( Node* n, const PhaseIdealLoop *verify_me );
    4.48 +  void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
    4.49 +  void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
    4.50 +  void build_loop_late_post ( Node* n );
    4.51  
    4.52    // Array of immediate dominance info for each CFG node indexed by node idx
    4.53  private:
    4.54 @@ -662,6 +672,19 @@
    4.55    // Is safept not required by an outer loop?
    4.56    bool is_deleteable_safept(Node* sfpt);
    4.57  
    4.58 +  // Perform verification that the graph is valid.
    4.59 +  PhaseIdealLoop( PhaseIterGVN &igvn) :
    4.60 +    PhaseTransform(Ideal_Loop),
    4.61 +    _igvn(igvn),
    4.62 +    _dom_lca_tags(C->comp_arena()),
    4.63 +    _verify_me(NULL),
    4.64 +    _verify_only(true) {
    4.65 +    build_and_optimize(false);
    4.66 +  }
    4.67 +
    4.68 +  // build the loop tree and perform any requested optimizations
    4.69 +  void build_and_optimize(bool do_split_if);
    4.70 +
    4.71  public:
    4.72    // Dominators for the sea of nodes
    4.73    void Dominators();
    4.74 @@ -671,7 +694,32 @@
    4.75    Node *dom_lca_internal( Node *n1, Node *n2 ) const;
    4.76  
    4.77    // Compute the Ideal Node to Loop mapping
    4.78 -  PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs );
    4.79 +  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) :
    4.80 +    PhaseTransform(Ideal_Loop),
    4.81 +    _igvn(igvn),
    4.82 +    _dom_lca_tags(C->comp_arena()),
    4.83 +    _verify_me(NULL),
    4.84 +    _verify_only(false) {
    4.85 +    build_and_optimize(do_split_ifs);
    4.86 +  }
    4.87 +
    4.88 +  // Verify that verify_me made the same decisions as a fresh run.
    4.89 +  PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) :
    4.90 +    PhaseTransform(Ideal_Loop),
    4.91 +    _igvn(igvn),
    4.92 +    _dom_lca_tags(C->comp_arena()),
    4.93 +    _verify_me(verify_me),
    4.94 +    _verify_only(false) {
    4.95 +    build_and_optimize(false);
    4.96 +  }
    4.97 +
    4.98 +  // Build and verify the loop tree without modifying the graph.  This
    4.99 +  // is useful to verify that all inputs properly dominate their uses.
   4.100 +  static void verify(PhaseIterGVN& igvn) {
   4.101 +#ifdef ASSERT
   4.102 +    PhaseIdealLoop v(igvn);
   4.103 +#endif
   4.104 +  }
   4.105  
   4.106    // True if the method has at least 1 irreducible loop
   4.107    bool _has_irreducible_loops;
     5.1 --- a/src/share/vm/opto/phase.cpp	Wed Aug 12 14:27:54 2009 -0700
     5.2 +++ b/src/share/vm/opto/phase.cpp	Fri Aug 14 00:02:12 2009 -0700
     5.3 @@ -1,5 +1,5 @@
     5.4  /*
     5.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
     5.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
     5.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5.8   *
     5.9   * This code is free software; you can redistribute it and/or modify it
    5.10 @@ -53,6 +53,7 @@
    5.11  elapsedTimer Phase::_t_registerMethod;
    5.12  elapsedTimer Phase::_t_temporaryTimer1;
    5.13  elapsedTimer Phase::_t_temporaryTimer2;
    5.14 +elapsedTimer Phase::_t_idealLoopVerify;
    5.15  
    5.16  // Subtimers for _t_optimizer
    5.17  elapsedTimer   Phase::_t_iterGVN;
    5.18 @@ -88,51 +89,52 @@
    5.19    tty->print_cr ("Accumulated compiler times:");
    5.20    tty->print_cr ("---------------------------");
    5.21    tty->print_cr ("  Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds());
    5.22 -  tty->print    ("    method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds());
    5.23 +  tty->print    ("    method compilation   : %3.3f sec", Phase::_t_methodCompilation.seconds());
    5.24    tty->print    ("/%d bytes",_total_bytes_compiled);
    5.25    tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds());
    5.26 -  tty->print_cr ("    stub compilation   : %3.3f sec.", Phase::_t_stubCompilation.seconds());
    5.27 +  tty->print_cr ("    stub compilation     : %3.3f sec.", Phase::_t_stubCompilation.seconds());
    5.28    tty->print_cr ("  Phases:");
    5.29 -  tty->print_cr ("    parse        : %3.3f sec", Phase::_t_parser.seconds());
    5.30 +  tty->print_cr ("    parse          : %3.3f sec", Phase::_t_parser.seconds());
    5.31    if (DoEscapeAnalysis) {
    5.32 -    tty->print_cr ("    escape analysis : %3.3f sec", Phase::_t_escapeAnalysis.seconds());
    5.33 +    tty->print_cr ("    escape analysis   : %3.3f sec", Phase::_t_escapeAnalysis.seconds());
    5.34    }
    5.35 -  tty->print_cr ("    optimizer    : %3.3f sec", Phase::_t_optimizer.seconds());
    5.36 +  tty->print_cr ("    optimizer      : %3.3f sec", Phase::_t_optimizer.seconds());
    5.37    if( Verbose || WizardMode ) {
    5.38 -    tty->print_cr ("      iterGVN      : %3.3f sec", Phase::_t_iterGVN.seconds());
    5.39 -    tty->print_cr ("      idealLoop    : %3.3f sec", Phase::_t_idealLoop.seconds());
    5.40 -    tty->print_cr ("      ccp          : %3.3f sec", Phase::_t_ccp.seconds());
    5.41 -    tty->print_cr ("      iterGVN2     : %3.3f sec", Phase::_t_iterGVN2.seconds());
    5.42 -    tty->print_cr ("      graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds());
    5.43 +    tty->print_cr ("      iterGVN        : %3.3f sec", Phase::_t_iterGVN.seconds());
    5.44 +    tty->print_cr ("      idealLoop      : %3.3f sec", Phase::_t_idealLoop.seconds());
    5.45 +    tty->print_cr ("      idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds());
    5.46 +    tty->print_cr ("      ccp            : %3.3f sec", Phase::_t_ccp.seconds());
    5.47 +    tty->print_cr ("      iterGVN2       : %3.3f sec", Phase::_t_iterGVN2.seconds());
    5.48 +    tty->print_cr ("      graphReshape   : %3.3f sec", Phase::_t_graphReshaping.seconds());
    5.49      double optimizer_subtotal = Phase::_t_iterGVN.seconds() +
    5.50        Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
    5.51        Phase::_t_graphReshaping.seconds();
    5.52      double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
    5.53 -    tty->print_cr ("      subtotal     : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
    5.54 +    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
    5.55    }
    5.56 -  tty->print_cr ("    matcher      : %3.3f sec", Phase::_t_matcher.seconds());
    5.57 -  tty->print_cr ("    scheduler    : %3.3f sec", Phase::_t_scheduler.seconds());
    5.58 -  tty->print_cr ("    regalloc     : %3.3f sec", Phase::_t_registerAllocation.seconds());
    5.59 +  tty->print_cr ("    matcher        : %3.3f sec", Phase::_t_matcher.seconds());
    5.60 +  tty->print_cr ("    scheduler      : %3.3f sec", Phase::_t_scheduler.seconds());
    5.61 +  tty->print_cr ("    regalloc       : %3.3f sec", Phase::_t_registerAllocation.seconds());
    5.62    if( Verbose || WizardMode ) {
    5.63 -    tty->print_cr ("      ctorChaitin  : %3.3f sec", Phase::_t_ctorChaitin.seconds());
    5.64 -    tty->print_cr ("      buildIFG     : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
    5.65 -    tty->print_cr ("      computeLive  : %3.3f sec", Phase::_t_computeLive.seconds());
    5.66 -    tty->print_cr ("      regAllocSplit: %3.3f sec", Phase::_t_regAllocSplit.seconds());
    5.67 +    tty->print_cr ("      ctorChaitin    : %3.3f sec", Phase::_t_ctorChaitin.seconds());
    5.68 +    tty->print_cr ("      buildIFG       : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
    5.69 +    tty->print_cr ("      computeLive    : %3.3f sec", Phase::_t_computeLive.seconds());
    5.70 +    tty->print_cr ("      regAllocSplit  : %3.3f sec", Phase::_t_regAllocSplit.seconds());
    5.71      tty->print_cr ("      postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
    5.72 -    tty->print_cr ("      fixupSpills  : %3.3f sec", Phase::_t_fixupSpills.seconds());
    5.73 +    tty->print_cr ("      fixupSpills    : %3.3f sec", Phase::_t_fixupSpills.seconds());
    5.74      double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
    5.75        Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
    5.76        Phase::_t_regAllocSplit.seconds()    + Phase::_t_fixupSpills.seconds() +
    5.77        Phase::_t_postAllocCopyRemoval.seconds();
    5.78      double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
    5.79 -    tty->print_cr ("      subtotal     : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
    5.80 +    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
    5.81    }
    5.82 -  tty->print_cr ("    macroExpand  : %3.3f sec", Phase::_t_macroExpand.seconds());
    5.83 -  tty->print_cr ("    blockOrdering: %3.3f sec", Phase::_t_blockOrdering.seconds());
    5.84 -  tty->print_cr ("    peephole     : %3.3f sec", Phase::_t_peephole.seconds());
    5.85 -  tty->print_cr ("    codeGen      : %3.3f sec", Phase::_t_codeGeneration.seconds());
    5.86 -  tty->print_cr ("    install_code : %3.3f sec", Phase::_t_registerMethod.seconds());
    5.87 -  tty->print_cr ("    ------------ : ----------");
    5.88 +  tty->print_cr ("    macroExpand    : %3.3f sec", Phase::_t_macroExpand.seconds());
    5.89 +  tty->print_cr ("    blockOrdering  : %3.3f sec", Phase::_t_blockOrdering.seconds());
    5.90 +  tty->print_cr ("    peephole       : %3.3f sec", Phase::_t_peephole.seconds());
    5.91 +  tty->print_cr ("    codeGen        : %3.3f sec", Phase::_t_codeGeneration.seconds());
    5.92 +  tty->print_cr ("    install_code   : %3.3f sec", Phase::_t_registerMethod.seconds());
    5.93 +  tty->print_cr ("    -------------- : ----------");
    5.94    double phase_subtotal = Phase::_t_parser.seconds() +
    5.95      (DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) +
    5.96      Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
    5.97 @@ -143,7 +145,7 @@
    5.98    double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
    5.99    // counters inside Compile::CodeGen include time for adapters and stubs
   5.100    // so phase-total can be greater than 100%
   5.101 -  tty->print_cr ("    total        : %3.3f sec,  %3.2f %%", phase_subtotal, percent_of_method_compile);
   5.102 +  tty->print_cr ("    total          : %3.3f sec,  %3.2f %%", phase_subtotal, percent_of_method_compile);
   5.103  
   5.104    assert( percent_of_method_compile > expected_method_compile_coverage ||
   5.105            phase_subtotal < minimum_meaningful_method_compile,
   5.106 @@ -157,8 +159,8 @@
   5.107      tty->cr();
   5.108      tty->print_cr ("    temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds());
   5.109    }
   5.110 -  tty->print_cr ("    output    : %3.3f sec", Phase::_t_output.seconds());
   5.111 -  tty->print_cr ("      isched    : %3.3f sec", Phase::_t_instrSched.seconds());
   5.112 -  tty->print_cr ("      bldOopMaps: %3.3f sec", Phase::_t_buildOopMaps.seconds());
   5.113 +  tty->print_cr ("    output         : %3.3f sec", Phase::_t_output.seconds());
   5.114 +  tty->print_cr ("      isched         : %3.3f sec", Phase::_t_instrSched.seconds());
   5.115 +  tty->print_cr ("      bldOopMaps     : %3.3f sec", Phase::_t_buildOopMaps.seconds());
   5.116  }
   5.117  #endif
     6.1 --- a/src/share/vm/opto/phase.hpp	Wed Aug 12 14:27:54 2009 -0700
     6.2 +++ b/src/share/vm/opto/phase.hpp	Fri Aug 14 00:02:12 2009 -0700
     6.3 @@ -1,5 +1,5 @@
     6.4  /*
     6.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
     6.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
     6.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     6.8   *
     6.9   * This code is free software; you can redistribute it and/or modify it
    6.10 @@ -83,6 +83,7 @@
    6.11    static elapsedTimer _t_registerMethod;
    6.12    static elapsedTimer _t_temporaryTimer1;
    6.13    static elapsedTimer _t_temporaryTimer2;
    6.14 +  static elapsedTimer _t_idealLoopVerify;
    6.15  
    6.16  // Subtimers for _t_optimizer
    6.17    static elapsedTimer   _t_iterGVN;

mercurial