Fri, 14 Aug 2009 00:02:12 -0700
6862956: PhaseIdealLoop should have a CFG verification mode
Reviewed-by: kvn, twisti
1.1 --- a/src/share/vm/opto/compile.cpp Wed Aug 12 14:27:54 2009 -0700 1.2 +++ b/src/share/vm/opto/compile.cpp Fri Aug 14 00:02:12 2009 -0700 1.3 @@ -1545,7 +1545,7 @@ 1.4 if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) { 1.5 { 1.6 TracePhase t2("idealLoop", &_t_idealLoop, true); 1.7 - PhaseIdealLoop ideal_loop( igvn, NULL, true ); 1.8 + PhaseIdealLoop ideal_loop( igvn, true ); 1.9 loop_opts_cnt--; 1.10 if (major_progress()) print_method("PhaseIdealLoop 1", 2); 1.11 if (failing()) return; 1.12 @@ -1553,7 +1553,7 @@ 1.13 // Loop opts pass if partial peeling occurred in previous pass 1.14 if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) { 1.15 TracePhase t3("idealLoop", &_t_idealLoop, true); 1.16 - PhaseIdealLoop ideal_loop( igvn, NULL, false ); 1.17 + PhaseIdealLoop ideal_loop( igvn, false ); 1.18 loop_opts_cnt--; 1.19 if (major_progress()) print_method("PhaseIdealLoop 2", 2); 1.20 if (failing()) return; 1.21 @@ -1561,10 +1561,15 @@ 1.22 // Loop opts pass for loop-unrolling before CCP 1.23 if(major_progress() && (loop_opts_cnt > 0)) { 1.24 TracePhase t4("idealLoop", &_t_idealLoop, true); 1.25 - PhaseIdealLoop ideal_loop( igvn, NULL, false ); 1.26 + PhaseIdealLoop ideal_loop( igvn, false ); 1.27 loop_opts_cnt--; 1.28 if (major_progress()) print_method("PhaseIdealLoop 3", 2); 1.29 } 1.30 + if (!failing()) { 1.31 + // Verify that last round of loop opts produced a valid graph 1.32 + NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); ) 1.33 + PhaseIdealLoop::verify(igvn); 1.34 + } 1.35 } 1.36 if (failing()) return; 1.37 1.38 @@ -1597,12 +1602,20 @@ 1.39 while(major_progress() && (loop_opts_cnt > 0)) { 1.40 TracePhase t2("idealLoop", &_t_idealLoop, true); 1.41 assert( cnt++ < 40, "infinite cycle in loop optimization" ); 1.42 - PhaseIdealLoop ideal_loop( igvn, NULL, true ); 1.43 + PhaseIdealLoop ideal_loop( igvn, true ); 1.44 loop_opts_cnt--; 1.45 if (major_progress()) print_method("PhaseIdealLoop iterations", 2); 1.46 if (failing()) return; 1.47 } 1.48 } 1.49 + 1.50 + { 1.51 + // Verify that all previous optimizations produced a valid graph 1.52 + // at least to this point, even if no loop optimizations were done. 1.53 + NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); ) 1.54 + PhaseIdealLoop::verify(igvn); 1.55 + } 1.56 + 1.57 { 1.58 NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); ) 1.59 PhaseMacroExpand mex(igvn);
2.1 --- a/src/share/vm/opto/domgraph.cpp Wed Aug 12 14:27:54 2009 -0700 2.2 +++ b/src/share/vm/opto/domgraph.cpp Fri Aug 14 00:02:12 2009 -0700 2.3 @@ -1,5 +1,5 @@ 2.4 /* 2.5 - * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved. 2.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 2.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.8 * 2.9 * This code is free software; you can redistribute it and/or modify it 2.10 @@ -396,7 +396,7 @@ 2.11 // nodes (using the is_CFG() call) and places them in a dominator tree. Thus, 2.12 // it needs a count of the CFG nodes for the mapping table. This is the 2.13 // Lengauer & Tarjan O(E-alpha(E,V)) algorithm. 2.14 -void PhaseIdealLoop::Dominators( ) { 2.15 +void PhaseIdealLoop::Dominators() { 2.16 ResourceMark rm; 2.17 // Setup mappings from my Graph to Tarjan's stuff and back 2.18 // Note: Tarjan uses 1-based arrays 2.19 @@ -454,7 +454,7 @@ 2.20 // flow into the main graph (and hence into ROOT) but are not reachable 2.21 // from above. Such code is dead, but requires a global pass to detect 2.22 // it; this global pass was the 'build_loop_tree' pass run just prior. 2.23 - if( whead->is_Region() ) { 2.24 + if( !_verify_only && whead->is_Region() ) { 2.25 for( uint i = 1; i < whead->req(); i++ ) { 2.26 if (!has_node(whead->in(i))) { 2.27 // Kill dead input path
3.1 --- a/src/share/vm/opto/loopnode.cpp Wed Aug 12 14:27:54 2009 -0700 3.2 +++ b/src/share/vm/opto/loopnode.cpp Fri Aug 14 00:02:12 2009 -0700 3.3 @@ -1420,13 +1420,12 @@ 3.4 } 3.5 3.6 //============================================================================= 3.7 -//------------------------------PhaseIdealLoop--------------------------------- 3.8 +//----------------------------build_and_optimize------------------------------- 3.9 // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to 3.10 // its corresponding LoopNode. If 'optimize' is true, do some loop cleanups. 3.11 -PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs ) 3.12 - : PhaseTransform(Ideal_Loop), 3.13 - _igvn(igvn), 3.14 - _dom_lca_tags(C->comp_arena()) { 3.15 +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) { 3.16 + int old_progress = C->major_progress(); 3.17 + 3.18 // Reset major-progress flag for the driver's heuristics 3.19 C->clear_major_progress(); 3.20 3.21 @@ -1465,18 +1464,20 @@ 3.22 } 3.23 3.24 // No loops after all 3.25 - if( !_ltree_root->_child ) C->set_has_loops(false); 3.26 + if( !_ltree_root->_child && !_verify_only ) C->set_has_loops(false); 3.27 3.28 // There should always be an outer loop containing the Root and Return nodes. 3.29 // If not, we have a degenerate empty program. Bail out in this case. 3.30 if (!has_node(C->root())) { 3.31 - C->clear_major_progress(); 3.32 - C->record_method_not_compilable("empty program detected during loop optimization"); 3.33 + if (!_verify_only) { 3.34 + C->clear_major_progress(); 3.35 + C->record_method_not_compilable("empty program detected during loop optimization"); 3.36 + } 3.37 return; 3.38 } 3.39 3.40 // Nothing to do, so get out 3.41 - if( !C->has_loops() && !do_split_ifs && !verify_me) { 3.42 + if( !C->has_loops() && !do_split_ifs && !_verify_me && !_verify_only ) { 3.43 _igvn.optimize(); // Cleanup NeverBranches 3.44 return; 3.45 } 3.46 @@ -1486,7 +1487,7 @@ 3.47 3.48 // Split shared headers and insert loop landing pads. 3.49 // Do not bother doing this on the Root loop of course. 3.50 - if( !verify_me && _ltree_root->_child ) { 3.51 + if( !_verify_me && !_verify_only && _ltree_root->_child ) { 3.52 if( _ltree_root->_child->beautify_loops( this ) ) { 3.53 // Re-build loop tree! 3.54 _ltree_root->_child = NULL; 3.55 @@ -1515,24 +1516,26 @@ 3.56 3.57 Dominators(); 3.58 3.59 - // As a side effect, Dominators removed any unreachable CFG paths 3.60 - // into RegionNodes. It doesn't do this test against Root, so 3.61 - // we do it here. 3.62 - for( uint i = 1; i < C->root()->req(); i++ ) { 3.63 - if( !_nodes[C->root()->in(i)->_idx] ) { // Dead path into Root? 3.64 - _igvn.hash_delete(C->root()); 3.65 - C->root()->del_req(i); 3.66 - _igvn._worklist.push(C->root()); 3.67 - i--; // Rerun same iteration on compressed edges 3.68 + if (!_verify_only) { 3.69 + // As a side effect, Dominators removed any unreachable CFG paths 3.70 + // into RegionNodes. It doesn't do this test against Root, so 3.71 + // we do it here. 3.72 + for( uint i = 1; i < C->root()->req(); i++ ) { 3.73 + if( !_nodes[C->root()->in(i)->_idx] ) { // Dead path into Root? 3.74 + _igvn.hash_delete(C->root()); 3.75 + C->root()->del_req(i); 3.76 + _igvn._worklist.push(C->root()); 3.77 + i--; // Rerun same iteration on compressed edges 3.78 + } 3.79 } 3.80 + 3.81 + // Given dominators, try to find inner loops with calls that must 3.82 + // always be executed (call dominates loop tail). These loops do 3.83 + // not need a separate safepoint. 3.84 + Node_List cisstack(a); 3.85 + _ltree_root->check_safepts(visited, cisstack); 3.86 } 3.87 3.88 - // Given dominators, try to find inner loops with calls that must 3.89 - // always be executed (call dominates loop tail). These loops do 3.90 - // not need a separate safepoint. 3.91 - Node_List cisstack(a); 3.92 - _ltree_root->check_safepts(visited, cisstack); 3.93 - 3.94 // Walk the DATA nodes and place into loops. Find earliest control 3.95 // node. For CFG nodes, the _nodes array starts out and remains 3.96 // holding the associated IdealLoopTree pointer. For DATA nodes, the 3.97 @@ -1548,11 +1551,11 @@ 3.98 // it will be processed among C->top() inputs 3.99 worklist.push( C->top() ); 3.100 visited.set( C->top()->_idx ); // Set C->top() as visited now 3.101 - build_loop_early( visited, worklist, nstack, verify_me ); 3.102 + build_loop_early( visited, worklist, nstack ); 3.103 3.104 // Given early legal placement, try finding counted loops. This placement 3.105 // is good enough to discover most loop invariants. 3.106 - if( !verify_me ) 3.107 + if( !_verify_me && !_verify_only ) 3.108 _ltree_root->counted_loop( this ); 3.109 3.110 // Find latest loop placement. Find ideal loop placement. 3.111 @@ -1562,16 +1565,25 @@ 3.112 worklist.push( C->root() ); 3.113 NOT_PRODUCT( C->verify_graph_edges(); ) 3.114 worklist.push( C->top() ); 3.115 - build_loop_late( visited, worklist, nstack, verify_me ); 3.116 + build_loop_late( visited, worklist, nstack ); 3.117 + 3.118 + if (_verify_only) { 3.119 + // restore major progress flag 3.120 + for (int i = 0; i < old_progress; i++) 3.121 + C->set_major_progress(); 3.122 + assert(C->unique() == unique, "verification mode made Nodes? ? ?"); 3.123 + assert(_igvn._worklist.size() == 0, "shouldn't push anything"); 3.124 + return; 3.125 + } 3.126 3.127 // clear out the dead code 3.128 while(_deadlist.size()) { 3.129 - igvn.remove_globally_dead_node(_deadlist.pop()); 3.130 + _igvn.remove_globally_dead_node(_deadlist.pop()); 3.131 } 3.132 3.133 #ifndef PRODUCT 3.134 C->verify_graph_edges(); 3.135 - if( verify_me ) { // Nested verify pass? 3.136 + if( _verify_me ) { // Nested verify pass? 3.137 // Check to see if the verify mode is broken 3.138 assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?"); 3.139 return; 3.140 @@ -1678,7 +1690,7 @@ 3.141 void PhaseIdealLoop::verify() const { 3.142 int old_progress = C->major_progress(); 3.143 ResourceMark rm; 3.144 - PhaseIdealLoop loop_verify( _igvn, this, false ); 3.145 + PhaseIdealLoop loop_verify( _igvn, this ); 3.146 VectorSet visited(Thread::current()->resource_area()); 3.147 3.148 fail = 0; 3.149 @@ -2138,54 +2150,58 @@ 3.150 // optimizing an infinite loop? 3.151 l = _ltree_root; // Oops, found infinite loop 3.152 3.153 - // Insert the NeverBranch between 'm' and it's control user. 3.154 - NeverBranchNode *iff = new (C, 1) NeverBranchNode( m ); 3.155 - _igvn.register_new_node_with_optimizer(iff); 3.156 - set_loop(iff, l); 3.157 - Node *if_t = new (C, 1) CProjNode( iff, 0 ); 3.158 - _igvn.register_new_node_with_optimizer(if_t); 3.159 - set_loop(if_t, l); 3.160 + if (!_verify_only) { 3.161 + // Insert the NeverBranch between 'm' and it's control user. 3.162 + NeverBranchNode *iff = new (C, 1) NeverBranchNode( m ); 3.163 + _igvn.register_new_node_with_optimizer(iff); 3.164 + set_loop(iff, l); 3.165 + Node *if_t = new (C, 1) CProjNode( iff, 0 ); 3.166 + _igvn.register_new_node_with_optimizer(if_t); 3.167 + set_loop(if_t, l); 3.168 3.169 - Node* cfg = NULL; // Find the One True Control User of m 3.170 - for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { 3.171 - Node* x = m->fast_out(j); 3.172 - if (x->is_CFG() && x != m && x != iff) 3.173 - { cfg = x; break; } 3.174 + Node* cfg = NULL; // Find the One True Control User of m 3.175 + for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { 3.176 + Node* x = m->fast_out(j); 3.177 + if (x->is_CFG() && x != m && x != iff) 3.178 + { cfg = x; break; } 3.179 + } 3.180 + assert(cfg != NULL, "must find the control user of m"); 3.181 + uint k = 0; // Probably cfg->in(0) 3.182 + while( cfg->in(k) != m ) k++; // But check incase cfg is a Region 3.183 + cfg->set_req( k, if_t ); // Now point to NeverBranch 3.184 + 3.185 + // Now create the never-taken loop exit 3.186 + Node *if_f = new (C, 1) CProjNode( iff, 1 ); 3.187 + _igvn.register_new_node_with_optimizer(if_f); 3.188 + set_loop(if_f, l); 3.189 + // Find frame ptr for Halt. Relies on the optimizer 3.190 + // V-N'ing. Easier and quicker than searching through 3.191 + // the program structure. 3.192 + Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr ); 3.193 + _igvn.register_new_node_with_optimizer(frame); 3.194 + // Halt & Catch Fire 3.195 + Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame ); 3.196 + _igvn.register_new_node_with_optimizer(halt); 3.197 + set_loop(halt, l); 3.198 + C->root()->add_req(halt); 3.199 } 3.200 - assert(cfg != NULL, "must find the control user of m"); 3.201 - uint k = 0; // Probably cfg->in(0) 3.202 - while( cfg->in(k) != m ) k++; // But check incase cfg is a Region 3.203 - cfg->set_req( k, if_t ); // Now point to NeverBranch 3.204 - 3.205 - // Now create the never-taken loop exit 3.206 - Node *if_f = new (C, 1) CProjNode( iff, 1 ); 3.207 - _igvn.register_new_node_with_optimizer(if_f); 3.208 - set_loop(if_f, l); 3.209 - // Find frame ptr for Halt. Relies on the optimizer 3.210 - // V-N'ing. Easier and quicker than searching through 3.211 - // the program structure. 3.212 - Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr ); 3.213 - _igvn.register_new_node_with_optimizer(frame); 3.214 - // Halt & Catch Fire 3.215 - Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame ); 3.216 - _igvn.register_new_node_with_optimizer(halt); 3.217 - set_loop(halt, l); 3.218 - C->root()->add_req(halt); 3.219 set_loop(C->root(), _ltree_root); 3.220 } 3.221 } 3.222 // Weeny check for irreducible. This child was already visited (this 3.223 // IS the post-work phase). Is this child's loop header post-visited 3.224 // as well? If so, then I found another entry into the loop. 3.225 - while( is_postvisited(l->_head) ) { 3.226 - // found irreducible 3.227 - l->_irreducible = 1; // = true 3.228 - l = l->_parent; 3.229 - _has_irreducible_loops = true; 3.230 - // Check for bad CFG here to prevent crash, and bailout of compile 3.231 - if (l == NULL) { 3.232 - C->record_method_not_compilable("unhandled CFG detected during loop optimization"); 3.233 - return pre_order; 3.234 + if (!_verify_only) { 3.235 + while( is_postvisited(l->_head) ) { 3.236 + // found irreducible 3.237 + l->_irreducible = 1; // = true 3.238 + l = l->_parent; 3.239 + _has_irreducible_loops = true; 3.240 + // Check for bad CFG here to prevent crash, and bailout of compile 3.241 + if (l == NULL) { 3.242 + C->record_method_not_compilable("unhandled CFG detected during loop optimization"); 3.243 + return pre_order; 3.244 + } 3.245 } 3.246 } 3.247 3.248 @@ -2253,7 +2269,7 @@ 3.249 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping. 3.250 // First pass computes the earliest controlling node possible. This is the 3.251 // controlling input with the deepest dominating depth. 3.252 -void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) { 3.253 +void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) { 3.254 while (worklist.size() != 0) { 3.255 // Use local variables nstack_top_n & nstack_top_i to cache values 3.256 // on nstack's top. 3.257 @@ -2285,7 +2301,7 @@ 3.258 // (the old code here would yank a 2nd safepoint after seeing a 3.259 // first one, even though the 1st did not dominate in the loop body 3.260 // and thus could be avoided indefinitely) 3.261 - if( !verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint && 3.262 + if( !_verify_only && !_verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint && 3.263 is_deleteable_safept(n)) { 3.264 Node *in = n->in(TypeFunc::Control); 3.265 lazy_replace(n,in); // Pull safepoint now 3.266 @@ -2408,12 +2424,31 @@ 3.267 return LCA; 3.268 } 3.269 3.270 -//------------------------------get_late_ctrl---------------------------------- 3.271 -// Compute latest legal control. 3.272 -Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) { 3.273 - assert(early != NULL, "early control should not be NULL"); 3.274 +bool PhaseIdealLoop::verify_dominance(Node* n, Node* use, Node* LCA, Node* early) { 3.275 + bool had_error = false; 3.276 +#ifdef ASSERT 3.277 + if (early != C->root()) { 3.278 + // Make sure that there's a dominance path from use to LCA 3.279 + Node* d = use; 3.280 + while (d != LCA) { 3.281 + d = idom(d); 3.282 + if (d == C->root()) { 3.283 + tty->print_cr("*** Use %d isn't dominated by def %s", use->_idx, n->_idx); 3.284 + n->dump(); 3.285 + use->dump(); 3.286 + had_error = true; 3.287 + break; 3.288 + } 3.289 + } 3.290 + } 3.291 +#endif 3.292 + return had_error; 3.293 +} 3.294 3.295 + 3.296 +Node* PhaseIdealLoop::compute_lca_of_uses(Node* n, Node* early, bool verify) { 3.297 // Compute LCA over list of uses 3.298 + bool had_error = false; 3.299 Node *LCA = NULL; 3.300 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && LCA != early; i++) { 3.301 Node* c = n->fast_out(i); 3.302 @@ -2423,15 +2458,34 @@ 3.303 for( uint j=1; j<c->req(); j++ ) {// For all inputs 3.304 if( c->in(j) == n ) { // Found matching input? 3.305 Node *use = c->in(0)->in(j); 3.306 + if (_verify_only && use->is_top()) continue; 3.307 LCA = dom_lca_for_get_late_ctrl( LCA, use, n ); 3.308 + if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error; 3.309 } 3.310 } 3.311 } else { 3.312 // For CFG data-users, use is in the block just prior 3.313 Node *use = has_ctrl(c) ? get_ctrl(c) : c->in(0); 3.314 LCA = dom_lca_for_get_late_ctrl( LCA, use, n ); 3.315 + if (verify) had_error = verify_dominance(n, use, LCA, early) || had_error; 3.316 } 3.317 } 3.318 + assert(!had_error, "bad dominance"); 3.319 + return LCA; 3.320 +} 3.321 + 3.322 +//------------------------------get_late_ctrl---------------------------------- 3.323 +// Compute latest legal control. 3.324 +Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) { 3.325 + assert(early != NULL, "early control should not be NULL"); 3.326 + 3.327 + Node* LCA = compute_lca_of_uses(n, early); 3.328 +#ifdef ASSERT 3.329 + if (LCA == C->root() && LCA != early) { 3.330 + // def doesn't dominate uses so print some useful debugging output 3.331 + compute_lca_of_uses(n, early, true); 3.332 + } 3.333 +#endif 3.334 3.335 // if this is a load, check for anti-dependent stores 3.336 // We use a conservative algorithm to identify potential interfering 3.337 @@ -2576,7 +2630,7 @@ 3.338 //------------------------------build_loop_late-------------------------------- 3.339 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping. 3.340 // Second pass finds latest legal placement, and ideal loop placement. 3.341 -void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) { 3.342 +void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ) { 3.343 while (worklist.size() != 0) { 3.344 Node *n = worklist.pop(); 3.345 // Only visit once 3.346 @@ -2612,7 +2666,7 @@ 3.347 } 3.348 } else { 3.349 // All of n's children have been processed, complete post-processing. 3.350 - build_loop_late_post(n, verify_me); 3.351 + build_loop_late_post(n); 3.352 if (nstack.is_empty()) { 3.353 // Finished all nodes on stack. 3.354 // Process next node on the worklist. 3.355 @@ -2631,9 +2685,9 @@ 3.356 //------------------------------build_loop_late_post--------------------------- 3.357 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping. 3.358 // Second pass finds latest legal placement, and ideal loop placement. 3.359 -void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify_me ) { 3.360 +void PhaseIdealLoop::build_loop_late_post( Node *n ) { 3.361 3.362 - if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress()) { 3.363 + if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress() && !_verify_only) { 3.364 _igvn._worklist.push(n); // Maybe we'll normalize it, if no more loops. 3.365 } 3.366 3.367 @@ -2714,6 +2768,7 @@ 3.368 if( get_loop(legal)->_nest < get_loop(least)->_nest ) 3.369 least = legal; 3.370 } 3.371 + assert(early == legal || legal != C->root(), "bad dominance of inputs"); 3.372 3.373 // Try not to place code on a loop entry projection 3.374 // which can inhibit range check elimination. 3.375 @@ -2731,8 +2786,8 @@ 3.376 #ifdef ASSERT 3.377 // If verifying, verify that 'verify_me' has a legal location 3.378 // and choose it as our location. 3.379 - if( verify_me ) { 3.380 - Node *v_ctrl = verify_me->get_ctrl_no_update(n); 3.381 + if( _verify_me ) { 3.382 + Node *v_ctrl = _verify_me->get_ctrl_no_update(n); 3.383 Node *legal = LCA; 3.384 while( early != legal ) { // While not at earliest legal 3.385 if( legal == v_ctrl ) break; // Check for prior good location
4.1 --- a/src/share/vm/opto/loopnode.hpp Wed Aug 12 14:27:54 2009 -0700 4.2 +++ b/src/share/vm/opto/loopnode.hpp Fri Aug 14 00:02:12 2009 -0700 4.3 @@ -1,5 +1,5 @@ 4.4 /* 4.5 - * Copyright 1998-2008 Sun Microsystems, Inc. All Rights Reserved. 4.6 + * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. 4.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.8 * 4.9 * This code is free software; you can redistribute it and/or modify it 4.10 @@ -442,6 +442,9 @@ 4.11 uint *_preorders; 4.12 uint _max_preorder; 4.13 4.14 + const PhaseIdealLoop* _verify_me; 4.15 + bool _verify_only; 4.16 + 4.17 // Allocate _preorders[] array 4.18 void allocate_preorders() { 4.19 _max_preorder = C->unique()+8; 4.20 @@ -497,6 +500,12 @@ 4.21 Node_Array _dom_lca_tags; 4.22 void init_dom_lca_tags(); 4.23 void clear_dom_lca_tags(); 4.24 + 4.25 + // Helper for debugging bad dominance relationships 4.26 + bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early); 4.27 + 4.28 + Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false); 4.29 + 4.30 // Inline wrapper for frequent cases: 4.31 // 1) only one use 4.32 // 2) a use is the same as the current LCA passed as 'n1' 4.33 @@ -511,6 +520,7 @@ 4.34 return find_non_split_ctrl(n); 4.35 } 4.36 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); 4.37 + 4.38 // true if CFG node d dominates CFG node n 4.39 bool is_dominator(Node *d, Node *n); 4.40 4.41 @@ -621,9 +631,9 @@ 4.42 IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost ); 4.43 4.44 // Place Data nodes in some loop nest 4.45 - void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ); 4.46 - void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ); 4.47 - void build_loop_late_post ( Node* n, const PhaseIdealLoop *verify_me ); 4.48 + void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); 4.49 + void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); 4.50 + void build_loop_late_post ( Node* n ); 4.51 4.52 // Array of immediate dominance info for each CFG node indexed by node idx 4.53 private: 4.54 @@ -662,6 +672,19 @@ 4.55 // Is safept not required by an outer loop? 4.56 bool is_deleteable_safept(Node* sfpt); 4.57 4.58 + // Perform verification that the graph is valid. 4.59 + PhaseIdealLoop( PhaseIterGVN &igvn) : 4.60 + PhaseTransform(Ideal_Loop), 4.61 + _igvn(igvn), 4.62 + _dom_lca_tags(C->comp_arena()), 4.63 + _verify_me(NULL), 4.64 + _verify_only(true) { 4.65 + build_and_optimize(false); 4.66 + } 4.67 + 4.68 + // build the loop tree and perform any requested optimizations 4.69 + void build_and_optimize(bool do_split_if); 4.70 + 4.71 public: 4.72 // Dominators for the sea of nodes 4.73 void Dominators(); 4.74 @@ -671,7 +694,32 @@ 4.75 Node *dom_lca_internal( Node *n1, Node *n2 ) const; 4.76 4.77 // Compute the Ideal Node to Loop mapping 4.78 - PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs ); 4.79 + PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : 4.80 + PhaseTransform(Ideal_Loop), 4.81 + _igvn(igvn), 4.82 + _dom_lca_tags(C->comp_arena()), 4.83 + _verify_me(NULL), 4.84 + _verify_only(false) { 4.85 + build_and_optimize(do_split_ifs); 4.86 + } 4.87 + 4.88 + // Verify that verify_me made the same decisions as a fresh run. 4.89 + PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : 4.90 + PhaseTransform(Ideal_Loop), 4.91 + _igvn(igvn), 4.92 + _dom_lca_tags(C->comp_arena()), 4.93 + _verify_me(verify_me), 4.94 + _verify_only(false) { 4.95 + build_and_optimize(false); 4.96 + } 4.97 + 4.98 + // Build and verify the loop tree without modifying the graph. This 4.99 + // is useful to verify that all inputs properly dominate their uses. 4.100 + static void verify(PhaseIterGVN& igvn) { 4.101 +#ifdef ASSERT 4.102 + PhaseIdealLoop v(igvn); 4.103 +#endif 4.104 + } 4.105 4.106 // True if the method has at least 1 irreducible loop 4.107 bool _has_irreducible_loops;
5.1 --- a/src/share/vm/opto/phase.cpp Wed Aug 12 14:27:54 2009 -0700 5.2 +++ b/src/share/vm/opto/phase.cpp Fri Aug 14 00:02:12 2009 -0700 5.3 @@ -1,5 +1,5 @@ 5.4 /* 5.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 5.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 5.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5.8 * 5.9 * This code is free software; you can redistribute it and/or modify it 5.10 @@ -53,6 +53,7 @@ 5.11 elapsedTimer Phase::_t_registerMethod; 5.12 elapsedTimer Phase::_t_temporaryTimer1; 5.13 elapsedTimer Phase::_t_temporaryTimer2; 5.14 +elapsedTimer Phase::_t_idealLoopVerify; 5.15 5.16 // Subtimers for _t_optimizer 5.17 elapsedTimer Phase::_t_iterGVN; 5.18 @@ -88,51 +89,52 @@ 5.19 tty->print_cr ("Accumulated compiler times:"); 5.20 tty->print_cr ("---------------------------"); 5.21 tty->print_cr (" Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds()); 5.22 - tty->print (" method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds()); 5.23 + tty->print (" method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds()); 5.24 tty->print ("/%d bytes",_total_bytes_compiled); 5.25 tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds()); 5.26 - tty->print_cr (" stub compilation : %3.3f sec.", Phase::_t_stubCompilation.seconds()); 5.27 + tty->print_cr (" stub compilation : %3.3f sec.", Phase::_t_stubCompilation.seconds()); 5.28 tty->print_cr (" Phases:"); 5.29 - tty->print_cr (" parse : %3.3f sec", Phase::_t_parser.seconds()); 5.30 + tty->print_cr (" parse : %3.3f sec", Phase::_t_parser.seconds()); 5.31 if (DoEscapeAnalysis) { 5.32 - tty->print_cr (" escape analysis : %3.3f sec", Phase::_t_escapeAnalysis.seconds()); 5.33 + tty->print_cr (" escape analysis : %3.3f sec", Phase::_t_escapeAnalysis.seconds()); 5.34 } 5.35 - tty->print_cr (" optimizer : %3.3f sec", Phase::_t_optimizer.seconds()); 5.36 + tty->print_cr (" optimizer : %3.3f sec", Phase::_t_optimizer.seconds()); 5.37 if( Verbose || WizardMode ) { 5.38 - tty->print_cr (" iterGVN : %3.3f sec", Phase::_t_iterGVN.seconds()); 5.39 - tty->print_cr (" idealLoop : %3.3f sec", Phase::_t_idealLoop.seconds()); 5.40 - tty->print_cr (" ccp : %3.3f sec", Phase::_t_ccp.seconds()); 5.41 - tty->print_cr (" iterGVN2 : %3.3f sec", Phase::_t_iterGVN2.seconds()); 5.42 - tty->print_cr (" graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds()); 5.43 + tty->print_cr (" iterGVN : %3.3f sec", Phase::_t_iterGVN.seconds()); 5.44 + tty->print_cr (" idealLoop : %3.3f sec", Phase::_t_idealLoop.seconds()); 5.45 + tty->print_cr (" idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds()); 5.46 + tty->print_cr (" ccp : %3.3f sec", Phase::_t_ccp.seconds()); 5.47 + tty->print_cr (" iterGVN2 : %3.3f sec", Phase::_t_iterGVN2.seconds()); 5.48 + tty->print_cr (" graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds()); 5.49 double optimizer_subtotal = Phase::_t_iterGVN.seconds() + 5.50 Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() + 5.51 Phase::_t_graphReshaping.seconds(); 5.52 double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0)); 5.53 - tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", optimizer_subtotal, percent_of_optimizer); 5.54 + tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", optimizer_subtotal, percent_of_optimizer); 5.55 } 5.56 - tty->print_cr (" matcher : %3.3f sec", Phase::_t_matcher.seconds()); 5.57 - tty->print_cr (" scheduler : %3.3f sec", Phase::_t_scheduler.seconds()); 5.58 - tty->print_cr (" regalloc : %3.3f sec", Phase::_t_registerAllocation.seconds()); 5.59 + tty->print_cr (" matcher : %3.3f sec", Phase::_t_matcher.seconds()); 5.60 + tty->print_cr (" scheduler : %3.3f sec", Phase::_t_scheduler.seconds()); 5.61 + tty->print_cr (" regalloc : %3.3f sec", Phase::_t_registerAllocation.seconds()); 5.62 if( Verbose || WizardMode ) { 5.63 - tty->print_cr (" ctorChaitin : %3.3f sec", Phase::_t_ctorChaitin.seconds()); 5.64 - tty->print_cr (" buildIFG : %3.3f sec", Phase::_t_buildIFGphysical.seconds()); 5.65 - tty->print_cr (" computeLive : %3.3f sec", Phase::_t_computeLive.seconds()); 5.66 - tty->print_cr (" regAllocSplit: %3.3f sec", Phase::_t_regAllocSplit.seconds()); 5.67 + tty->print_cr (" ctorChaitin : %3.3f sec", Phase::_t_ctorChaitin.seconds()); 5.68 + tty->print_cr (" buildIFG : %3.3f sec", Phase::_t_buildIFGphysical.seconds()); 5.69 + tty->print_cr (" computeLive : %3.3f sec", Phase::_t_computeLive.seconds()); 5.70 + tty->print_cr (" regAllocSplit : %3.3f sec", Phase::_t_regAllocSplit.seconds()); 5.71 tty->print_cr (" postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds()); 5.72 - tty->print_cr (" fixupSpills : %3.3f sec", Phase::_t_fixupSpills.seconds()); 5.73 + tty->print_cr (" fixupSpills : %3.3f sec", Phase::_t_fixupSpills.seconds()); 5.74 double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() + 5.75 Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() + 5.76 Phase::_t_regAllocSplit.seconds() + Phase::_t_fixupSpills.seconds() + 5.77 Phase::_t_postAllocCopyRemoval.seconds(); 5.78 double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0)); 5.79 - tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc); 5.80 + tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc); 5.81 } 5.82 - tty->print_cr (" macroExpand : %3.3f sec", Phase::_t_macroExpand.seconds()); 5.83 - tty->print_cr (" blockOrdering: %3.3f sec", Phase::_t_blockOrdering.seconds()); 5.84 - tty->print_cr (" peephole : %3.3f sec", Phase::_t_peephole.seconds()); 5.85 - tty->print_cr (" codeGen : %3.3f sec", Phase::_t_codeGeneration.seconds()); 5.86 - tty->print_cr (" install_code : %3.3f sec", Phase::_t_registerMethod.seconds()); 5.87 - tty->print_cr (" ------------ : ----------"); 5.88 + tty->print_cr (" macroExpand : %3.3f sec", Phase::_t_macroExpand.seconds()); 5.89 + tty->print_cr (" blockOrdering : %3.3f sec", Phase::_t_blockOrdering.seconds()); 5.90 + tty->print_cr (" peephole : %3.3f sec", Phase::_t_peephole.seconds()); 5.91 + tty->print_cr (" codeGen : %3.3f sec", Phase::_t_codeGeneration.seconds()); 5.92 + tty->print_cr (" install_code : %3.3f sec", Phase::_t_registerMethod.seconds()); 5.93 + tty->print_cr (" -------------- : ----------"); 5.94 double phase_subtotal = Phase::_t_parser.seconds() + 5.95 (DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) + 5.96 Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() + 5.97 @@ -143,7 +145,7 @@ 5.98 double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0; 5.99 // counters inside Compile::CodeGen include time for adapters and stubs 5.100 // so phase-total can be greater than 100% 5.101 - tty->print_cr (" total : %3.3f sec, %3.2f %%", phase_subtotal, percent_of_method_compile); 5.102 + tty->print_cr (" total : %3.3f sec, %3.2f %%", phase_subtotal, percent_of_method_compile); 5.103 5.104 assert( percent_of_method_compile > expected_method_compile_coverage || 5.105 phase_subtotal < minimum_meaningful_method_compile, 5.106 @@ -157,8 +159,8 @@ 5.107 tty->cr(); 5.108 tty->print_cr (" temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds()); 5.109 } 5.110 - tty->print_cr (" output : %3.3f sec", Phase::_t_output.seconds()); 5.111 - tty->print_cr (" isched : %3.3f sec", Phase::_t_instrSched.seconds()); 5.112 - tty->print_cr (" bldOopMaps: %3.3f sec", Phase::_t_buildOopMaps.seconds()); 5.113 + tty->print_cr (" output : %3.3f sec", Phase::_t_output.seconds()); 5.114 + tty->print_cr (" isched : %3.3f sec", Phase::_t_instrSched.seconds()); 5.115 + tty->print_cr (" bldOopMaps : %3.3f sec", Phase::_t_buildOopMaps.seconds()); 5.116 } 5.117 #endif
6.1 --- a/src/share/vm/opto/phase.hpp Wed Aug 12 14:27:54 2009 -0700 6.2 +++ b/src/share/vm/opto/phase.hpp Fri Aug 14 00:02:12 2009 -0700 6.3 @@ -1,5 +1,5 @@ 6.4 /* 6.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 6.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 6.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6.8 * 6.9 * This code is free software; you can redistribute it and/or modify it 6.10 @@ -83,6 +83,7 @@ 6.11 static elapsedTimer _t_registerMethod; 6.12 static elapsedTimer _t_temporaryTimer1; 6.13 static elapsedTimer _t_temporaryTimer2; 6.14 + static elapsedTimer _t_idealLoopVerify; 6.15 6.16 // Subtimers for _t_optimizer 6.17 static elapsedTimer _t_iterGVN;