Tue, 12 Jan 2010 14:37:35 -0800
6894779: Loop Predication for Loop Optimizer in C2
Summary: Loop predication implementation
Reviewed-by: never, kvn
1.1 --- a/src/share/vm/includeDB_compiler2 Sat Jan 09 00:59:35 2010 -0800 1.2 +++ b/src/share/vm/includeDB_compiler2 Tue Jan 12 14:37:35 2010 -0800 1.3 @@ -601,6 +601,7 @@ 1.4 1.5 loopTransform.cpp addnode.hpp 1.6 loopTransform.cpp allocation.inline.hpp 1.7 +loopTransform.cpp callnode.hpp 1.8 loopTransform.cpp connode.hpp 1.9 loopTransform.cpp compileLog.hpp 1.10 loopTransform.cpp divnode.hpp
2.1 --- a/src/share/vm/opto/c2_globals.hpp Sat Jan 09 00:59:35 2010 -0800 2.2 +++ b/src/share/vm/opto/c2_globals.hpp Tue Jan 12 14:37:35 2010 -0800 2.3 @@ -154,6 +154,12 @@ 2.4 notproduct(bool, TraceProfileTripCount, false, \ 2.5 "Trace profile loop trip count information") \ 2.6 \ 2.7 + product(bool, UseLoopPredicate, true, \ 2.8 + "Generate a predicate to select fast/slow loop versions") \ 2.9 + \ 2.10 + develop(bool, TraceLoopPredicate, false, \ 2.11 + "Trace generation of loop predicates") \ 2.12 + \ 2.13 develop(bool, OptoCoalesce, true, \ 2.14 "Use Conservative Copy Coalescing in the Register Allocator") \ 2.15 \
3.1 --- a/src/share/vm/opto/compile.cpp Sat Jan 09 00:59:35 2010 -0800 3.2 +++ b/src/share/vm/opto/compile.cpp Tue Jan 12 14:37:35 2010 -0800 3.3 @@ -932,6 +932,7 @@ 3.4 3.5 _intrinsics = NULL; 3.6 _macro_nodes = new GrowableArray<Node*>(comp_arena(), 8, 0, NULL); 3.7 + _predicate_opaqs = new GrowableArray<Node*>(comp_arena(), 8, 0, NULL); 3.8 register_library_intrinsics(); 3.9 } 3.10 3.11 @@ -1553,6 +1554,19 @@ 3.12 } 3.13 } 3.14 3.15 +//---------------------cleanup_loop_predicates----------------------- 3.16 +// Remove the opaque nodes that protect the predicates so that all unused 3.17 +// checks and uncommon_traps will be eliminated from the ideal graph 3.18 +void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) { 3.19 + if (predicate_count()==0) return; 3.20 + for (int i = predicate_count(); i > 0; i--) { 3.21 + Node * n = predicate_opaque1_node(i-1); 3.22 + assert(n->Opcode() == Op_Opaque1, "must be"); 3.23 + igvn.replace_node(n, n->in(1)); 3.24 + } 3.25 + assert(predicate_count()==0, "should be clean!"); 3.26 + igvn.optimize(); 3.27 +} 3.28 3.29 //------------------------------Optimize--------------------------------------- 3.30 // Given a graph, optimize it. 3.31 @@ -1594,7 +1608,7 @@ 3.32 if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) { 3.33 { 3.34 TracePhase t2("idealLoop", &_t_idealLoop, true); 3.35 - PhaseIdealLoop ideal_loop( igvn, true ); 3.36 + PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate); 3.37 loop_opts_cnt--; 3.38 if (major_progress()) print_method("PhaseIdealLoop 1", 2); 3.39 if (failing()) return; 3.40 @@ -1602,7 +1616,7 @@ 3.41 // Loop opts pass if partial peeling occurred in previous pass 3.42 if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) { 3.43 TracePhase t3("idealLoop", &_t_idealLoop, true); 3.44 - PhaseIdealLoop ideal_loop( igvn, false ); 3.45 + PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate); 3.46 loop_opts_cnt--; 3.47 if (major_progress()) print_method("PhaseIdealLoop 2", 2); 3.48 if (failing()) return; 3.49 @@ -1610,7 +1624,7 @@ 3.50 // Loop opts pass for loop-unrolling before CCP 3.51 if(major_progress() && (loop_opts_cnt > 0)) { 3.52 TracePhase t4("idealLoop", &_t_idealLoop, true); 3.53 - PhaseIdealLoop ideal_loop( igvn, false ); 3.54 + PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate); 3.55 loop_opts_cnt--; 3.56 if (major_progress()) print_method("PhaseIdealLoop 3", 2); 3.57 } 3.58 @@ -1648,13 +1662,21 @@ 3.59 // peeling, unrolling, etc. 3.60 if(loop_opts_cnt > 0) { 3.61 debug_only( int cnt = 0; ); 3.62 + bool loop_predication = UseLoopPredicate; 3.63 while(major_progress() && (loop_opts_cnt > 0)) { 3.64 TracePhase t2("idealLoop", &_t_idealLoop, true); 3.65 assert( cnt++ < 40, "infinite cycle in loop optimization" ); 3.66 - PhaseIdealLoop ideal_loop( igvn, true ); 3.67 + PhaseIdealLoop ideal_loop( igvn, true, loop_predication); 3.68 loop_opts_cnt--; 3.69 if (major_progress()) print_method("PhaseIdealLoop iterations", 2); 3.70 if (failing()) return; 3.71 + // Perform loop predication optimization during first iteration after CCP. 3.72 + // After that switch it off and cleanup unused loop predicates. 3.73 + if (loop_predication) { 3.74 + loop_predication = false; 3.75 + cleanup_loop_predicates(igvn); 3.76 + if (failing()) return; 3.77 + } 3.78 } 3.79 } 3.80
4.1 --- a/src/share/vm/opto/compile.hpp Sat Jan 09 00:59:35 2010 -0800 4.2 +++ b/src/share/vm/opto/compile.hpp Tue Jan 12 14:37:35 2010 -0800 4.3 @@ -38,6 +38,7 @@ 4.4 class OptoReg; 4.5 class PhaseCFG; 4.6 class PhaseGVN; 4.7 +class PhaseIterGVN; 4.8 class PhaseRegAlloc; 4.9 class PhaseCCP; 4.10 class PhaseCCP_DCE; 4.11 @@ -172,6 +173,7 @@ 4.12 const char* _failure_reason; // for record_failure/failing pattern 4.13 GrowableArray<CallGenerator*>* _intrinsics; // List of intrinsics. 4.14 GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching. 4.15 + GrowableArray<Node*>* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. 4.16 ConnectionGraph* _congraph; 4.17 #ifndef PRODUCT 4.18 IdealGraphPrinter* _printer; 4.19 @@ -351,7 +353,9 @@ 4.20 } 4.21 4.22 int macro_count() { return _macro_nodes->length(); } 4.23 + int predicate_count() { return _predicate_opaqs->length();} 4.24 Node* macro_node(int idx) { return _macro_nodes->at(idx); } 4.25 + Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);} 4.26 ConnectionGraph* congraph() { return _congraph;} 4.27 void add_macro_node(Node * n) { 4.28 //assert(n->is_macro(), "must be a macro node"); 4.29 @@ -363,7 +367,19 @@ 4.30 // that the node is in the array before attempting to remove it 4.31 if (_macro_nodes->contains(n)) 4.32 _macro_nodes->remove(n); 4.33 + // remove from _predicate_opaqs list also if it is there 4.34 + if (predicate_count() > 0 && _predicate_opaqs->contains(n)){ 4.35 + _predicate_opaqs->remove(n); 4.36 + } 4.37 } 4.38 + void add_predicate_opaq(Node * n) { 4.39 + assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); 4.40 + assert(_macro_nodes->contains(n), "should have already been in macro list"); 4.41 + _predicate_opaqs->append(n); 4.42 + } 4.43 + // remove the opaque nodes that protect the predicates so that the unused checks and 4.44 + // uncommon traps will be eliminated from the graph. 4.45 + void cleanup_loop_predicates(PhaseIterGVN &igvn); 4.46 4.47 // Compilation environment. 4.48 Arena* comp_arena() { return &_comp_arena; }
5.1 --- a/src/share/vm/opto/loopTransform.cpp Sat Jan 09 00:59:35 2010 -0800 5.2 +++ b/src/share/vm/opto/loopTransform.cpp Tue Jan 12 14:37:35 2010 -0800 5.3 @@ -549,6 +549,10 @@ 5.4 // Comparing trip+off vs limit 5.5 Node *bol = iff->in(1); 5.6 if( bol->req() != 2 ) continue; // dead constant test 5.7 + if (!bol->is_Bool()) { 5.8 + assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); 5.9 + continue; 5.10 + } 5.11 Node *cmp = bol->in(1); 5.12 5.13 Node *rc_exp = cmp->in(1); 5.14 @@ -875,7 +879,7 @@ 5.15 //------------------------------is_invariant----------------------------- 5.16 // Return true if n is invariant 5.17 bool IdealLoopTree::is_invariant(Node* n) const { 5.18 - Node *n_c = _phase->get_ctrl(n); 5.19 + Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; 5.20 if (n_c->is_top()) return false; 5.21 return !is_member(_phase->get_loop(n_c)); 5.22 } 5.23 @@ -1594,7 +1598,7 @@ 5.24 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { 5.25 // Check and remove empty loops (spam micro-benchmarks) 5.26 if( policy_do_remove_empty_loop(phase) ) 5.27 - return true; // Here we removed an empty loop 5.28 + return true; // Here we removed an empty loop 5.29 5.30 bool should_peel = policy_peeling(phase); // Should we peel? 5.31 5.32 @@ -1688,8 +1692,8 @@ 5.33 // an even number of trips). If we are peeling, we might enable some RCE 5.34 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if 5.35 // peeling. 5.36 - if( should_unroll && !should_peel ) 5.37 - phase->do_unroll(this,old_new, true); 5.38 + if( should_unroll && !should_peel ) 5.39 + phase->do_unroll(this,old_new, true); 5.40 5.41 // Adjust the pre-loop limits to align the main body 5.42 // iterations. 5.43 @@ -1731,9 +1735,9 @@ 5.44 _allow_optimizations && 5.45 !tail()->is_top() ) { // Also ignore the occasional dead backedge 5.46 if (!_has_call) { 5.47 - if (!iteration_split_impl( phase, old_new )) { 5.48 - return false; 5.49 - } 5.50 + if (!iteration_split_impl( phase, old_new )) { 5.51 + return false; 5.52 + } 5.53 } else if (policy_unswitching(phase)) { 5.54 phase->do_unswitching(this, old_new); 5.55 } 5.56 @@ -1746,3 +1750,576 @@ 5.57 return false; 5.58 return true; 5.59 } 5.60 + 5.61 +//-------------------------------is_uncommon_trap_proj---------------------------- 5.62 +// Return true if proj is the form of "proj->[region->..]call_uct" 5.63 +bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) { 5.64 + int path_limit = 10; 5.65 + assert(proj, "invalid argument"); 5.66 + Node* out = proj; 5.67 + for (int ct = 0; ct < path_limit; ct++) { 5.68 + out = out->unique_ctrl_out(); 5.69 + if (out == NULL || out->is_Root() || out->is_Start()) 5.70 + return false; 5.71 + if (out->is_CallStaticJava()) { 5.72 + int req = out->as_CallStaticJava()->uncommon_trap_request(); 5.73 + if (req != 0) { 5.74 + Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req); 5.75 + if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){ 5.76 + return true; 5.77 + } 5.78 + } 5.79 + return false; // don't do further after call 5.80 + } 5.81 + } 5.82 + return false; 5.83 +} 5.84 + 5.85 +//-------------------------------is_uncommon_trap_if_pattern------------------------- 5.86 +// Return true for "if(test)-> proj -> ... 5.87 +// | 5.88 +// V 5.89 +// other_proj->[region->..]call_uct" 5.90 +// 5.91 +// "must_reason_predicate" means the uct reason must be Reason_predicate 5.92 +bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) { 5.93 + Node *in0 = proj->in(0); 5.94 + if (!in0->is_If()) return false; 5.95 + IfNode* iff = in0->as_If(); 5.96 + 5.97 + // we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate 5.98 + if (must_reason_predicate) { 5.99 + if (iff->in(1)->Opcode() != Op_Conv2B || 5.100 + iff->in(1)->in(1)->Opcode() != Op_Opaque1) { 5.101 + return false; 5.102 + } 5.103 + } 5.104 + 5.105 + ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); 5.106 + return is_uncommon_trap_proj(other_proj, must_reason_predicate); 5.107 +} 5.108 + 5.109 +//------------------------------create_new_if_for_predicate------------------------ 5.110 +// create a new if above the uct_if_pattern for the predicate to be promoted. 5.111 +// 5.112 +// before after 5.113 +// ---------- ---------- 5.114 +// ctrl ctrl 5.115 +// | | 5.116 +// | | 5.117 +// v v 5.118 +// iff new_iff 5.119 +// / \ / \ 5.120 +// / \ / \ 5.121 +// v v v v 5.122 +// uncommon_proj cont_proj if_uct if_cont 5.123 +// \ | | | | 5.124 +// \ | | | | 5.125 +// v v v | v 5.126 +// rgn loop | iff 5.127 +// | | / \ 5.128 +// | | / \ 5.129 +// v | v v 5.130 +// uncommon_trap | uncommon_proj cont_proj 5.131 +// \ \ | | 5.132 +// \ \ | | 5.133 +// v v v v 5.134 +// rgn loop 5.135 +// | 5.136 +// | 5.137 +// v 5.138 +// uncommon_trap 5.139 +// 5.140 +// 5.141 +// We will create a region to guard the uct call if there is no one there. 5.142 +// The true projecttion (if_cont) of the new_iff is returned. 5.143 +ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) { 5.144 + assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!"); 5.145 + IfNode* iff = cont_proj->in(0)->as_If(); 5.146 + 5.147 + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 5.148 + Node *rgn = uncommon_proj->unique_ctrl_out(); 5.149 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 5.150 + 5.151 + if (!rgn->is_Region()) { // create a region to guard the call 5.152 + assert(rgn->is_Call(), "must be call uct"); 5.153 + CallNode* call = rgn->as_Call(); 5.154 + rgn = new (C, 1) RegionNode(1); 5.155 + _igvn.set_type(rgn, rgn->bottom_type()); 5.156 + rgn->add_req(uncommon_proj); 5.157 + set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1); 5.158 + _igvn.hash_delete(call); 5.159 + call->set_req(0, rgn); 5.160 + } 5.161 + 5.162 + // Create new_iff 5.163 + uint iffdd = dom_depth(iff); 5.164 + IdealLoopTree* lp = get_loop(iff); 5.165 + IfNode *new_iff = new (C, 2) IfNode(iff->in(0), NULL, iff->_prob, iff->_fcnt); 5.166 + register_node(new_iff, lp, idom(iff), iffdd); 5.167 + Node *if_cont = new (C, 1) IfTrueNode(new_iff); 5.168 + Node *if_uct = new (C, 1) IfFalseNode(new_iff); 5.169 + if (cont_proj->is_IfFalse()) { 5.170 + // Swap 5.171 + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 5.172 + } 5.173 + register_node(if_cont, lp, new_iff, iffdd); 5.174 + register_node(if_uct, get_loop(rgn), new_iff, iffdd); 5.175 + 5.176 + // if_cont to iff 5.177 + _igvn.hash_delete(iff); 5.178 + iff->set_req(0, if_cont); 5.179 + set_idom(iff, if_cont, dom_depth(iff)); 5.180 + 5.181 + // if_uct to rgn 5.182 + _igvn.hash_delete(rgn); 5.183 + rgn->add_req(if_uct); 5.184 + Node* ridom = idom(rgn); 5.185 + Node* nrdom = dom_lca(ridom, new_iff); 5.186 + set_idom(rgn, nrdom, dom_depth(rgn)); 5.187 + 5.188 + // rgn must have no phis 5.189 + assert(!rgn->as_Region()->has_phi(), "region must have no phis"); 5.190 + 5.191 + return if_cont->as_Proj(); 5.192 +} 5.193 + 5.194 +//------------------------------find_predicate_insertion_point-------------------------- 5.195 +// Find a good location to insert a predicate 5.196 +ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) { 5.197 + if (start_c == C->root() || !start_c->is_Proj()) 5.198 + return NULL; 5.199 + if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) { 5.200 + return start_c->as_Proj(); 5.201 + } 5.202 + return NULL; 5.203 +} 5.204 + 5.205 +//------------------------------Invariance----------------------------------- 5.206 +// Helper class for loop_predication_impl to compute invariance on the fly and 5.207 +// clone invariants. 5.208 +class Invariance : public StackObj { 5.209 + VectorSet _visited, _invariant; 5.210 + Node_Stack _stack; 5.211 + VectorSet _clone_visited; 5.212 + Node_List _old_new; // map of old to new (clone) 5.213 + IdealLoopTree* _lpt; 5.214 + PhaseIdealLoop* _phase; 5.215 + 5.216 + // Helper function to set up the invariance for invariance computation 5.217 + // If n is a known invariant, set up directly. Otherwise, look up the 5.218 + // the possibility to push n onto the stack for further processing. 5.219 + void visit(Node* use, Node* n) { 5.220 + if (_lpt->is_invariant(n)) { // known invariant 5.221 + _invariant.set(n->_idx); 5.222 + } else if (!n->is_CFG()) { 5.223 + Node *n_ctrl = _phase->ctrl_or_self(n); 5.224 + Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG 5.225 + if (_phase->is_dominator(n_ctrl, u_ctrl)) { 5.226 + _stack.push(n, n->in(0) == NULL ? 1 : 0); 5.227 + } 5.228 + } 5.229 + } 5.230 + 5.231 + // Compute invariance for "the_node" and (possibly) all its inputs recursively 5.232 + // on the fly 5.233 + void compute_invariance(Node* n) { 5.234 + assert(_visited.test(n->_idx), "must be"); 5.235 + visit(n, n); 5.236 + while (_stack.is_nonempty()) { 5.237 + Node* n = _stack.node(); 5.238 + uint idx = _stack.index(); 5.239 + if (idx == n->req()) { // all inputs are processed 5.240 + _stack.pop(); 5.241 + // n is invariant if it's inputs are all invariant 5.242 + bool all_inputs_invariant = true; 5.243 + for (uint i = 0; i < n->req(); i++) { 5.244 + Node* in = n->in(i); 5.245 + if (in == NULL) continue; 5.246 + assert(_visited.test(in->_idx), "must have visited input"); 5.247 + if (!_invariant.test(in->_idx)) { // bad guy 5.248 + all_inputs_invariant = false; 5.249 + break; 5.250 + } 5.251 + } 5.252 + if (all_inputs_invariant) { 5.253 + _invariant.set(n->_idx); // I am a invariant too 5.254 + } 5.255 + } else { // process next input 5.256 + _stack.set_index(idx + 1); 5.257 + Node* m = n->in(idx); 5.258 + if (m != NULL && !_visited.test_set(m->_idx)) { 5.259 + visit(n, m); 5.260 + } 5.261 + } 5.262 + } 5.263 + } 5.264 + 5.265 + // Helper function to set up _old_new map for clone_nodes. 5.266 + // If n is a known invariant, set up directly ("clone" of n == n). 5.267 + // Otherwise, push n onto the stack for real cloning. 5.268 + void clone_visit(Node* n) { 5.269 + assert(_invariant.test(n->_idx), "must be invariant"); 5.270 + if (_lpt->is_invariant(n)) { // known invariant 5.271 + _old_new.map(n->_idx, n); 5.272 + } else{ // to be cloned 5.273 + assert (!n->is_CFG(), "should not see CFG here"); 5.274 + _stack.push(n, n->in(0) == NULL ? 1 : 0); 5.275 + } 5.276 + } 5.277 + 5.278 + // Clone "n" and (possibly) all its inputs recursively 5.279 + void clone_nodes(Node* n, Node* ctrl) { 5.280 + clone_visit(n); 5.281 + while (_stack.is_nonempty()) { 5.282 + Node* n = _stack.node(); 5.283 + uint idx = _stack.index(); 5.284 + if (idx == n->req()) { // all inputs processed, clone n! 5.285 + _stack.pop(); 5.286 + // clone invariant node 5.287 + Node* n_cl = n->clone(); 5.288 + _old_new.map(n->_idx, n_cl); 5.289 + _phase->register_new_node(n_cl, ctrl); 5.290 + for (uint i = 0; i < n->req(); i++) { 5.291 + Node* in = n_cl->in(i); 5.292 + if (in == NULL) continue; 5.293 + n_cl->set_req(i, _old_new[in->_idx]); 5.294 + } 5.295 + } else { // process next input 5.296 + _stack.set_index(idx + 1); 5.297 + Node* m = n->in(idx); 5.298 + if (m != NULL && !_clone_visited.test_set(m->_idx)) { 5.299 + clone_visit(m); // visit the input 5.300 + } 5.301 + } 5.302 + } 5.303 + } 5.304 + 5.305 + public: 5.306 + Invariance(Arena* area, IdealLoopTree* lpt) : 5.307 + _lpt(lpt), _phase(lpt->_phase), 5.308 + _visited(area), _invariant(area), _stack(area, 10 /* guess */), 5.309 + _clone_visited(area), _old_new(area) 5.310 + {} 5.311 + 5.312 + // Map old to n for invariance computation and clone 5.313 + void map_ctrl(Node* old, Node* n) { 5.314 + assert(old->is_CFG() && n->is_CFG(), "must be"); 5.315 + _old_new.map(old->_idx, n); // "clone" of old is n 5.316 + _invariant.set(old->_idx); // old is invariant 5.317 + _clone_visited.set(old->_idx); 5.318 + } 5.319 + 5.320 + // Driver function to compute invariance 5.321 + bool is_invariant(Node* n) { 5.322 + if (!_visited.test_set(n->_idx)) 5.323 + compute_invariance(n); 5.324 + return (_invariant.test(n->_idx) != 0); 5.325 + } 5.326 + 5.327 + // Driver function to clone invariant 5.328 + Node* clone(Node* n, Node* ctrl) { 5.329 + assert(ctrl->is_CFG(), "must be"); 5.330 + assert(_invariant.test(n->_idx), "must be an invariant"); 5.331 + if (!_clone_visited.test(n->_idx)) 5.332 + clone_nodes(n, ctrl); 5.333 + return _old_new[n->_idx]; 5.334 + } 5.335 +}; 5.336 + 5.337 +//------------------------------is_range_check_if ----------------------------------- 5.338 +// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format 5.339 +// Note: this function is particularly designed for loop predication. We require load_range 5.340 +// and offset to be loop invariant computed on the fly by "invar" 5.341 +bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { 5.342 + if (!is_loop_exit(iff)) { 5.343 + return false; 5.344 + } 5.345 + if (!iff->in(1)->is_Bool()) { 5.346 + return false; 5.347 + } 5.348 + const BoolNode *bol = iff->in(1)->as_Bool(); 5.349 + if (bol->_test._test != BoolTest::lt) { 5.350 + return false; 5.351 + } 5.352 + if (!bol->in(1)->is_Cmp()) { 5.353 + return false; 5.354 + } 5.355 + const CmpNode *cmp = bol->in(1)->as_Cmp(); 5.356 + if (cmp->Opcode() != Op_CmpU ) { 5.357 + return false; 5.358 + } 5.359 + if (cmp->in(2)->Opcode() != Op_LoadRange) { 5.360 + return false; 5.361 + } 5.362 + LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2); 5.363 + if (!invar.is_invariant(lr)) { // loadRange must be invariant 5.364 + return false; 5.365 + } 5.366 + Node *iv = _head->as_CountedLoop()->phi(); 5.367 + int scale = 0; 5.368 + Node *offset = NULL; 5.369 + if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { 5.370 + return false; 5.371 + } 5.372 + if(offset && !invar.is_invariant(offset)) { // offset must be invariant 5.373 + return false; 5.374 + } 5.375 + return true; 5.376 +} 5.377 + 5.378 +//------------------------------rc_predicate----------------------------------- 5.379 +// Create a range check predicate 5.380 +// 5.381 +// for (i = init; i < limit; i += stride) { 5.382 +// a[scale*i+offset] 5.383 +// } 5.384 +// 5.385 +// Compute max(scale*i + offset) for init <= i < limit and build the predicate 5.386 +// as "max(scale*i + offset) u< a.length". 5.387 +// 5.388 +// There are two cases for max(scale*i + offset): 5.389 +// (1) stride*scale > 0 5.390 +// max(scale*i + offset) = scale*(limit-stride) + offset 5.391 +// (2) stride*scale < 0 5.392 +// max(scale*i + offset) = scale*init + offset 5.393 +BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, 5.394 + int scale, Node* offset, 5.395 + Node* init, Node* limit, Node* stride, 5.396 + Node* range) { 5.397 + Node* max_idx_expr = init; 5.398 + int stride_con = stride->get_int(); 5.399 + if ((stride_con > 0) == (scale > 0)) { 5.400 + max_idx_expr = new (C, 3) SubINode(limit, stride); 5.401 + register_new_node(max_idx_expr, ctrl); 5.402 + } 5.403 + 5.404 + if (scale != 1) { 5.405 + ConNode* con_scale = _igvn.intcon(scale); 5.406 + max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); 5.407 + register_new_node(max_idx_expr, ctrl); 5.408 + } 5.409 + 5.410 + if (offset && (!offset->is_Con() || offset->get_int() != 0)){ 5.411 + max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); 5.412 + register_new_node(max_idx_expr, ctrl); 5.413 + } 5.414 + 5.415 + CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); 5.416 + register_new_node(cmp, ctrl); 5.417 + BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); 5.418 + register_new_node(bol, ctrl); 5.419 + return bol; 5.420 +} 5.421 + 5.422 +//------------------------------ loop_predication_impl-------------------------- 5.423 +// Insert loop predicates for null checks and range checks 5.424 +bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { 5.425 + if (!UseLoopPredicate) return false; 5.426 + 5.427 + // Too many traps seen? 5.428 + bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate); 5.429 + int tc = C->trap_count(Deoptimization::Reason_predicate); 5.430 + if (tmt || tc > 0) { 5.431 + if (TraceLoopPredicate) { 5.432 + tty->print_cr("too many predicate traps: %d", tc); 5.433 + C->method()->print(); // which method has too many predicate traps 5.434 + tty->print_cr(""); 5.435 + } 5.436 + return false; 5.437 + } 5.438 + 5.439 + CountedLoopNode *cl = NULL; 5.440 + if (loop->_head->is_CountedLoop()) { 5.441 + cl = loop->_head->as_CountedLoop(); 5.442 + // do nothing for iteration-splitted loops 5.443 + if(!cl->is_normal_loop()) return false; 5.444 + } 5.445 + 5.446 + LoopNode *lpn = loop->_head->as_Loop(); 5.447 + Node* entry = lpn->in(LoopNode::EntryControl); 5.448 + 5.449 + ProjNode *predicate_proj = find_predicate_insertion_point(entry); 5.450 + if (!predicate_proj){ 5.451 +#ifndef PRODUCT 5.452 + if (TraceLoopPredicate) { 5.453 + tty->print("missing predicate:"); 5.454 + loop->dump_head(); 5.455 + } 5.456 +#endif 5.457 + return false; 5.458 + } 5.459 + 5.460 + ConNode* zero = _igvn.intcon(0); 5.461 + set_ctrl(zero, C->root()); 5.462 + Node *cond_false = new (C, 2) Conv2BNode(zero); 5.463 + register_new_node(cond_false, C->root()); 5.464 + ConNode* one = _igvn.intcon(1); 5.465 + set_ctrl(one, C->root()); 5.466 + Node *cond_true = new (C, 2) Conv2BNode(one); 5.467 + register_new_node(cond_true, C->root()); 5.468 + 5.469 + ResourceArea *area = Thread::current()->resource_area(); 5.470 + Invariance invar(area, loop); 5.471 + 5.472 + // Create list of if-projs such that a newer proj dominates all older 5.473 + // projs in the list, and they all dominate loop->tail() 5.474 + Node_List if_proj_list(area); 5.475 + LoopNode *head = loop->_head->as_Loop(); 5.476 + Node *current_proj = loop->tail(); //start from tail 5.477 + while ( current_proj != head ) { 5.478 + if (loop == get_loop(current_proj) && // still in the loop ? 5.479 + current_proj->is_Proj() && // is a projection ? 5.480 + current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? 5.481 + if_proj_list.push(current_proj); 5.482 + } 5.483 + current_proj = idom(current_proj); 5.484 + } 5.485 + 5.486 + bool hoisted = false; // true if at least one proj is promoted 5.487 + while (if_proj_list.size() > 0) { 5.488 + // Following are changed to nonnull when a predicate can be hoisted 5.489 + ProjNode* new_predicate_proj = NULL; 5.490 + BoolNode* new_predicate_bol = NULL; 5.491 + 5.492 + ProjNode* proj = if_proj_list.pop()->as_Proj(); 5.493 + IfNode* iff = proj->in(0)->as_If(); 5.494 + 5.495 + if (!is_uncommon_trap_if_pattern(proj)) { 5.496 + if (loop->is_loop_exit(iff)) { 5.497 + // stop processing the remaining projs in the list because the execution of them 5.498 + // depends on the condition of "iff" (iff->in(1)). 5.499 + break; 5.500 + } else { 5.501 + // Both arms are inside the loop. There are two cases: 5.502 + // (1) there is one backward branch. In this case, any remaining proj 5.503 + // in the if_proj list post-dominates "iff". So, the condition of "iff" 5.504 + // does not determine the execution the remining projs directly, and we 5.505 + // can safely continue. 5.506 + // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" 5.507 + // does not dominate loop->tail(), so it can not be in the if_proj list. 5.508 + continue; 5.509 + } 5.510 + } 5.511 + 5.512 + Node* test = iff->in(1); 5.513 + if (!test->is_Bool()){ //Conv2B, ... 5.514 + continue; 5.515 + } 5.516 + BoolNode* bol = test->as_Bool(); 5.517 + if (invar.is_invariant(bol)) { 5.518 + // Invariant test 5.519 + new_predicate_proj = create_new_if_for_predicate(predicate_proj); 5.520 + Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); 5.521 + new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); 5.522 + if (TraceLoopPredicate) tty->print("invariant"); 5.523 + } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { 5.524 + // Range check (only for counted loops) 5.525 + new_predicate_proj = create_new_if_for_predicate(predicate_proj); 5.526 + Node *ctrl = new_predicate_proj->in(0)->as_If()->in(0); 5.527 + const Node* cmp = bol->in(1)->as_Cmp(); 5.528 + Node* idx = cmp->in(1); 5.529 + assert(!invar.is_invariant(idx), "index is variant"); 5.530 + assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be"); 5.531 + LoadRangeNode* ld_rng = (LoadRangeNode*)cmp->in(2); // LoadRangeNode 5.532 + assert(invar.is_invariant(ld_rng), "load range must be invariant"); 5.533 + ld_rng = (LoadRangeNode*)invar.clone(ld_rng, ctrl); 5.534 + int scale = 1; 5.535 + Node* offset = zero; 5.536 + bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); 5.537 + assert(ok, "must be index expression"); 5.538 + if (offset && offset != zero) { 5.539 + assert(invar.is_invariant(offset), "offset must be loop invariant"); 5.540 + offset = invar.clone(offset, ctrl); 5.541 + } 5.542 + Node* init = cl->init_trip(); 5.543 + Node* limit = cl->limit(); 5.544 + Node* stride = cl->stride(); 5.545 + new_predicate_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng); 5.546 + if (TraceLoopPredicate) tty->print("range check"); 5.547 + } 5.548 + 5.549 + if (new_predicate_proj == NULL) { 5.550 + // The other proj of the "iff" is a uncommon trap projection, and we can assume 5.551 + // the other proj will not be executed ("executed" means uct raised). 5.552 + continue; 5.553 + } else { 5.554 + // Success - attach condition (new_predicate_bol) to predicate if 5.555 + invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate 5.556 + IfNode* new_iff = new_predicate_proj->in(0)->as_If(); 5.557 + 5.558 + // Negate test if necessary 5.559 + if (proj->_con != predicate_proj->_con) { 5.560 + new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); 5.561 + register_new_node(new_predicate_bol, new_iff->in(0)); 5.562 + if (TraceLoopPredicate) tty->print_cr(" if negated: %d", iff->_idx); 5.563 + } else { 5.564 + if (TraceLoopPredicate) tty->print_cr(" if: %d", iff->_idx); 5.565 + } 5.566 + 5.567 + _igvn.hash_delete(new_iff); 5.568 + new_iff->set_req(1, new_predicate_bol); 5.569 + 5.570 + _igvn.hash_delete(iff); 5.571 + iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true); 5.572 + 5.573 + Node* ctrl = new_predicate_proj; // new control 5.574 + ProjNode* dp = proj; // old control 5.575 + assert(get_loop(dp) == loop, "guarenteed at the time of collecting proj"); 5.576 + // Find nodes (depends only on the test) off the surviving projection; 5.577 + // move them outside the loop with the control of proj_clone 5.578 + for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { 5.579 + Node* cd = dp->fast_out(i); // Control-dependent node 5.580 + if (cd->depends_only_on_test()) { 5.581 + assert(cd->in(0) == dp, ""); 5.582 + _igvn.hash_delete(cd); 5.583 + cd->set_req(0, ctrl); // ctrl, not NULL 5.584 + set_early_ctrl(cd); 5.585 + _igvn._worklist.push(cd); 5.586 + IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); 5.587 + if (new_loop != loop) { 5.588 + if (!loop->_child) loop->_body.yank(cd); 5.589 + if (!new_loop->_child ) new_loop->_body.push(cd); 5.590 + } 5.591 + --i; 5.592 + --imax; 5.593 + } 5.594 + } 5.595 + 5.596 + hoisted = true; 5.597 + C->set_major_progress(); 5.598 + } 5.599 + } // end while 5.600 + 5.601 +#ifndef PRODUCT 5.602 + // report that the loop predication has been actually performed 5.603 + // for this loop 5.604 + if (TraceLoopPredicate && hoisted) { 5.605 + tty->print("Loop Predication Performed:"); 5.606 + loop->dump_head(); 5.607 + } 5.608 +#endif 5.609 + 5.610 + return hoisted; 5.611 +} 5.612 + 5.613 +//------------------------------loop_predication-------------------------------- 5.614 +// driver routine for loop predication optimization 5.615 +bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { 5.616 + bool hoisted = false; 5.617 + // Recursively promote predicates 5.618 + if ( _child ) { 5.619 + hoisted = _child->loop_predication( phase); 5.620 + } 5.621 + 5.622 + // self 5.623 + if (!_irreducible && !tail()->is_top()) { 5.624 + hoisted |= phase->loop_predication_impl(this); 5.625 + } 5.626 + 5.627 + if ( _next ) { //sibling 5.628 + hoisted |= _next->loop_predication( phase); 5.629 + } 5.630 + 5.631 + return hoisted; 5.632 +}
6.1 --- a/src/share/vm/opto/loopnode.cpp Sat Jan 09 00:59:35 2010 -0800 6.2 +++ b/src/share/vm/opto/loopnode.cpp Tue Jan 12 14:37:35 2010 -0800 6.3 @@ -1420,11 +1420,57 @@ 6.4 } 6.5 } 6.6 6.7 +//---------------------collect_potentially_useful_predicates----------------------- 6.8 +// Helper function to collect potentially useful predicates to prevent them from 6.9 +// being eliminated by PhaseIdealLoop::eliminate_useless_predicates 6.10 +void PhaseIdealLoop::collect_potentially_useful_predicates( 6.11 + IdealLoopTree * loop, Unique_Node_List &useful_predicates) { 6.12 + if (loop->_child) { // child 6.13 + collect_potentially_useful_predicates(loop->_child, useful_predicates); 6.14 + } 6.15 + 6.16 + // self (only loops that we can apply loop predication may use their predicates) 6.17 + if (loop->_head->is_Loop() && 6.18 + !loop->_irreducible && 6.19 + !loop->tail()->is_top()) { 6.20 + LoopNode *lpn = loop->_head->as_Loop(); 6.21 + Node* entry = lpn->in(LoopNode::EntryControl); 6.22 + ProjNode *predicate_proj = find_predicate_insertion_point(entry); 6.23 + if (predicate_proj != NULL ) { // right pattern that can be used by loop predication 6.24 + assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 6.25 + useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one 6.26 + } 6.27 + } 6.28 + 6.29 + if ( loop->_next ) { // sibling 6.30 + collect_potentially_useful_predicates(loop->_next, useful_predicates); 6.31 + } 6.32 +} 6.33 + 6.34 +//------------------------eliminate_useless_predicates----------------------------- 6.35 +// Eliminate all inserted predicates if they could not be used by loop predication. 6.36 +void PhaseIdealLoop::eliminate_useless_predicates() { 6.37 + if (C->predicate_count() == 0) return; // no predicate left 6.38 + 6.39 + Unique_Node_List useful_predicates; // to store useful predicates 6.40 + if (C->has_loops()) { 6.41 + collect_potentially_useful_predicates(_ltree_root->_child, useful_predicates); 6.42 + } 6.43 + 6.44 + for (int i = C->predicate_count(); i > 0; i--) { 6.45 + Node * n = C->predicate_opaque1_node(i-1); 6.46 + assert(n->Opcode() == Op_Opaque1, "must be"); 6.47 + if (!useful_predicates.member(n)) { // not in the useful list 6.48 + _igvn.replace_node(n, n->in(1)); 6.49 + } 6.50 + } 6.51 +} 6.52 + 6.53 //============================================================================= 6.54 //----------------------------build_and_optimize------------------------------- 6.55 // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to 6.56 // its corresponding LoopNode. If 'optimize' is true, do some loop cleanups. 6.57 -void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) { 6.58 +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) { 6.59 int old_progress = C->major_progress(); 6.60 6.61 // Reset major-progress flag for the driver's heuristics 6.62 @@ -1577,6 +1623,12 @@ 6.63 return; 6.64 } 6.65 6.66 + // some parser-inserted loop predicates could never be used by loop 6.67 + // predication. Eliminate them before loop optimization 6.68 + if (UseLoopPredicate) { 6.69 + eliminate_useless_predicates(); 6.70 + } 6.71 + 6.72 // clear out the dead code 6.73 while(_deadlist.size()) { 6.74 _igvn.remove_globally_dead_node(_deadlist.pop()); 6.75 @@ -1603,7 +1655,7 @@ 6.76 // Because RCE opportunities can be masked by split_thru_phi, 6.77 // look for RCE candidates and inhibit split_thru_phi 6.78 // on just their loop-phi's for this pass of loop opts 6.79 - if( SplitIfBlocks && do_split_ifs ) { 6.80 + if (SplitIfBlocks && do_split_ifs) { 6.81 if (lpt->policy_range_check(this)) { 6.82 lpt->_rce_candidate = 1; // = true 6.83 } 6.84 @@ -1619,12 +1671,17 @@ 6.85 NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); 6.86 } 6.87 6.88 + // Perform loop predication before iteration splitting 6.89 + if (do_loop_pred && C->has_loops() && !C->major_progress()) { 6.90 + _ltree_root->_child->loop_predication(this); 6.91 + } 6.92 + 6.93 // Perform iteration-splitting on inner loops. Split iterations to avoid 6.94 // range checks or one-shot null checks. 6.95 6.96 // If split-if's didn't hack the graph too bad (no CFG changes) 6.97 // then do loop opts. 6.98 - if( C->has_loops() && !C->major_progress() ) { 6.99 + if (C->has_loops() && !C->major_progress()) { 6.100 memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) ); 6.101 _ltree_root->_child->iteration_split( this, worklist ); 6.102 // No verify after peeling! GCM has hoisted code out of the loop. 6.103 @@ -1636,7 +1693,7 @@ 6.104 // Do verify graph edges in any case 6.105 NOT_PRODUCT( C->verify_graph_edges(); ); 6.106 6.107 - if( !do_split_ifs ) { 6.108 + if (!do_split_ifs) { 6.109 // We saw major progress in Split-If to get here. We forced a 6.110 // pass with unrolling and not split-if, however more split-if's 6.111 // might make progress. If the unrolling didn't make progress 6.112 @@ -2763,6 +2820,22 @@ 6.113 Node *legal = LCA; // Walk 'legal' up the IDOM chain 6.114 Node *least = legal; // Best legal position so far 6.115 while( early != legal ) { // While not at earliest legal 6.116 +#ifdef ASSERT 6.117 + if (legal->is_Start() && !early->is_Root()) { 6.118 + // Bad graph. Print idom path and fail. 6.119 + tty->print_cr( "Bad graph detected in build_loop_late"); 6.120 + tty->print("n: ");n->dump(); tty->cr(); 6.121 + tty->print("early: ");early->dump(); tty->cr(); 6.122 + int ct = 0; 6.123 + Node *dbg_legal = LCA; 6.124 + while(!dbg_legal->is_Start() && ct < 100) { 6.125 + tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr(); 6.126 + ct++; 6.127 + dbg_legal = idom(dbg_legal); 6.128 + } 6.129 + assert(false, "Bad graph detected in build_loop_late"); 6.130 + } 6.131 +#endif 6.132 // Find least loop nesting depth 6.133 legal = idom(legal); // Bump up the IDOM tree 6.134 // Check for lower nesting depth
7.1 --- a/src/share/vm/opto/loopnode.hpp Sat Jan 09 00:59:35 2010 -0800 7.2 +++ b/src/share/vm/opto/loopnode.hpp Tue Jan 12 14:37:35 2010 -0800 7.3 @@ -30,6 +30,7 @@ 7.4 class Node; 7.5 class PhaseIdealLoop; 7.6 class VectorSet; 7.7 +class Invariance; 7.8 struct small_cache; 7.9 7.10 // 7.11 @@ -325,6 +326,10 @@ 7.12 // Returns TRUE if loop tree is structurally changed. 7.13 bool beautify_loops( PhaseIdealLoop *phase ); 7.14 7.15 + // Perform optimization to use the loop predicates for null checks and range checks. 7.16 + // Applies to any loop level (not just the innermost one) 7.17 + bool loop_predication( PhaseIdealLoop *phase); 7.18 + 7.19 // Perform iteration-splitting on inner loops. Split iterations to 7.20 // avoid range checks or one-shot null checks. Returns false if the 7.21 // current round of loop opts should stop. 7.22 @@ -395,6 +400,9 @@ 7.23 // into longer memory ops, we may want to increase alignment. 7.24 bool policy_align( PhaseIdealLoop *phase ) const; 7.25 7.26 + // Return TRUE if "iff" is a range check. 7.27 + bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; 7.28 + 7.29 // Compute loop trip count from profile data 7.30 void compute_profile_trip_cnt( PhaseIdealLoop *phase ); 7.31 7.32 @@ -521,9 +529,6 @@ 7.33 } 7.34 Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); 7.35 7.36 - // true if CFG node d dominates CFG node n 7.37 - bool is_dominator(Node *d, Node *n); 7.38 - 7.39 // Helper function for directing control inputs away from CFG split 7.40 // points. 7.41 Node *find_non_split_ctrl( Node *ctrl ) const { 7.42 @@ -572,6 +577,17 @@ 7.43 assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); 7.44 return n; 7.45 } 7.46 + // true if CFG node d dominates CFG node n 7.47 + bool is_dominator(Node *d, Node *n); 7.48 + // return get_ctrl for a data node and self(n) for a CFG node 7.49 + Node* ctrl_or_self(Node* n) { 7.50 + if (has_ctrl(n)) 7.51 + return get_ctrl(n); 7.52 + else { 7.53 + assert (n->is_CFG(), "must be a CFG node"); 7.54 + return n; 7.55 + } 7.56 + } 7.57 7.58 private: 7.59 Node *get_ctrl_no_update( Node *i ) const { 7.60 @@ -600,7 +616,7 @@ 7.61 // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace 7.62 // the 'old_node' with 'new_node'. Kill old-node. Add a reference 7.63 // from old_node to new_node to support the lazy update. Reference 7.64 - // replaces loop reference, since that is not neede for dead node. 7.65 + // replaces loop reference, since that is not needed for dead node. 7.66 public: 7.67 void lazy_update( Node *old_node, Node *new_node ) { 7.68 assert( old_node != new_node, "no cycles please" ); 7.69 @@ -679,11 +695,11 @@ 7.70 _dom_lca_tags(C->comp_arena()), 7.71 _verify_me(NULL), 7.72 _verify_only(true) { 7.73 - build_and_optimize(false); 7.74 + build_and_optimize(false, false); 7.75 } 7.76 7.77 // build the loop tree and perform any requested optimizations 7.78 - void build_and_optimize(bool do_split_if); 7.79 + void build_and_optimize(bool do_split_if, bool do_loop_pred); 7.80 7.81 public: 7.82 // Dominators for the sea of nodes 7.83 @@ -694,13 +710,13 @@ 7.84 Node *dom_lca_internal( Node *n1, Node *n2 ) const; 7.85 7.86 // Compute the Ideal Node to Loop mapping 7.87 - PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : 7.88 + PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) : 7.89 PhaseTransform(Ideal_Loop), 7.90 _igvn(igvn), 7.91 _dom_lca_tags(C->comp_arena()), 7.92 _verify_me(NULL), 7.93 _verify_only(false) { 7.94 - build_and_optimize(do_split_ifs); 7.95 + build_and_optimize(do_split_ifs, do_loop_pred); 7.96 } 7.97 7.98 // Verify that verify_me made the same decisions as a fresh run. 7.99 @@ -710,7 +726,7 @@ 7.100 _dom_lca_tags(C->comp_arena()), 7.101 _verify_me(verify_me), 7.102 _verify_only(false) { 7.103 - build_and_optimize(false); 7.104 + build_and_optimize(false, false); 7.105 } 7.106 7.107 // Build and verify the loop tree without modifying the graph. This 7.108 @@ -790,6 +806,30 @@ 7.109 // Return true if exp is a scaled induction var plus (or minus) constant 7.110 bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); 7.111 7.112 + // Return true if proj is for "proj->[region->..]call_uct" 7.113 + bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false); 7.114 + // Return true for "if(test)-> proj -> ... 7.115 + // | 7.116 + // V 7.117 + // other_proj->[region->..]call_uct" 7.118 + bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false); 7.119 + // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted 7.120 + ProjNode* create_new_if_for_predicate(ProjNode* cont_proj); 7.121 + // Find a good location to insert a predicate 7.122 + ProjNode* find_predicate_insertion_point(Node* start_c); 7.123 + // Construct a range check for a predicate if 7.124 + BoolNode* rc_predicate(Node* ctrl, 7.125 + int scale, Node* offset, 7.126 + Node* init, Node* limit, Node* stride, 7.127 + Node* range); 7.128 + 7.129 + // Implementation of the loop predication to promote checks outside the loop 7.130 + bool loop_predication_impl(IdealLoopTree *loop); 7.131 + 7.132 + // Helper function to collect predicate for eliminating the useless ones 7.133 + void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); 7.134 + void eliminate_useless_predicates(); 7.135 + 7.136 // Eliminate range-checks and other trip-counter vs loop-invariant tests. 7.137 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); 7.138 7.139 @@ -906,7 +946,6 @@ 7.140 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); 7.141 7.142 // Helper functions 7.143 - void register_new_node( Node *n, Node *blk ); 7.144 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); 7.145 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); 7.146 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); 7.147 @@ -918,6 +957,7 @@ 7.148 public: 7.149 void set_created_loop_node() { _created_loop_node = true; } 7.150 bool created_loop_node() { return _created_loop_node; } 7.151 + void register_new_node( Node *n, Node *blk ); 7.152 7.153 #ifndef PRODUCT 7.154 void dump( ) const;
8.1 --- a/src/share/vm/opto/parse.hpp Sat Jan 09 00:59:35 2010 -0800 8.2 +++ b/src/share/vm/opto/parse.hpp Tue Jan 12 14:37:35 2010 -0800 8.3 @@ -430,6 +430,11 @@ 8.4 } 8.5 } 8.6 8.7 + // Return true if the parser should add a loop predicate 8.8 + bool should_add_predicate(int target_bci); 8.9 + // Insert a loop predicate into the graph 8.10 + void add_predicate(); 8.11 + 8.12 // Note: Intrinsic generation routines may be found in library_call.cpp. 8.13 8.14 // Helper function to setup Ideal Call nodes 8.15 @@ -491,7 +496,7 @@ 8.16 8.17 void do_ifnull(BoolTest::mask btest, Node* c); 8.18 void do_if(BoolTest::mask btest, Node* c); 8.19 - void repush_if_args(); 8.20 + int repush_if_args(); 8.21 void adjust_map_after_if(BoolTest::mask btest, Node* c, float prob, 8.22 Block* path, Block* other_path); 8.23 IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
9.1 --- a/src/share/vm/opto/parse1.cpp Sat Jan 09 00:59:35 2010 -0800 9.2 +++ b/src/share/vm/opto/parse1.cpp Tue Jan 12 14:37:35 2010 -0800 9.3 @@ -1383,6 +1383,10 @@ 9.4 set_parse_bci(iter().cur_bci()); 9.5 9.6 if (bci() == block()->limit()) { 9.7 + // insert a predicate if it falls through to a loop head block 9.8 + if (should_add_predicate(bci())){ 9.9 + add_predicate(); 9.10 + } 9.11 // Do not walk into the next block until directed by do_all_blocks. 9.12 merge(bci()); 9.13 break; 9.14 @@ -2083,6 +2087,37 @@ 9.15 } 9.16 } 9.17 9.18 +//------------------------------should_add_predicate-------------------------- 9.19 +bool Parse::should_add_predicate(int target_bci) { 9.20 + if (!UseLoopPredicate) return false; 9.21 + Block* target = successor_for_bci(target_bci); 9.22 + if (target != NULL && 9.23 + target->is_loop_head() && 9.24 + block()->rpo() < target->rpo()) { 9.25 + return true; 9.26 + } 9.27 + return false; 9.28 +} 9.29 + 9.30 +//------------------------------add_predicate--------------------------------- 9.31 +void Parse::add_predicate() { 9.32 + assert(UseLoopPredicate,"use only for loop predicate"); 9.33 + Node *cont = _gvn.intcon(1); 9.34 + Node* opq = _gvn.transform(new (C, 2) Opaque1Node(C, cont)); 9.35 + Node *bol = _gvn.transform(new (C, 2) Conv2BNode(opq)); 9.36 + IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); 9.37 + Node* iffalse = _gvn.transform(new (C, 1) IfFalseNode(iff)); 9.38 + C->add_predicate_opaq(opq); 9.39 + { 9.40 + PreserveJVMState pjvms(this); 9.41 + set_control(iffalse); 9.42 + uncommon_trap(Deoptimization::Reason_predicate, 9.43 + Deoptimization::Action_maybe_recompile); 9.44 + } 9.45 + Node* iftrue = _gvn.transform(new (C, 1) IfTrueNode(iff)); 9.46 + set_control(iftrue); 9.47 +} 9.48 + 9.49 #ifndef PRODUCT 9.50 //------------------------show_parse_info-------------------------------------- 9.51 void Parse::show_parse_info() {
10.1 --- a/src/share/vm/opto/parse2.cpp Sat Jan 09 00:59:35 2010 -0800 10.2 +++ b/src/share/vm/opto/parse2.cpp Tue Jan 12 14:37:35 2010 -0800 10.3 @@ -278,6 +278,11 @@ 10.4 if (len < 1) { 10.5 // If this is a backward branch, add safepoint 10.6 maybe_add_safepoint(default_dest); 10.7 + if (should_add_predicate(default_dest)){ 10.8 + _sp += 1; // set original stack for use by uncommon_trap 10.9 + add_predicate(); 10.10 + _sp -= 1; 10.11 + } 10.12 merge(default_dest); 10.13 return; 10.14 } 10.15 @@ -324,6 +329,11 @@ 10.16 10.17 if (len < 1) { // If this is a backward branch, add safepoint 10.18 maybe_add_safepoint(default_dest); 10.19 + if (should_add_predicate(default_dest)){ 10.20 + _sp += 1; // set original stack for use by uncommon_trap 10.21 + add_predicate(); 10.22 + _sp -= 1; 10.23 + } 10.24 merge(default_dest); 10.25 return; 10.26 } 10.27 @@ -731,6 +741,9 @@ 10.28 push(_gvn.makecon(ret_addr)); 10.29 10.30 // Flow to the jsr. 10.31 + if (should_add_predicate(jsr_bci)){ 10.32 + add_predicate(); 10.33 + } 10.34 merge(jsr_bci); 10.35 } 10.36 10.37 @@ -881,7 +894,7 @@ 10.38 10.39 //-------------------------------repush_if_args-------------------------------- 10.40 // Push arguments of an "if" bytecode back onto the stack by adjusting _sp. 10.41 -inline void Parse::repush_if_args() { 10.42 +inline int Parse::repush_if_args() { 10.43 #ifndef PRODUCT 10.44 if (PrintOpto && WizardMode) { 10.45 tty->print("defending against excessive implicit null exceptions on %s @%d in ", 10.46 @@ -895,6 +908,7 @@ 10.47 assert(argument(0) != NULL, "must exist"); 10.48 assert(bc_depth == 1 || argument(1) != NULL, "two must exist"); 10.49 _sp += bc_depth; 10.50 + return bc_depth; 10.51 } 10.52 10.53 //----------------------------------do_ifnull---------------------------------- 10.54 @@ -954,8 +968,14 @@ 10.55 // Update method data 10.56 profile_taken_branch(target_bci); 10.57 adjust_map_after_if(btest, c, prob, branch_block, next_block); 10.58 - if (!stopped()) 10.59 + if (!stopped()) { 10.60 + if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop 10.61 + int nargs = repush_if_args(); // set original stack for uncommon_trap 10.62 + add_predicate(); 10.63 + _sp -= nargs; 10.64 + } 10.65 merge(target_bci); 10.66 + } 10.67 } 10.68 } 10.69 10.70 @@ -1076,8 +1096,14 @@ 10.71 // Update method data 10.72 profile_taken_branch(target_bci); 10.73 adjust_map_after_if(taken_btest, c, prob, branch_block, next_block); 10.74 - if (!stopped()) 10.75 + if (!stopped()) { 10.76 + if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop 10.77 + int nargs = repush_if_args(); // set original stack for the uncommon_trap 10.78 + add_predicate(); 10.79 + _sp -= nargs; 10.80 + } 10.81 merge(target_bci); 10.82 + } 10.83 } 10.84 } 10.85 10.86 @@ -2080,6 +2106,10 @@ 10.87 // Update method data 10.88 profile_taken_branch(target_bci); 10.89 10.90 + // Add loop predicate if it goes to a loop 10.91 + if (should_add_predicate(target_bci)){ 10.92 + add_predicate(); 10.93 + } 10.94 // Merge the current control into the target basic block 10.95 merge(target_bci); 10.96
11.1 --- a/src/share/vm/opto/split_if.cpp Sat Jan 09 00:59:35 2010 -0800 11.2 +++ b/src/share/vm/opto/split_if.cpp Tue Jan 12 14:37:35 2010 -0800 11.3 @@ -219,6 +219,7 @@ 11.4 11.5 //------------------------------register_new_node------------------------------ 11.6 void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) { 11.7 + assert(!n->is_CFG(), "must be data node"); 11.8 _igvn.register_new_node_with_optimizer(n); 11.9 set_ctrl(n, blk); 11.10 IdealLoopTree *loop = get_loop(blk);
12.1 --- a/src/share/vm/runtime/deoptimization.cpp Sat Jan 09 00:59:35 2010 -0800 12.2 +++ b/src/share/vm/runtime/deoptimization.cpp Tue Jan 12 14:37:35 2010 -0800 12.3 @@ -1672,7 +1672,8 @@ 12.4 "unhandled", 12.5 "constraint", 12.6 "div0_check", 12.7 - "age" 12.8 + "age", 12.9 + "predicate" 12.10 }; 12.11 const char* Deoptimization::_trap_action_name[Action_LIMIT] = { 12.12 // Note: Keep this in sync. with enum DeoptAction.
13.1 --- a/src/share/vm/runtime/deoptimization.hpp Sat Jan 09 00:59:35 2010 -0800 13.2 +++ b/src/share/vm/runtime/deoptimization.hpp Tue Jan 12 14:37:35 2010 -0800 13.3 @@ -46,6 +46,7 @@ 13.4 Reason_constraint, // arbitrary runtime constraint violated 13.5 Reason_div0_check, // a null_check due to division by zero 13.6 Reason_age, // nmethod too old; tier threshold reached 13.7 + Reason_predicate, // compiler generated predicate failed 13.8 Reason_LIMIT, 13.9 // Note: Keep this enum in sync. with _trap_reason_name. 13.10 Reason_RECORDED_LIMIT = Reason_unloaded // some are not recorded per bc