1.1 --- a/src/share/vm/opto/loopTransform.cpp Sat Apr 02 09:49:27 2011 -0700 1.2 +++ b/src/share/vm/opto/loopTransform.cpp Sat Apr 02 10:54:15 2011 -0700 1.3 @@ -301,6 +301,132 @@ 1.4 // peeled-loop backedge has 2 users. 1.5 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the 1.6 // extra backedge user. 1.7 +// 1.8 +// orig 1.9 +// 1.10 +// stmt1 1.11 +// | 1.12 +// v 1.13 +// loop predicate 1.14 +// | 1.15 +// v 1.16 +// loop<----+ 1.17 +// | | 1.18 +// stmt2 | 1.19 +// | | 1.20 +// v | 1.21 +// if ^ 1.22 +// / \ | 1.23 +// / \ | 1.24 +// v v | 1.25 +// false true | 1.26 +// / \ | 1.27 +// / ----+ 1.28 +// | 1.29 +// v 1.30 +// exit 1.31 +// 1.32 +// 1.33 +// after clone loop 1.34 +// 1.35 +// stmt1 1.36 +// | 1.37 +// v 1.38 +// loop predicate 1.39 +// / \ 1.40 +// clone / \ orig 1.41 +// / \ 1.42 +// / \ 1.43 +// v v 1.44 +// +---->loop clone loop<----+ 1.45 +// | | | | 1.46 +// | stmt2 clone stmt2 | 1.47 +// | | | | 1.48 +// | v v | 1.49 +// ^ if clone If ^ 1.50 +// | / \ / \ | 1.51 +// | / \ / \ | 1.52 +// | v v v v | 1.53 +// | true false false true | 1.54 +// | / \ / \ | 1.55 +// +---- \ / ----+ 1.56 +// \ / 1.57 +// 1v v2 1.58 +// region 1.59 +// | 1.60 +// v 1.61 +// exit 1.62 +// 1.63 +// 1.64 +// after peel and predicate move 1.65 +// 1.66 +// stmt1 1.67 +// / 1.68 +// / 1.69 +// clone / orig 1.70 +// / 1.71 +// / +----------+ 1.72 +// / | | 1.73 +// / loop predicate | 1.74 +// / | | 1.75 +// v v | 1.76 +// TOP-->loop clone loop<----+ | 1.77 +// | | | | 1.78 +// stmt2 clone stmt2 | | 1.79 +// | | | ^ 1.80 +// v v | | 1.81 +// if clone If ^ | 1.82 +// / \ / \ | | 1.83 +// / \ / \ | | 1.84 +// v v v v | | 1.85 +// true false false true | | 1.86 +// | \ / \ | | 1.87 +// | \ / ----+ ^ 1.88 +// | \ / | 1.89 +// | 1v v2 | 1.90 +// v region | 1.91 +// | | | 1.92 +// | v | 1.93 +// | exit | 1.94 +// | | 1.95 +// +--------------->-----------------+ 1.96 +// 1.97 +// 1.98 +// final graph 1.99 +// 1.100 +// stmt1 1.101 +// | 1.102 +// v 1.103 +// stmt2 clone 1.104 +// | 1.105 +// v 1.106 +// if clone 1.107 +// / | 1.108 +// / | 1.109 +// v v 1.110 +// false true 1.111 +// | | 1.112 +// | v 1.113 +// | loop predicate 1.114 +// | | 1.115 +// | v 1.116 +// | loop<----+ 1.117 +// | | | 1.118 +// | stmt2 | 1.119 +// | | | 1.120 +// | v | 1.121 +// v if ^ 1.122 +// | / \ | 1.123 +// | / \ | 1.124 +// | v v | 1.125 +// | false true | 1.126 +// | | \ | 1.127 +// v v --+ 1.128 +// region 1.129 +// | 1.130 +// v 1.131 +// exit 1.132 +// 1.133 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) { 1.134 1.135 C->set_major_progress(); 1.136 @@ -315,9 +441,10 @@ 1.137 loop->dump_head(); 1.138 } 1.139 #endif 1.140 - Node *h = loop->_head; 1.141 - if (h->is_CountedLoop()) { 1.142 - CountedLoopNode *cl = h->as_CountedLoop(); 1.143 + Node* head = loop->_head; 1.144 + bool counted_loop = head->is_CountedLoop(); 1.145 + if (counted_loop) { 1.146 + CountedLoopNode *cl = head->as_CountedLoop(); 1.147 assert(cl->trip_count() > 0, "peeling a fully unrolled loop"); 1.148 cl->set_trip_count(cl->trip_count() - 1); 1.149 if (cl->is_main_loop()) { 1.150 @@ -330,11 +457,11 @@ 1.151 #endif 1.152 } 1.153 } 1.154 + Node* entry = head->in(LoopNode::EntryControl); 1.155 1.156 // Step 1: Clone the loop body. The clone becomes the peeled iteration. 1.157 // The pre-loop illegally has 2 control users (old & new loops). 1.158 - clone_loop( loop, old_new, dom_depth(loop->_head) ); 1.159 - 1.160 + clone_loop( loop, old_new, dom_depth(head) ); 1.161 1.162 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. 1.163 // Do this by making the old-loop fall-in edges act as if they came 1.164 @@ -342,12 +469,15 @@ 1.165 // backedges) and then map to the new peeled iteration. This leaves 1.166 // the pre-loop with only 1 user (the new peeled iteration), but the 1.167 // peeled-loop backedge has 2 users. 1.168 - for (DUIterator_Fast jmax, j = loop->_head->fast_outs(jmax); j < jmax; j++) { 1.169 - Node* old = loop->_head->fast_out(j); 1.170 - if( old->in(0) == loop->_head && old->req() == 3 && 1.171 - (old->is_Loop() || old->is_Phi()) ) { 1.172 - Node *new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; 1.173 - if( !new_exit_value ) // Backedge value is ALSO loop invariant? 1.174 + Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; 1.175 + new_exit_value = move_loop_predicates(entry, new_exit_value); 1.176 + _igvn.hash_delete(head); 1.177 + head->set_req(LoopNode::EntryControl, new_exit_value); 1.178 + for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { 1.179 + Node* old = head->fast_out(j); 1.180 + if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { 1.181 + new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; 1.182 + if (!new_exit_value ) // Backedge value is ALSO loop invariant? 1.183 // Then loop body backedge value remains the same. 1.184 new_exit_value = old->in(LoopNode::LoopBackControl); 1.185 _igvn.hash_delete(old); 1.186 @@ -358,12 +488,12 @@ 1.187 1.188 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the 1.189 // extra backedge user. 1.190 - Node *nnn = old_new[loop->_head->_idx]; 1.191 - _igvn.hash_delete(nnn); 1.192 - nnn->set_req(LoopNode::LoopBackControl, C->top()); 1.193 - for (DUIterator_Fast j2max, j2 = nnn->fast_outs(j2max); j2 < j2max; j2++) { 1.194 - Node* use = nnn->fast_out(j2); 1.195 - if( use->in(0) == nnn && use->req() == 3 && use->is_Phi() ) { 1.196 + Node* new_head = old_new[head->_idx]; 1.197 + _igvn.hash_delete(new_head); 1.198 + new_head->set_req(LoopNode::LoopBackControl, C->top()); 1.199 + for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) { 1.200 + Node* use = new_head->fast_out(j2); 1.201 + if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) { 1.202 _igvn.hash_delete(use); 1.203 use->set_req(LoopNode::LoopBackControl, C->top()); 1.204 } 1.205 @@ -371,15 +501,15 @@ 1.206 1.207 1.208 // Step 4: Correct dom-depth info. Set to loop-head depth. 1.209 - int dd = dom_depth(loop->_head); 1.210 - set_idom(loop->_head, loop->_head->in(1), dd); 1.211 + int dd = dom_depth(head); 1.212 + set_idom(head, head->in(1), dd); 1.213 for (uint j3 = 0; j3 < loop->_body.size(); j3++) { 1.214 Node *old = loop->_body.at(j3); 1.215 Node *nnn = old_new[old->_idx]; 1.216 if (!has_ctrl(nnn)) 1.217 set_idom(nnn, idom(nnn), dd-1); 1.218 // While we're at it, remove any SafePoints from the peeled code 1.219 - if( old->Opcode() == Op_SafePoint ) { 1.220 + if (old->Opcode() == Op_SafePoint) { 1.221 Node *nnn = old_new[old->_idx]; 1.222 lazy_replace(nnn,nnn->in(TypeFunc::Control)); 1.223 } 1.224 @@ -1659,7 +1789,7 @@ 1.225 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop(); 1.226 if (needs_guard) { 1.227 // Check for an obvious zero trip guard. 1.228 - Node* inctrl = cl->in(LoopNode::EntryControl); 1.229 + Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl)); 1.230 if (inctrl->Opcode() == Op_IfTrue) { 1.231 // The test should look like just the backedge of a CountedLoop 1.232 Node* iff = inctrl->in(0); 1.233 @@ -1861,651 +1991,8 @@ 1.234 return true; 1.235 } 1.236 1.237 -//-------------------------------is_uncommon_trap_proj---------------------------- 1.238 -// Return true if proj is the form of "proj->[region->..]call_uct" 1.239 -bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, Deoptimization::DeoptReason reason) { 1.240 - int path_limit = 10; 1.241 - assert(proj, "invalid argument"); 1.242 - Node* out = proj; 1.243 - for (int ct = 0; ct < path_limit; ct++) { 1.244 - out = out->unique_ctrl_out(); 1.245 - if (out == NULL || out->is_Root() || out->is_Start()) 1.246 - return false; 1.247 - if (out->is_CallStaticJava()) { 1.248 - int req = out->as_CallStaticJava()->uncommon_trap_request(); 1.249 - if (req != 0) { 1.250 - Deoptimization::DeoptReason trap_reason = Deoptimization::trap_request_reason(req); 1.251 - if (trap_reason == reason || reason == Deoptimization::Reason_none) { 1.252 - return true; 1.253 - } 1.254 - } 1.255 - return false; // don't do further after call 1.256 - } 1.257 - } 1.258 - return false; 1.259 -} 1.260 1.261 -//-------------------------------is_uncommon_trap_if_pattern------------------------- 1.262 -// Return true for "if(test)-> proj -> ... 1.263 -// | 1.264 -// V 1.265 -// other_proj->[region->..]call_uct" 1.266 -// 1.267 -// "must_reason_predicate" means the uct reason must be Reason_predicate 1.268 -bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, Deoptimization::DeoptReason reason) { 1.269 - Node *in0 = proj->in(0); 1.270 - if (!in0->is_If()) return false; 1.271 - // Variation of a dead If node. 1.272 - if (in0->outcnt() < 2) return false; 1.273 - IfNode* iff = in0->as_If(); 1.274 - 1.275 - // we need "If(Conv2B(Opaque1(...)))" pattern for reason_predicate 1.276 - if (reason != Deoptimization::Reason_none) { 1.277 - if (iff->in(1)->Opcode() != Op_Conv2B || 1.278 - iff->in(1)->in(1)->Opcode() != Op_Opaque1) { 1.279 - return false; 1.280 - } 1.281 - } 1.282 - 1.283 - ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); 1.284 - return is_uncommon_trap_proj(other_proj, reason); 1.285 -} 1.286 - 1.287 -//-------------------------------register_control------------------------- 1.288 -void PhaseIdealLoop::register_control(Node* n, IdealLoopTree *loop, Node* pred) { 1.289 - assert(n->is_CFG(), "must be control node"); 1.290 - _igvn.register_new_node_with_optimizer(n); 1.291 - loop->_body.push(n); 1.292 - set_loop(n, loop); 1.293 - // When called from beautify_loops() idom is not constructed yet. 1.294 - if (_idom != NULL) { 1.295 - set_idom(n, pred, dom_depth(pred)); 1.296 - } 1.297 -} 1.298 - 1.299 -//------------------------------create_new_if_for_predicate------------------------ 1.300 -// create a new if above the uct_if_pattern for the predicate to be promoted. 1.301 -// 1.302 -// before after 1.303 -// ---------- ---------- 1.304 -// ctrl ctrl 1.305 -// | | 1.306 -// | | 1.307 -// v v 1.308 -// iff new_iff 1.309 -// / \ / \ 1.310 -// / \ / \ 1.311 -// v v v v 1.312 -// uncommon_proj cont_proj if_uct if_cont 1.313 -// \ | | | | 1.314 -// \ | | | | 1.315 -// v v v | v 1.316 -// rgn loop | iff 1.317 -// | | / \ 1.318 -// | | / \ 1.319 -// v | v v 1.320 -// uncommon_trap | uncommon_proj cont_proj 1.321 -// \ \ | | 1.322 -// \ \ | | 1.323 -// v v v v 1.324 -// rgn loop 1.325 -// | 1.326 -// | 1.327 -// v 1.328 -// uncommon_trap 1.329 -// 1.330 -// 1.331 -// We will create a region to guard the uct call if there is no one there. 1.332 -// The true projecttion (if_cont) of the new_iff is returned. 1.333 -// This code is also used to clone predicates to clonned loops. 1.334 -ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.335 - Deoptimization::DeoptReason reason) { 1.336 - assert(is_uncommon_trap_if_pattern(cont_proj, reason), "must be a uct if pattern!"); 1.337 - IfNode* iff = cont_proj->in(0)->as_If(); 1.338 - 1.339 - ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 1.340 - Node *rgn = uncommon_proj->unique_ctrl_out(); 1.341 - assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.342 - 1.343 - if (!rgn->is_Region()) { // create a region to guard the call 1.344 - assert(rgn->is_Call(), "must be call uct"); 1.345 - CallNode* call = rgn->as_Call(); 1.346 - IdealLoopTree* loop = get_loop(call); 1.347 - rgn = new (C, 1) RegionNode(1); 1.348 - rgn->add_req(uncommon_proj); 1.349 - register_control(rgn, loop, uncommon_proj); 1.350 - _igvn.hash_delete(call); 1.351 - call->set_req(0, rgn); 1.352 - // When called from beautify_loops() idom is not constructed yet. 1.353 - if (_idom != NULL) { 1.354 - set_idom(call, rgn, dom_depth(rgn)); 1.355 - } 1.356 - } 1.357 - 1.358 - Node* entry = iff->in(0); 1.359 - if (new_entry != NULL) { 1.360 - // Clonning the predicate to new location. 1.361 - entry = new_entry; 1.362 - } 1.363 - // Create new_iff 1.364 - IdealLoopTree* lp = get_loop(entry); 1.365 - IfNode *new_iff = new (C, 2) IfNode(entry, NULL, iff->_prob, iff->_fcnt); 1.366 - register_control(new_iff, lp, entry); 1.367 - Node *if_cont = new (C, 1) IfTrueNode(new_iff); 1.368 - Node *if_uct = new (C, 1) IfFalseNode(new_iff); 1.369 - if (cont_proj->is_IfFalse()) { 1.370 - // Swap 1.371 - Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 1.372 - } 1.373 - register_control(if_cont, lp, new_iff); 1.374 - register_control(if_uct, get_loop(rgn), new_iff); 1.375 - 1.376 - // if_uct to rgn 1.377 - _igvn.hash_delete(rgn); 1.378 - rgn->add_req(if_uct); 1.379 - // When called from beautify_loops() idom is not constructed yet. 1.380 - if (_idom != NULL) { 1.381 - Node* ridom = idom(rgn); 1.382 - Node* nrdom = dom_lca(ridom, new_iff); 1.383 - set_idom(rgn, nrdom, dom_depth(rgn)); 1.384 - } 1.385 - // rgn must have no phis 1.386 - assert(!rgn->as_Region()->has_phi(), "region must have no phis"); 1.387 - 1.388 - if (new_entry == NULL) { 1.389 - // Attach if_cont to iff 1.390 - _igvn.hash_delete(iff); 1.391 - iff->set_req(0, if_cont); 1.392 - if (_idom != NULL) { 1.393 - set_idom(iff, if_cont, dom_depth(iff)); 1.394 - } 1.395 - } 1.396 - return if_cont->as_Proj(); 1.397 -} 1.398 - 1.399 -//--------------------------find_predicate_insertion_point------------------- 1.400 -// Find a good location to insert a predicate 1.401 -ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason) { 1.402 - if (start_c == NULL || !start_c->is_Proj()) 1.403 - return NULL; 1.404 - if (is_uncommon_trap_if_pattern(start_c->as_Proj(), reason)) { 1.405 - return start_c->as_Proj(); 1.406 - } 1.407 - return NULL; 1.408 -} 1.409 - 1.410 -//--------------------------find_predicate------------------------------------ 1.411 -// Find a predicate 1.412 -Node* PhaseIdealLoop::find_predicate(Node* entry) { 1.413 - Node* predicate = NULL; 1.414 - if (UseLoopPredicate) { 1.415 - predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.416 - if (predicate != NULL) { // right pattern that can be used by loop predication 1.417 - assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.418 - return entry; 1.419 - } 1.420 - } 1.421 - return NULL; 1.422 -} 1.423 - 1.424 -//------------------------------Invariance----------------------------------- 1.425 -// Helper class for loop_predication_impl to compute invariance on the fly and 1.426 -// clone invariants. 1.427 -class Invariance : public StackObj { 1.428 - VectorSet _visited, _invariant; 1.429 - Node_Stack _stack; 1.430 - VectorSet _clone_visited; 1.431 - Node_List _old_new; // map of old to new (clone) 1.432 - IdealLoopTree* _lpt; 1.433 - PhaseIdealLoop* _phase; 1.434 - 1.435 - // Helper function to set up the invariance for invariance computation 1.436 - // If n is a known invariant, set up directly. Otherwise, look up the 1.437 - // the possibility to push n onto the stack for further processing. 1.438 - void visit(Node* use, Node* n) { 1.439 - if (_lpt->is_invariant(n)) { // known invariant 1.440 - _invariant.set(n->_idx); 1.441 - } else if (!n->is_CFG()) { 1.442 - Node *n_ctrl = _phase->ctrl_or_self(n); 1.443 - Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG 1.444 - if (_phase->is_dominator(n_ctrl, u_ctrl)) { 1.445 - _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.446 - } 1.447 - } 1.448 - } 1.449 - 1.450 - // Compute invariance for "the_node" and (possibly) all its inputs recursively 1.451 - // on the fly 1.452 - void compute_invariance(Node* n) { 1.453 - assert(_visited.test(n->_idx), "must be"); 1.454 - visit(n, n); 1.455 - while (_stack.is_nonempty()) { 1.456 - Node* n = _stack.node(); 1.457 - uint idx = _stack.index(); 1.458 - if (idx == n->req()) { // all inputs are processed 1.459 - _stack.pop(); 1.460 - // n is invariant if it's inputs are all invariant 1.461 - bool all_inputs_invariant = true; 1.462 - for (uint i = 0; i < n->req(); i++) { 1.463 - Node* in = n->in(i); 1.464 - if (in == NULL) continue; 1.465 - assert(_visited.test(in->_idx), "must have visited input"); 1.466 - if (!_invariant.test(in->_idx)) { // bad guy 1.467 - all_inputs_invariant = false; 1.468 - break; 1.469 - } 1.470 - } 1.471 - if (all_inputs_invariant) { 1.472 - _invariant.set(n->_idx); // I am a invariant too 1.473 - } 1.474 - } else { // process next input 1.475 - _stack.set_index(idx + 1); 1.476 - Node* m = n->in(idx); 1.477 - if (m != NULL && !_visited.test_set(m->_idx)) { 1.478 - visit(n, m); 1.479 - } 1.480 - } 1.481 - } 1.482 - } 1.483 - 1.484 - // Helper function to set up _old_new map for clone_nodes. 1.485 - // If n is a known invariant, set up directly ("clone" of n == n). 1.486 - // Otherwise, push n onto the stack for real cloning. 1.487 - void clone_visit(Node* n) { 1.488 - assert(_invariant.test(n->_idx), "must be invariant"); 1.489 - if (_lpt->is_invariant(n)) { // known invariant 1.490 - _old_new.map(n->_idx, n); 1.491 - } else{ // to be cloned 1.492 - assert (!n->is_CFG(), "should not see CFG here"); 1.493 - _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.494 - } 1.495 - } 1.496 - 1.497 - // Clone "n" and (possibly) all its inputs recursively 1.498 - void clone_nodes(Node* n, Node* ctrl) { 1.499 - clone_visit(n); 1.500 - while (_stack.is_nonempty()) { 1.501 - Node* n = _stack.node(); 1.502 - uint idx = _stack.index(); 1.503 - if (idx == n->req()) { // all inputs processed, clone n! 1.504 - _stack.pop(); 1.505 - // clone invariant node 1.506 - Node* n_cl = n->clone(); 1.507 - _old_new.map(n->_idx, n_cl); 1.508 - _phase->register_new_node(n_cl, ctrl); 1.509 - for (uint i = 0; i < n->req(); i++) { 1.510 - Node* in = n_cl->in(i); 1.511 - if (in == NULL) continue; 1.512 - n_cl->set_req(i, _old_new[in->_idx]); 1.513 - } 1.514 - } else { // process next input 1.515 - _stack.set_index(idx + 1); 1.516 - Node* m = n->in(idx); 1.517 - if (m != NULL && !_clone_visited.test_set(m->_idx)) { 1.518 - clone_visit(m); // visit the input 1.519 - } 1.520 - } 1.521 - } 1.522 - } 1.523 - 1.524 - public: 1.525 - Invariance(Arena* area, IdealLoopTree* lpt) : 1.526 - _lpt(lpt), _phase(lpt->_phase), 1.527 - _visited(area), _invariant(area), _stack(area, 10 /* guess */), 1.528 - _clone_visited(area), _old_new(area) 1.529 - {} 1.530 - 1.531 - // Map old to n for invariance computation and clone 1.532 - void map_ctrl(Node* old, Node* n) { 1.533 - assert(old->is_CFG() && n->is_CFG(), "must be"); 1.534 - _old_new.map(old->_idx, n); // "clone" of old is n 1.535 - _invariant.set(old->_idx); // old is invariant 1.536 - _clone_visited.set(old->_idx); 1.537 - } 1.538 - 1.539 - // Driver function to compute invariance 1.540 - bool is_invariant(Node* n) { 1.541 - if (!_visited.test_set(n->_idx)) 1.542 - compute_invariance(n); 1.543 - return (_invariant.test(n->_idx) != 0); 1.544 - } 1.545 - 1.546 - // Driver function to clone invariant 1.547 - Node* clone(Node* n, Node* ctrl) { 1.548 - assert(ctrl->is_CFG(), "must be"); 1.549 - assert(_invariant.test(n->_idx), "must be an invariant"); 1.550 - if (!_clone_visited.test(n->_idx)) 1.551 - clone_nodes(n, ctrl); 1.552 - return _old_new[n->_idx]; 1.553 - } 1.554 -}; 1.555 - 1.556 -//------------------------------is_range_check_if ----------------------------------- 1.557 -// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format 1.558 -// Note: this function is particularly designed for loop predication. We require load_range 1.559 -// and offset to be loop invariant computed on the fly by "invar" 1.560 -bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { 1.561 - if (!is_loop_exit(iff)) { 1.562 - return false; 1.563 - } 1.564 - if (!iff->in(1)->is_Bool()) { 1.565 - return false; 1.566 - } 1.567 - const BoolNode *bol = iff->in(1)->as_Bool(); 1.568 - if (bol->_test._test != BoolTest::lt) { 1.569 - return false; 1.570 - } 1.571 - if (!bol->in(1)->is_Cmp()) { 1.572 - return false; 1.573 - } 1.574 - const CmpNode *cmp = bol->in(1)->as_Cmp(); 1.575 - if (cmp->Opcode() != Op_CmpU ) { 1.576 - return false; 1.577 - } 1.578 - Node* range = cmp->in(2); 1.579 - if (range->Opcode() != Op_LoadRange) { 1.580 - const TypeInt* tint = phase->_igvn.type(range)->isa_int(); 1.581 - if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) { 1.582 - // Allow predication on positive values that aren't LoadRanges. 1.583 - // This allows optimization of loops where the length of the 1.584 - // array is a known value and doesn't need to be loaded back 1.585 - // from the array. 1.586 - return false; 1.587 - } 1.588 - } 1.589 - if (!invar.is_invariant(range)) { 1.590 - return false; 1.591 - } 1.592 - Node *iv = _head->as_CountedLoop()->phi(); 1.593 - int scale = 0; 1.594 - Node *offset = NULL; 1.595 - if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { 1.596 - return false; 1.597 - } 1.598 - if(offset && !invar.is_invariant(offset)) { // offset must be invariant 1.599 - return false; 1.600 - } 1.601 - return true; 1.602 -} 1.603 - 1.604 -//------------------------------rc_predicate----------------------------------- 1.605 -// Create a range check predicate 1.606 -// 1.607 -// for (i = init; i < limit; i += stride) { 1.608 -// a[scale*i+offset] 1.609 -// } 1.610 -// 1.611 -// Compute max(scale*i + offset) for init <= i < limit and build the predicate 1.612 -// as "max(scale*i + offset) u< a.length". 1.613 -// 1.614 -// There are two cases for max(scale*i + offset): 1.615 -// (1) stride*scale > 0 1.616 -// max(scale*i + offset) = scale*(limit-stride) + offset 1.617 -// (2) stride*scale < 0 1.618 -// max(scale*i + offset) = scale*init + offset 1.619 -BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, 1.620 - int scale, Node* offset, 1.621 - Node* init, Node* limit, Node* stride, 1.622 - Node* range, bool upper) { 1.623 - DEBUG_ONLY(ttyLocker ttyl); 1.624 - if (TraceLoopPredicate) tty->print("rc_predicate "); 1.625 - 1.626 - Node* max_idx_expr = init; 1.627 - int stride_con = stride->get_int(); 1.628 - if ((stride_con > 0) == (scale > 0) == upper) { 1.629 - max_idx_expr = new (C, 3) SubINode(limit, stride); 1.630 - register_new_node(max_idx_expr, ctrl); 1.631 - if (TraceLoopPredicate) tty->print("(limit - stride) "); 1.632 - } else { 1.633 - if (TraceLoopPredicate) tty->print("init "); 1.634 - } 1.635 - 1.636 - if (scale != 1) { 1.637 - ConNode* con_scale = _igvn.intcon(scale); 1.638 - max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); 1.639 - register_new_node(max_idx_expr, ctrl); 1.640 - if (TraceLoopPredicate) tty->print("* %d ", scale); 1.641 - } 1.642 - 1.643 - if (offset && (!offset->is_Con() || offset->get_int() != 0)){ 1.644 - max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); 1.645 - register_new_node(max_idx_expr, ctrl); 1.646 - if (TraceLoopPredicate) 1.647 - if (offset->is_Con()) tty->print("+ %d ", offset->get_int()); 1.648 - else tty->print("+ offset "); 1.649 - } 1.650 - 1.651 - CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); 1.652 - register_new_node(cmp, ctrl); 1.653 - BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); 1.654 - register_new_node(bol, ctrl); 1.655 - 1.656 - if (TraceLoopPredicate) tty->print_cr("<u range"); 1.657 - return bol; 1.658 -} 1.659 - 1.660 -//------------------------------ loop_predication_impl-------------------------- 1.661 -// Insert loop predicates for null checks and range checks 1.662 -bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { 1.663 - if (!UseLoopPredicate) return false; 1.664 - 1.665 - if (!loop->_head->is_Loop()) { 1.666 - // Could be a simple region when irreducible loops are present. 1.667 - return false; 1.668 - } 1.669 - 1.670 - if (loop->_head->unique_ctrl_out()->Opcode() == Op_NeverBranch) { 1.671 - // do nothing for infinite loops 1.672 - return false; 1.673 - } 1.674 - 1.675 - CountedLoopNode *cl = NULL; 1.676 - if (loop->_head->is_CountedLoop()) { 1.677 - cl = loop->_head->as_CountedLoop(); 1.678 - // do nothing for iteration-splitted loops 1.679 - if (!cl->is_normal_loop()) return false; 1.680 - } 1.681 - 1.682 - LoopNode *lpn = loop->_head->as_Loop(); 1.683 - Node* entry = lpn->in(LoopNode::EntryControl); 1.684 - 1.685 - ProjNode *predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.686 - if (!predicate_proj) { 1.687 -#ifndef PRODUCT 1.688 - if (TraceLoopPredicate) { 1.689 - tty->print("missing predicate:"); 1.690 - loop->dump_head(); 1.691 - lpn->dump(1); 1.692 - } 1.693 -#endif 1.694 - return false; 1.695 - } 1.696 - ConNode* zero = _igvn.intcon(0); 1.697 - set_ctrl(zero, C->root()); 1.698 - 1.699 - ResourceArea *area = Thread::current()->resource_area(); 1.700 - Invariance invar(area, loop); 1.701 - 1.702 - // Create list of if-projs such that a newer proj dominates all older 1.703 - // projs in the list, and they all dominate loop->tail() 1.704 - Node_List if_proj_list(area); 1.705 - LoopNode *head = loop->_head->as_Loop(); 1.706 - Node *current_proj = loop->tail(); //start from tail 1.707 - while ( current_proj != head ) { 1.708 - if (loop == get_loop(current_proj) && // still in the loop ? 1.709 - current_proj->is_Proj() && // is a projection ? 1.710 - current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? 1.711 - if_proj_list.push(current_proj); 1.712 - } 1.713 - current_proj = idom(current_proj); 1.714 - } 1.715 - 1.716 - bool hoisted = false; // true if at least one proj is promoted 1.717 - while (if_proj_list.size() > 0) { 1.718 - // Following are changed to nonnull when a predicate can be hoisted 1.719 - ProjNode* new_predicate_proj = NULL; 1.720 - 1.721 - ProjNode* proj = if_proj_list.pop()->as_Proj(); 1.722 - IfNode* iff = proj->in(0)->as_If(); 1.723 - 1.724 - if (!is_uncommon_trap_if_pattern(proj, Deoptimization::Reason_none)) { 1.725 - if (loop->is_loop_exit(iff)) { 1.726 - // stop processing the remaining projs in the list because the execution of them 1.727 - // depends on the condition of "iff" (iff->in(1)). 1.728 - break; 1.729 - } else { 1.730 - // Both arms are inside the loop. There are two cases: 1.731 - // (1) there is one backward branch. In this case, any remaining proj 1.732 - // in the if_proj list post-dominates "iff". So, the condition of "iff" 1.733 - // does not determine the execution the remining projs directly, and we 1.734 - // can safely continue. 1.735 - // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" 1.736 - // does not dominate loop->tail(), so it can not be in the if_proj list. 1.737 - continue; 1.738 - } 1.739 - } 1.740 - 1.741 - Node* test = iff->in(1); 1.742 - if (!test->is_Bool()){ //Conv2B, ... 1.743 - continue; 1.744 - } 1.745 - BoolNode* bol = test->as_Bool(); 1.746 - if (invar.is_invariant(bol)) { 1.747 - // Invariant test 1.748 - new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL, 1.749 - Deoptimization::Reason_predicate); 1.750 - Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); 1.751 - BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); 1.752 - 1.753 - // Negate test if necessary 1.754 - bool negated = false; 1.755 - if (proj->_con != predicate_proj->_con) { 1.756 - new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); 1.757 - register_new_node(new_predicate_bol, ctrl); 1.758 - negated = true; 1.759 - } 1.760 - IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If(); 1.761 - _igvn.hash_delete(new_predicate_iff); 1.762 - new_predicate_iff->set_req(1, new_predicate_bol); 1.763 -#ifndef PRODUCT 1.764 - if (TraceLoopPredicate) { 1.765 - tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx); 1.766 - loop->dump_head(); 1.767 - } else if (TraceLoopOpts) { 1.768 - tty->print("Predicate IC "); 1.769 - loop->dump_head(); 1.770 - } 1.771 -#endif 1.772 - } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { 1.773 - assert(proj->_con == predicate_proj->_con, "must match"); 1.774 - 1.775 - // Range check for counted loops 1.776 - const Node* cmp = bol->in(1)->as_Cmp(); 1.777 - Node* idx = cmp->in(1); 1.778 - assert(!invar.is_invariant(idx), "index is variant"); 1.779 - assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be"); 1.780 - Node* rng = cmp->in(2); 1.781 - assert(invar.is_invariant(rng), "range must be invariant"); 1.782 - int scale = 1; 1.783 - Node* offset = zero; 1.784 - bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); 1.785 - assert(ok, "must be index expression"); 1.786 - 1.787 - Node* init = cl->init_trip(); 1.788 - Node* limit = cl->limit(); 1.789 - Node* stride = cl->stride(); 1.790 - 1.791 - // Build if's for the upper and lower bound tests. The 1.792 - // lower_bound test will dominate the upper bound test and all 1.793 - // cloned or created nodes will use the lower bound test as 1.794 - // their declared control. 1.795 - ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.796 - ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.797 - assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); 1.798 - Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); 1.799 - 1.800 - // Perform cloning to keep Invariance state correct since the 1.801 - // late schedule will place invariant things in the loop. 1.802 - rng = invar.clone(rng, ctrl); 1.803 - if (offset && offset != zero) { 1.804 - assert(invar.is_invariant(offset), "offset must be loop invariant"); 1.805 - offset = invar.clone(offset, ctrl); 1.806 - } 1.807 - 1.808 - // Test the lower bound 1.809 - Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); 1.810 - IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); 1.811 - _igvn.hash_delete(lower_bound_iff); 1.812 - lower_bound_iff->set_req(1, lower_bound_bol); 1.813 - if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); 1.814 - 1.815 - // Test the upper bound 1.816 - Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); 1.817 - IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); 1.818 - _igvn.hash_delete(upper_bound_iff); 1.819 - upper_bound_iff->set_req(1, upper_bound_bol); 1.820 - if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); 1.821 - 1.822 - // Fall through into rest of the clean up code which will move 1.823 - // any dependent nodes onto the upper bound test. 1.824 - new_predicate_proj = upper_bound_proj; 1.825 - 1.826 -#ifndef PRODUCT 1.827 - if (TraceLoopOpts && !TraceLoopPredicate) { 1.828 - tty->print("Predicate RC "); 1.829 - loop->dump_head(); 1.830 - } 1.831 -#endif 1.832 - } else { 1.833 - // Loop variant check (for example, range check in non-counted loop) 1.834 - // with uncommon trap. 1.835 - continue; 1.836 - } 1.837 - assert(new_predicate_proj != NULL, "sanity"); 1.838 - // Success - attach condition (new_predicate_bol) to predicate if 1.839 - invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate 1.840 - 1.841 - // Eliminate the old If in the loop body 1.842 - dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con ); 1.843 - 1.844 - hoisted = true; 1.845 - C->set_major_progress(); 1.846 - } // end while 1.847 - 1.848 -#ifndef PRODUCT 1.849 - // report that the loop predication has been actually performed 1.850 - // for this loop 1.851 - if (TraceLoopPredicate && hoisted) { 1.852 - tty->print("Loop Predication Performed:"); 1.853 - loop->dump_head(); 1.854 - } 1.855 -#endif 1.856 - 1.857 - return hoisted; 1.858 -} 1.859 - 1.860 -//------------------------------loop_predication-------------------------------- 1.861 -// driver routine for loop predication optimization 1.862 -bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { 1.863 - bool hoisted = false; 1.864 - // Recursively promote predicates 1.865 - if ( _child ) { 1.866 - hoisted = _child->loop_predication( phase); 1.867 - } 1.868 - 1.869 - // self 1.870 - if (!_irreducible && !tail()->is_top()) { 1.871 - hoisted |= phase->loop_predication_impl(this); 1.872 - } 1.873 - 1.874 - if ( _next ) { //sibling 1.875 - hoisted |= _next->loop_predication( phase); 1.876 - } 1.877 - 1.878 - return hoisted; 1.879 -} 1.880 - 1.881 - 1.882 +//============================================================================= 1.883 // Process all the loops in the loop tree and replace any fill 1.884 // patterns with an intrisc version. 1.885 bool PhaseIdealLoop::do_intrinsify_fill() { 1.886 @@ -2762,6 +2249,13 @@ 1.887 return false; 1.888 } 1.889 1.890 +#ifndef PRODUCT 1.891 + if (TraceLoopOpts) { 1.892 + tty->print("ArrayFill "); 1.893 + lpt->dump_head(); 1.894 + } 1.895 +#endif 1.896 + 1.897 // Now replace the whole loop body by a call to a fill routine that 1.898 // covers the same region as the loop. 1.899 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);