1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/loopPredicate.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,874 @@ 1.4 +/* 1.5 + * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "opto/loopnode.hpp" 1.30 +#include "opto/addnode.hpp" 1.31 +#include "opto/callnode.hpp" 1.32 +#include "opto/connode.hpp" 1.33 +#include "opto/loopnode.hpp" 1.34 +#include "opto/mulnode.hpp" 1.35 +#include "opto/rootnode.hpp" 1.36 +#include "opto/subnode.hpp" 1.37 + 1.38 +/* 1.39 + * The general idea of Loop Predication is to insert a predicate on the entry 1.40 + * path to a loop, and raise a uncommon trap if the check of the condition fails. 1.41 + * The condition checks are promoted from inside the loop body, and thus 1.42 + * the checks inside the loop could be eliminated. Currently, loop predication 1.43 + * optimization has been applied to remove array range check and loop invariant 1.44 + * checks (such as null checks). 1.45 +*/ 1.46 + 1.47 +//-------------------------------register_control------------------------- 1.48 +void PhaseIdealLoop::register_control(Node* n, IdealLoopTree *loop, Node* pred) { 1.49 + assert(n->is_CFG(), "must be control node"); 1.50 + _igvn.register_new_node_with_optimizer(n); 1.51 + loop->_body.push(n); 1.52 + set_loop(n, loop); 1.53 + // When called from beautify_loops() idom is not constructed yet. 1.54 + if (_idom != NULL) { 1.55 + set_idom(n, pred, dom_depth(pred)); 1.56 + } 1.57 +} 1.58 + 1.59 +//------------------------------create_new_if_for_predicate------------------------ 1.60 +// create a new if above the uct_if_pattern for the predicate to be promoted. 1.61 +// 1.62 +// before after 1.63 +// ---------- ---------- 1.64 +// ctrl ctrl 1.65 +// | | 1.66 +// | | 1.67 +// v v 1.68 +// iff new_iff 1.69 +// / \ / \ 1.70 +// / \ / \ 1.71 +// v v v v 1.72 +// uncommon_proj cont_proj if_uct if_cont 1.73 +// \ | | | | 1.74 +// \ | | | | 1.75 +// v v v | v 1.76 +// rgn loop | iff 1.77 +// | | / \ 1.78 +// | | / \ 1.79 +// v | v v 1.80 +// uncommon_trap | uncommon_proj cont_proj 1.81 +// \ \ | | 1.82 +// \ \ | | 1.83 +// v v v v 1.84 +// rgn loop 1.85 +// | 1.86 +// | 1.87 +// v 1.88 +// uncommon_trap 1.89 +// 1.90 +// 1.91 +// We will create a region to guard the uct call if there is no one there. 1.92 +// The true projecttion (if_cont) of the new_iff is returned. 1.93 +// This code is also used to clone predicates to clonned loops. 1.94 +ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.95 + Deoptimization::DeoptReason reason) { 1.96 + assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!"); 1.97 + IfNode* iff = cont_proj->in(0)->as_If(); 1.98 + 1.99 + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 1.100 + Node *rgn = uncommon_proj->unique_ctrl_out(); 1.101 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.102 + 1.103 + uint proj_index = 1; // region's edge corresponding to uncommon_proj 1.104 + if (!rgn->is_Region()) { // create a region to guard the call 1.105 + assert(rgn->is_Call(), "must be call uct"); 1.106 + CallNode* call = rgn->as_Call(); 1.107 + IdealLoopTree* loop = get_loop(call); 1.108 + rgn = new (C) RegionNode(1); 1.109 + rgn->add_req(uncommon_proj); 1.110 + register_control(rgn, loop, uncommon_proj); 1.111 + _igvn.hash_delete(call); 1.112 + call->set_req(0, rgn); 1.113 + // When called from beautify_loops() idom is not constructed yet. 1.114 + if (_idom != NULL) { 1.115 + set_idom(call, rgn, dom_depth(rgn)); 1.116 + } 1.117 + } else { 1.118 + // Find region's edge corresponding to uncommon_proj 1.119 + for (; proj_index < rgn->req(); proj_index++) 1.120 + if (rgn->in(proj_index) == uncommon_proj) break; 1.121 + assert(proj_index < rgn->req(), "sanity"); 1.122 + } 1.123 + 1.124 + Node* entry = iff->in(0); 1.125 + if (new_entry != NULL) { 1.126 + // Clonning the predicate to new location. 1.127 + entry = new_entry; 1.128 + } 1.129 + // Create new_iff 1.130 + IdealLoopTree* lp = get_loop(entry); 1.131 + IfNode *new_iff = iff->clone()->as_If(); 1.132 + new_iff->set_req(0, entry); 1.133 + register_control(new_iff, lp, entry); 1.134 + Node *if_cont = new (C) IfTrueNode(new_iff); 1.135 + Node *if_uct = new (C) IfFalseNode(new_iff); 1.136 + if (cont_proj->is_IfFalse()) { 1.137 + // Swap 1.138 + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 1.139 + } 1.140 + register_control(if_cont, lp, new_iff); 1.141 + register_control(if_uct, get_loop(rgn), new_iff); 1.142 + 1.143 + // if_uct to rgn 1.144 + _igvn.hash_delete(rgn); 1.145 + rgn->add_req(if_uct); 1.146 + // When called from beautify_loops() idom is not constructed yet. 1.147 + if (_idom != NULL) { 1.148 + Node* ridom = idom(rgn); 1.149 + Node* nrdom = dom_lca(ridom, new_iff); 1.150 + set_idom(rgn, nrdom, dom_depth(rgn)); 1.151 + } 1.152 + 1.153 + // If rgn has phis add new edges which has the same 1.154 + // value as on original uncommon_proj pass. 1.155 + assert(rgn->in(rgn->req() -1) == if_uct, "new edge should be last"); 1.156 + bool has_phi = false; 1.157 + for (DUIterator_Fast imax, i = rgn->fast_outs(imax); i < imax; i++) { 1.158 + Node* use = rgn->fast_out(i); 1.159 + if (use->is_Phi() && use->outcnt() > 0) { 1.160 + assert(use->in(0) == rgn, ""); 1.161 + _igvn.rehash_node_delayed(use); 1.162 + use->add_req(use->in(proj_index)); 1.163 + has_phi = true; 1.164 + } 1.165 + } 1.166 + assert(!has_phi || rgn->req() > 3, "no phis when region is created"); 1.167 + 1.168 + if (new_entry == NULL) { 1.169 + // Attach if_cont to iff 1.170 + _igvn.hash_delete(iff); 1.171 + iff->set_req(0, if_cont); 1.172 + if (_idom != NULL) { 1.173 + set_idom(iff, if_cont, dom_depth(iff)); 1.174 + } 1.175 + } 1.176 + return if_cont->as_Proj(); 1.177 +} 1.178 + 1.179 +//------------------------------create_new_if_for_predicate------------------------ 1.180 +// Create a new if below new_entry for the predicate to be cloned (IGVN optimization) 1.181 +ProjNode* PhaseIterGVN::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.182 + Deoptimization::DeoptReason reason) { 1.183 + assert(new_entry != 0, "only used for clone predicate"); 1.184 + assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!"); 1.185 + IfNode* iff = cont_proj->in(0)->as_If(); 1.186 + 1.187 + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 1.188 + Node *rgn = uncommon_proj->unique_ctrl_out(); 1.189 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.190 + 1.191 + uint proj_index = 1; // region's edge corresponding to uncommon_proj 1.192 + if (!rgn->is_Region()) { // create a region to guard the call 1.193 + assert(rgn->is_Call(), "must be call uct"); 1.194 + CallNode* call = rgn->as_Call(); 1.195 + rgn = new (C) RegionNode(1); 1.196 + register_new_node_with_optimizer(rgn); 1.197 + rgn->add_req(uncommon_proj); 1.198 + hash_delete(call); 1.199 + call->set_req(0, rgn); 1.200 + } else { 1.201 + // Find region's edge corresponding to uncommon_proj 1.202 + for (; proj_index < rgn->req(); proj_index++) 1.203 + if (rgn->in(proj_index) == uncommon_proj) break; 1.204 + assert(proj_index < rgn->req(), "sanity"); 1.205 + } 1.206 + 1.207 + // Create new_iff in new location. 1.208 + IfNode *new_iff = iff->clone()->as_If(); 1.209 + new_iff->set_req(0, new_entry); 1.210 + 1.211 + register_new_node_with_optimizer(new_iff); 1.212 + Node *if_cont = new (C) IfTrueNode(new_iff); 1.213 + Node *if_uct = new (C) IfFalseNode(new_iff); 1.214 + if (cont_proj->is_IfFalse()) { 1.215 + // Swap 1.216 + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 1.217 + } 1.218 + register_new_node_with_optimizer(if_cont); 1.219 + register_new_node_with_optimizer(if_uct); 1.220 + 1.221 + // if_uct to rgn 1.222 + hash_delete(rgn); 1.223 + rgn->add_req(if_uct); 1.224 + 1.225 + // If rgn has phis add corresponding new edges which has the same 1.226 + // value as on original uncommon_proj pass. 1.227 + assert(rgn->in(rgn->req() -1) == if_uct, "new edge should be last"); 1.228 + bool has_phi = false; 1.229 + for (DUIterator_Fast imax, i = rgn->fast_outs(imax); i < imax; i++) { 1.230 + Node* use = rgn->fast_out(i); 1.231 + if (use->is_Phi() && use->outcnt() > 0) { 1.232 + rehash_node_delayed(use); 1.233 + use->add_req(use->in(proj_index)); 1.234 + has_phi = true; 1.235 + } 1.236 + } 1.237 + assert(!has_phi || rgn->req() > 3, "no phis when region is created"); 1.238 + 1.239 + return if_cont->as_Proj(); 1.240 +} 1.241 + 1.242 +//--------------------------clone_predicate----------------------- 1.243 +ProjNode* PhaseIdealLoop::clone_predicate(ProjNode* predicate_proj, Node* new_entry, 1.244 + Deoptimization::DeoptReason reason, 1.245 + PhaseIdealLoop* loop_phase, 1.246 + PhaseIterGVN* igvn) { 1.247 + ProjNode* new_predicate_proj; 1.248 + if (loop_phase != NULL) { 1.249 + new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason); 1.250 + } else { 1.251 + new_predicate_proj = igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason); 1.252 + } 1.253 + IfNode* iff = new_predicate_proj->in(0)->as_If(); 1.254 + Node* ctrl = iff->in(0); 1.255 + 1.256 + // Match original condition since predicate's projections could be swapped. 1.257 + assert(predicate_proj->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.258 + Node* opq = new (igvn->C) Opaque1Node(igvn->C, predicate_proj->in(0)->in(1)->in(1)->in(1)); 1.259 + igvn->C->add_predicate_opaq(opq); 1.260 + 1.261 + Node* bol = new (igvn->C) Conv2BNode(opq); 1.262 + if (loop_phase != NULL) { 1.263 + loop_phase->register_new_node(opq, ctrl); 1.264 + loop_phase->register_new_node(bol, ctrl); 1.265 + } else { 1.266 + igvn->register_new_node_with_optimizer(opq); 1.267 + igvn->register_new_node_with_optimizer(bol); 1.268 + } 1.269 + igvn->hash_delete(iff); 1.270 + iff->set_req(1, bol); 1.271 + return new_predicate_proj; 1.272 +} 1.273 + 1.274 + 1.275 +//--------------------------clone_loop_predicates----------------------- 1.276 +// Interface from IGVN 1.277 +Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) { 1.278 + return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, clone_limit_check, NULL, this); 1.279 +} 1.280 + 1.281 +// Interface from PhaseIdealLoop 1.282 +Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) { 1.283 + return clone_loop_predicates(old_entry, new_entry, clone_limit_check, this, &this->_igvn); 1.284 +} 1.285 + 1.286 +// Clone loop predicates to cloned loops (peeled, unswitched, split_if). 1.287 +Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, 1.288 + bool clone_limit_check, 1.289 + PhaseIdealLoop* loop_phase, 1.290 + PhaseIterGVN* igvn) { 1.291 +#ifdef ASSERT 1.292 + if (new_entry == NULL || !(new_entry->is_Proj() || new_entry->is_Region() || new_entry->is_SafePoint())) { 1.293 + if (new_entry != NULL) 1.294 + new_entry->dump(); 1.295 + assert(false, "not IfTrue, IfFalse, Region or SafePoint"); 1.296 + } 1.297 +#endif 1.298 + // Search original predicates 1.299 + Node* entry = old_entry; 1.300 + ProjNode* limit_check_proj = NULL; 1.301 + if (LoopLimitCheck) { 1.302 + limit_check_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); 1.303 + if (limit_check_proj != NULL) { 1.304 + entry = entry->in(0)->in(0); 1.305 + } 1.306 + } 1.307 + if (UseLoopPredicate) { 1.308 + ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.309 + if (predicate_proj != NULL) { // right pattern that can be used by loop predication 1.310 + // clone predicate 1.311 + new_entry = clone_predicate(predicate_proj, new_entry, 1.312 + Deoptimization::Reason_predicate, 1.313 + loop_phase, igvn); 1.314 + assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate"); 1.315 + if (TraceLoopPredicate) { 1.316 + tty->print("Loop Predicate cloned: "); 1.317 + debug_only( new_entry->in(0)->dump(); ) 1.318 + } 1.319 + } 1.320 + } 1.321 + if (limit_check_proj != NULL && clone_limit_check) { 1.322 + // Clone loop limit check last to insert it before loop. 1.323 + // Don't clone a limit check which was already finalized 1.324 + // for this counted loop (only one limit check is needed). 1.325 + new_entry = clone_predicate(limit_check_proj, new_entry, 1.326 + Deoptimization::Reason_loop_limit_check, 1.327 + loop_phase, igvn); 1.328 + assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone limit check"); 1.329 + if (TraceLoopLimitCheck) { 1.330 + tty->print("Loop Limit Check cloned: "); 1.331 + debug_only( new_entry->in(0)->dump(); ) 1.332 + } 1.333 + } 1.334 + return new_entry; 1.335 +} 1.336 + 1.337 +//--------------------------skip_loop_predicates------------------------------ 1.338 +// Skip related predicates. 1.339 +Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) { 1.340 + Node* predicate = NULL; 1.341 + if (LoopLimitCheck) { 1.342 + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); 1.343 + if (predicate != NULL) { 1.344 + entry = entry->in(0)->in(0); 1.345 + } 1.346 + } 1.347 + if (UseLoopPredicate) { 1.348 + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.349 + if (predicate != NULL) { // right pattern that can be used by loop predication 1.350 + IfNode* iff = entry->in(0)->as_If(); 1.351 + ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con); 1.352 + Node* rgn = uncommon_proj->unique_ctrl_out(); 1.353 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.354 + entry = entry->in(0)->in(0); 1.355 + while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) { 1.356 + uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con); 1.357 + if (uncommon_proj->unique_ctrl_out() != rgn) 1.358 + break; 1.359 + entry = entry->in(0)->in(0); 1.360 + } 1.361 + } 1.362 + } 1.363 + return entry; 1.364 +} 1.365 + 1.366 +//--------------------------find_predicate_insertion_point------------------- 1.367 +// Find a good location to insert a predicate 1.368 +ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason) { 1.369 + if (start_c == NULL || !start_c->is_Proj()) 1.370 + return NULL; 1.371 + if (start_c->as_Proj()->is_uncommon_trap_if_pattern(reason)) { 1.372 + return start_c->as_Proj(); 1.373 + } 1.374 + return NULL; 1.375 +} 1.376 + 1.377 +//--------------------------find_predicate------------------------------------ 1.378 +// Find a predicate 1.379 +Node* PhaseIdealLoop::find_predicate(Node* entry) { 1.380 + Node* predicate = NULL; 1.381 + if (LoopLimitCheck) { 1.382 + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); 1.383 + if (predicate != NULL) { // right pattern that can be used by loop predication 1.384 + return entry; 1.385 + } 1.386 + } 1.387 + if (UseLoopPredicate) { 1.388 + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.389 + if (predicate != NULL) { // right pattern that can be used by loop predication 1.390 + return entry; 1.391 + } 1.392 + } 1.393 + return NULL; 1.394 +} 1.395 + 1.396 +//------------------------------Invariance----------------------------------- 1.397 +// Helper class for loop_predication_impl to compute invariance on the fly and 1.398 +// clone invariants. 1.399 +class Invariance : public StackObj { 1.400 + VectorSet _visited, _invariant; 1.401 + Node_Stack _stack; 1.402 + VectorSet _clone_visited; 1.403 + Node_List _old_new; // map of old to new (clone) 1.404 + IdealLoopTree* _lpt; 1.405 + PhaseIdealLoop* _phase; 1.406 + 1.407 + // Helper function to set up the invariance for invariance computation 1.408 + // If n is a known invariant, set up directly. Otherwise, look up the 1.409 + // the possibility to push n onto the stack for further processing. 1.410 + void visit(Node* use, Node* n) { 1.411 + if (_lpt->is_invariant(n)) { // known invariant 1.412 + _invariant.set(n->_idx); 1.413 + } else if (!n->is_CFG()) { 1.414 + Node *n_ctrl = _phase->ctrl_or_self(n); 1.415 + Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG 1.416 + if (_phase->is_dominator(n_ctrl, u_ctrl)) { 1.417 + _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.418 + } 1.419 + } 1.420 + } 1.421 + 1.422 + // Compute invariance for "the_node" and (possibly) all its inputs recursively 1.423 + // on the fly 1.424 + void compute_invariance(Node* n) { 1.425 + assert(_visited.test(n->_idx), "must be"); 1.426 + visit(n, n); 1.427 + while (_stack.is_nonempty()) { 1.428 + Node* n = _stack.node(); 1.429 + uint idx = _stack.index(); 1.430 + if (idx == n->req()) { // all inputs are processed 1.431 + _stack.pop(); 1.432 + // n is invariant if it's inputs are all invariant 1.433 + bool all_inputs_invariant = true; 1.434 + for (uint i = 0; i < n->req(); i++) { 1.435 + Node* in = n->in(i); 1.436 + if (in == NULL) continue; 1.437 + assert(_visited.test(in->_idx), "must have visited input"); 1.438 + if (!_invariant.test(in->_idx)) { // bad guy 1.439 + all_inputs_invariant = false; 1.440 + break; 1.441 + } 1.442 + } 1.443 + if (all_inputs_invariant) { 1.444 + _invariant.set(n->_idx); // I am a invariant too 1.445 + } 1.446 + } else { // process next input 1.447 + _stack.set_index(idx + 1); 1.448 + Node* m = n->in(idx); 1.449 + if (m != NULL && !_visited.test_set(m->_idx)) { 1.450 + visit(n, m); 1.451 + } 1.452 + } 1.453 + } 1.454 + } 1.455 + 1.456 + // Helper function to set up _old_new map for clone_nodes. 1.457 + // If n is a known invariant, set up directly ("clone" of n == n). 1.458 + // Otherwise, push n onto the stack for real cloning. 1.459 + void clone_visit(Node* n) { 1.460 + assert(_invariant.test(n->_idx), "must be invariant"); 1.461 + if (_lpt->is_invariant(n)) { // known invariant 1.462 + _old_new.map(n->_idx, n); 1.463 + } else { // to be cloned 1.464 + assert(!n->is_CFG(), "should not see CFG here"); 1.465 + _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.466 + } 1.467 + } 1.468 + 1.469 + // Clone "n" and (possibly) all its inputs recursively 1.470 + void clone_nodes(Node* n, Node* ctrl) { 1.471 + clone_visit(n); 1.472 + while (_stack.is_nonempty()) { 1.473 + Node* n = _stack.node(); 1.474 + uint idx = _stack.index(); 1.475 + if (idx == n->req()) { // all inputs processed, clone n! 1.476 + _stack.pop(); 1.477 + // clone invariant node 1.478 + Node* n_cl = n->clone(); 1.479 + _old_new.map(n->_idx, n_cl); 1.480 + _phase->register_new_node(n_cl, ctrl); 1.481 + for (uint i = 0; i < n->req(); i++) { 1.482 + Node* in = n_cl->in(i); 1.483 + if (in == NULL) continue; 1.484 + n_cl->set_req(i, _old_new[in->_idx]); 1.485 + } 1.486 + } else { // process next input 1.487 + _stack.set_index(idx + 1); 1.488 + Node* m = n->in(idx); 1.489 + if (m != NULL && !_clone_visited.test_set(m->_idx)) { 1.490 + clone_visit(m); // visit the input 1.491 + } 1.492 + } 1.493 + } 1.494 + } 1.495 + 1.496 + public: 1.497 + Invariance(Arena* area, IdealLoopTree* lpt) : 1.498 + _lpt(lpt), _phase(lpt->_phase), 1.499 + _visited(area), _invariant(area), _stack(area, 10 /* guess */), 1.500 + _clone_visited(area), _old_new(area) 1.501 + {} 1.502 + 1.503 + // Map old to n for invariance computation and clone 1.504 + void map_ctrl(Node* old, Node* n) { 1.505 + assert(old->is_CFG() && n->is_CFG(), "must be"); 1.506 + _old_new.map(old->_idx, n); // "clone" of old is n 1.507 + _invariant.set(old->_idx); // old is invariant 1.508 + _clone_visited.set(old->_idx); 1.509 + } 1.510 + 1.511 + // Driver function to compute invariance 1.512 + bool is_invariant(Node* n) { 1.513 + if (!_visited.test_set(n->_idx)) 1.514 + compute_invariance(n); 1.515 + return (_invariant.test(n->_idx) != 0); 1.516 + } 1.517 + 1.518 + // Driver function to clone invariant 1.519 + Node* clone(Node* n, Node* ctrl) { 1.520 + assert(ctrl->is_CFG(), "must be"); 1.521 + assert(_invariant.test(n->_idx), "must be an invariant"); 1.522 + if (!_clone_visited.test(n->_idx)) 1.523 + clone_nodes(n, ctrl); 1.524 + return _old_new[n->_idx]; 1.525 + } 1.526 +}; 1.527 + 1.528 +//------------------------------is_range_check_if ----------------------------------- 1.529 +// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format 1.530 +// Note: this function is particularly designed for loop predication. We require load_range 1.531 +// and offset to be loop invariant computed on the fly by "invar" 1.532 +bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { 1.533 + if (!is_loop_exit(iff)) { 1.534 + return false; 1.535 + } 1.536 + if (!iff->in(1)->is_Bool()) { 1.537 + return false; 1.538 + } 1.539 + const BoolNode *bol = iff->in(1)->as_Bool(); 1.540 + if (bol->_test._test != BoolTest::lt) { 1.541 + return false; 1.542 + } 1.543 + if (!bol->in(1)->is_Cmp()) { 1.544 + return false; 1.545 + } 1.546 + const CmpNode *cmp = bol->in(1)->as_Cmp(); 1.547 + if (cmp->Opcode() != Op_CmpU) { 1.548 + return false; 1.549 + } 1.550 + Node* range = cmp->in(2); 1.551 + if (range->Opcode() != Op_LoadRange) { 1.552 + const TypeInt* tint = phase->_igvn.type(range)->isa_int(); 1.553 + if (tint == NULL || tint->empty() || tint->_lo < 0) { 1.554 + // Allow predication on positive values that aren't LoadRanges. 1.555 + // This allows optimization of loops where the length of the 1.556 + // array is a known value and doesn't need to be loaded back 1.557 + // from the array. 1.558 + return false; 1.559 + } 1.560 + } 1.561 + if (!invar.is_invariant(range)) { 1.562 + return false; 1.563 + } 1.564 + Node *iv = _head->as_CountedLoop()->phi(); 1.565 + int scale = 0; 1.566 + Node *offset = NULL; 1.567 + if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { 1.568 + return false; 1.569 + } 1.570 + if (offset && !invar.is_invariant(offset)) { // offset must be invariant 1.571 + return false; 1.572 + } 1.573 + return true; 1.574 +} 1.575 + 1.576 +//------------------------------rc_predicate----------------------------------- 1.577 +// Create a range check predicate 1.578 +// 1.579 +// for (i = init; i < limit; i += stride) { 1.580 +// a[scale*i+offset] 1.581 +// } 1.582 +// 1.583 +// Compute max(scale*i + offset) for init <= i < limit and build the predicate 1.584 +// as "max(scale*i + offset) u< a.length". 1.585 +// 1.586 +// There are two cases for max(scale*i + offset): 1.587 +// (1) stride*scale > 0 1.588 +// max(scale*i + offset) = scale*(limit-stride) + offset 1.589 +// (2) stride*scale < 0 1.590 +// max(scale*i + offset) = scale*init + offset 1.591 +BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree *loop, Node* ctrl, 1.592 + int scale, Node* offset, 1.593 + Node* init, Node* limit, Node* stride, 1.594 + Node* range, bool upper) { 1.595 + stringStream* predString = NULL; 1.596 + if (TraceLoopPredicate) { 1.597 + predString = new stringStream(); 1.598 + predString->print("rc_predicate "); 1.599 + } 1.600 + 1.601 + Node* max_idx_expr = init; 1.602 + int stride_con = stride->get_int(); 1.603 + if ((stride_con > 0) == (scale > 0) == upper) { 1.604 + if (LoopLimitCheck) { 1.605 + // With LoopLimitCheck limit is not exact. 1.606 + // Calculate exact limit here. 1.607 + // Note, counted loop's test is '<' or '>'. 1.608 + limit = exact_limit(loop); 1.609 + max_idx_expr = new (C) SubINode(limit, stride); 1.610 + register_new_node(max_idx_expr, ctrl); 1.611 + if (TraceLoopPredicate) predString->print("(limit - stride) "); 1.612 + } else { 1.613 + max_idx_expr = new (C) SubINode(limit, stride); 1.614 + register_new_node(max_idx_expr, ctrl); 1.615 + if (TraceLoopPredicate) predString->print("(limit - stride) "); 1.616 + } 1.617 + } else { 1.618 + if (TraceLoopPredicate) predString->print("init "); 1.619 + } 1.620 + 1.621 + if (scale != 1) { 1.622 + ConNode* con_scale = _igvn.intcon(scale); 1.623 + max_idx_expr = new (C) MulINode(max_idx_expr, con_scale); 1.624 + register_new_node(max_idx_expr, ctrl); 1.625 + if (TraceLoopPredicate) predString->print("* %d ", scale); 1.626 + } 1.627 + 1.628 + if (offset && (!offset->is_Con() || offset->get_int() != 0)){ 1.629 + max_idx_expr = new (C) AddINode(max_idx_expr, offset); 1.630 + register_new_node(max_idx_expr, ctrl); 1.631 + if (TraceLoopPredicate) 1.632 + if (offset->is_Con()) predString->print("+ %d ", offset->get_int()); 1.633 + else predString->print("+ offset "); 1.634 + } 1.635 + 1.636 + CmpUNode* cmp = new (C) CmpUNode(max_idx_expr, range); 1.637 + register_new_node(cmp, ctrl); 1.638 + BoolNode* bol = new (C) BoolNode(cmp, BoolTest::lt); 1.639 + register_new_node(bol, ctrl); 1.640 + 1.641 + if (TraceLoopPredicate) { 1.642 + predString->print_cr("<u range"); 1.643 + tty->print("%s", predString->as_string()); 1.644 + } 1.645 + return bol; 1.646 +} 1.647 + 1.648 +//------------------------------ loop_predication_impl-------------------------- 1.649 +// Insert loop predicates for null checks and range checks 1.650 +bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { 1.651 + if (!UseLoopPredicate) return false; 1.652 + 1.653 + if (!loop->_head->is_Loop()) { 1.654 + // Could be a simple region when irreducible loops are present. 1.655 + return false; 1.656 + } 1.657 + LoopNode* head = loop->_head->as_Loop(); 1.658 + 1.659 + if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) { 1.660 + // do nothing for infinite loops 1.661 + return false; 1.662 + } 1.663 + 1.664 + CountedLoopNode *cl = NULL; 1.665 + if (head->is_valid_counted_loop()) { 1.666 + cl = head->as_CountedLoop(); 1.667 + // do nothing for iteration-splitted loops 1.668 + if (!cl->is_normal_loop()) return false; 1.669 + // Avoid RCE if Counted loop's test is '!='. 1.670 + BoolTest::mask bt = cl->loopexit()->test_trip(); 1.671 + if (bt != BoolTest::lt && bt != BoolTest::gt) 1.672 + cl = NULL; 1.673 + } 1.674 + 1.675 + Node* entry = head->in(LoopNode::EntryControl); 1.676 + ProjNode *predicate_proj = NULL; 1.677 + // Loop limit check predicate should be near the loop. 1.678 + if (LoopLimitCheck) { 1.679 + predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); 1.680 + if (predicate_proj != NULL) 1.681 + entry = predicate_proj->in(0)->in(0); 1.682 + } 1.683 + 1.684 + predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.685 + if (!predicate_proj) { 1.686 +#ifndef PRODUCT 1.687 + if (TraceLoopPredicate) { 1.688 + tty->print("missing predicate:"); 1.689 + loop->dump_head(); 1.690 + head->dump(1); 1.691 + } 1.692 +#endif 1.693 + return false; 1.694 + } 1.695 + ConNode* zero = _igvn.intcon(0); 1.696 + set_ctrl(zero, C->root()); 1.697 + 1.698 + ResourceArea *area = Thread::current()->resource_area(); 1.699 + Invariance invar(area, loop); 1.700 + 1.701 + // Create list of if-projs such that a newer proj dominates all older 1.702 + // projs in the list, and they all dominate loop->tail() 1.703 + Node_List if_proj_list(area); 1.704 + Node *current_proj = loop->tail(); //start from tail 1.705 + while (current_proj != head) { 1.706 + if (loop == get_loop(current_proj) && // still in the loop ? 1.707 + current_proj->is_Proj() && // is a projection ? 1.708 + current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? 1.709 + if_proj_list.push(current_proj); 1.710 + } 1.711 + current_proj = idom(current_proj); 1.712 + } 1.713 + 1.714 + bool hoisted = false; // true if at least one proj is promoted 1.715 + while (if_proj_list.size() > 0) { 1.716 + // Following are changed to nonnull when a predicate can be hoisted 1.717 + ProjNode* new_predicate_proj = NULL; 1.718 + 1.719 + ProjNode* proj = if_proj_list.pop()->as_Proj(); 1.720 + IfNode* iff = proj->in(0)->as_If(); 1.721 + 1.722 + if (!proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none)) { 1.723 + if (loop->is_loop_exit(iff)) { 1.724 + // stop processing the remaining projs in the list because the execution of them 1.725 + // depends on the condition of "iff" (iff->in(1)). 1.726 + break; 1.727 + } else { 1.728 + // Both arms are inside the loop. There are two cases: 1.729 + // (1) there is one backward branch. In this case, any remaining proj 1.730 + // in the if_proj list post-dominates "iff". So, the condition of "iff" 1.731 + // does not determine the execution the remining projs directly, and we 1.732 + // can safely continue. 1.733 + // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" 1.734 + // does not dominate loop->tail(), so it can not be in the if_proj list. 1.735 + continue; 1.736 + } 1.737 + } 1.738 + 1.739 + Node* test = iff->in(1); 1.740 + if (!test->is_Bool()){ //Conv2B, ... 1.741 + continue; 1.742 + } 1.743 + BoolNode* bol = test->as_Bool(); 1.744 + if (invar.is_invariant(bol)) { 1.745 + // Invariant test 1.746 + new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL, 1.747 + Deoptimization::Reason_predicate); 1.748 + Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); 1.749 + BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); 1.750 + 1.751 + // Negate test if necessary 1.752 + bool negated = false; 1.753 + if (proj->_con != predicate_proj->_con) { 1.754 + new_predicate_bol = new (C) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); 1.755 + register_new_node(new_predicate_bol, ctrl); 1.756 + negated = true; 1.757 + } 1.758 + IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If(); 1.759 + _igvn.hash_delete(new_predicate_iff); 1.760 + new_predicate_iff->set_req(1, new_predicate_bol); 1.761 +#ifndef PRODUCT 1.762 + if (TraceLoopPredicate) { 1.763 + tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx); 1.764 + loop->dump_head(); 1.765 + } else if (TraceLoopOpts) { 1.766 + tty->print("Predicate IC "); 1.767 + loop->dump_head(); 1.768 + } 1.769 +#endif 1.770 + } else if ((cl != NULL) && (proj->_con == predicate_proj->_con) && 1.771 + loop->is_range_check_if(iff, this, invar)) { 1.772 + 1.773 + // Range check for counted loops 1.774 + const Node* cmp = bol->in(1)->as_Cmp(); 1.775 + Node* idx = cmp->in(1); 1.776 + assert(!invar.is_invariant(idx), "index is variant"); 1.777 + Node* rng = cmp->in(2); 1.778 + assert(rng->Opcode() == Op_LoadRange || _igvn.type(rng)->is_int() >= 0, "must be"); 1.779 + assert(invar.is_invariant(rng), "range must be invariant"); 1.780 + int scale = 1; 1.781 + Node* offset = zero; 1.782 + bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); 1.783 + assert(ok, "must be index expression"); 1.784 + 1.785 + Node* init = cl->init_trip(); 1.786 + Node* limit = cl->limit(); 1.787 + Node* stride = cl->stride(); 1.788 + 1.789 + // Build if's for the upper and lower bound tests. The 1.790 + // lower_bound test will dominate the upper bound test and all 1.791 + // cloned or created nodes will use the lower bound test as 1.792 + // their declared control. 1.793 + ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.794 + ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.795 + assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); 1.796 + Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); 1.797 + 1.798 + // Perform cloning to keep Invariance state correct since the 1.799 + // late schedule will place invariant things in the loop. 1.800 + rng = invar.clone(rng, ctrl); 1.801 + if (offset && offset != zero) { 1.802 + assert(invar.is_invariant(offset), "offset must be loop invariant"); 1.803 + offset = invar.clone(offset, ctrl); 1.804 + } 1.805 + 1.806 + // Test the lower bound 1.807 + Node* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false); 1.808 + IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); 1.809 + _igvn.hash_delete(lower_bound_iff); 1.810 + lower_bound_iff->set_req(1, lower_bound_bol); 1.811 + if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); 1.812 + 1.813 + // Test the upper bound 1.814 + Node* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true); 1.815 + IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); 1.816 + _igvn.hash_delete(upper_bound_iff); 1.817 + upper_bound_iff->set_req(1, upper_bound_bol); 1.818 + if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); 1.819 + 1.820 + // Fall through into rest of the clean up code which will move 1.821 + // any dependent nodes onto the upper bound test. 1.822 + new_predicate_proj = upper_bound_proj; 1.823 + 1.824 +#ifndef PRODUCT 1.825 + if (TraceLoopOpts && !TraceLoopPredicate) { 1.826 + tty->print("Predicate RC "); 1.827 + loop->dump_head(); 1.828 + } 1.829 +#endif 1.830 + } else { 1.831 + // Loop variant check (for example, range check in non-counted loop) 1.832 + // with uncommon trap. 1.833 + continue; 1.834 + } 1.835 + assert(new_predicate_proj != NULL, "sanity"); 1.836 + // Success - attach condition (new_predicate_bol) to predicate if 1.837 + invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate 1.838 + 1.839 + // Eliminate the old If in the loop body 1.840 + dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con ); 1.841 + 1.842 + hoisted = true; 1.843 + C->set_major_progress(); 1.844 + } // end while 1.845 + 1.846 +#ifndef PRODUCT 1.847 + // report that the loop predication has been actually performed 1.848 + // for this loop 1.849 + if (TraceLoopPredicate && hoisted) { 1.850 + tty->print("Loop Predication Performed:"); 1.851 + loop->dump_head(); 1.852 + } 1.853 +#endif 1.854 + 1.855 + return hoisted; 1.856 +} 1.857 + 1.858 +//------------------------------loop_predication-------------------------------- 1.859 +// driver routine for loop predication optimization 1.860 +bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { 1.861 + bool hoisted = false; 1.862 + // Recursively promote predicates 1.863 + if (_child) { 1.864 + hoisted = _child->loop_predication( phase); 1.865 + } 1.866 + 1.867 + // self 1.868 + if (!_irreducible && !tail()->is_top()) { 1.869 + hoisted |= phase->loop_predication_impl(this); 1.870 + } 1.871 + 1.872 + if (_next) { //sibling 1.873 + hoisted |= _next->loop_predication( phase); 1.874 + } 1.875 + 1.876 + return hoisted; 1.877 +}