1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/loopPredicate.cpp Sat Apr 02 10:54:15 2011 -0700 1.3 @@ -0,0 +1,960 @@ 1.4 +/* 1.5 + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "opto/loopnode.hpp" 1.30 +#include "opto/addnode.hpp" 1.31 +#include "opto/callnode.hpp" 1.32 +#include "opto/connode.hpp" 1.33 +#include "opto/loopnode.hpp" 1.34 +#include "opto/mulnode.hpp" 1.35 +#include "opto/rootnode.hpp" 1.36 +#include "opto/subnode.hpp" 1.37 + 1.38 +/* 1.39 + * The general idea of Loop Predication is to insert a predicate on the entry 1.40 + * path to a loop, and raise a uncommon trap if the check of the condition fails. 1.41 + * The condition checks are promoted from inside the loop body, and thus 1.42 + * the checks inside the loop could be eliminated. Currently, loop predication 1.43 + * optimization has been applied to remove array range check and loop invariant 1.44 + * checks (such as null checks). 1.45 +*/ 1.46 + 1.47 +//-------------------------------is_uncommon_trap_proj---------------------------- 1.48 +// Return true if proj is the form of "proj->[region->..]call_uct" 1.49 +bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, Deoptimization::DeoptReason reason) { 1.50 + int path_limit = 10; 1.51 + assert(proj, "invalid argument"); 1.52 + Node* out = proj; 1.53 + for (int ct = 0; ct < path_limit; ct++) { 1.54 + out = out->unique_ctrl_out(); 1.55 + if (out == NULL) 1.56 + return false; 1.57 + if (out->is_CallStaticJava()) { 1.58 + int req = out->as_CallStaticJava()->uncommon_trap_request(); 1.59 + if (req != 0) { 1.60 + Deoptimization::DeoptReason trap_reason = Deoptimization::trap_request_reason(req); 1.61 + if (trap_reason == reason || reason == Deoptimization::Reason_none) { 1.62 + return true; 1.63 + } 1.64 + } 1.65 + return false; // don't do further after call 1.66 + } 1.67 + if (out->Opcode() != Op_Region) 1.68 + return false; 1.69 + } 1.70 + return false; 1.71 +} 1.72 + 1.73 +//-------------------------------is_uncommon_trap_if_pattern------------------------- 1.74 +// Return true for "if(test)-> proj -> ... 1.75 +// | 1.76 +// V 1.77 +// other_proj->[region->..]call_uct" 1.78 +// 1.79 +// "must_reason_predicate" means the uct reason must be Reason_predicate 1.80 +bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, Deoptimization::DeoptReason reason) { 1.81 + Node *in0 = proj->in(0); 1.82 + if (!in0->is_If()) return false; 1.83 + // Variation of a dead If node. 1.84 + if (in0->outcnt() < 2) return false; 1.85 + IfNode* iff = in0->as_If(); 1.86 + 1.87 + // we need "If(Conv2B(Opaque1(...)))" pattern for reason_predicate 1.88 + if (reason != Deoptimization::Reason_none) { 1.89 + if (iff->in(1)->Opcode() != Op_Conv2B || 1.90 + iff->in(1)->in(1)->Opcode() != Op_Opaque1) { 1.91 + return false; 1.92 + } 1.93 + } 1.94 + 1.95 + ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); 1.96 + if (is_uncommon_trap_proj(other_proj, reason)) { 1.97 + assert(reason == Deoptimization::Reason_none || 1.98 + Compile::current()->is_predicate_opaq(iff->in(1)->in(1)), "should be on the list"); 1.99 + return true; 1.100 + } 1.101 + return false; 1.102 +} 1.103 + 1.104 +//-------------------------------register_control------------------------- 1.105 +void PhaseIdealLoop::register_control(Node* n, IdealLoopTree *loop, Node* pred) { 1.106 + assert(n->is_CFG(), "must be control node"); 1.107 + _igvn.register_new_node_with_optimizer(n); 1.108 + loop->_body.push(n); 1.109 + set_loop(n, loop); 1.110 + // When called from beautify_loops() idom is not constructed yet. 1.111 + if (_idom != NULL) { 1.112 + set_idom(n, pred, dom_depth(pred)); 1.113 + } 1.114 +} 1.115 + 1.116 +//------------------------------create_new_if_for_predicate------------------------ 1.117 +// create a new if above the uct_if_pattern for the predicate to be promoted. 1.118 +// 1.119 +// before after 1.120 +// ---------- ---------- 1.121 +// ctrl ctrl 1.122 +// | | 1.123 +// | | 1.124 +// v v 1.125 +// iff new_iff 1.126 +// / \ / \ 1.127 +// / \ / \ 1.128 +// v v v v 1.129 +// uncommon_proj cont_proj if_uct if_cont 1.130 +// \ | | | | 1.131 +// \ | | | | 1.132 +// v v v | v 1.133 +// rgn loop | iff 1.134 +// | | / \ 1.135 +// | | / \ 1.136 +// v | v v 1.137 +// uncommon_trap | uncommon_proj cont_proj 1.138 +// \ \ | | 1.139 +// \ \ | | 1.140 +// v v v v 1.141 +// rgn loop 1.142 +// | 1.143 +// | 1.144 +// v 1.145 +// uncommon_trap 1.146 +// 1.147 +// 1.148 +// We will create a region to guard the uct call if there is no one there. 1.149 +// The true projecttion (if_cont) of the new_iff is returned. 1.150 +// This code is also used to clone predicates to clonned loops. 1.151 +ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.152 + Deoptimization::DeoptReason reason) { 1.153 + assert(is_uncommon_trap_if_pattern(cont_proj, reason), "must be a uct if pattern!"); 1.154 + IfNode* iff = cont_proj->in(0)->as_If(); 1.155 + 1.156 + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 1.157 + Node *rgn = uncommon_proj->unique_ctrl_out(); 1.158 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.159 + 1.160 + uint proj_index = 1; // region's edge corresponding to uncommon_proj 1.161 + if (!rgn->is_Region()) { // create a region to guard the call 1.162 + assert(rgn->is_Call(), "must be call uct"); 1.163 + CallNode* call = rgn->as_Call(); 1.164 + IdealLoopTree* loop = get_loop(call); 1.165 + rgn = new (C, 1) RegionNode(1); 1.166 + rgn->add_req(uncommon_proj); 1.167 + register_control(rgn, loop, uncommon_proj); 1.168 + _igvn.hash_delete(call); 1.169 + call->set_req(0, rgn); 1.170 + // When called from beautify_loops() idom is not constructed yet. 1.171 + if (_idom != NULL) { 1.172 + set_idom(call, rgn, dom_depth(rgn)); 1.173 + } 1.174 + } else { 1.175 + // Find region's edge corresponding to uncommon_proj 1.176 + for (; proj_index < rgn->req(); proj_index++) 1.177 + if (rgn->in(proj_index) == uncommon_proj) break; 1.178 + assert(proj_index < rgn->req(), "sanity"); 1.179 + } 1.180 + 1.181 + Node* entry = iff->in(0); 1.182 + if (new_entry != NULL) { 1.183 + // Clonning the predicate to new location. 1.184 + entry = new_entry; 1.185 + } 1.186 + // Create new_iff 1.187 + IdealLoopTree* lp = get_loop(entry); 1.188 + IfNode *new_iff = iff->clone()->as_If(); 1.189 + new_iff->set_req(0, entry); 1.190 + register_control(new_iff, lp, entry); 1.191 + Node *if_cont = new (C, 1) IfTrueNode(new_iff); 1.192 + Node *if_uct = new (C, 1) IfFalseNode(new_iff); 1.193 + if (cont_proj->is_IfFalse()) { 1.194 + // Swap 1.195 + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 1.196 + } 1.197 + register_control(if_cont, lp, new_iff); 1.198 + register_control(if_uct, get_loop(rgn), new_iff); 1.199 + 1.200 + // if_uct to rgn 1.201 + _igvn.hash_delete(rgn); 1.202 + rgn->add_req(if_uct); 1.203 + // When called from beautify_loops() idom is not constructed yet. 1.204 + if (_idom != NULL) { 1.205 + Node* ridom = idom(rgn); 1.206 + Node* nrdom = dom_lca(ridom, new_iff); 1.207 + set_idom(rgn, nrdom, dom_depth(rgn)); 1.208 + } 1.209 + 1.210 + // If rgn has phis add new edges which has the same 1.211 + // value as on original uncommon_proj pass. 1.212 + assert(rgn->in(rgn->req() -1) == if_uct, "new edge should be last"); 1.213 + bool has_phi = false; 1.214 + for (DUIterator_Fast imax, i = rgn->fast_outs(imax); i < imax; i++) { 1.215 + Node* use = rgn->fast_out(i); 1.216 + if (use->is_Phi() && use->outcnt() > 0) { 1.217 + assert(use->in(0) == rgn, ""); 1.218 + _igvn.hash_delete(use); 1.219 + use->add_req(use->in(proj_index)); 1.220 + _igvn._worklist.push(use); 1.221 + has_phi = true; 1.222 + } 1.223 + } 1.224 + assert(!has_phi || rgn->req() > 3, "no phis when region is created"); 1.225 + 1.226 + if (new_entry == NULL) { 1.227 + // Attach if_cont to iff 1.228 + _igvn.hash_delete(iff); 1.229 + iff->set_req(0, if_cont); 1.230 + if (_idom != NULL) { 1.231 + set_idom(iff, if_cont, dom_depth(iff)); 1.232 + } 1.233 + } 1.234 + return if_cont->as_Proj(); 1.235 +} 1.236 + 1.237 +//------------------------------create_new_if_for_predicate------------------------ 1.238 +// Create a new if below new_entry for the predicate to be cloned (IGVN optimization) 1.239 +ProjNode* PhaseIterGVN::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.240 + Deoptimization::DeoptReason reason) { 1.241 + assert(new_entry != 0, "only used for clone predicate"); 1.242 + assert(PhaseIdealLoop::is_uncommon_trap_if_pattern(cont_proj, reason), "must be a uct if pattern!"); 1.243 + IfNode* iff = cont_proj->in(0)->as_If(); 1.244 + 1.245 + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 1.246 + Node *rgn = uncommon_proj->unique_ctrl_out(); 1.247 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.248 + 1.249 + uint proj_index = 1; // region's edge corresponding to uncommon_proj 1.250 + if (!rgn->is_Region()) { // create a region to guard the call 1.251 + assert(rgn->is_Call(), "must be call uct"); 1.252 + CallNode* call = rgn->as_Call(); 1.253 + rgn = new (C, 1) RegionNode(1); 1.254 + register_new_node_with_optimizer(rgn); 1.255 + rgn->add_req(uncommon_proj); 1.256 + hash_delete(call); 1.257 + call->set_req(0, rgn); 1.258 + } else { 1.259 + // Find region's edge corresponding to uncommon_proj 1.260 + for (; proj_index < rgn->req(); proj_index++) 1.261 + if (rgn->in(proj_index) == uncommon_proj) break; 1.262 + assert(proj_index < rgn->req(), "sanity"); 1.263 + } 1.264 + 1.265 + // Create new_iff in new location. 1.266 + IfNode *new_iff = iff->clone()->as_If(); 1.267 + new_iff->set_req(0, new_entry); 1.268 + 1.269 + register_new_node_with_optimizer(new_iff); 1.270 + Node *if_cont = new (C, 1) IfTrueNode(new_iff); 1.271 + Node *if_uct = new (C, 1) IfFalseNode(new_iff); 1.272 + if (cont_proj->is_IfFalse()) { 1.273 + // Swap 1.274 + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 1.275 + } 1.276 + register_new_node_with_optimizer(if_cont); 1.277 + register_new_node_with_optimizer(if_uct); 1.278 + 1.279 + // if_uct to rgn 1.280 + hash_delete(rgn); 1.281 + rgn->add_req(if_uct); 1.282 + 1.283 + // If rgn has phis add corresponding new edges which has the same 1.284 + // value as on original uncommon_proj pass. 1.285 + assert(rgn->in(rgn->req() -1) == if_uct, "new edge should be last"); 1.286 + bool has_phi = false; 1.287 + for (DUIterator_Fast imax, i = rgn->fast_outs(imax); i < imax; i++) { 1.288 + Node* use = rgn->fast_out(i); 1.289 + if (use->is_Phi() && use->outcnt() > 0) { 1.290 + hash_delete(use); 1.291 + use->add_req(use->in(proj_index)); 1.292 + _worklist.push(use); 1.293 + has_phi = true; 1.294 + } 1.295 + } 1.296 + assert(!has_phi || rgn->req() > 3, "no phis when region is created"); 1.297 + 1.298 + return if_cont->as_Proj(); 1.299 +} 1.300 + 1.301 +//--------------------------clone_predicate----------------------- 1.302 +ProjNode* PhaseIdealLoop::clone_predicate(ProjNode* predicate_proj, Node* new_entry, 1.303 + Deoptimization::DeoptReason reason, 1.304 + PhaseIdealLoop* loop_phase, 1.305 + PhaseIterGVN* igvn) { 1.306 + ProjNode* new_predicate_proj; 1.307 + if (loop_phase != NULL) { 1.308 + new_predicate_proj = loop_phase->create_new_if_for_predicate(predicate_proj, new_entry, reason); 1.309 + } else { 1.310 + new_predicate_proj = igvn->create_new_if_for_predicate(predicate_proj, new_entry, reason); 1.311 + } 1.312 + IfNode* iff = new_predicate_proj->in(0)->as_If(); 1.313 + Node* ctrl = iff->in(0); 1.314 + 1.315 + // Match original condition since predicate's projections could be swapped. 1.316 + assert(predicate_proj->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.317 + Node* opq = new (igvn->C, 2) Opaque1Node(igvn->C, predicate_proj->in(0)->in(1)->in(1)->in(1)); 1.318 + igvn->C->add_predicate_opaq(opq); 1.319 + 1.320 + Node* bol = new (igvn->C, 2) Conv2BNode(opq); 1.321 + if (loop_phase != NULL) { 1.322 + loop_phase->register_new_node(opq, ctrl); 1.323 + loop_phase->register_new_node(bol, ctrl); 1.324 + } else { 1.325 + igvn->register_new_node_with_optimizer(opq); 1.326 + igvn->register_new_node_with_optimizer(bol); 1.327 + } 1.328 + igvn->hash_delete(iff); 1.329 + iff->set_req(1, bol); 1.330 + return new_predicate_proj; 1.331 +} 1.332 + 1.333 +//--------------------------move_predicate----------------------- 1.334 +// Cut predicate from old place and move it to new. 1.335 +ProjNode* PhaseIdealLoop::move_predicate(ProjNode* predicate_proj, Node* new_entry, 1.336 + Deoptimization::DeoptReason reason, 1.337 + PhaseIdealLoop* loop_phase, 1.338 + PhaseIterGVN* igvn) { 1.339 + assert(new_entry != NULL, "must be"); 1.340 + assert(predicate_proj->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.341 + IfNode* iff = predicate_proj->in(0)->as_If(); 1.342 + Node* old_entry = iff->in(0); 1.343 + 1.344 + // Cut predicate from old place. 1.345 + Node* old = predicate_proj; 1.346 + igvn->_worklist.push(old); 1.347 + for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) { 1.348 + Node* use = old->last_out(i); // for each use... 1.349 + igvn->hash_delete(use); 1.350 + igvn->_worklist.push(use); 1.351 + // Update use-def info 1.352 + uint uses_found = 0; 1.353 + for (uint j = 0; j < use->req(); j++) { 1.354 + if (use->in(j) == old) { 1.355 + use->set_req(j, old_entry); 1.356 + uses_found++; 1.357 + if (loop_phase != NULL) { 1.358 + if (use->is_CFG()) { 1.359 + // When called from beautify_loops() idom is not constructed yet. 1.360 + if (loop_phase->_idom != NULL) 1.361 + loop_phase->set_idom(use, old_entry, loop_phase->dom_depth(use)); 1.362 + } else { 1.363 + loop_phase->set_ctrl(use, old_entry); 1.364 + } 1.365 + } 1.366 + } 1.367 + } 1.368 + i -= uses_found; // we deleted 1 or more copies of this edge 1.369 + } 1.370 + 1.371 + // Move predicate. 1.372 + igvn->hash_delete(iff); 1.373 + iff->set_req(0, new_entry); 1.374 + igvn->_worklist.push(iff); 1.375 + 1.376 + if (loop_phase != NULL) { 1.377 + // Fix up idom and ctrl. 1.378 + loop_phase->set_ctrl(iff->in(1), new_entry); 1.379 + loop_phase->set_ctrl(iff->in(1)->in(1), new_entry); 1.380 + // When called from beautify_loops() idom is not constructed yet. 1.381 + if (loop_phase->_idom != NULL) 1.382 + loop_phase->set_idom(iff, new_entry, loop_phase->dom_depth(iff)); 1.383 + } 1.384 + 1.385 + return predicate_proj; 1.386 +} 1.387 + 1.388 +//--------------------------clone_loop_predicates----------------------- 1.389 +// Interface from IGVN 1.390 +Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry) { 1.391 + return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, NULL, this); 1.392 +} 1.393 +Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry) { 1.394 + return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, NULL, this); 1.395 +} 1.396 + 1.397 +// Interface from PhaseIdealLoop 1.398 +Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry) { 1.399 + return clone_loop_predicates(old_entry, new_entry, false, this, &this->_igvn); 1.400 +} 1.401 +Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry) { 1.402 + return clone_loop_predicates(old_entry, new_entry, true, this, &this->_igvn); 1.403 +} 1.404 + 1.405 +// Clone loop predicates to cloned loops (peeled, unswitched, split_if). 1.406 +Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, 1.407 + bool move_predicates, 1.408 + PhaseIdealLoop* loop_phase, 1.409 + PhaseIterGVN* igvn) { 1.410 +#ifdef ASSERT 1.411 + if (new_entry == NULL || !(new_entry->is_Proj() || new_entry->is_Region() || new_entry->is_SafePoint())) { 1.412 + if (new_entry != NULL) 1.413 + new_entry->dump(); 1.414 + assert(false, "not IfTrue, IfFalse, Region or SafePoint"); 1.415 + } 1.416 +#endif 1.417 + // Search original predicates 1.418 + Node* entry = old_entry; 1.419 + if (UseLoopPredicate) { 1.420 + ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.421 + if (predicate_proj != NULL) { // right pattern that can be used by loop predication 1.422 + assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.423 + if (move_predicates) { 1.424 + new_entry = move_predicate(predicate_proj, new_entry, 1.425 + Deoptimization::Reason_predicate, 1.426 + loop_phase, igvn); 1.427 + assert(new_entry == predicate_proj, "old predicate fall through projection"); 1.428 + } else { 1.429 + // clone predicate 1.430 + new_entry = clone_predicate(predicate_proj, new_entry, 1.431 + Deoptimization::Reason_predicate, 1.432 + loop_phase, igvn); 1.433 + assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone predicate"); 1.434 + } 1.435 + if (TraceLoopPredicate) { 1.436 + tty->print_cr("Loop Predicate %s: ", move_predicates ? "moved" : "cloned"); 1.437 + debug_only( new_entry->in(0)->dump(); ) 1.438 + } 1.439 + } 1.440 + } 1.441 + return new_entry; 1.442 +} 1.443 + 1.444 +//--------------------------eliminate_loop_predicates----------------------- 1.445 +void PhaseIdealLoop::eliminate_loop_predicates(Node* entry) { 1.446 + if (UseLoopPredicate) { 1.447 + ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.448 + if (predicate_proj != NULL) { // right pattern that can be used by loop predication 1.449 + Node* n = entry->in(0)->in(1)->in(1); 1.450 + assert(n->Opcode()==Op_Opaque1, "must be"); 1.451 + // Remove Opaque1 node from predicates list. 1.452 + // IGVN will remove this predicate check. 1.453 + _igvn.replace_node(n, n->in(1)); 1.454 + } 1.455 + } 1.456 +} 1.457 + 1.458 +//--------------------------skip_loop_predicates------------------------------ 1.459 +// Skip related predicates. 1.460 +Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) { 1.461 + Node* predicate = NULL; 1.462 + if (UseLoopPredicate) { 1.463 + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.464 + if (predicate != NULL) { // right pattern that can be used by loop predication 1.465 + assert(entry->is_Proj() && entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.466 + IfNode* iff = entry->in(0)->as_If(); 1.467 + ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con); 1.468 + Node* rgn = uncommon_proj->unique_ctrl_out(); 1.469 + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.470 + entry = entry->in(0)->in(0); 1.471 + while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) { 1.472 + uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con); 1.473 + if (uncommon_proj->unique_ctrl_out() != rgn) 1.474 + break; 1.475 + entry = entry->in(0)->in(0); 1.476 + } 1.477 + } 1.478 + } 1.479 + return entry; 1.480 +} 1.481 + 1.482 +//--------------------------find_predicate_insertion_point------------------- 1.483 +// Find a good location to insert a predicate 1.484 +ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason) { 1.485 + if (start_c == NULL || !start_c->is_Proj()) 1.486 + return NULL; 1.487 + if (is_uncommon_trap_if_pattern(start_c->as_Proj(), reason)) { 1.488 + return start_c->as_Proj(); 1.489 + } 1.490 + return NULL; 1.491 +} 1.492 + 1.493 +//--------------------------find_predicate------------------------------------ 1.494 +// Find a predicate 1.495 +Node* PhaseIdealLoop::find_predicate(Node* entry) { 1.496 + Node* predicate = NULL; 1.497 + if (UseLoopPredicate) { 1.498 + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.499 + if (predicate != NULL) { // right pattern that can be used by loop predication 1.500 + assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.501 + return entry; 1.502 + } 1.503 + } 1.504 + return NULL; 1.505 +} 1.506 + 1.507 +//------------------------------Invariance----------------------------------- 1.508 +// Helper class for loop_predication_impl to compute invariance on the fly and 1.509 +// clone invariants. 1.510 +class Invariance : public StackObj { 1.511 + VectorSet _visited, _invariant; 1.512 + Node_Stack _stack; 1.513 + VectorSet _clone_visited; 1.514 + Node_List _old_new; // map of old to new (clone) 1.515 + IdealLoopTree* _lpt; 1.516 + PhaseIdealLoop* _phase; 1.517 + 1.518 + // Helper function to set up the invariance for invariance computation 1.519 + // If n is a known invariant, set up directly. Otherwise, look up the 1.520 + // the possibility to push n onto the stack for further processing. 1.521 + void visit(Node* use, Node* n) { 1.522 + if (_lpt->is_invariant(n)) { // known invariant 1.523 + _invariant.set(n->_idx); 1.524 + } else if (!n->is_CFG()) { 1.525 + Node *n_ctrl = _phase->ctrl_or_self(n); 1.526 + Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG 1.527 + if (_phase->is_dominator(n_ctrl, u_ctrl)) { 1.528 + _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.529 + } 1.530 + } 1.531 + } 1.532 + 1.533 + // Compute invariance for "the_node" and (possibly) all its inputs recursively 1.534 + // on the fly 1.535 + void compute_invariance(Node* n) { 1.536 + assert(_visited.test(n->_idx), "must be"); 1.537 + visit(n, n); 1.538 + while (_stack.is_nonempty()) { 1.539 + Node* n = _stack.node(); 1.540 + uint idx = _stack.index(); 1.541 + if (idx == n->req()) { // all inputs are processed 1.542 + _stack.pop(); 1.543 + // n is invariant if it's inputs are all invariant 1.544 + bool all_inputs_invariant = true; 1.545 + for (uint i = 0; i < n->req(); i++) { 1.546 + Node* in = n->in(i); 1.547 + if (in == NULL) continue; 1.548 + assert(_visited.test(in->_idx), "must have visited input"); 1.549 + if (!_invariant.test(in->_idx)) { // bad guy 1.550 + all_inputs_invariant = false; 1.551 + break; 1.552 + } 1.553 + } 1.554 + if (all_inputs_invariant) { 1.555 + _invariant.set(n->_idx); // I am a invariant too 1.556 + } 1.557 + } else { // process next input 1.558 + _stack.set_index(idx + 1); 1.559 + Node* m = n->in(idx); 1.560 + if (m != NULL && !_visited.test_set(m->_idx)) { 1.561 + visit(n, m); 1.562 + } 1.563 + } 1.564 + } 1.565 + } 1.566 + 1.567 + // Helper function to set up _old_new map for clone_nodes. 1.568 + // If n is a known invariant, set up directly ("clone" of n == n). 1.569 + // Otherwise, push n onto the stack for real cloning. 1.570 + void clone_visit(Node* n) { 1.571 + assert(_invariant.test(n->_idx), "must be invariant"); 1.572 + if (_lpt->is_invariant(n)) { // known invariant 1.573 + _old_new.map(n->_idx, n); 1.574 + } else { // to be cloned 1.575 + assert(!n->is_CFG(), "should not see CFG here"); 1.576 + _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.577 + } 1.578 + } 1.579 + 1.580 + // Clone "n" and (possibly) all its inputs recursively 1.581 + void clone_nodes(Node* n, Node* ctrl) { 1.582 + clone_visit(n); 1.583 + while (_stack.is_nonempty()) { 1.584 + Node* n = _stack.node(); 1.585 + uint idx = _stack.index(); 1.586 + if (idx == n->req()) { // all inputs processed, clone n! 1.587 + _stack.pop(); 1.588 + // clone invariant node 1.589 + Node* n_cl = n->clone(); 1.590 + _old_new.map(n->_idx, n_cl); 1.591 + _phase->register_new_node(n_cl, ctrl); 1.592 + for (uint i = 0; i < n->req(); i++) { 1.593 + Node* in = n_cl->in(i); 1.594 + if (in == NULL) continue; 1.595 + n_cl->set_req(i, _old_new[in->_idx]); 1.596 + } 1.597 + } else { // process next input 1.598 + _stack.set_index(idx + 1); 1.599 + Node* m = n->in(idx); 1.600 + if (m != NULL && !_clone_visited.test_set(m->_idx)) { 1.601 + clone_visit(m); // visit the input 1.602 + } 1.603 + } 1.604 + } 1.605 + } 1.606 + 1.607 + public: 1.608 + Invariance(Arena* area, IdealLoopTree* lpt) : 1.609 + _lpt(lpt), _phase(lpt->_phase), 1.610 + _visited(area), _invariant(area), _stack(area, 10 /* guess */), 1.611 + _clone_visited(area), _old_new(area) 1.612 + {} 1.613 + 1.614 + // Map old to n for invariance computation and clone 1.615 + void map_ctrl(Node* old, Node* n) { 1.616 + assert(old->is_CFG() && n->is_CFG(), "must be"); 1.617 + _old_new.map(old->_idx, n); // "clone" of old is n 1.618 + _invariant.set(old->_idx); // old is invariant 1.619 + _clone_visited.set(old->_idx); 1.620 + } 1.621 + 1.622 + // Driver function to compute invariance 1.623 + bool is_invariant(Node* n) { 1.624 + if (!_visited.test_set(n->_idx)) 1.625 + compute_invariance(n); 1.626 + return (_invariant.test(n->_idx) != 0); 1.627 + } 1.628 + 1.629 + // Driver function to clone invariant 1.630 + Node* clone(Node* n, Node* ctrl) { 1.631 + assert(ctrl->is_CFG(), "must be"); 1.632 + assert(_invariant.test(n->_idx), "must be an invariant"); 1.633 + if (!_clone_visited.test(n->_idx)) 1.634 + clone_nodes(n, ctrl); 1.635 + return _old_new[n->_idx]; 1.636 + } 1.637 +}; 1.638 + 1.639 +//------------------------------is_range_check_if ----------------------------------- 1.640 +// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format 1.641 +// Note: this function is particularly designed for loop predication. We require load_range 1.642 +// and offset to be loop invariant computed on the fly by "invar" 1.643 +bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { 1.644 + if (!is_loop_exit(iff)) { 1.645 + return false; 1.646 + } 1.647 + if (!iff->in(1)->is_Bool()) { 1.648 + return false; 1.649 + } 1.650 + const BoolNode *bol = iff->in(1)->as_Bool(); 1.651 + if (bol->_test._test != BoolTest::lt) { 1.652 + return false; 1.653 + } 1.654 + if (!bol->in(1)->is_Cmp()) { 1.655 + return false; 1.656 + } 1.657 + const CmpNode *cmp = bol->in(1)->as_Cmp(); 1.658 + if (cmp->Opcode() != Op_CmpU) { 1.659 + return false; 1.660 + } 1.661 + Node* range = cmp->in(2); 1.662 + if (range->Opcode() != Op_LoadRange) { 1.663 + const TypeInt* tint = phase->_igvn.type(range)->isa_int(); 1.664 + if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) { 1.665 + // Allow predication on positive values that aren't LoadRanges. 1.666 + // This allows optimization of loops where the length of the 1.667 + // array is a known value and doesn't need to be loaded back 1.668 + // from the array. 1.669 + return false; 1.670 + } 1.671 + } 1.672 + if (!invar.is_invariant(range)) { 1.673 + return false; 1.674 + } 1.675 + Node *iv = _head->as_CountedLoop()->phi(); 1.676 + int scale = 0; 1.677 + Node *offset = NULL; 1.678 + if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { 1.679 + return false; 1.680 + } 1.681 + if (offset && !invar.is_invariant(offset)) { // offset must be invariant 1.682 + return false; 1.683 + } 1.684 + return true; 1.685 +} 1.686 + 1.687 +//------------------------------rc_predicate----------------------------------- 1.688 +// Create a range check predicate 1.689 +// 1.690 +// for (i = init; i < limit; i += stride) { 1.691 +// a[scale*i+offset] 1.692 +// } 1.693 +// 1.694 +// Compute max(scale*i + offset) for init <= i < limit and build the predicate 1.695 +// as "max(scale*i + offset) u< a.length". 1.696 +// 1.697 +// There are two cases for max(scale*i + offset): 1.698 +// (1) stride*scale > 0 1.699 +// max(scale*i + offset) = scale*(limit-stride) + offset 1.700 +// (2) stride*scale < 0 1.701 +// max(scale*i + offset) = scale*init + offset 1.702 +BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, 1.703 + int scale, Node* offset, 1.704 + Node* init, Node* limit, Node* stride, 1.705 + Node* range, bool upper) { 1.706 + DEBUG_ONLY(ttyLocker ttyl); 1.707 + if (TraceLoopPredicate) tty->print("rc_predicate "); 1.708 + 1.709 + Node* max_idx_expr = init; 1.710 + int stride_con = stride->get_int(); 1.711 + if ((stride_con > 0) == (scale > 0) == upper) { 1.712 + max_idx_expr = new (C, 3) SubINode(limit, stride); 1.713 + register_new_node(max_idx_expr, ctrl); 1.714 + if (TraceLoopPredicate) tty->print("(limit - stride) "); 1.715 + } else { 1.716 + if (TraceLoopPredicate) tty->print("init "); 1.717 + } 1.718 + 1.719 + if (scale != 1) { 1.720 + ConNode* con_scale = _igvn.intcon(scale); 1.721 + max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); 1.722 + register_new_node(max_idx_expr, ctrl); 1.723 + if (TraceLoopPredicate) tty->print("* %d ", scale); 1.724 + } 1.725 + 1.726 + if (offset && (!offset->is_Con() || offset->get_int() != 0)){ 1.727 + max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); 1.728 + register_new_node(max_idx_expr, ctrl); 1.729 + if (TraceLoopPredicate) 1.730 + if (offset->is_Con()) tty->print("+ %d ", offset->get_int()); 1.731 + else tty->print("+ offset "); 1.732 + } 1.733 + 1.734 + CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); 1.735 + register_new_node(cmp, ctrl); 1.736 + BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); 1.737 + register_new_node(bol, ctrl); 1.738 + 1.739 + if (TraceLoopPredicate) tty->print_cr("<u range"); 1.740 + return bol; 1.741 +} 1.742 + 1.743 +//------------------------------ loop_predication_impl-------------------------- 1.744 +// Insert loop predicates for null checks and range checks 1.745 +bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { 1.746 + if (!UseLoopPredicate) return false; 1.747 + 1.748 + if (!loop->_head->is_Loop()) { 1.749 + // Could be a simple region when irreducible loops are present. 1.750 + return false; 1.751 + } 1.752 + 1.753 + if (loop->_head->unique_ctrl_out()->Opcode() == Op_NeverBranch) { 1.754 + // do nothing for infinite loops 1.755 + return false; 1.756 + } 1.757 + 1.758 + CountedLoopNode *cl = NULL; 1.759 + if (loop->_head->is_CountedLoop()) { 1.760 + cl = loop->_head->as_CountedLoop(); 1.761 + // do nothing for iteration-splitted loops 1.762 + if (!cl->is_normal_loop()) return false; 1.763 + } 1.764 + 1.765 + LoopNode *lpn = loop->_head->as_Loop(); 1.766 + Node* entry = lpn->in(LoopNode::EntryControl); 1.767 + 1.768 + ProjNode *predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.769 + if (!predicate_proj) { 1.770 +#ifndef PRODUCT 1.771 + if (TraceLoopPredicate) { 1.772 + tty->print("missing predicate:"); 1.773 + loop->dump_head(); 1.774 + lpn->dump(1); 1.775 + } 1.776 +#endif 1.777 + return false; 1.778 + } 1.779 + ConNode* zero = _igvn.intcon(0); 1.780 + set_ctrl(zero, C->root()); 1.781 + 1.782 + ResourceArea *area = Thread::current()->resource_area(); 1.783 + Invariance invar(area, loop); 1.784 + 1.785 + // Create list of if-projs such that a newer proj dominates all older 1.786 + // projs in the list, and they all dominate loop->tail() 1.787 + Node_List if_proj_list(area); 1.788 + LoopNode *head = loop->_head->as_Loop(); 1.789 + Node *current_proj = loop->tail(); //start from tail 1.790 + while (current_proj != head) { 1.791 + if (loop == get_loop(current_proj) && // still in the loop ? 1.792 + current_proj->is_Proj() && // is a projection ? 1.793 + current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? 1.794 + if_proj_list.push(current_proj); 1.795 + } 1.796 + current_proj = idom(current_proj); 1.797 + } 1.798 + 1.799 + bool hoisted = false; // true if at least one proj is promoted 1.800 + while (if_proj_list.size() > 0) { 1.801 + // Following are changed to nonnull when a predicate can be hoisted 1.802 + ProjNode* new_predicate_proj = NULL; 1.803 + 1.804 + ProjNode* proj = if_proj_list.pop()->as_Proj(); 1.805 + IfNode* iff = proj->in(0)->as_If(); 1.806 + 1.807 + if (!is_uncommon_trap_if_pattern(proj, Deoptimization::Reason_none)) { 1.808 + if (loop->is_loop_exit(iff)) { 1.809 + // stop processing the remaining projs in the list because the execution of them 1.810 + // depends on the condition of "iff" (iff->in(1)). 1.811 + break; 1.812 + } else { 1.813 + // Both arms are inside the loop. There are two cases: 1.814 + // (1) there is one backward branch. In this case, any remaining proj 1.815 + // in the if_proj list post-dominates "iff". So, the condition of "iff" 1.816 + // does not determine the execution the remining projs directly, and we 1.817 + // can safely continue. 1.818 + // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" 1.819 + // does not dominate loop->tail(), so it can not be in the if_proj list. 1.820 + continue; 1.821 + } 1.822 + } 1.823 + 1.824 + Node* test = iff->in(1); 1.825 + if (!test->is_Bool()){ //Conv2B, ... 1.826 + continue; 1.827 + } 1.828 + BoolNode* bol = test->as_Bool(); 1.829 + if (invar.is_invariant(bol)) { 1.830 + // Invariant test 1.831 + new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL, 1.832 + Deoptimization::Reason_predicate); 1.833 + Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); 1.834 + BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); 1.835 + 1.836 + // Negate test if necessary 1.837 + bool negated = false; 1.838 + if (proj->_con != predicate_proj->_con) { 1.839 + new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); 1.840 + register_new_node(new_predicate_bol, ctrl); 1.841 + negated = true; 1.842 + } 1.843 + IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If(); 1.844 + _igvn.hash_delete(new_predicate_iff); 1.845 + new_predicate_iff->set_req(1, new_predicate_bol); 1.846 +#ifndef PRODUCT 1.847 + if (TraceLoopPredicate) { 1.848 + tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx); 1.849 + loop->dump_head(); 1.850 + } else if (TraceLoopOpts) { 1.851 + tty->print("Predicate IC "); 1.852 + loop->dump_head(); 1.853 + } 1.854 +#endif 1.855 + } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { 1.856 + assert(proj->_con == predicate_proj->_con, "must match"); 1.857 + 1.858 + // Range check for counted loops 1.859 + const Node* cmp = bol->in(1)->as_Cmp(); 1.860 + Node* idx = cmp->in(1); 1.861 + assert(!invar.is_invariant(idx), "index is variant"); 1.862 + assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be"); 1.863 + Node* rng = cmp->in(2); 1.864 + assert(invar.is_invariant(rng), "range must be invariant"); 1.865 + int scale = 1; 1.866 + Node* offset = zero; 1.867 + bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); 1.868 + assert(ok, "must be index expression"); 1.869 + 1.870 + Node* init = cl->init_trip(); 1.871 + Node* limit = cl->limit(); 1.872 + Node* stride = cl->stride(); 1.873 + 1.874 + // Build if's for the upper and lower bound tests. The 1.875 + // lower_bound test will dominate the upper bound test and all 1.876 + // cloned or created nodes will use the lower bound test as 1.877 + // their declared control. 1.878 + ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.879 + ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.880 + assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); 1.881 + Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); 1.882 + 1.883 + // Perform cloning to keep Invariance state correct since the 1.884 + // late schedule will place invariant things in the loop. 1.885 + rng = invar.clone(rng, ctrl); 1.886 + if (offset && offset != zero) { 1.887 + assert(invar.is_invariant(offset), "offset must be loop invariant"); 1.888 + offset = invar.clone(offset, ctrl); 1.889 + } 1.890 + 1.891 + // Test the lower bound 1.892 + Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); 1.893 + IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); 1.894 + _igvn.hash_delete(lower_bound_iff); 1.895 + lower_bound_iff->set_req(1, lower_bound_bol); 1.896 + if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); 1.897 + 1.898 + // Test the upper bound 1.899 + Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); 1.900 + IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); 1.901 + _igvn.hash_delete(upper_bound_iff); 1.902 + upper_bound_iff->set_req(1, upper_bound_bol); 1.903 + if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); 1.904 + 1.905 + // Fall through into rest of the clean up code which will move 1.906 + // any dependent nodes onto the upper bound test. 1.907 + new_predicate_proj = upper_bound_proj; 1.908 + 1.909 +#ifndef PRODUCT 1.910 + if (TraceLoopOpts && !TraceLoopPredicate) { 1.911 + tty->print("Predicate RC "); 1.912 + loop->dump_head(); 1.913 + } 1.914 +#endif 1.915 + } else { 1.916 + // Loop variant check (for example, range check in non-counted loop) 1.917 + // with uncommon trap. 1.918 + continue; 1.919 + } 1.920 + assert(new_predicate_proj != NULL, "sanity"); 1.921 + // Success - attach condition (new_predicate_bol) to predicate if 1.922 + invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate 1.923 + 1.924 + // Eliminate the old If in the loop body 1.925 + dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con ); 1.926 + 1.927 + hoisted = true; 1.928 + C->set_major_progress(); 1.929 + } // end while 1.930 + 1.931 +#ifndef PRODUCT 1.932 + // report that the loop predication has been actually performed 1.933 + // for this loop 1.934 + if (TraceLoopPredicate && hoisted) { 1.935 + tty->print("Loop Predication Performed:"); 1.936 + loop->dump_head(); 1.937 + } 1.938 +#endif 1.939 + 1.940 + return hoisted; 1.941 +} 1.942 + 1.943 +//------------------------------loop_predication-------------------------------- 1.944 +// driver routine for loop predication optimization 1.945 +bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { 1.946 + bool hoisted = false; 1.947 + // Recursively promote predicates 1.948 + if (_child) { 1.949 + hoisted = _child->loop_predication( phase); 1.950 + } 1.951 + 1.952 + // self 1.953 + if (!_irreducible && !tail()->is_top()) { 1.954 + hoisted |= phase->loop_predication_impl(this); 1.955 + } 1.956 + 1.957 + if (_next) { //sibling 1.958 + hoisted |= _next->loop_predication( phase); 1.959 + } 1.960 + 1.961 + return hoisted; 1.962 +} 1.963 +