1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/loopnode.hpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,1118 @@ 1.4 +/* 1.5 + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#ifndef SHARE_VM_OPTO_LOOPNODE_HPP 1.29 +#define SHARE_VM_OPTO_LOOPNODE_HPP 1.30 + 1.31 +#include "opto/cfgnode.hpp" 1.32 +#include "opto/multnode.hpp" 1.33 +#include "opto/phaseX.hpp" 1.34 +#include "opto/subnode.hpp" 1.35 +#include "opto/type.hpp" 1.36 + 1.37 +class CmpNode; 1.38 +class CountedLoopEndNode; 1.39 +class CountedLoopNode; 1.40 +class IdealLoopTree; 1.41 +class LoopNode; 1.42 +class Node; 1.43 +class PhaseIdealLoop; 1.44 +class VectorSet; 1.45 +class Invariance; 1.46 +struct small_cache; 1.47 + 1.48 +// 1.49 +// I D E A L I Z E D L O O P S 1.50 +// 1.51 +// Idealized loops are the set of loops I perform more interesting 1.52 +// transformations on, beyond simple hoisting. 1.53 + 1.54 +//------------------------------LoopNode--------------------------------------- 1.55 +// Simple loop header. Fall in path on left, loop-back path on right. 1.56 +class LoopNode : public RegionNode { 1.57 + // Size is bigger to hold the flags. However, the flags do not change 1.58 + // the semantics so it does not appear in the hash & cmp functions. 1.59 + virtual uint size_of() const { return sizeof(*this); } 1.60 +protected: 1.61 + short _loop_flags; 1.62 + // Names for flag bitfields 1.63 + enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3, 1.64 + MainHasNoPreLoop=4, 1.65 + HasExactTripCount=8, 1.66 + InnerLoop=16, 1.67 + PartialPeelLoop=32, 1.68 + PartialPeelFailed=64 }; 1.69 + char _unswitch_count; 1.70 + enum { _unswitch_max=3 }; 1.71 + 1.72 +public: 1.73 + // Names for edge indices 1.74 + enum { Self=0, EntryControl, LoopBackControl }; 1.75 + 1.76 + int is_inner_loop() const { return _loop_flags & InnerLoop; } 1.77 + void set_inner_loop() { _loop_flags |= InnerLoop; } 1.78 + 1.79 + int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } 1.80 + void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; } 1.81 + int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; } 1.82 + void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; } 1.83 + 1.84 + int unswitch_max() { return _unswitch_max; } 1.85 + int unswitch_count() { return _unswitch_count; } 1.86 + void set_unswitch_count(int val) { 1.87 + assert (val <= unswitch_max(), "too many unswitches"); 1.88 + _unswitch_count = val; 1.89 + } 1.90 + 1.91 + LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) { 1.92 + init_class_id(Class_Loop); 1.93 + init_req(EntryControl, entry); 1.94 + init_req(LoopBackControl, backedge); 1.95 + } 1.96 + 1.97 + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 1.98 + virtual int Opcode() const; 1.99 + bool can_be_counted_loop(PhaseTransform* phase) const { 1.100 + return req() == 3 && in(0) != NULL && 1.101 + in(1) != NULL && phase->type(in(1)) != Type::TOP && 1.102 + in(2) != NULL && phase->type(in(2)) != Type::TOP; 1.103 + } 1.104 + bool is_valid_counted_loop() const; 1.105 +#ifndef PRODUCT 1.106 + virtual void dump_spec(outputStream *st) const; 1.107 +#endif 1.108 +}; 1.109 + 1.110 +//------------------------------Counted Loops---------------------------------- 1.111 +// Counted loops are all trip-counted loops, with exactly 1 trip-counter exit 1.112 +// path (and maybe some other exit paths). The trip-counter exit is always 1.113 +// last in the loop. The trip-counter have to stride by a constant; 1.114 +// the exit value is also loop invariant. 1.115 + 1.116 +// CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The 1.117 +// CountedLoopNode has the incoming loop control and the loop-back-control 1.118 +// which is always the IfTrue before the matching CountedLoopEndNode. The 1.119 +// CountedLoopEndNode has an incoming control (possibly not the 1.120 +// CountedLoopNode if there is control flow in the loop), the post-increment 1.121 +// trip-counter value, and the limit. The trip-counter value is always of 1.122 +// the form (Op old-trip-counter stride). The old-trip-counter is produced 1.123 +// by a Phi connected to the CountedLoopNode. The stride is constant. 1.124 +// The Op is any commutable opcode, including Add, Mul, Xor. The 1.125 +// CountedLoopEndNode also takes in the loop-invariant limit value. 1.126 + 1.127 +// From a CountedLoopNode I can reach the matching CountedLoopEndNode via the 1.128 +// loop-back control. From CountedLoopEndNodes I can reach CountedLoopNodes 1.129 +// via the old-trip-counter from the Op node. 1.130 + 1.131 +//------------------------------CountedLoopNode-------------------------------- 1.132 +// CountedLoopNodes head simple counted loops. CountedLoopNodes have as 1.133 +// inputs the incoming loop-start control and the loop-back control, so they 1.134 +// act like RegionNodes. They also take in the initial trip counter, the 1.135 +// loop-invariant stride and the loop-invariant limit value. CountedLoopNodes 1.136 +// produce a loop-body control and the trip counter value. Since 1.137 +// CountedLoopNodes behave like RegionNodes I still have a standard CFG model. 1.138 + 1.139 +class CountedLoopNode : public LoopNode { 1.140 + // Size is bigger to hold _main_idx. However, _main_idx does not change 1.141 + // the semantics so it does not appear in the hash & cmp functions. 1.142 + virtual uint size_of() const { return sizeof(*this); } 1.143 + 1.144 + // For Pre- and Post-loops during debugging ONLY, this holds the index of 1.145 + // the Main CountedLoop. Used to assert that we understand the graph shape. 1.146 + node_idx_t _main_idx; 1.147 + 1.148 + // Known trip count calculated by compute_exact_trip_count() 1.149 + uint _trip_count; 1.150 + 1.151 + // Expected trip count from profile data 1.152 + float _profile_trip_cnt; 1.153 + 1.154 + // Log2 of original loop bodies in unrolled loop 1.155 + int _unrolled_count_log2; 1.156 + 1.157 + // Node count prior to last unrolling - used to decide if 1.158 + // unroll,optimize,unroll,optimize,... is making progress 1.159 + int _node_count_before_unroll; 1.160 + 1.161 +public: 1.162 + CountedLoopNode( Node *entry, Node *backedge ) 1.163 + : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint), 1.164 + _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0), 1.165 + _node_count_before_unroll(0) { 1.166 + init_class_id(Class_CountedLoop); 1.167 + // Initialize _trip_count to the largest possible value. 1.168 + // Will be reset (lower) if the loop's trip count is known. 1.169 + } 1.170 + 1.171 + virtual int Opcode() const; 1.172 + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 1.173 + 1.174 + Node *init_control() const { return in(EntryControl); } 1.175 + Node *back_control() const { return in(LoopBackControl); } 1.176 + CountedLoopEndNode *loopexit() const; 1.177 + Node *init_trip() const; 1.178 + Node *stride() const; 1.179 + int stride_con() const; 1.180 + bool stride_is_con() const; 1.181 + Node *limit() const; 1.182 + Node *incr() const; 1.183 + Node *phi() const; 1.184 + 1.185 + // Match increment with optional truncation 1.186 + static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type); 1.187 + 1.188 + // A 'main' loop has a pre-loop and a post-loop. The 'main' loop 1.189 + // can run short a few iterations and may start a few iterations in. 1.190 + // It will be RCE'd and unrolled and aligned. 1.191 + 1.192 + // A following 'post' loop will run any remaining iterations. Used 1.193 + // during Range Check Elimination, the 'post' loop will do any final 1.194 + // iterations with full checks. Also used by Loop Unrolling, where 1.195 + // the 'post' loop will do any epilog iterations needed. Basically, 1.196 + // a 'post' loop can not profitably be further unrolled or RCE'd. 1.197 + 1.198 + // A preceding 'pre' loop will run at least 1 iteration (to do peeling), 1.199 + // it may do under-flow checks for RCE and may do alignment iterations 1.200 + // so the following main loop 'knows' that it is striding down cache 1.201 + // lines. 1.202 + 1.203 + // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or 1.204 + // Aligned, may be missing it's pre-loop. 1.205 + int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; } 1.206 + int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; } 1.207 + int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; } 1.208 + int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; } 1.209 + int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; } 1.210 + void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; } 1.211 + 1.212 + int main_idx() const { return _main_idx; } 1.213 + 1.214 + 1.215 + void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } 1.216 + void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } 1.217 + void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; } 1.218 + void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; } 1.219 + 1.220 + void set_trip_count(uint tc) { _trip_count = tc; } 1.221 + uint trip_count() { return _trip_count; } 1.222 + 1.223 + bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; } 1.224 + void set_exact_trip_count(uint tc) { 1.225 + _trip_count = tc; 1.226 + _loop_flags |= HasExactTripCount; 1.227 + } 1.228 + void set_nonexact_trip_count() { 1.229 + _loop_flags &= ~HasExactTripCount; 1.230 + } 1.231 + 1.232 + void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; } 1.233 + float profile_trip_cnt() { return _profile_trip_cnt; } 1.234 + 1.235 + void double_unrolled_count() { _unrolled_count_log2++; } 1.236 + int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); } 1.237 + 1.238 + void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; } 1.239 + int node_count_before_unroll() { return _node_count_before_unroll; } 1.240 + 1.241 +#ifndef PRODUCT 1.242 + virtual void dump_spec(outputStream *st) const; 1.243 +#endif 1.244 +}; 1.245 + 1.246 +//------------------------------CountedLoopEndNode----------------------------- 1.247 +// CountedLoopEndNodes end simple trip counted loops. They act much like 1.248 +// IfNodes. 1.249 +class CountedLoopEndNode : public IfNode { 1.250 +public: 1.251 + enum { TestControl, TestValue }; 1.252 + 1.253 + CountedLoopEndNode( Node *control, Node *test, float prob, float cnt ) 1.254 + : IfNode( control, test, prob, cnt) { 1.255 + init_class_id(Class_CountedLoopEnd); 1.256 + } 1.257 + virtual int Opcode() const; 1.258 + 1.259 + Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; } 1.260 + Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } 1.261 + Node *limit() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } 1.262 + Node *stride() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; } 1.263 + Node *phi() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } 1.264 + Node *init_trip() const { Node *tmp = phi (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; } 1.265 + int stride_con() const; 1.266 + bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } 1.267 + BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } 1.268 + CountedLoopNode *loopnode() const { 1.269 + // The CountedLoopNode that goes with this CountedLoopEndNode may 1.270 + // have been optimized out by the IGVN so be cautious with the 1.271 + // pattern matching on the graph 1.272 + if (phi() == NULL) { 1.273 + return NULL; 1.274 + } 1.275 + Node *ln = phi()->in(0); 1.276 + if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) { 1.277 + return (CountedLoopNode*)ln; 1.278 + } 1.279 + return NULL; 1.280 + } 1.281 + 1.282 +#ifndef PRODUCT 1.283 + virtual void dump_spec(outputStream *st) const; 1.284 +#endif 1.285 +}; 1.286 + 1.287 + 1.288 +inline CountedLoopEndNode *CountedLoopNode::loopexit() const { 1.289 + Node *bc = back_control(); 1.290 + if( bc == NULL ) return NULL; 1.291 + Node *le = bc->in(0); 1.292 + if( le->Opcode() != Op_CountedLoopEnd ) 1.293 + return NULL; 1.294 + return (CountedLoopEndNode*)le; 1.295 +} 1.296 +inline Node *CountedLoopNode::init_trip() const { return loopexit() ? loopexit()->init_trip() : NULL; } 1.297 +inline Node *CountedLoopNode::stride() const { return loopexit() ? loopexit()->stride() : NULL; } 1.298 +inline int CountedLoopNode::stride_con() const { return loopexit() ? loopexit()->stride_con() : 0; } 1.299 +inline bool CountedLoopNode::stride_is_con() const { return loopexit() && loopexit()->stride_is_con(); } 1.300 +inline Node *CountedLoopNode::limit() const { return loopexit() ? loopexit()->limit() : NULL; } 1.301 +inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; } 1.302 +inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; } 1.303 + 1.304 +//------------------------------LoopLimitNode----------------------------- 1.305 +// Counted Loop limit node which represents exact final iterator value: 1.306 +// trip_count = (limit - init_trip + stride - 1)/stride 1.307 +// final_value= trip_count * stride + init_trip. 1.308 +// Use HW instructions to calculate it when it can overflow in integer. 1.309 +// Note, final_value should fit into integer since counted loop has 1.310 +// limit check: limit <= max_int-stride. 1.311 +class LoopLimitNode : public Node { 1.312 + enum { Init=1, Limit=2, Stride=3 }; 1.313 + public: 1.314 + LoopLimitNode( Compile* C, Node *init, Node *limit, Node *stride ) : Node(0,init,limit,stride) { 1.315 + // Put it on the Macro nodes list to optimize during macro nodes expansion. 1.316 + init_flags(Flag_is_macro); 1.317 + C->add_macro_node(this); 1.318 + } 1.319 + virtual int Opcode() const; 1.320 + virtual const Type *bottom_type() const { return TypeInt::INT; } 1.321 + virtual uint ideal_reg() const { return Op_RegI; } 1.322 + virtual const Type *Value( PhaseTransform *phase ) const; 1.323 + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 1.324 + virtual Node *Identity( PhaseTransform *phase ); 1.325 +}; 1.326 + 1.327 +// -----------------------------IdealLoopTree---------------------------------- 1.328 +class IdealLoopTree : public ResourceObj { 1.329 +public: 1.330 + IdealLoopTree *_parent; // Parent in loop tree 1.331 + IdealLoopTree *_next; // Next sibling in loop tree 1.332 + IdealLoopTree *_child; // First child in loop tree 1.333 + 1.334 + // The head-tail backedge defines the loop. 1.335 + // If tail is NULL then this loop has multiple backedges as part of the 1.336 + // same loop. During cleanup I'll peel off the multiple backedges; merge 1.337 + // them at the loop bottom and flow 1 real backedge into the loop. 1.338 + Node *_head; // Head of loop 1.339 + Node *_tail; // Tail of loop 1.340 + inline Node *tail(); // Handle lazy update of _tail field 1.341 + PhaseIdealLoop* _phase; 1.342 + 1.343 + Node_List _body; // Loop body for inner loops 1.344 + 1.345 + uint8 _nest; // Nesting depth 1.346 + uint8 _irreducible:1, // True if irreducible 1.347 + _has_call:1, // True if has call safepoint 1.348 + _has_sfpt:1, // True if has non-call safepoint 1.349 + _rce_candidate:1; // True if candidate for range check elimination 1.350 + 1.351 + Node_List* _safepts; // List of safepoints in this loop 1.352 + Node_List* _required_safept; // A inner loop cannot delete these safepts; 1.353 + bool _allow_optimizations; // Allow loop optimizations 1.354 + 1.355 + IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail ) 1.356 + : _parent(0), _next(0), _child(0), 1.357 + _head(head), _tail(tail), 1.358 + _phase(phase), 1.359 + _safepts(NULL), 1.360 + _required_safept(NULL), 1.361 + _allow_optimizations(true), 1.362 + _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) 1.363 + { } 1.364 + 1.365 + // Is 'l' a member of 'this'? 1.366 + int is_member( const IdealLoopTree *l ) const; // Test for nested membership 1.367 + 1.368 + // Set loop nesting depth. Accumulate has_call bits. 1.369 + int set_nest( uint depth ); 1.370 + 1.371 + // Split out multiple fall-in edges from the loop header. Move them to a 1.372 + // private RegionNode before the loop. This becomes the loop landing pad. 1.373 + void split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ); 1.374 + 1.375 + // Split out the outermost loop from this shared header. 1.376 + void split_outer_loop( PhaseIdealLoop *phase ); 1.377 + 1.378 + // Merge all the backedges from the shared header into a private Region. 1.379 + // Feed that region as the one backedge to this loop. 1.380 + void merge_many_backedges( PhaseIdealLoop *phase ); 1.381 + 1.382 + // Split shared headers and insert loop landing pads. 1.383 + // Insert a LoopNode to replace the RegionNode. 1.384 + // Returns TRUE if loop tree is structurally changed. 1.385 + bool beautify_loops( PhaseIdealLoop *phase ); 1.386 + 1.387 + // Perform optimization to use the loop predicates for null checks and range checks. 1.388 + // Applies to any loop level (not just the innermost one) 1.389 + bool loop_predication( PhaseIdealLoop *phase); 1.390 + 1.391 + // Perform iteration-splitting on inner loops. Split iterations to 1.392 + // avoid range checks or one-shot null checks. Returns false if the 1.393 + // current round of loop opts should stop. 1.394 + bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new ); 1.395 + 1.396 + // Driver for various flavors of iteration splitting. Returns false 1.397 + // if the current round of loop opts should stop. 1.398 + bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ); 1.399 + 1.400 + // Given dominators, try to find loops with calls that must always be 1.401 + // executed (call dominates loop tail). These loops do not need non-call 1.402 + // safepoints (ncsfpt). 1.403 + void check_safepts(VectorSet &visited, Node_List &stack); 1.404 + 1.405 + // Allpaths backwards scan from loop tail, terminating each path at first safepoint 1.406 + // encountered. 1.407 + void allpaths_check_safepts(VectorSet &visited, Node_List &stack); 1.408 + 1.409 + // Convert to counted loops where possible 1.410 + void counted_loop( PhaseIdealLoop *phase ); 1.411 + 1.412 + // Check for Node being a loop-breaking test 1.413 + Node *is_loop_exit(Node *iff) const; 1.414 + 1.415 + // Returns true if ctrl is executed on every complete iteration 1.416 + bool dominates_backedge(Node* ctrl); 1.417 + 1.418 + // Remove simplistic dead code from loop body 1.419 + void DCE_loop_body(); 1.420 + 1.421 + // Look for loop-exit tests with my 50/50 guesses from the Parsing stage. 1.422 + // Replace with a 1-in-10 exit guess. 1.423 + void adjust_loop_exit_prob( PhaseIdealLoop *phase ); 1.424 + 1.425 + // Return TRUE or FALSE if the loop should never be RCE'd or aligned. 1.426 + // Useful for unrolling loops with NO array accesses. 1.427 + bool policy_peel_only( PhaseIdealLoop *phase ) const; 1.428 + 1.429 + // Return TRUE or FALSE if the loop should be unswitched -- clone 1.430 + // loop with an invariant test 1.431 + bool policy_unswitching( PhaseIdealLoop *phase ) const; 1.432 + 1.433 + // Micro-benchmark spamming. Remove empty loops. 1.434 + bool policy_do_remove_empty_loop( PhaseIdealLoop *phase ); 1.435 + 1.436 + // Convert one iteration loop into normal code. 1.437 + bool policy_do_one_iteration_loop( PhaseIdealLoop *phase ); 1.438 + 1.439 + // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can 1.440 + // make some loop-invariant test (usually a null-check) happen before the 1.441 + // loop. 1.442 + bool policy_peeling( PhaseIdealLoop *phase ) const; 1.443 + 1.444 + // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any 1.445 + // known trip count in the counted loop node. 1.446 + bool policy_maximally_unroll( PhaseIdealLoop *phase ) const; 1.447 + 1.448 + // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if 1.449 + // the loop is a CountedLoop and the body is small enough. 1.450 + bool policy_unroll( PhaseIdealLoop *phase ) const; 1.451 + 1.452 + // Return TRUE or FALSE if the loop should be range-check-eliminated. 1.453 + // Gather a list of IF tests that are dominated by iteration splitting; 1.454 + // also gather the end of the first split and the start of the 2nd split. 1.455 + bool policy_range_check( PhaseIdealLoop *phase ) const; 1.456 + 1.457 + // Return TRUE or FALSE if the loop should be cache-line aligned. 1.458 + // Gather the expression that does the alignment. Note that only 1.459 + // one array base can be aligned in a loop (unless the VM guarantees 1.460 + // mutual alignment). Note that if we vectorize short memory ops 1.461 + // into longer memory ops, we may want to increase alignment. 1.462 + bool policy_align( PhaseIdealLoop *phase ) const; 1.463 + 1.464 + // Return TRUE if "iff" is a range check. 1.465 + bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; 1.466 + 1.467 + // Compute loop exact trip count if possible 1.468 + void compute_exact_trip_count( PhaseIdealLoop *phase ); 1.469 + 1.470 + // Compute loop trip count from profile data 1.471 + void compute_profile_trip_cnt( PhaseIdealLoop *phase ); 1.472 + 1.473 + // Reassociate invariant expressions. 1.474 + void reassociate_invariants(PhaseIdealLoop *phase); 1.475 + // Reassociate invariant add and subtract expressions. 1.476 + Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); 1.477 + // Return nonzero index of invariant operand if invariant and variant 1.478 + // are combined with an Add or Sub. Helper for reassociate_invariants. 1.479 + int is_invariant_addition(Node* n, PhaseIdealLoop *phase); 1.480 + 1.481 + // Return true if n is invariant 1.482 + bool is_invariant(Node* n) const; 1.483 + 1.484 + // Put loop body on igvn work list 1.485 + void record_for_igvn(); 1.486 + 1.487 + bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); } 1.488 + bool is_inner() { return is_loop() && _child == NULL; } 1.489 + bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); } 1.490 + 1.491 +#ifndef PRODUCT 1.492 + void dump_head( ) const; // Dump loop head only 1.493 + void dump() const; // Dump this loop recursively 1.494 + void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const; 1.495 +#endif 1.496 + 1.497 +}; 1.498 + 1.499 +// -----------------------------PhaseIdealLoop--------------------------------- 1.500 +// Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a 1.501 +// loop tree. Drives the loop-based transformations on the ideal graph. 1.502 +class PhaseIdealLoop : public PhaseTransform { 1.503 + friend class IdealLoopTree; 1.504 + friend class SuperWord; 1.505 + // Pre-computed def-use info 1.506 + PhaseIterGVN &_igvn; 1.507 + 1.508 + // Head of loop tree 1.509 + IdealLoopTree *_ltree_root; 1.510 + 1.511 + // Array of pre-order numbers, plus post-visited bit. 1.512 + // ZERO for not pre-visited. EVEN for pre-visited but not post-visited. 1.513 + // ODD for post-visited. Other bits are the pre-order number. 1.514 + uint *_preorders; 1.515 + uint _max_preorder; 1.516 + 1.517 + const PhaseIdealLoop* _verify_me; 1.518 + bool _verify_only; 1.519 + 1.520 + // Allocate _preorders[] array 1.521 + void allocate_preorders() { 1.522 + _max_preorder = C->unique()+8; 1.523 + _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder); 1.524 + memset(_preorders, 0, sizeof(uint) * _max_preorder); 1.525 + } 1.526 + 1.527 + // Allocate _preorders[] array 1.528 + void reallocate_preorders() { 1.529 + if ( _max_preorder < C->unique() ) { 1.530 + _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, C->unique()); 1.531 + _max_preorder = C->unique(); 1.532 + } 1.533 + memset(_preorders, 0, sizeof(uint) * _max_preorder); 1.534 + } 1.535 + 1.536 + // Check to grow _preorders[] array for the case when build_loop_tree_impl() 1.537 + // adds new nodes. 1.538 + void check_grow_preorders( ) { 1.539 + if ( _max_preorder < C->unique() ) { 1.540 + uint newsize = _max_preorder<<1; // double size of array 1.541 + _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, newsize); 1.542 + memset(&_preorders[_max_preorder],0,sizeof(uint)*(newsize-_max_preorder)); 1.543 + _max_preorder = newsize; 1.544 + } 1.545 + } 1.546 + // Check for pre-visited. Zero for NOT visited; non-zero for visited. 1.547 + int is_visited( Node *n ) const { return _preorders[n->_idx]; } 1.548 + // Pre-order numbers are written to the Nodes array as low-bit-set values. 1.549 + void set_preorder_visited( Node *n, int pre_order ) { 1.550 + assert( !is_visited( n ), "already set" ); 1.551 + _preorders[n->_idx] = (pre_order<<1); 1.552 + }; 1.553 + // Return pre-order number. 1.554 + int get_preorder( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]>>1; } 1.555 + 1.556 + // Check for being post-visited. 1.557 + // Should be previsited already (checked with assert(is_visited(n))). 1.558 + int is_postvisited( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]&1; } 1.559 + 1.560 + // Mark as post visited 1.561 + void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; } 1.562 + 1.563 + // Set/get control node out. Set lower bit to distinguish from IdealLoopTree 1.564 + // Returns true if "n" is a data node, false if it's a control node. 1.565 + bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; } 1.566 + 1.567 + // clear out dead code after build_loop_late 1.568 + Node_List _deadlist; 1.569 + 1.570 + // Support for faster execution of get_late_ctrl()/dom_lca() 1.571 + // when a node has many uses and dominator depth is deep. 1.572 + Node_Array _dom_lca_tags; 1.573 + void init_dom_lca_tags(); 1.574 + void clear_dom_lca_tags(); 1.575 + 1.576 + // Helper for debugging bad dominance relationships 1.577 + bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early); 1.578 + 1.579 + Node* compute_lca_of_uses(Node* n, Node* early, bool verify = false); 1.580 + 1.581 + // Inline wrapper for frequent cases: 1.582 + // 1) only one use 1.583 + // 2) a use is the same as the current LCA passed as 'n1' 1.584 + Node *dom_lca_for_get_late_ctrl( Node *lca, Node *n, Node *tag ) { 1.585 + assert( n->is_CFG(), "" ); 1.586 + // Fast-path NULL lca 1.587 + if( lca != NULL && lca != n ) { 1.588 + assert( lca->is_CFG(), "" ); 1.589 + // find LCA of all uses 1.590 + n = dom_lca_for_get_late_ctrl_internal( lca, n, tag ); 1.591 + } 1.592 + return find_non_split_ctrl(n); 1.593 + } 1.594 + Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); 1.595 + 1.596 + // Helper function for directing control inputs away from CFG split 1.597 + // points. 1.598 + Node *find_non_split_ctrl( Node *ctrl ) const { 1.599 + if (ctrl != NULL) { 1.600 + if (ctrl->is_MultiBranch()) { 1.601 + ctrl = ctrl->in(0); 1.602 + } 1.603 + assert(ctrl->is_CFG(), "CFG"); 1.604 + } 1.605 + return ctrl; 1.606 + } 1.607 + 1.608 +public: 1.609 + bool has_node( Node* n ) const { 1.610 + guarantee(n != NULL, "No Node."); 1.611 + return _nodes[n->_idx] != NULL; 1.612 + } 1.613 + // check if transform created new nodes that need _ctrl recorded 1.614 + Node *get_late_ctrl( Node *n, Node *early ); 1.615 + Node *get_early_ctrl( Node *n ); 1.616 + Node *get_early_ctrl_for_expensive(Node *n, Node* earliest); 1.617 + void set_early_ctrl( Node *n ); 1.618 + void set_subtree_ctrl( Node *root ); 1.619 + void set_ctrl( Node *n, Node *ctrl ) { 1.620 + assert( !has_node(n) || has_ctrl(n), "" ); 1.621 + assert( ctrl->in(0), "cannot set dead control node" ); 1.622 + assert( ctrl == find_non_split_ctrl(ctrl), "must set legal crtl" ); 1.623 + _nodes.map( n->_idx, (Node*)((intptr_t)ctrl + 1) ); 1.624 + } 1.625 + // Set control and update loop membership 1.626 + void set_ctrl_and_loop(Node* n, Node* ctrl) { 1.627 + IdealLoopTree* old_loop = get_loop(get_ctrl(n)); 1.628 + IdealLoopTree* new_loop = get_loop(ctrl); 1.629 + if (old_loop != new_loop) { 1.630 + if (old_loop->_child == NULL) old_loop->_body.yank(n); 1.631 + if (new_loop->_child == NULL) new_loop->_body.push(n); 1.632 + } 1.633 + set_ctrl(n, ctrl); 1.634 + } 1.635 + // Control nodes can be replaced or subsumed. During this pass they 1.636 + // get their replacement Node in slot 1. Instead of updating the block 1.637 + // location of all Nodes in the subsumed block, we lazily do it. As we 1.638 + // pull such a subsumed block out of the array, we write back the final 1.639 + // correct block. 1.640 + Node *get_ctrl( Node *i ) { 1.641 + assert(has_node(i), ""); 1.642 + Node *n = get_ctrl_no_update(i); 1.643 + _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) ); 1.644 + assert(has_node(i) && has_ctrl(i), ""); 1.645 + assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); 1.646 + return n; 1.647 + } 1.648 + // true if CFG node d dominates CFG node n 1.649 + bool is_dominator(Node *d, Node *n); 1.650 + // return get_ctrl for a data node and self(n) for a CFG node 1.651 + Node* ctrl_or_self(Node* n) { 1.652 + if (has_ctrl(n)) 1.653 + return get_ctrl(n); 1.654 + else { 1.655 + assert (n->is_CFG(), "must be a CFG node"); 1.656 + return n; 1.657 + } 1.658 + } 1.659 + 1.660 +private: 1.661 + Node *get_ctrl_no_update( Node *i ) const { 1.662 + assert( has_ctrl(i), "" ); 1.663 + Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1); 1.664 + if (!n->in(0)) { 1.665 + // Skip dead CFG nodes 1.666 + do { 1.667 + n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); 1.668 + } while (!n->in(0)); 1.669 + n = find_non_split_ctrl(n); 1.670 + } 1.671 + return n; 1.672 + } 1.673 + 1.674 + // Check for loop being set 1.675 + // "n" must be a control node. Returns true if "n" is known to be in a loop. 1.676 + bool has_loop( Node *n ) const { 1.677 + assert(!has_node(n) || !has_ctrl(n), ""); 1.678 + return has_node(n); 1.679 + } 1.680 + // Set loop 1.681 + void set_loop( Node *n, IdealLoopTree *loop ) { 1.682 + _nodes.map(n->_idx, (Node*)loop); 1.683 + } 1.684 + // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace 1.685 + // the 'old_node' with 'new_node'. Kill old-node. Add a reference 1.686 + // from old_node to new_node to support the lazy update. Reference 1.687 + // replaces loop reference, since that is not needed for dead node. 1.688 +public: 1.689 + void lazy_update( Node *old_node, Node *new_node ) { 1.690 + assert( old_node != new_node, "no cycles please" ); 1.691 + //old_node->set_req( 1, new_node /*NO DU INFO*/ ); 1.692 + // Nodes always have DU info now, so re-use the side array slot 1.693 + // for this node to provide the forwarding pointer. 1.694 + _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) ); 1.695 + } 1.696 + void lazy_replace( Node *old_node, Node *new_node ) { 1.697 + _igvn.replace_node( old_node, new_node ); 1.698 + lazy_update( old_node, new_node ); 1.699 + } 1.700 + void lazy_replace_proj( Node *old_node, Node *new_node ) { 1.701 + assert( old_node->req() == 1, "use this for Projs" ); 1.702 + _igvn.hash_delete(old_node); // Must hash-delete before hacking edges 1.703 + old_node->add_req( NULL ); 1.704 + lazy_replace( old_node, new_node ); 1.705 + } 1.706 + 1.707 +private: 1.708 + 1.709 + // Place 'n' in some loop nest, where 'n' is a CFG node 1.710 + void build_loop_tree(); 1.711 + int build_loop_tree_impl( Node *n, int pre_order ); 1.712 + // Insert loop into the existing loop tree. 'innermost' is a leaf of the 1.713 + // loop tree, not the root. 1.714 + IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost ); 1.715 + 1.716 + // Place Data nodes in some loop nest 1.717 + void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); 1.718 + void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack ); 1.719 + void build_loop_late_post ( Node* n ); 1.720 + 1.721 + // Array of immediate dominance info for each CFG node indexed by node idx 1.722 +private: 1.723 + uint _idom_size; 1.724 + Node **_idom; // Array of immediate dominators 1.725 + uint *_dom_depth; // Used for fast LCA test 1.726 + GrowableArray<uint>* _dom_stk; // For recomputation of dom depth 1.727 + 1.728 + Node* idom_no_update(Node* d) const { 1.729 + assert(d->_idx < _idom_size, "oob"); 1.730 + Node* n = _idom[d->_idx]; 1.731 + assert(n != NULL,"Bad immediate dominator info."); 1.732 + while (n->in(0) == NULL) { // Skip dead CFG nodes 1.733 + //n = n->in(1); 1.734 + n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1); 1.735 + assert(n != NULL,"Bad immediate dominator info."); 1.736 + } 1.737 + return n; 1.738 + } 1.739 + Node *idom(Node* d) const { 1.740 + uint didx = d->_idx; 1.741 + Node *n = idom_no_update(d); 1.742 + _idom[didx] = n; // Lazily remove dead CFG nodes from table. 1.743 + return n; 1.744 + } 1.745 + uint dom_depth(Node* d) const { 1.746 + guarantee(d != NULL, "Null dominator info."); 1.747 + guarantee(d->_idx < _idom_size, ""); 1.748 + return _dom_depth[d->_idx]; 1.749 + } 1.750 + void set_idom(Node* d, Node* n, uint dom_depth); 1.751 + // Locally compute IDOM using dom_lca call 1.752 + Node *compute_idom( Node *region ) const; 1.753 + // Recompute dom_depth 1.754 + void recompute_dom_depth(); 1.755 + 1.756 + // Is safept not required by an outer loop? 1.757 + bool is_deleteable_safept(Node* sfpt); 1.758 + 1.759 + // Replace parallel induction variable (parallel to trip counter) 1.760 + void replace_parallel_iv(IdealLoopTree *loop); 1.761 + 1.762 + // Perform verification that the graph is valid. 1.763 + PhaseIdealLoop( PhaseIterGVN &igvn) : 1.764 + PhaseTransform(Ideal_Loop), 1.765 + _igvn(igvn), 1.766 + _dom_lca_tags(arena()), // Thread::resource_area 1.767 + _verify_me(NULL), 1.768 + _verify_only(true) { 1.769 + build_and_optimize(false, false); 1.770 + } 1.771 + 1.772 + // build the loop tree and perform any requested optimizations 1.773 + void build_and_optimize(bool do_split_if, bool skip_loop_opts); 1.774 + 1.775 +public: 1.776 + // Dominators for the sea of nodes 1.777 + void Dominators(); 1.778 + Node *dom_lca( Node *n1, Node *n2 ) const { 1.779 + return find_non_split_ctrl(dom_lca_internal(n1, n2)); 1.780 + } 1.781 + Node *dom_lca_internal( Node *n1, Node *n2 ) const; 1.782 + 1.783 + // Compute the Ideal Node to Loop mapping 1.784 + PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool skip_loop_opts = false) : 1.785 + PhaseTransform(Ideal_Loop), 1.786 + _igvn(igvn), 1.787 + _dom_lca_tags(arena()), // Thread::resource_area 1.788 + _verify_me(NULL), 1.789 + _verify_only(false) { 1.790 + build_and_optimize(do_split_ifs, skip_loop_opts); 1.791 + } 1.792 + 1.793 + // Verify that verify_me made the same decisions as a fresh run. 1.794 + PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me) : 1.795 + PhaseTransform(Ideal_Loop), 1.796 + _igvn(igvn), 1.797 + _dom_lca_tags(arena()), // Thread::resource_area 1.798 + _verify_me(verify_me), 1.799 + _verify_only(false) { 1.800 + build_and_optimize(false, false); 1.801 + } 1.802 + 1.803 + // Build and verify the loop tree without modifying the graph. This 1.804 + // is useful to verify that all inputs properly dominate their uses. 1.805 + static void verify(PhaseIterGVN& igvn) { 1.806 +#ifdef ASSERT 1.807 + PhaseIdealLoop v(igvn); 1.808 +#endif 1.809 + } 1.810 + 1.811 + // True if the method has at least 1 irreducible loop 1.812 + bool _has_irreducible_loops; 1.813 + 1.814 + // Per-Node transform 1.815 + virtual Node *transform( Node *a_node ) { return 0; } 1.816 + 1.817 + bool is_counted_loop( Node *x, IdealLoopTree *loop ); 1.818 + 1.819 + Node* exact_limit( IdealLoopTree *loop ); 1.820 + 1.821 + // Return a post-walked LoopNode 1.822 + IdealLoopTree *get_loop( Node *n ) const { 1.823 + // Dead nodes have no loop, so return the top level loop instead 1.824 + if (!has_node(n)) return _ltree_root; 1.825 + assert(!has_ctrl(n), ""); 1.826 + return (IdealLoopTree*)_nodes[n->_idx]; 1.827 + } 1.828 + 1.829 + // Is 'n' a (nested) member of 'loop'? 1.830 + int is_member( const IdealLoopTree *loop, Node *n ) const { 1.831 + return loop->is_member(get_loop(n)); } 1.832 + 1.833 + // This is the basic building block of the loop optimizations. It clones an 1.834 + // entire loop body. It makes an old_new loop body mapping; with this 1.835 + // mapping you can find the new-loop equivalent to an old-loop node. All 1.836 + // new-loop nodes are exactly equal to their old-loop counterparts, all 1.837 + // edges are the same. All exits from the old-loop now have a RegionNode 1.838 + // that merges the equivalent new-loop path. This is true even for the 1.839 + // normal "loop-exit" condition. All uses of loop-invariant old-loop values 1.840 + // now come from (one or more) Phis that merge their new-loop equivalents. 1.841 + // Parameter side_by_side_idom: 1.842 + // When side_by_size_idom is NULL, the dominator tree is constructed for 1.843 + // the clone loop to dominate the original. Used in construction of 1.844 + // pre-main-post loop sequence. 1.845 + // When nonnull, the clone and original are side-by-side, both are 1.846 + // dominated by the passed in side_by_side_idom node. Used in 1.847 + // construction of unswitched loops. 1.848 + void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth, 1.849 + Node* side_by_side_idom = NULL); 1.850 + 1.851 + // If we got the effect of peeling, either by actually peeling or by 1.852 + // making a pre-loop which must execute at least once, we can remove 1.853 + // all loop-invariant dominated tests in the main body. 1.854 + void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ); 1.855 + 1.856 + // Generate code to do a loop peel for the given loop (and body). 1.857 + // old_new is a temp array. 1.858 + void do_peeling( IdealLoopTree *loop, Node_List &old_new ); 1.859 + 1.860 + // Add pre and post loops around the given loop. These loops are used 1.861 + // during RCE, unrolling and aligning loops. 1.862 + void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ); 1.863 + // If Node n lives in the back_ctrl block, we clone a private version of n 1.864 + // in preheader_ctrl block and return that, otherwise return n. 1.865 + Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ); 1.866 + 1.867 + // Take steps to maximally unroll the loop. Peel any odd iterations, then 1.868 + // unroll to do double iterations. The next round of major loop transforms 1.869 + // will repeat till the doubled loop body does all remaining iterations in 1 1.870 + // pass. 1.871 + void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ); 1.872 + 1.873 + // Unroll the loop body one step - make each trip do 2 iterations. 1.874 + void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ); 1.875 + 1.876 + // Return true if exp is a constant times an induction var 1.877 + bool is_scaled_iv(Node* exp, Node* iv, int* p_scale); 1.878 + 1.879 + // Return true if exp is a scaled induction var plus (or minus) constant 1.880 + bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); 1.881 + 1.882 + // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted 1.883 + ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.884 + Deoptimization::DeoptReason reason); 1.885 + void register_control(Node* n, IdealLoopTree *loop, Node* pred); 1.886 + 1.887 + // Clone loop predicates to cloned loops (peeled, unswitched) 1.888 + static ProjNode* clone_predicate(ProjNode* predicate_proj, Node* new_entry, 1.889 + Deoptimization::DeoptReason reason, 1.890 + PhaseIdealLoop* loop_phase, 1.891 + PhaseIterGVN* igvn); 1.892 + 1.893 + static Node* clone_loop_predicates(Node* old_entry, Node* new_entry, 1.894 + bool clone_limit_check, 1.895 + PhaseIdealLoop* loop_phase, 1.896 + PhaseIterGVN* igvn); 1.897 + Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check); 1.898 + 1.899 + static Node* skip_loop_predicates(Node* entry); 1.900 + 1.901 + // Find a good location to insert a predicate 1.902 + static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason); 1.903 + // Find a predicate 1.904 + static Node* find_predicate(Node* entry); 1.905 + // Construct a range check for a predicate if 1.906 + BoolNode* rc_predicate(IdealLoopTree *loop, Node* ctrl, 1.907 + int scale, Node* offset, 1.908 + Node* init, Node* limit, Node* stride, 1.909 + Node* range, bool upper); 1.910 + 1.911 + // Implementation of the loop predication to promote checks outside the loop 1.912 + bool loop_predication_impl(IdealLoopTree *loop); 1.913 + 1.914 + // Helper function to collect predicate for eliminating the useless ones 1.915 + void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); 1.916 + void eliminate_useless_predicates(); 1.917 + 1.918 + // Change the control input of expensive nodes to allow commoning by 1.919 + // IGVN when it is guaranteed to not result in a more frequent 1.920 + // execution of the expensive node. Return true if progress. 1.921 + bool process_expensive_nodes(); 1.922 + 1.923 + // Check whether node has become unreachable 1.924 + bool is_node_unreachable(Node *n) const { 1.925 + return !has_node(n) || n->is_unreachable(_igvn); 1.926 + } 1.927 + 1.928 + // Eliminate range-checks and other trip-counter vs loop-invariant tests. 1.929 + void do_range_check( IdealLoopTree *loop, Node_List &old_new ); 1.930 + 1.931 + // Create a slow version of the loop by cloning the loop 1.932 + // and inserting an if to select fast-slow versions. 1.933 + ProjNode* create_slow_version_of_loop(IdealLoopTree *loop, 1.934 + Node_List &old_new); 1.935 + 1.936 + // Clone loop with an invariant test (that does not exit) and 1.937 + // insert a clone of the test that selects which version to 1.938 + // execute. 1.939 + void do_unswitching (IdealLoopTree *loop, Node_List &old_new); 1.940 + 1.941 + // Find candidate "if" for unswitching 1.942 + IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const; 1.943 + 1.944 + // Range Check Elimination uses this function! 1.945 + // Constrain the main loop iterations so the affine function: 1.946 + // low_limit <= scale_con * I + offset < upper_limit 1.947 + // always holds true. That is, either increase the number of iterations in 1.948 + // the pre-loop or the post-loop until the condition holds true in the main 1.949 + // loop. Scale_con, offset and limit are all loop invariant. 1.950 + void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ); 1.951 + // Helper function for add_constraint(). 1.952 + Node* adjust_limit( int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl ); 1.953 + 1.954 + // Partially peel loop up through last_peel node. 1.955 + bool partial_peel( IdealLoopTree *loop, Node_List &old_new ); 1.956 + 1.957 + // Create a scheduled list of nodes control dependent on ctrl set. 1.958 + void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched ); 1.959 + // Has a use in the vector set 1.960 + bool has_use_in_set( Node* n, VectorSet& vset ); 1.961 + // Has use internal to the vector set (ie. not in a phi at the loop head) 1.962 + bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ); 1.963 + // clone "n" for uses that are outside of loop 1.964 + int clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ); 1.965 + // clone "n" for special uses that are in the not_peeled region 1.966 + void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, 1.967 + VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ); 1.968 + // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist 1.969 + void insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ); 1.970 +#ifdef ASSERT 1.971 + // Validate the loop partition sets: peel and not_peel 1.972 + bool is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, VectorSet& not_peel ); 1.973 + // Ensure that uses outside of loop are of the right form 1.974 + bool is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, 1.975 + uint orig_exit_idx, uint clone_exit_idx); 1.976 + bool is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx); 1.977 +#endif 1.978 + 1.979 + // Returns nonzero constant stride if-node is a possible iv test (otherwise returns zero.) 1.980 + int stride_of_possible_iv( Node* iff ); 1.981 + bool is_possible_iv_test( Node* iff ) { return stride_of_possible_iv(iff) != 0; } 1.982 + // Return the (unique) control output node that's in the loop (if it exists.) 1.983 + Node* stay_in_loop( Node* n, IdealLoopTree *loop); 1.984 + // Insert a signed compare loop exit cloned from an unsigned compare. 1.985 + IfNode* insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop); 1.986 + void remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop); 1.987 + // Utility to register node "n" with PhaseIdealLoop 1.988 + void register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth); 1.989 + // Utility to create an if-projection 1.990 + ProjNode* proj_clone(ProjNode* p, IfNode* iff); 1.991 + // Force the iff control output to be the live_proj 1.992 + Node* short_circuit_if(IfNode* iff, ProjNode* live_proj); 1.993 + // Insert a region before an if projection 1.994 + RegionNode* insert_region_before_proj(ProjNode* proj); 1.995 + // Insert a new if before an if projection 1.996 + ProjNode* insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj); 1.997 + 1.998 + // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 1.999 + // "Nearly" because all Nodes have been cloned from the original in the loop, 1.1000 + // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 1.1001 + // through the Phi recursively, and return a Bool. 1.1002 + BoolNode *clone_iff( PhiNode *phi, IdealLoopTree *loop ); 1.1003 + CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop ); 1.1004 + 1.1005 + 1.1006 + // Rework addressing expressions to get the most loop-invariant stuff 1.1007 + // moved out. We'd like to do all associative operators, but it's especially 1.1008 + // important (common) to do address expressions. 1.1009 + Node *remix_address_expressions( Node *n ); 1.1010 + 1.1011 + // Attempt to use a conditional move instead of a phi/branch 1.1012 + Node *conditional_move( Node *n ); 1.1013 + 1.1014 + // Reorganize offset computations to lower register pressure. 1.1015 + // Mostly prevent loop-fallout uses of the pre-incremented trip counter 1.1016 + // (which are then alive with the post-incremented trip counter 1.1017 + // forcing an extra register move) 1.1018 + void reorg_offsets( IdealLoopTree *loop ); 1.1019 + 1.1020 + // Check for aggressive application of 'split-if' optimization, 1.1021 + // using basic block level info. 1.1022 + void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack ); 1.1023 + Node *split_if_with_blocks_pre ( Node *n ); 1.1024 + void split_if_with_blocks_post( Node *n ); 1.1025 + Node *has_local_phi_input( Node *n ); 1.1026 + // Mark an IfNode as being dominated by a prior test, 1.1027 + // without actually altering the CFG (and hence IDOM info). 1.1028 + void dominated_by( Node *prevdom, Node *iff, bool flip = false, bool exclude_loop_predicate = false ); 1.1029 + 1.1030 + // Split Node 'n' through merge point 1.1031 + Node *split_thru_region( Node *n, Node *region ); 1.1032 + // Split Node 'n' through merge point if there is enough win. 1.1033 + Node *split_thru_phi( Node *n, Node *region, int policy ); 1.1034 + // Found an If getting its condition-code input from a Phi in the 1.1035 + // same block. Split thru the Region. 1.1036 + void do_split_if( Node *iff ); 1.1037 + 1.1038 + // Conversion of fill/copy patterns into intrisic versions 1.1039 + bool do_intrinsify_fill(); 1.1040 + bool intrinsify_fill(IdealLoopTree* lpt); 1.1041 + bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, 1.1042 + Node*& shift, Node*& offset); 1.1043 + 1.1044 +private: 1.1045 + // Return a type based on condition control flow 1.1046 + const TypeInt* filtered_type( Node *n, Node* n_ctrl); 1.1047 + const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } 1.1048 + // Helpers for filtered type 1.1049 + const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); 1.1050 + 1.1051 + // Helper functions 1.1052 + Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); 1.1053 + Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); 1.1054 + void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); 1.1055 + bool split_up( Node *n, Node *blk1, Node *blk2 ); 1.1056 + void sink_use( Node *use, Node *post_loop ); 1.1057 + Node *place_near_use( Node *useblock ) const; 1.1058 + 1.1059 + bool _created_loop_node; 1.1060 +public: 1.1061 + void set_created_loop_node() { _created_loop_node = true; } 1.1062 + bool created_loop_node() { return _created_loop_node; } 1.1063 + void register_new_node( Node *n, Node *blk ); 1.1064 + 1.1065 +#ifdef ASSERT 1.1066 + void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA); 1.1067 +#endif 1.1068 + 1.1069 +#ifndef PRODUCT 1.1070 + void dump( ) const; 1.1071 + void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const; 1.1072 + void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const; 1.1073 + void verify() const; // Major slow :-) 1.1074 + void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const; 1.1075 + IdealLoopTree *get_loop_idx(Node* n) const { 1.1076 + // Dead nodes have no loop, so return the top level loop instead 1.1077 + return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root; 1.1078 + } 1.1079 + // Print some stats 1.1080 + static void print_statistics(); 1.1081 + static int _loop_invokes; // Count of PhaseIdealLoop invokes 1.1082 + static int _loop_work; // Sum of PhaseIdealLoop x _unique 1.1083 +#endif 1.1084 +}; 1.1085 + 1.1086 +inline Node* IdealLoopTree::tail() { 1.1087 +// Handle lazy update of _tail field 1.1088 + Node *n = _tail; 1.1089 + //while( !n->in(0) ) // Skip dead CFG nodes 1.1090 + //n = n->in(1); 1.1091 + if (n->in(0) == NULL) 1.1092 + n = _phase->get_ctrl(n); 1.1093 + _tail = n; 1.1094 + return n; 1.1095 +} 1.1096 + 1.1097 + 1.1098 +// Iterate over the loop tree using a preorder, left-to-right traversal. 1.1099 +// 1.1100 +// Example that visits all counted loops from within PhaseIdealLoop 1.1101 +// 1.1102 +// for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { 1.1103 +// IdealLoopTree* lpt = iter.current(); 1.1104 +// if (!lpt->is_counted()) continue; 1.1105 +// ... 1.1106 +class LoopTreeIterator : public StackObj { 1.1107 +private: 1.1108 + IdealLoopTree* _root; 1.1109 + IdealLoopTree* _curnt; 1.1110 + 1.1111 +public: 1.1112 + LoopTreeIterator(IdealLoopTree* root) : _root(root), _curnt(root) {} 1.1113 + 1.1114 + bool done() { return _curnt == NULL; } // Finished iterating? 1.1115 + 1.1116 + void next(); // Advance to next loop tree 1.1117 + 1.1118 + IdealLoopTree* current() { return _curnt; } // Return current value of iterator. 1.1119 +}; 1.1120 + 1.1121 +#endif // SHARE_VM_OPTO_LOOPNODE_HPP