1.1 --- a/src/share/vm/opto/loopTransform.cpp Thu Apr 07 21:32:23 2011 -0700 1.2 +++ b/src/share/vm/opto/loopTransform.cpp Fri Apr 08 14:56:22 2011 -0700 1.3 @@ -63,6 +63,46 @@ 1.4 } 1.5 } 1.6 1.7 +//------------------------------compute_exact_trip_count----------------------- 1.8 +// Compute loop exact trip count if possible. Do not recalculate trip count for 1.9 +// split loops (pre-main-post) which have their limits and inits behind Opaque node. 1.10 +void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) { 1.11 + if (!_head->as_Loop()->is_valid_counted_loop()) { 1.12 + return; 1.13 + } 1.14 + CountedLoopNode* cl = _head->as_CountedLoop(); 1.15 + // Trip count may become nonexact for iteration split loops since 1.16 + // RCE modifies limits. Note, _trip_count value is not reset since 1.17 + // it is used to limit unrolling of main loop. 1.18 + cl->set_nonexact_trip_count(); 1.19 + 1.20 + // Loop's test should be part of loop. 1.21 + if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) 1.22 + return; // Infinite loop 1.23 + 1.24 +#ifdef ASSERT 1.25 + BoolTest::mask bt = cl->loopexit()->test_trip(); 1.26 + assert(bt == BoolTest::lt || bt == BoolTest::gt || 1.27 + bt == BoolTest::ne, "canonical test is expected"); 1.28 +#endif 1.29 + 1.30 + Node* init_n = cl->init_trip(); 1.31 + Node* limit_n = cl->limit(); 1.32 + if (init_n != NULL && init_n->is_Con() && 1.33 + limit_n != NULL && limit_n->is_Con()) { 1.34 + // Use longs to avoid integer overflow. 1.35 + int stride_con = cl->stride_con(); 1.36 + long init_con = cl->init_trip()->get_int(); 1.37 + long limit_con = cl->limit()->get_int(); 1.38 + int stride_m = stride_con - (stride_con > 0 ? 1 : -1); 1.39 + long trip_count = (limit_con - init_con + stride_m)/stride_con; 1.40 + if (trip_count > 0 && (julong)trip_count < (julong)max_juint) { 1.41 + // Set exact trip count. 1.42 + cl->set_exact_trip_count((uint)trip_count); 1.43 + } 1.44 + } 1.45 +} 1.46 + 1.47 //------------------------------compute_profile_trip_cnt---------------------------- 1.48 // Compute loop trip count from profile data as 1.49 // (backedge_count + loop_exit_count) / loop_exit_count 1.50 @@ -533,29 +573,15 @@ 1.51 if (!cl->is_valid_counted_loop()) 1.52 return false; // Malformed counted loop 1.53 1.54 - Node *init_n = cl->init_trip(); 1.55 - Node *limit_n = cl->limit(); 1.56 - 1.57 - // Non-constant bounds 1.58 - if (init_n == NULL || !init_n->is_Con() || 1.59 - limit_n == NULL || !limit_n->is_Con()) { 1.60 + if (!cl->has_exact_trip_count()) { 1.61 + // Trip count is not exact. 1.62 return false; 1.63 } 1.64 - // Use longs to avoid integer overflow. 1.65 - int stride_con = cl->stride_con(); 1.66 - long init_con = cl->init_trip()->get_int(); 1.67 - long limit_con = cl->limit()->get_int(); 1.68 - int stride_m = stride_con - (stride_con > 0 ? 1 : -1); 1.69 - long trip_cnt = (limit_con - init_con + stride_m)/stride_con; 1.70 1.71 - // Note, max_jint is used to indicate unknown trip count. 1.72 - if (trip_cnt <= 0 || trip_cnt >= (long)max_jint) { 1.73 - // return a false (no maximally unroll) and the regular unroll/peel 1.74 - // route will make a small mess which CCP will fold away. 1.75 - return false; 1.76 - } 1.77 - uint trip_count = (uint)trip_cnt; 1.78 - cl->set_trip_count(trip_count); 1.79 + uint trip_count = cl->trip_count(); 1.80 + // Note, max_juint is used to indicate unknown trip count. 1.81 + assert(trip_count > 1, "one iteration loop should be optimized out already"); 1.82 + assert(trip_count < max_juint, "exact trip_count should be less than max_uint."); 1.83 1.84 // Real policy: if we maximally unroll, does it get too big? 1.85 // Allow the unrolled mess to get larger than standard loop 1.86 @@ -1096,7 +1122,7 @@ 1.87 tty->print("Unrolling "); 1.88 loop->dump_head(); 1.89 } else if (TraceLoopOpts) { 1.90 - if (loop_head->trip_count() < LoopUnrollLimit) { 1.91 + if (loop_head->trip_count() < (uint)LoopUnrollLimit) { 1.92 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); 1.93 } else { 1.94 tty->print("Unroll %d ", loop_head->unrolled_count()*2); 1.95 @@ -1802,6 +1828,17 @@ 1.96 // main and post loops have explicitly created zero trip guard 1.97 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop(); 1.98 if (needs_guard) { 1.99 + // Skip guard if values not overlap. 1.100 + const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int(); 1.101 + const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int(); 1.102 + int stride_con = cl->stride_con(); 1.103 + if (stride_con > 0) { 1.104 + needs_guard = (init_t->_hi >= limit_t->_lo); 1.105 + } else { 1.106 + needs_guard = (init_t->_lo <= limit_t->_hi); 1.107 + } 1.108 + } 1.109 + if (needs_guard) { 1.110 // Check for an obvious zero trip guard. 1.111 Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl)); 1.112 if (inctrl->Opcode() == Op_IfTrue) { 1.113 @@ -1846,12 +1883,49 @@ 1.114 return true; 1.115 } 1.116 1.117 +//------------------------------policy_do_one_iteration_loop------------------- 1.118 +// Convert one iteration loop into normal code. 1.119 +bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) { 1.120 + if (!_head->as_Loop()->is_valid_counted_loop()) 1.121 + return false; // Only for counted loop 1.122 + 1.123 + CountedLoopNode *cl = _head->as_CountedLoop(); 1.124 + if (!cl->has_exact_trip_count() || cl->trip_count() != 1) { 1.125 + return false; 1.126 + } 1.127 + 1.128 +#ifndef PRODUCT 1.129 + if(TraceLoopOpts) { 1.130 + tty->print("OneIteration "); 1.131 + this->dump_head(); 1.132 + } 1.133 +#endif 1.134 + 1.135 + Node *init_n = cl->init_trip(); 1.136 +#ifdef ASSERT 1.137 + // Loop boundaries should be constant since trip count is exact. 1.138 + assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration"); 1.139 +#endif 1.140 + // Replace the phi at loop head with the value of the init_trip. 1.141 + // Then the CountedLoopEnd will collapse (backedge will not be taken) 1.142 + // and all loop-invariant uses of the exit values will be correct. 1.143 + phase->_igvn.replace_node(cl->phi(), cl->init_trip()); 1.144 + phase->C->set_major_progress(); 1.145 + return true; 1.146 +} 1.147 1.148 //============================================================================= 1.149 //------------------------------iteration_split_impl--------------------------- 1.150 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { 1.151 + // Compute exact loop trip count if possible. 1.152 + compute_exact_trip_count(phase); 1.153 + 1.154 + // Convert one iteration loop into normal code. 1.155 + if (policy_do_one_iteration_loop(phase)) 1.156 + return true; 1.157 + 1.158 // Check and remove empty loops (spam micro-benchmarks) 1.159 - if( policy_do_remove_empty_loop(phase) ) 1.160 + if (policy_do_remove_empty_loop(phase)) 1.161 return true; // Here we removed an empty loop 1.162 1.163 bool should_peel = policy_peeling(phase); // Should we peel? 1.164 @@ -1860,40 +1934,40 @@ 1.165 1.166 // Non-counted loops may be peeled; exactly 1 iteration is peeled. 1.167 // This removes loop-invariant tests (usually null checks). 1.168 - if( !_head->is_CountedLoop() ) { // Non-counted loop 1.169 + if (!_head->is_CountedLoop()) { // Non-counted loop 1.170 if (PartialPeelLoop && phase->partial_peel(this, old_new)) { 1.171 // Partial peel succeeded so terminate this round of loop opts 1.172 return false; 1.173 } 1.174 - if( should_peel ) { // Should we peel? 1.175 + if (should_peel) { // Should we peel? 1.176 #ifndef PRODUCT 1.177 if (PrintOpto) tty->print_cr("should_peel"); 1.178 #endif 1.179 phase->do_peeling(this,old_new); 1.180 - } else if( should_unswitch ) { 1.181 + } else if (should_unswitch) { 1.182 phase->do_unswitching(this, old_new); 1.183 } 1.184 return true; 1.185 } 1.186 CountedLoopNode *cl = _head->as_CountedLoop(); 1.187 1.188 - if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops 1.189 + if (!cl->loopexit()) return true; // Ignore various kinds of broken loops 1.190 1.191 // Do nothing special to pre- and post- loops 1.192 - if( cl->is_pre_loop() || cl->is_post_loop() ) return true; 1.193 + if (cl->is_pre_loop() || cl->is_post_loop()) return true; 1.194 1.195 // Compute loop trip count from profile data 1.196 compute_profile_trip_cnt(phase); 1.197 1.198 // Before attempting fancy unrolling, RCE or alignment, see if we want 1.199 // to completely unroll this loop or do loop unswitching. 1.200 - if( cl->is_normal_loop() ) { 1.201 + if (cl->is_normal_loop()) { 1.202 if (should_unswitch) { 1.203 phase->do_unswitching(this, old_new); 1.204 return true; 1.205 } 1.206 bool should_maximally_unroll = policy_maximally_unroll(phase); 1.207 - if( should_maximally_unroll ) { 1.208 + if (should_maximally_unroll) { 1.209 // Here we did some unrolling and peeling. Eventually we will 1.210 // completely unroll this loop and it will no longer be a loop. 1.211 phase->do_maximally_unroll(this,old_new); 1.212 @@ -1937,14 +2011,14 @@ 1.213 // If we have any of these conditions (RCE, alignment, unrolling) met, then 1.214 // we switch to the pre-/main-/post-loop model. This model also covers 1.215 // peeling. 1.216 - if( should_rce || should_align || should_unroll ) { 1.217 - if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops 1.218 + if (should_rce || should_align || should_unroll) { 1.219 + if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops 1.220 phase->insert_pre_post_loops(this,old_new, !may_rce_align); 1.221 1.222 // Adjust the pre- and main-loop limits to let the pre and post loops run 1.223 // with full checks, but the main-loop with no checks. Remove said 1.224 // checks from the main body. 1.225 - if( should_rce ) 1.226 + if (should_rce) 1.227 phase->do_range_check(this,old_new); 1.228 1.229 // Double loop body for unrolling. Adjust the minimum-trip test (will do 1.230 @@ -1952,16 +2026,16 @@ 1.231 // an even number of trips). If we are peeling, we might enable some RCE 1.232 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if 1.233 // peeling. 1.234 - if( should_unroll && !should_peel ) 1.235 - phase->do_unroll(this,old_new, true); 1.236 + if (should_unroll && !should_peel) 1.237 + phase->do_unroll(this,old_new, true); 1.238 1.239 // Adjust the pre-loop limits to align the main body 1.240 // iterations. 1.241 - if( should_align ) 1.242 + if (should_align) 1.243 Unimplemented(); 1.244 1.245 } else { // Else we have an unchanged counted loop 1.246 - if( should_peel ) // Might want to peel but do nothing else 1.247 + if (should_peel) // Might want to peel but do nothing else 1.248 phase->do_peeling(this,old_new); 1.249 } 1.250 return true;