1.1 --- a/src/share/vm/opto/loopTransform.cpp Fri Apr 08 16:18:48 2011 -0700 1.2 +++ b/src/share/vm/opto/loopTransform.cpp Sat Apr 09 21:16:12 2011 -0700 1.3 @@ -63,6 +63,46 @@ 1.4 } 1.5 } 1.6 1.7 +//------------------------------compute_exact_trip_count----------------------- 1.8 +// Compute loop exact trip count if possible. Do not recalculate trip count for 1.9 +// split loops (pre-main-post) which have their limits and inits behind Opaque node. 1.10 +void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) { 1.11 + if (!_head->as_Loop()->is_valid_counted_loop()) { 1.12 + return; 1.13 + } 1.14 + CountedLoopNode* cl = _head->as_CountedLoop(); 1.15 + // Trip count may become nonexact for iteration split loops since 1.16 + // RCE modifies limits. Note, _trip_count value is not reset since 1.17 + // it is used to limit unrolling of main loop. 1.18 + cl->set_nonexact_trip_count(); 1.19 + 1.20 + // Loop's test should be part of loop. 1.21 + if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)))) 1.22 + return; // Infinite loop 1.23 + 1.24 +#ifdef ASSERT 1.25 + BoolTest::mask bt = cl->loopexit()->test_trip(); 1.26 + assert(bt == BoolTest::lt || bt == BoolTest::gt || 1.27 + bt == BoolTest::ne, "canonical test is expected"); 1.28 +#endif 1.29 + 1.30 + Node* init_n = cl->init_trip(); 1.31 + Node* limit_n = cl->limit(); 1.32 + if (init_n != NULL && init_n->is_Con() && 1.33 + limit_n != NULL && limit_n->is_Con()) { 1.34 + // Use longs to avoid integer overflow. 1.35 + int stride_con = cl->stride_con(); 1.36 + long init_con = cl->init_trip()->get_int(); 1.37 + long limit_con = cl->limit()->get_int(); 1.38 + int stride_m = stride_con - (stride_con > 0 ? 1 : -1); 1.39 + long trip_count = (limit_con - init_con + stride_m)/stride_con; 1.40 + if (trip_count > 0 && (julong)trip_count < (julong)max_juint) { 1.41 + // Set exact trip count. 1.42 + cl->set_exact_trip_count((uint)trip_count); 1.43 + } 1.44 + } 1.45 +} 1.46 + 1.47 //------------------------------compute_profile_trip_cnt---------------------------- 1.48 // Compute loop trip count from profile data as 1.49 // (backedge_count + loop_exit_count) / loop_exit_count 1.50 @@ -301,6 +341,132 @@ 1.51 // peeled-loop backedge has 2 users. 1.52 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the 1.53 // extra backedge user. 1.54 +// 1.55 +// orig 1.56 +// 1.57 +// stmt1 1.58 +// | 1.59 +// v 1.60 +// loop predicate 1.61 +// | 1.62 +// v 1.63 +// loop<----+ 1.64 +// | | 1.65 +// stmt2 | 1.66 +// | | 1.67 +// v | 1.68 +// if ^ 1.69 +// / \ | 1.70 +// / \ | 1.71 +// v v | 1.72 +// false true | 1.73 +// / \ | 1.74 +// / ----+ 1.75 +// | 1.76 +// v 1.77 +// exit 1.78 +// 1.79 +// 1.80 +// after clone loop 1.81 +// 1.82 +// stmt1 1.83 +// | 1.84 +// v 1.85 +// loop predicate 1.86 +// / \ 1.87 +// clone / \ orig 1.88 +// / \ 1.89 +// / \ 1.90 +// v v 1.91 +// +---->loop clone loop<----+ 1.92 +// | | | | 1.93 +// | stmt2 clone stmt2 | 1.94 +// | | | | 1.95 +// | v v | 1.96 +// ^ if clone If ^ 1.97 +// | / \ / \ | 1.98 +// | / \ / \ | 1.99 +// | v v v v | 1.100 +// | true false false true | 1.101 +// | / \ / \ | 1.102 +// +---- \ / ----+ 1.103 +// \ / 1.104 +// 1v v2 1.105 +// region 1.106 +// | 1.107 +// v 1.108 +// exit 1.109 +// 1.110 +// 1.111 +// after peel and predicate move 1.112 +// 1.113 +// stmt1 1.114 +// / 1.115 +// / 1.116 +// clone / orig 1.117 +// / 1.118 +// / +----------+ 1.119 +// / | | 1.120 +// / loop predicate | 1.121 +// / | | 1.122 +// v v | 1.123 +// TOP-->loop clone loop<----+ | 1.124 +// | | | | 1.125 +// stmt2 clone stmt2 | | 1.126 +// | | | ^ 1.127 +// v v | | 1.128 +// if clone If ^ | 1.129 +// / \ / \ | | 1.130 +// / \ / \ | | 1.131 +// v v v v | | 1.132 +// true false false true | | 1.133 +// | \ / \ | | 1.134 +// | \ / ----+ ^ 1.135 +// | \ / | 1.136 +// | 1v v2 | 1.137 +// v region | 1.138 +// | | | 1.139 +// | v | 1.140 +// | exit | 1.141 +// | | 1.142 +// +--------------->-----------------+ 1.143 +// 1.144 +// 1.145 +// final graph 1.146 +// 1.147 +// stmt1 1.148 +// | 1.149 +// v 1.150 +// stmt2 clone 1.151 +// | 1.152 +// v 1.153 +// if clone 1.154 +// / | 1.155 +// / | 1.156 +// v v 1.157 +// false true 1.158 +// | | 1.159 +// | v 1.160 +// | loop predicate 1.161 +// | | 1.162 +// | v 1.163 +// | loop<----+ 1.164 +// | | | 1.165 +// | stmt2 | 1.166 +// | | | 1.167 +// | v | 1.168 +// v if ^ 1.169 +// | / \ | 1.170 +// | / \ | 1.171 +// | v v | 1.172 +// | false true | 1.173 +// | | \ | 1.174 +// v v --+ 1.175 +// region 1.176 +// | 1.177 +// v 1.178 +// exit 1.179 +// 1.180 void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) { 1.181 1.182 C->set_major_progress(); 1.183 @@ -315,9 +481,10 @@ 1.184 loop->dump_head(); 1.185 } 1.186 #endif 1.187 - Node *h = loop->_head; 1.188 - if (h->is_CountedLoop()) { 1.189 - CountedLoopNode *cl = h->as_CountedLoop(); 1.190 + Node* head = loop->_head; 1.191 + bool counted_loop = head->is_CountedLoop(); 1.192 + if (counted_loop) { 1.193 + CountedLoopNode *cl = head->as_CountedLoop(); 1.194 assert(cl->trip_count() > 0, "peeling a fully unrolled loop"); 1.195 cl->set_trip_count(cl->trip_count() - 1); 1.196 if (cl->is_main_loop()) { 1.197 @@ -330,11 +497,11 @@ 1.198 #endif 1.199 } 1.200 } 1.201 + Node* entry = head->in(LoopNode::EntryControl); 1.202 1.203 // Step 1: Clone the loop body. The clone becomes the peeled iteration. 1.204 // The pre-loop illegally has 2 control users (old & new loops). 1.205 - clone_loop( loop, old_new, dom_depth(loop->_head) ); 1.206 - 1.207 + clone_loop( loop, old_new, dom_depth(head) ); 1.208 1.209 // Step 2: Make the old-loop fall-in edges point to the peeled iteration. 1.210 // Do this by making the old-loop fall-in edges act as if they came 1.211 @@ -342,12 +509,15 @@ 1.212 // backedges) and then map to the new peeled iteration. This leaves 1.213 // the pre-loop with only 1 user (the new peeled iteration), but the 1.214 // peeled-loop backedge has 2 users. 1.215 - for (DUIterator_Fast jmax, j = loop->_head->fast_outs(jmax); j < jmax; j++) { 1.216 - Node* old = loop->_head->fast_out(j); 1.217 - if( old->in(0) == loop->_head && old->req() == 3 && 1.218 - (old->is_Loop() || old->is_Phi()) ) { 1.219 - Node *new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; 1.220 - if( !new_exit_value ) // Backedge value is ALSO loop invariant? 1.221 + Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; 1.222 + new_exit_value = move_loop_predicates(entry, new_exit_value); 1.223 + _igvn.hash_delete(head); 1.224 + head->set_req(LoopNode::EntryControl, new_exit_value); 1.225 + for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { 1.226 + Node* old = head->fast_out(j); 1.227 + if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { 1.228 + new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx]; 1.229 + if (!new_exit_value ) // Backedge value is ALSO loop invariant? 1.230 // Then loop body backedge value remains the same. 1.231 new_exit_value = old->in(LoopNode::LoopBackControl); 1.232 _igvn.hash_delete(old); 1.233 @@ -358,12 +528,12 @@ 1.234 1.235 // Step 3: Cut the backedge on the clone (so its not a loop) and remove the 1.236 // extra backedge user. 1.237 - Node *nnn = old_new[loop->_head->_idx]; 1.238 - _igvn.hash_delete(nnn); 1.239 - nnn->set_req(LoopNode::LoopBackControl, C->top()); 1.240 - for (DUIterator_Fast j2max, j2 = nnn->fast_outs(j2max); j2 < j2max; j2++) { 1.241 - Node* use = nnn->fast_out(j2); 1.242 - if( use->in(0) == nnn && use->req() == 3 && use->is_Phi() ) { 1.243 + Node* new_head = old_new[head->_idx]; 1.244 + _igvn.hash_delete(new_head); 1.245 + new_head->set_req(LoopNode::LoopBackControl, C->top()); 1.246 + for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) { 1.247 + Node* use = new_head->fast_out(j2); 1.248 + if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) { 1.249 _igvn.hash_delete(use); 1.250 use->set_req(LoopNode::LoopBackControl, C->top()); 1.251 } 1.252 @@ -371,15 +541,15 @@ 1.253 1.254 1.255 // Step 4: Correct dom-depth info. Set to loop-head depth. 1.256 - int dd = dom_depth(loop->_head); 1.257 - set_idom(loop->_head, loop->_head->in(1), dd); 1.258 + int dd = dom_depth(head); 1.259 + set_idom(head, head->in(1), dd); 1.260 for (uint j3 = 0; j3 < loop->_body.size(); j3++) { 1.261 Node *old = loop->_body.at(j3); 1.262 Node *nnn = old_new[old->_idx]; 1.263 if (!has_ctrl(nnn)) 1.264 set_idom(nnn, idom(nnn), dd-1); 1.265 // While we're at it, remove any SafePoints from the peeled code 1.266 - if( old->Opcode() == Op_SafePoint ) { 1.267 + if (old->Opcode() == Op_SafePoint) { 1.268 Node *nnn = old_new[old->_idx]; 1.269 lazy_replace(nnn,nnn->in(TypeFunc::Control)); 1.270 } 1.271 @@ -392,34 +562,26 @@ 1.272 loop->record_for_igvn(); 1.273 } 1.274 1.275 +#define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop 1.276 + 1.277 //------------------------------policy_maximally_unroll------------------------ 1.278 -// Return exact loop trip count, or 0 if not maximally unrolling 1.279 +// Calculate exact loop trip count and return true if loop can be maximally 1.280 +// unrolled. 1.281 bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const { 1.282 CountedLoopNode *cl = _head->as_CountedLoop(); 1.283 assert(cl->is_normal_loop(), ""); 1.284 + if (!cl->is_valid_counted_loop()) 1.285 + return false; // Malformed counted loop 1.286 1.287 - Node *init_n = cl->init_trip(); 1.288 - Node *limit_n = cl->limit(); 1.289 - 1.290 - // Non-constant bounds 1.291 - if (init_n == NULL || !init_n->is_Con() || 1.292 - limit_n == NULL || !limit_n->is_Con() || 1.293 - // protect against stride not being a constant 1.294 - !cl->stride_is_con()) { 1.295 + if (!cl->has_exact_trip_count()) { 1.296 + // Trip count is not exact. 1.297 return false; 1.298 } 1.299 - int init = init_n->get_int(); 1.300 - int limit = limit_n->get_int(); 1.301 - int span = limit - init; 1.302 - int stride = cl->stride_con(); 1.303 1.304 - if (init >= limit || stride > span) { 1.305 - // return a false (no maximally unroll) and the regular unroll/peel 1.306 - // route will make a small mess which CCP will fold away. 1.307 - return false; 1.308 - } 1.309 - uint trip_count = span/stride; // trip_count can be greater than 2 Gig. 1.310 - assert( (int)trip_count*stride == span, "must divide evenly" ); 1.311 + uint trip_count = cl->trip_count(); 1.312 + // Note, max_juint is used to indicate unknown trip count. 1.313 + assert(trip_count > 1, "one iteration loop should be optimized out already"); 1.314 + assert(trip_count < max_juint, "exact trip_count should be less than max_uint."); 1.315 1.316 // Real policy: if we maximally unroll, does it get too big? 1.317 // Allow the unrolled mess to get larger than standard loop 1.318 @@ -427,15 +589,29 @@ 1.319 uint body_size = _body.size(); 1.320 uint unroll_limit = (uint)LoopUnrollLimit * 4; 1.321 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); 1.322 - cl->set_trip_count(trip_count); 1.323 if (trip_count > unroll_limit || body_size > unroll_limit) { 1.324 return false; 1.325 } 1.326 1.327 + // Take into account that after unroll conjoined heads and tails will fold, 1.328 + // otherwise policy_unroll() may allow more unrolling than max unrolling. 1.329 + uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; 1.330 + uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE; 1.331 + if (body_size != tst_body_size) // Check for int overflow 1.332 + return false; 1.333 + if (new_body_size > unroll_limit || 1.334 + // Unrolling can result in a large amount of node construction 1.335 + new_body_size >= MaxNodeLimit - phase->C->unique()) { 1.336 + return false; 1.337 + } 1.338 + 1.339 // Currently we don't have policy to optimize one iteration loops. 1.340 // Maximally unrolling transformation is used for that: 1.341 // it is peeled and the original loop become non reachable (dead). 1.342 - if (trip_count == 1) 1.343 + // Also fully unroll a loop with few iterations regardless next 1.344 + // conditions since following loop optimizations will split 1.345 + // such loop anyway (pre-main-post). 1.346 + if (trip_count <= 3) 1.347 return true; 1.348 1.349 // Do not unroll a loop with String intrinsics code. 1.350 @@ -452,17 +628,7 @@ 1.351 } // switch 1.352 } 1.353 1.354 - if (body_size <= unroll_limit) { 1.355 - uint new_body_size = body_size * trip_count; 1.356 - if (new_body_size <= unroll_limit && 1.357 - body_size == new_body_size / trip_count && 1.358 - // Unrolling can result in a large amount of node construction 1.359 - new_body_size < MaxNodeLimit - phase->C->unique()) { 1.360 - return true; // maximally unroll 1.361 - } 1.362 - } 1.363 - 1.364 - return false; // Do not maximally unroll 1.365 + return true; // Do maximally unroll 1.366 } 1.367 1.368 1.369 @@ -474,12 +640,15 @@ 1.370 CountedLoopNode *cl = _head->as_CountedLoop(); 1.371 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); 1.372 1.373 - // protect against stride not being a constant 1.374 - if (!cl->stride_is_con()) return false; 1.375 + if (!cl->is_valid_counted_loop()) 1.376 + return false; // Malformed counted loop 1.377 1.378 // protect against over-unrolling 1.379 if (cl->trip_count() <= 1) return false; 1.380 1.381 + // Check for stride being a small enough constant 1.382 + if (abs(cl->stride_con()) > (1<<3)) return false; 1.383 + 1.384 int future_unroll_ct = cl->unrolled_count() * 2; 1.385 1.386 // Don't unroll if the next round of unrolling would push us 1.387 @@ -560,9 +729,6 @@ 1.388 return false; 1.389 } 1.390 1.391 - // Check for stride being a small enough constant 1.392 - if (abs(cl->stride_con()) > (1<<3)) return false; 1.393 - 1.394 // Unroll once! (Each trip will soon do double iterations) 1.395 return true; 1.396 } 1.397 @@ -956,7 +1122,11 @@ 1.398 tty->print("Unrolling "); 1.399 loop->dump_head(); 1.400 } else if (TraceLoopOpts) { 1.401 - tty->print("Unroll %d ", loop_head->unrolled_count()*2); 1.402 + if (loop_head->trip_count() < (uint)LoopUnrollLimit) { 1.403 + tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); 1.404 + } else { 1.405 + tty->print("Unroll %d ", loop_head->unrolled_count()*2); 1.406 + } 1.407 loop->dump_head(); 1.408 } 1.409 #endif 1.410 @@ -1631,7 +1801,7 @@ 1.411 // have on the last iteration. This will break the loop. 1.412 bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) { 1.413 // Minimum size must be empty loop 1.414 - if (_body.size() > 7/*number of nodes in an empty loop*/) 1.415 + if (_body.size() > EMPTY_LOOP_SIZE) 1.416 return false; 1.417 1.418 if (!_head->is_CountedLoop()) 1.419 @@ -1658,8 +1828,19 @@ 1.420 // main and post loops have explicitly created zero trip guard 1.421 bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop(); 1.422 if (needs_guard) { 1.423 + // Skip guard if values not overlap. 1.424 + const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int(); 1.425 + const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int(); 1.426 + int stride_con = cl->stride_con(); 1.427 + if (stride_con > 0) { 1.428 + needs_guard = (init_t->_hi >= limit_t->_lo); 1.429 + } else { 1.430 + needs_guard = (init_t->_lo <= limit_t->_hi); 1.431 + } 1.432 + } 1.433 + if (needs_guard) { 1.434 // Check for an obvious zero trip guard. 1.435 - Node* inctrl = cl->in(LoopNode::EntryControl); 1.436 + Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl)); 1.437 if (inctrl->Opcode() == Op_IfTrue) { 1.438 // The test should look like just the backedge of a CountedLoop 1.439 Node* iff = inctrl->in(0); 1.440 @@ -1702,12 +1883,49 @@ 1.441 return true; 1.442 } 1.443 1.444 +//------------------------------policy_do_one_iteration_loop------------------- 1.445 +// Convert one iteration loop into normal code. 1.446 +bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) { 1.447 + if (!_head->as_Loop()->is_valid_counted_loop()) 1.448 + return false; // Only for counted loop 1.449 + 1.450 + CountedLoopNode *cl = _head->as_CountedLoop(); 1.451 + if (!cl->has_exact_trip_count() || cl->trip_count() != 1) { 1.452 + return false; 1.453 + } 1.454 + 1.455 +#ifndef PRODUCT 1.456 + if(TraceLoopOpts) { 1.457 + tty->print("OneIteration "); 1.458 + this->dump_head(); 1.459 + } 1.460 +#endif 1.461 + 1.462 + Node *init_n = cl->init_trip(); 1.463 +#ifdef ASSERT 1.464 + // Loop boundaries should be constant since trip count is exact. 1.465 + assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration"); 1.466 +#endif 1.467 + // Replace the phi at loop head with the value of the init_trip. 1.468 + // Then the CountedLoopEnd will collapse (backedge will not be taken) 1.469 + // and all loop-invariant uses of the exit values will be correct. 1.470 + phase->_igvn.replace_node(cl->phi(), cl->init_trip()); 1.471 + phase->C->set_major_progress(); 1.472 + return true; 1.473 +} 1.474 1.475 //============================================================================= 1.476 //------------------------------iteration_split_impl--------------------------- 1.477 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { 1.478 + // Compute exact loop trip count if possible. 1.479 + compute_exact_trip_count(phase); 1.480 + 1.481 + // Convert one iteration loop into normal code. 1.482 + if (policy_do_one_iteration_loop(phase)) 1.483 + return true; 1.484 + 1.485 // Check and remove empty loops (spam micro-benchmarks) 1.486 - if( policy_do_remove_empty_loop(phase) ) 1.487 + if (policy_do_remove_empty_loop(phase)) 1.488 return true; // Here we removed an empty loop 1.489 1.490 bool should_peel = policy_peeling(phase); // Should we peel? 1.491 @@ -1716,40 +1934,40 @@ 1.492 1.493 // Non-counted loops may be peeled; exactly 1 iteration is peeled. 1.494 // This removes loop-invariant tests (usually null checks). 1.495 - if( !_head->is_CountedLoop() ) { // Non-counted loop 1.496 + if (!_head->is_CountedLoop()) { // Non-counted loop 1.497 if (PartialPeelLoop && phase->partial_peel(this, old_new)) { 1.498 // Partial peel succeeded so terminate this round of loop opts 1.499 return false; 1.500 } 1.501 - if( should_peel ) { // Should we peel? 1.502 + if (should_peel) { // Should we peel? 1.503 #ifndef PRODUCT 1.504 if (PrintOpto) tty->print_cr("should_peel"); 1.505 #endif 1.506 phase->do_peeling(this,old_new); 1.507 - } else if( should_unswitch ) { 1.508 + } else if (should_unswitch) { 1.509 phase->do_unswitching(this, old_new); 1.510 } 1.511 return true; 1.512 } 1.513 CountedLoopNode *cl = _head->as_CountedLoop(); 1.514 1.515 - if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops 1.516 + if (!cl->loopexit()) return true; // Ignore various kinds of broken loops 1.517 1.518 // Do nothing special to pre- and post- loops 1.519 - if( cl->is_pre_loop() || cl->is_post_loop() ) return true; 1.520 + if (cl->is_pre_loop() || cl->is_post_loop()) return true; 1.521 1.522 // Compute loop trip count from profile data 1.523 compute_profile_trip_cnt(phase); 1.524 1.525 // Before attempting fancy unrolling, RCE or alignment, see if we want 1.526 // to completely unroll this loop or do loop unswitching. 1.527 - if( cl->is_normal_loop() ) { 1.528 + if (cl->is_normal_loop()) { 1.529 if (should_unswitch) { 1.530 phase->do_unswitching(this, old_new); 1.531 return true; 1.532 } 1.533 bool should_maximally_unroll = policy_maximally_unroll(phase); 1.534 - if( should_maximally_unroll ) { 1.535 + if (should_maximally_unroll) { 1.536 // Here we did some unrolling and peeling. Eventually we will 1.537 // completely unroll this loop and it will no longer be a loop. 1.538 phase->do_maximally_unroll(this,old_new); 1.539 @@ -1757,6 +1975,12 @@ 1.540 } 1.541 } 1.542 1.543 + // Skip next optimizations if running low on nodes. Note that 1.544 + // policy_unswitching and policy_maximally_unroll have this check. 1.545 + uint nodes_left = MaxNodeLimit - phase->C->unique(); 1.546 + if ((2 * _body.size()) > nodes_left) { 1.547 + return true; 1.548 + } 1.549 1.550 // Counted loops may be peeled, may need some iterations run up 1.551 // front for RCE, and may want to align loop refs to a cache 1.552 @@ -1787,14 +2011,14 @@ 1.553 // If we have any of these conditions (RCE, alignment, unrolling) met, then 1.554 // we switch to the pre-/main-/post-loop model. This model also covers 1.555 // peeling. 1.556 - if( should_rce || should_align || should_unroll ) { 1.557 - if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops 1.558 + if (should_rce || should_align || should_unroll) { 1.559 + if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops 1.560 phase->insert_pre_post_loops(this,old_new, !may_rce_align); 1.561 1.562 // Adjust the pre- and main-loop limits to let the pre and post loops run 1.563 // with full checks, but the main-loop with no checks. Remove said 1.564 // checks from the main body. 1.565 - if( should_rce ) 1.566 + if (should_rce) 1.567 phase->do_range_check(this,old_new); 1.568 1.569 // Double loop body for unrolling. Adjust the minimum-trip test (will do 1.570 @@ -1802,16 +2026,16 @@ 1.571 // an even number of trips). If we are peeling, we might enable some RCE 1.572 // and we'd rather unroll the post-RCE'd loop SO... do not unroll if 1.573 // peeling. 1.574 - if( should_unroll && !should_peel ) 1.575 - phase->do_unroll(this,old_new, true); 1.576 + if (should_unroll && !should_peel) 1.577 + phase->do_unroll(this,old_new, true); 1.578 1.579 // Adjust the pre-loop limits to align the main body 1.580 // iterations. 1.581 - if( should_align ) 1.582 + if (should_align) 1.583 Unimplemented(); 1.584 1.585 } else { // Else we have an unchanged counted loop 1.586 - if( should_peel ) // Might want to peel but do nothing else 1.587 + if (should_peel) // Might want to peel but do nothing else 1.588 phase->do_peeling(this,old_new); 1.589 } 1.590 return true; 1.591 @@ -1861,651 +2085,8 @@ 1.592 return true; 1.593 } 1.594 1.595 -//-------------------------------is_uncommon_trap_proj---------------------------- 1.596 -// Return true if proj is the form of "proj->[region->..]call_uct" 1.597 -bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, Deoptimization::DeoptReason reason) { 1.598 - int path_limit = 10; 1.599 - assert(proj, "invalid argument"); 1.600 - Node* out = proj; 1.601 - for (int ct = 0; ct < path_limit; ct++) { 1.602 - out = out->unique_ctrl_out(); 1.603 - if (out == NULL || out->is_Root() || out->is_Start()) 1.604 - return false; 1.605 - if (out->is_CallStaticJava()) { 1.606 - int req = out->as_CallStaticJava()->uncommon_trap_request(); 1.607 - if (req != 0) { 1.608 - Deoptimization::DeoptReason trap_reason = Deoptimization::trap_request_reason(req); 1.609 - if (trap_reason == reason || reason == Deoptimization::Reason_none) { 1.610 - return true; 1.611 - } 1.612 - } 1.613 - return false; // don't do further after call 1.614 - } 1.615 - } 1.616 - return false; 1.617 -} 1.618 1.619 -//-------------------------------is_uncommon_trap_if_pattern------------------------- 1.620 -// Return true for "if(test)-> proj -> ... 1.621 -// | 1.622 -// V 1.623 -// other_proj->[region->..]call_uct" 1.624 -// 1.625 -// "must_reason_predicate" means the uct reason must be Reason_predicate 1.626 -bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, Deoptimization::DeoptReason reason) { 1.627 - Node *in0 = proj->in(0); 1.628 - if (!in0->is_If()) return false; 1.629 - // Variation of a dead If node. 1.630 - if (in0->outcnt() < 2) return false; 1.631 - IfNode* iff = in0->as_If(); 1.632 - 1.633 - // we need "If(Conv2B(Opaque1(...)))" pattern for reason_predicate 1.634 - if (reason != Deoptimization::Reason_none) { 1.635 - if (iff->in(1)->Opcode() != Op_Conv2B || 1.636 - iff->in(1)->in(1)->Opcode() != Op_Opaque1) { 1.637 - return false; 1.638 - } 1.639 - } 1.640 - 1.641 - ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); 1.642 - return is_uncommon_trap_proj(other_proj, reason); 1.643 -} 1.644 - 1.645 -//-------------------------------register_control------------------------- 1.646 -void PhaseIdealLoop::register_control(Node* n, IdealLoopTree *loop, Node* pred) { 1.647 - assert(n->is_CFG(), "must be control node"); 1.648 - _igvn.register_new_node_with_optimizer(n); 1.649 - loop->_body.push(n); 1.650 - set_loop(n, loop); 1.651 - // When called from beautify_loops() idom is not constructed yet. 1.652 - if (_idom != NULL) { 1.653 - set_idom(n, pred, dom_depth(pred)); 1.654 - } 1.655 -} 1.656 - 1.657 -//------------------------------create_new_if_for_predicate------------------------ 1.658 -// create a new if above the uct_if_pattern for the predicate to be promoted. 1.659 -// 1.660 -// before after 1.661 -// ---------- ---------- 1.662 -// ctrl ctrl 1.663 -// | | 1.664 -// | | 1.665 -// v v 1.666 -// iff new_iff 1.667 -// / \ / \ 1.668 -// / \ / \ 1.669 -// v v v v 1.670 -// uncommon_proj cont_proj if_uct if_cont 1.671 -// \ | | | | 1.672 -// \ | | | | 1.673 -// v v v | v 1.674 -// rgn loop | iff 1.675 -// | | / \ 1.676 -// | | / \ 1.677 -// v | v v 1.678 -// uncommon_trap | uncommon_proj cont_proj 1.679 -// \ \ | | 1.680 -// \ \ | | 1.681 -// v v v v 1.682 -// rgn loop 1.683 -// | 1.684 -// | 1.685 -// v 1.686 -// uncommon_trap 1.687 -// 1.688 -// 1.689 -// We will create a region to guard the uct call if there is no one there. 1.690 -// The true projecttion (if_cont) of the new_iff is returned. 1.691 -// This code is also used to clone predicates to clonned loops. 1.692 -ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, 1.693 - Deoptimization::DeoptReason reason) { 1.694 - assert(is_uncommon_trap_if_pattern(cont_proj, reason), "must be a uct if pattern!"); 1.695 - IfNode* iff = cont_proj->in(0)->as_If(); 1.696 - 1.697 - ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); 1.698 - Node *rgn = uncommon_proj->unique_ctrl_out(); 1.699 - assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); 1.700 - 1.701 - if (!rgn->is_Region()) { // create a region to guard the call 1.702 - assert(rgn->is_Call(), "must be call uct"); 1.703 - CallNode* call = rgn->as_Call(); 1.704 - IdealLoopTree* loop = get_loop(call); 1.705 - rgn = new (C, 1) RegionNode(1); 1.706 - rgn->add_req(uncommon_proj); 1.707 - register_control(rgn, loop, uncommon_proj); 1.708 - _igvn.hash_delete(call); 1.709 - call->set_req(0, rgn); 1.710 - // When called from beautify_loops() idom is not constructed yet. 1.711 - if (_idom != NULL) { 1.712 - set_idom(call, rgn, dom_depth(rgn)); 1.713 - } 1.714 - } 1.715 - 1.716 - Node* entry = iff->in(0); 1.717 - if (new_entry != NULL) { 1.718 - // Clonning the predicate to new location. 1.719 - entry = new_entry; 1.720 - } 1.721 - // Create new_iff 1.722 - IdealLoopTree* lp = get_loop(entry); 1.723 - IfNode *new_iff = new (C, 2) IfNode(entry, NULL, iff->_prob, iff->_fcnt); 1.724 - register_control(new_iff, lp, entry); 1.725 - Node *if_cont = new (C, 1) IfTrueNode(new_iff); 1.726 - Node *if_uct = new (C, 1) IfFalseNode(new_iff); 1.727 - if (cont_proj->is_IfFalse()) { 1.728 - // Swap 1.729 - Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; 1.730 - } 1.731 - register_control(if_cont, lp, new_iff); 1.732 - register_control(if_uct, get_loop(rgn), new_iff); 1.733 - 1.734 - // if_uct to rgn 1.735 - _igvn.hash_delete(rgn); 1.736 - rgn->add_req(if_uct); 1.737 - // When called from beautify_loops() idom is not constructed yet. 1.738 - if (_idom != NULL) { 1.739 - Node* ridom = idom(rgn); 1.740 - Node* nrdom = dom_lca(ridom, new_iff); 1.741 - set_idom(rgn, nrdom, dom_depth(rgn)); 1.742 - } 1.743 - // rgn must have no phis 1.744 - assert(!rgn->as_Region()->has_phi(), "region must have no phis"); 1.745 - 1.746 - if (new_entry == NULL) { 1.747 - // Attach if_cont to iff 1.748 - _igvn.hash_delete(iff); 1.749 - iff->set_req(0, if_cont); 1.750 - if (_idom != NULL) { 1.751 - set_idom(iff, if_cont, dom_depth(iff)); 1.752 - } 1.753 - } 1.754 - return if_cont->as_Proj(); 1.755 -} 1.756 - 1.757 -//--------------------------find_predicate_insertion_point------------------- 1.758 -// Find a good location to insert a predicate 1.759 -ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason) { 1.760 - if (start_c == NULL || !start_c->is_Proj()) 1.761 - return NULL; 1.762 - if (is_uncommon_trap_if_pattern(start_c->as_Proj(), reason)) { 1.763 - return start_c->as_Proj(); 1.764 - } 1.765 - return NULL; 1.766 -} 1.767 - 1.768 -//--------------------------find_predicate------------------------------------ 1.769 -// Find a predicate 1.770 -Node* PhaseIdealLoop::find_predicate(Node* entry) { 1.771 - Node* predicate = NULL; 1.772 - if (UseLoopPredicate) { 1.773 - predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.774 - if (predicate != NULL) { // right pattern that can be used by loop predication 1.775 - assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); 1.776 - return entry; 1.777 - } 1.778 - } 1.779 - return NULL; 1.780 -} 1.781 - 1.782 -//------------------------------Invariance----------------------------------- 1.783 -// Helper class for loop_predication_impl to compute invariance on the fly and 1.784 -// clone invariants. 1.785 -class Invariance : public StackObj { 1.786 - VectorSet _visited, _invariant; 1.787 - Node_Stack _stack; 1.788 - VectorSet _clone_visited; 1.789 - Node_List _old_new; // map of old to new (clone) 1.790 - IdealLoopTree* _lpt; 1.791 - PhaseIdealLoop* _phase; 1.792 - 1.793 - // Helper function to set up the invariance for invariance computation 1.794 - // If n is a known invariant, set up directly. Otherwise, look up the 1.795 - // the possibility to push n onto the stack for further processing. 1.796 - void visit(Node* use, Node* n) { 1.797 - if (_lpt->is_invariant(n)) { // known invariant 1.798 - _invariant.set(n->_idx); 1.799 - } else if (!n->is_CFG()) { 1.800 - Node *n_ctrl = _phase->ctrl_or_self(n); 1.801 - Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG 1.802 - if (_phase->is_dominator(n_ctrl, u_ctrl)) { 1.803 - _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.804 - } 1.805 - } 1.806 - } 1.807 - 1.808 - // Compute invariance for "the_node" and (possibly) all its inputs recursively 1.809 - // on the fly 1.810 - void compute_invariance(Node* n) { 1.811 - assert(_visited.test(n->_idx), "must be"); 1.812 - visit(n, n); 1.813 - while (_stack.is_nonempty()) { 1.814 - Node* n = _stack.node(); 1.815 - uint idx = _stack.index(); 1.816 - if (idx == n->req()) { // all inputs are processed 1.817 - _stack.pop(); 1.818 - // n is invariant if it's inputs are all invariant 1.819 - bool all_inputs_invariant = true; 1.820 - for (uint i = 0; i < n->req(); i++) { 1.821 - Node* in = n->in(i); 1.822 - if (in == NULL) continue; 1.823 - assert(_visited.test(in->_idx), "must have visited input"); 1.824 - if (!_invariant.test(in->_idx)) { // bad guy 1.825 - all_inputs_invariant = false; 1.826 - break; 1.827 - } 1.828 - } 1.829 - if (all_inputs_invariant) { 1.830 - _invariant.set(n->_idx); // I am a invariant too 1.831 - } 1.832 - } else { // process next input 1.833 - _stack.set_index(idx + 1); 1.834 - Node* m = n->in(idx); 1.835 - if (m != NULL && !_visited.test_set(m->_idx)) { 1.836 - visit(n, m); 1.837 - } 1.838 - } 1.839 - } 1.840 - } 1.841 - 1.842 - // Helper function to set up _old_new map for clone_nodes. 1.843 - // If n is a known invariant, set up directly ("clone" of n == n). 1.844 - // Otherwise, push n onto the stack for real cloning. 1.845 - void clone_visit(Node* n) { 1.846 - assert(_invariant.test(n->_idx), "must be invariant"); 1.847 - if (_lpt->is_invariant(n)) { // known invariant 1.848 - _old_new.map(n->_idx, n); 1.849 - } else{ // to be cloned 1.850 - assert (!n->is_CFG(), "should not see CFG here"); 1.851 - _stack.push(n, n->in(0) == NULL ? 1 : 0); 1.852 - } 1.853 - } 1.854 - 1.855 - // Clone "n" and (possibly) all its inputs recursively 1.856 - void clone_nodes(Node* n, Node* ctrl) { 1.857 - clone_visit(n); 1.858 - while (_stack.is_nonempty()) { 1.859 - Node* n = _stack.node(); 1.860 - uint idx = _stack.index(); 1.861 - if (idx == n->req()) { // all inputs processed, clone n! 1.862 - _stack.pop(); 1.863 - // clone invariant node 1.864 - Node* n_cl = n->clone(); 1.865 - _old_new.map(n->_idx, n_cl); 1.866 - _phase->register_new_node(n_cl, ctrl); 1.867 - for (uint i = 0; i < n->req(); i++) { 1.868 - Node* in = n_cl->in(i); 1.869 - if (in == NULL) continue; 1.870 - n_cl->set_req(i, _old_new[in->_idx]); 1.871 - } 1.872 - } else { // process next input 1.873 - _stack.set_index(idx + 1); 1.874 - Node* m = n->in(idx); 1.875 - if (m != NULL && !_clone_visited.test_set(m->_idx)) { 1.876 - clone_visit(m); // visit the input 1.877 - } 1.878 - } 1.879 - } 1.880 - } 1.881 - 1.882 - public: 1.883 - Invariance(Arena* area, IdealLoopTree* lpt) : 1.884 - _lpt(lpt), _phase(lpt->_phase), 1.885 - _visited(area), _invariant(area), _stack(area, 10 /* guess */), 1.886 - _clone_visited(area), _old_new(area) 1.887 - {} 1.888 - 1.889 - // Map old to n for invariance computation and clone 1.890 - void map_ctrl(Node* old, Node* n) { 1.891 - assert(old->is_CFG() && n->is_CFG(), "must be"); 1.892 - _old_new.map(old->_idx, n); // "clone" of old is n 1.893 - _invariant.set(old->_idx); // old is invariant 1.894 - _clone_visited.set(old->_idx); 1.895 - } 1.896 - 1.897 - // Driver function to compute invariance 1.898 - bool is_invariant(Node* n) { 1.899 - if (!_visited.test_set(n->_idx)) 1.900 - compute_invariance(n); 1.901 - return (_invariant.test(n->_idx) != 0); 1.902 - } 1.903 - 1.904 - // Driver function to clone invariant 1.905 - Node* clone(Node* n, Node* ctrl) { 1.906 - assert(ctrl->is_CFG(), "must be"); 1.907 - assert(_invariant.test(n->_idx), "must be an invariant"); 1.908 - if (!_clone_visited.test(n->_idx)) 1.909 - clone_nodes(n, ctrl); 1.910 - return _old_new[n->_idx]; 1.911 - } 1.912 -}; 1.913 - 1.914 -//------------------------------is_range_check_if ----------------------------------- 1.915 -// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format 1.916 -// Note: this function is particularly designed for loop predication. We require load_range 1.917 -// and offset to be loop invariant computed on the fly by "invar" 1.918 -bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { 1.919 - if (!is_loop_exit(iff)) { 1.920 - return false; 1.921 - } 1.922 - if (!iff->in(1)->is_Bool()) { 1.923 - return false; 1.924 - } 1.925 - const BoolNode *bol = iff->in(1)->as_Bool(); 1.926 - if (bol->_test._test != BoolTest::lt) { 1.927 - return false; 1.928 - } 1.929 - if (!bol->in(1)->is_Cmp()) { 1.930 - return false; 1.931 - } 1.932 - const CmpNode *cmp = bol->in(1)->as_Cmp(); 1.933 - if (cmp->Opcode() != Op_CmpU ) { 1.934 - return false; 1.935 - } 1.936 - Node* range = cmp->in(2); 1.937 - if (range->Opcode() != Op_LoadRange) { 1.938 - const TypeInt* tint = phase->_igvn.type(range)->isa_int(); 1.939 - if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) { 1.940 - // Allow predication on positive values that aren't LoadRanges. 1.941 - // This allows optimization of loops where the length of the 1.942 - // array is a known value and doesn't need to be loaded back 1.943 - // from the array. 1.944 - return false; 1.945 - } 1.946 - } 1.947 - if (!invar.is_invariant(range)) { 1.948 - return false; 1.949 - } 1.950 - Node *iv = _head->as_CountedLoop()->phi(); 1.951 - int scale = 0; 1.952 - Node *offset = NULL; 1.953 - if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { 1.954 - return false; 1.955 - } 1.956 - if(offset && !invar.is_invariant(offset)) { // offset must be invariant 1.957 - return false; 1.958 - } 1.959 - return true; 1.960 -} 1.961 - 1.962 -//------------------------------rc_predicate----------------------------------- 1.963 -// Create a range check predicate 1.964 -// 1.965 -// for (i = init; i < limit; i += stride) { 1.966 -// a[scale*i+offset] 1.967 -// } 1.968 -// 1.969 -// Compute max(scale*i + offset) for init <= i < limit and build the predicate 1.970 -// as "max(scale*i + offset) u< a.length". 1.971 -// 1.972 -// There are two cases for max(scale*i + offset): 1.973 -// (1) stride*scale > 0 1.974 -// max(scale*i + offset) = scale*(limit-stride) + offset 1.975 -// (2) stride*scale < 0 1.976 -// max(scale*i + offset) = scale*init + offset 1.977 -BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, 1.978 - int scale, Node* offset, 1.979 - Node* init, Node* limit, Node* stride, 1.980 - Node* range, bool upper) { 1.981 - DEBUG_ONLY(ttyLocker ttyl); 1.982 - if (TraceLoopPredicate) tty->print("rc_predicate "); 1.983 - 1.984 - Node* max_idx_expr = init; 1.985 - int stride_con = stride->get_int(); 1.986 - if ((stride_con > 0) == (scale > 0) == upper) { 1.987 - max_idx_expr = new (C, 3) SubINode(limit, stride); 1.988 - register_new_node(max_idx_expr, ctrl); 1.989 - if (TraceLoopPredicate) tty->print("(limit - stride) "); 1.990 - } else { 1.991 - if (TraceLoopPredicate) tty->print("init "); 1.992 - } 1.993 - 1.994 - if (scale != 1) { 1.995 - ConNode* con_scale = _igvn.intcon(scale); 1.996 - max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); 1.997 - register_new_node(max_idx_expr, ctrl); 1.998 - if (TraceLoopPredicate) tty->print("* %d ", scale); 1.999 - } 1.1000 - 1.1001 - if (offset && (!offset->is_Con() || offset->get_int() != 0)){ 1.1002 - max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); 1.1003 - register_new_node(max_idx_expr, ctrl); 1.1004 - if (TraceLoopPredicate) 1.1005 - if (offset->is_Con()) tty->print("+ %d ", offset->get_int()); 1.1006 - else tty->print("+ offset "); 1.1007 - } 1.1008 - 1.1009 - CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); 1.1010 - register_new_node(cmp, ctrl); 1.1011 - BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); 1.1012 - register_new_node(bol, ctrl); 1.1013 - 1.1014 - if (TraceLoopPredicate) tty->print_cr("<u range"); 1.1015 - return bol; 1.1016 -} 1.1017 - 1.1018 -//------------------------------ loop_predication_impl-------------------------- 1.1019 -// Insert loop predicates for null checks and range checks 1.1020 -bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { 1.1021 - if (!UseLoopPredicate) return false; 1.1022 - 1.1023 - if (!loop->_head->is_Loop()) { 1.1024 - // Could be a simple region when irreducible loops are present. 1.1025 - return false; 1.1026 - } 1.1027 - 1.1028 - if (loop->_head->unique_ctrl_out()->Opcode() == Op_NeverBranch) { 1.1029 - // do nothing for infinite loops 1.1030 - return false; 1.1031 - } 1.1032 - 1.1033 - CountedLoopNode *cl = NULL; 1.1034 - if (loop->_head->is_CountedLoop()) { 1.1035 - cl = loop->_head->as_CountedLoop(); 1.1036 - // do nothing for iteration-splitted loops 1.1037 - if (!cl->is_normal_loop()) return false; 1.1038 - } 1.1039 - 1.1040 - LoopNode *lpn = loop->_head->as_Loop(); 1.1041 - Node* entry = lpn->in(LoopNode::EntryControl); 1.1042 - 1.1043 - ProjNode *predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); 1.1044 - if (!predicate_proj) { 1.1045 -#ifndef PRODUCT 1.1046 - if (TraceLoopPredicate) { 1.1047 - tty->print("missing predicate:"); 1.1048 - loop->dump_head(); 1.1049 - lpn->dump(1); 1.1050 - } 1.1051 -#endif 1.1052 - return false; 1.1053 - } 1.1054 - ConNode* zero = _igvn.intcon(0); 1.1055 - set_ctrl(zero, C->root()); 1.1056 - 1.1057 - ResourceArea *area = Thread::current()->resource_area(); 1.1058 - Invariance invar(area, loop); 1.1059 - 1.1060 - // Create list of if-projs such that a newer proj dominates all older 1.1061 - // projs in the list, and they all dominate loop->tail() 1.1062 - Node_List if_proj_list(area); 1.1063 - LoopNode *head = loop->_head->as_Loop(); 1.1064 - Node *current_proj = loop->tail(); //start from tail 1.1065 - while ( current_proj != head ) { 1.1066 - if (loop == get_loop(current_proj) && // still in the loop ? 1.1067 - current_proj->is_Proj() && // is a projection ? 1.1068 - current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? 1.1069 - if_proj_list.push(current_proj); 1.1070 - } 1.1071 - current_proj = idom(current_proj); 1.1072 - } 1.1073 - 1.1074 - bool hoisted = false; // true if at least one proj is promoted 1.1075 - while (if_proj_list.size() > 0) { 1.1076 - // Following are changed to nonnull when a predicate can be hoisted 1.1077 - ProjNode* new_predicate_proj = NULL; 1.1078 - 1.1079 - ProjNode* proj = if_proj_list.pop()->as_Proj(); 1.1080 - IfNode* iff = proj->in(0)->as_If(); 1.1081 - 1.1082 - if (!is_uncommon_trap_if_pattern(proj, Deoptimization::Reason_none)) { 1.1083 - if (loop->is_loop_exit(iff)) { 1.1084 - // stop processing the remaining projs in the list because the execution of them 1.1085 - // depends on the condition of "iff" (iff->in(1)). 1.1086 - break; 1.1087 - } else { 1.1088 - // Both arms are inside the loop. There are two cases: 1.1089 - // (1) there is one backward branch. In this case, any remaining proj 1.1090 - // in the if_proj list post-dominates "iff". So, the condition of "iff" 1.1091 - // does not determine the execution the remining projs directly, and we 1.1092 - // can safely continue. 1.1093 - // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" 1.1094 - // does not dominate loop->tail(), so it can not be in the if_proj list. 1.1095 - continue; 1.1096 - } 1.1097 - } 1.1098 - 1.1099 - Node* test = iff->in(1); 1.1100 - if (!test->is_Bool()){ //Conv2B, ... 1.1101 - continue; 1.1102 - } 1.1103 - BoolNode* bol = test->as_Bool(); 1.1104 - if (invar.is_invariant(bol)) { 1.1105 - // Invariant test 1.1106 - new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL, 1.1107 - Deoptimization::Reason_predicate); 1.1108 - Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); 1.1109 - BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); 1.1110 - 1.1111 - // Negate test if necessary 1.1112 - bool negated = false; 1.1113 - if (proj->_con != predicate_proj->_con) { 1.1114 - new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); 1.1115 - register_new_node(new_predicate_bol, ctrl); 1.1116 - negated = true; 1.1117 - } 1.1118 - IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If(); 1.1119 - _igvn.hash_delete(new_predicate_iff); 1.1120 - new_predicate_iff->set_req(1, new_predicate_bol); 1.1121 -#ifndef PRODUCT 1.1122 - if (TraceLoopPredicate) { 1.1123 - tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx); 1.1124 - loop->dump_head(); 1.1125 - } else if (TraceLoopOpts) { 1.1126 - tty->print("Predicate IC "); 1.1127 - loop->dump_head(); 1.1128 - } 1.1129 -#endif 1.1130 - } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { 1.1131 - assert(proj->_con == predicate_proj->_con, "must match"); 1.1132 - 1.1133 - // Range check for counted loops 1.1134 - const Node* cmp = bol->in(1)->as_Cmp(); 1.1135 - Node* idx = cmp->in(1); 1.1136 - assert(!invar.is_invariant(idx), "index is variant"); 1.1137 - assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be"); 1.1138 - Node* rng = cmp->in(2); 1.1139 - assert(invar.is_invariant(rng), "range must be invariant"); 1.1140 - int scale = 1; 1.1141 - Node* offset = zero; 1.1142 - bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); 1.1143 - assert(ok, "must be index expression"); 1.1144 - 1.1145 - Node* init = cl->init_trip(); 1.1146 - Node* limit = cl->limit(); 1.1147 - Node* stride = cl->stride(); 1.1148 - 1.1149 - // Build if's for the upper and lower bound tests. The 1.1150 - // lower_bound test will dominate the upper bound test and all 1.1151 - // cloned or created nodes will use the lower bound test as 1.1152 - // their declared control. 1.1153 - ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.1154 - ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate); 1.1155 - assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); 1.1156 - Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); 1.1157 - 1.1158 - // Perform cloning to keep Invariance state correct since the 1.1159 - // late schedule will place invariant things in the loop. 1.1160 - rng = invar.clone(rng, ctrl); 1.1161 - if (offset && offset != zero) { 1.1162 - assert(invar.is_invariant(offset), "offset must be loop invariant"); 1.1163 - offset = invar.clone(offset, ctrl); 1.1164 - } 1.1165 - 1.1166 - // Test the lower bound 1.1167 - Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); 1.1168 - IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); 1.1169 - _igvn.hash_delete(lower_bound_iff); 1.1170 - lower_bound_iff->set_req(1, lower_bound_bol); 1.1171 - if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); 1.1172 - 1.1173 - // Test the upper bound 1.1174 - Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); 1.1175 - IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); 1.1176 - _igvn.hash_delete(upper_bound_iff); 1.1177 - upper_bound_iff->set_req(1, upper_bound_bol); 1.1178 - if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); 1.1179 - 1.1180 - // Fall through into rest of the clean up code which will move 1.1181 - // any dependent nodes onto the upper bound test. 1.1182 - new_predicate_proj = upper_bound_proj; 1.1183 - 1.1184 -#ifndef PRODUCT 1.1185 - if (TraceLoopOpts && !TraceLoopPredicate) { 1.1186 - tty->print("Predicate RC "); 1.1187 - loop->dump_head(); 1.1188 - } 1.1189 -#endif 1.1190 - } else { 1.1191 - // Loop variant check (for example, range check in non-counted loop) 1.1192 - // with uncommon trap. 1.1193 - continue; 1.1194 - } 1.1195 - assert(new_predicate_proj != NULL, "sanity"); 1.1196 - // Success - attach condition (new_predicate_bol) to predicate if 1.1197 - invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate 1.1198 - 1.1199 - // Eliminate the old If in the loop body 1.1200 - dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con ); 1.1201 - 1.1202 - hoisted = true; 1.1203 - C->set_major_progress(); 1.1204 - } // end while 1.1205 - 1.1206 -#ifndef PRODUCT 1.1207 - // report that the loop predication has been actually performed 1.1208 - // for this loop 1.1209 - if (TraceLoopPredicate && hoisted) { 1.1210 - tty->print("Loop Predication Performed:"); 1.1211 - loop->dump_head(); 1.1212 - } 1.1213 -#endif 1.1214 - 1.1215 - return hoisted; 1.1216 -} 1.1217 - 1.1218 -//------------------------------loop_predication-------------------------------- 1.1219 -// driver routine for loop predication optimization 1.1220 -bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { 1.1221 - bool hoisted = false; 1.1222 - // Recursively promote predicates 1.1223 - if ( _child ) { 1.1224 - hoisted = _child->loop_predication( phase); 1.1225 - } 1.1226 - 1.1227 - // self 1.1228 - if (!_irreducible && !tail()->is_top()) { 1.1229 - hoisted |= phase->loop_predication_impl(this); 1.1230 - } 1.1231 - 1.1232 - if ( _next ) { //sibling 1.1233 - hoisted |= _next->loop_predication( phase); 1.1234 - } 1.1235 - 1.1236 - return hoisted; 1.1237 -} 1.1238 - 1.1239 - 1.1240 +//============================================================================= 1.1241 // Process all the loops in the loop tree and replace any fill 1.1242 // patterns with an intrisc version. 1.1243 bool PhaseIdealLoop::do_intrinsify_fill() { 1.1244 @@ -2625,9 +2206,12 @@ 1.1245 if (value != head->phi()) { 1.1246 msg = "unhandled shift in address"; 1.1247 } else { 1.1248 - found_index = true; 1.1249 - shift = n; 1.1250 - assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int(), "scale should match"); 1.1251 + if (type2aelembytes(store->as_Mem()->memory_type(), true) != (1 << n->in(2)->get_int())) { 1.1252 + msg = "scale doesn't match"; 1.1253 + } else { 1.1254 + found_index = true; 1.1255 + shift = n; 1.1256 + } 1.1257 } 1.1258 } else if (n->Opcode() == Op_ConvI2L && conv == NULL) { 1.1259 if (n->in(1) == head->phi()) { 1.1260 @@ -2762,6 +2346,13 @@ 1.1261 return false; 1.1262 } 1.1263 1.1264 +#ifndef PRODUCT 1.1265 + if (TraceLoopOpts) { 1.1266 + tty->print("ArrayFill "); 1.1267 + lpt->dump_head(); 1.1268 + } 1.1269 +#endif 1.1270 + 1.1271 // Now replace the whole loop body by a call to a fill routine that 1.1272 // covers the same region as the loop. 1.1273 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);