src/share/vm/opto/loopTransform.cpp

changeset 2747
3af54845df98
parent 2735
d7a3fed1c1c9
child 2750
6c97c830fb6f
     1.1 --- a/src/share/vm/opto/loopTransform.cpp	Thu Apr 07 21:32:23 2011 -0700
     1.2 +++ b/src/share/vm/opto/loopTransform.cpp	Fri Apr 08 14:56:22 2011 -0700
     1.3 @@ -63,6 +63,46 @@
     1.4    }
     1.5  }
     1.6  
     1.7 +//------------------------------compute_exact_trip_count-----------------------
     1.8 +// Compute loop exact trip count if possible. Do not recalculate trip count for
     1.9 +// split loops (pre-main-post) which have their limits and inits behind Opaque node.
    1.10 +void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
    1.11 +  if (!_head->as_Loop()->is_valid_counted_loop()) {
    1.12 +    return;
    1.13 +  }
    1.14 +  CountedLoopNode* cl = _head->as_CountedLoop();
    1.15 +  // Trip count may become nonexact for iteration split loops since
    1.16 +  // RCE modifies limits. Note, _trip_count value is not reset since
    1.17 +  // it is used to limit unrolling of main loop.
    1.18 +  cl->set_nonexact_trip_count();
    1.19 +
    1.20 +  // Loop's test should be part of loop.
    1.21 +  if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
    1.22 +    return; // Infinite loop
    1.23 +
    1.24 +#ifdef ASSERT
    1.25 +  BoolTest::mask bt = cl->loopexit()->test_trip();
    1.26 +  assert(bt == BoolTest::lt || bt == BoolTest::gt ||
    1.27 +         bt == BoolTest::ne, "canonical test is expected");
    1.28 +#endif
    1.29 +
    1.30 +  Node* init_n = cl->init_trip();
    1.31 +  Node* limit_n = cl->limit();
    1.32 +  if (init_n  != NULL &&  init_n->is_Con() &&
    1.33 +      limit_n != NULL && limit_n->is_Con()) {
    1.34 +    // Use longs to avoid integer overflow.
    1.35 +    int stride_con  = cl->stride_con();
    1.36 +    long init_con   = cl->init_trip()->get_int();
    1.37 +    long limit_con  = cl->limit()->get_int();
    1.38 +    int stride_m    = stride_con - (stride_con > 0 ? 1 : -1);
    1.39 +    long trip_count = (limit_con - init_con + stride_m)/stride_con;
    1.40 +    if (trip_count > 0 && (julong)trip_count < (julong)max_juint) {
    1.41 +      // Set exact trip count.
    1.42 +      cl->set_exact_trip_count((uint)trip_count);
    1.43 +    }
    1.44 +  }
    1.45 +}
    1.46 +
    1.47  //------------------------------compute_profile_trip_cnt----------------------------
    1.48  // Compute loop trip count from profile data as
    1.49  //    (backedge_count + loop_exit_count) / loop_exit_count
    1.50 @@ -533,29 +573,15 @@
    1.51    if (!cl->is_valid_counted_loop())
    1.52      return false; // Malformed counted loop
    1.53  
    1.54 -  Node *init_n = cl->init_trip();
    1.55 -  Node *limit_n = cl->limit();
    1.56 -
    1.57 -  // Non-constant bounds
    1.58 -  if (init_n   == NULL || !init_n->is_Con()  ||
    1.59 -      limit_n  == NULL || !limit_n->is_Con()) {
    1.60 +  if (!cl->has_exact_trip_count()) {
    1.61 +    // Trip count is not exact.
    1.62      return false;
    1.63    }
    1.64 -  // Use longs to avoid integer overflow.
    1.65 -  int  stride_con = cl->stride_con();
    1.66 -  long init_con   = cl->init_trip()->get_int();
    1.67 -  long limit_con  = cl->limit()->get_int();
    1.68 -  int  stride_m   = stride_con - (stride_con > 0 ? 1 : -1);
    1.69 -  long trip_cnt   = (limit_con - init_con + stride_m)/stride_con;
    1.70  
    1.71 -  // Note, max_jint is used to indicate unknown trip count.
    1.72 -  if (trip_cnt <= 0 || trip_cnt >= (long)max_jint) {
    1.73 -    // return a false (no maximally unroll) and the regular unroll/peel
    1.74 -    // route will make a small mess which CCP will fold away.
    1.75 -    return false;
    1.76 -  }
    1.77 -  uint trip_count = (uint)trip_cnt;
    1.78 -  cl->set_trip_count(trip_count);
    1.79 +  uint trip_count = cl->trip_count();
    1.80 +  // Note, max_juint is used to indicate unknown trip count.
    1.81 +  assert(trip_count > 1, "one iteration loop should be optimized out already");
    1.82 +  assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
    1.83  
    1.84    // Real policy: if we maximally unroll, does it get too big?
    1.85    // Allow the unrolled mess to get larger than standard loop
    1.86 @@ -1096,7 +1122,7 @@
    1.87      tty->print("Unrolling ");
    1.88      loop->dump_head();
    1.89    } else if (TraceLoopOpts) {
    1.90 -    if (loop_head->trip_count() < LoopUnrollLimit) {
    1.91 +    if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
    1.92        tty->print("Unroll  %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
    1.93      } else {
    1.94        tty->print("Unroll  %d     ", loop_head->unrolled_count()*2);
    1.95 @@ -1802,6 +1828,17 @@
    1.96    // main and post loops have explicitly created zero trip guard
    1.97    bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
    1.98    if (needs_guard) {
    1.99 +    // Skip guard if values not overlap.
   1.100 +    const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
   1.101 +    const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
   1.102 +    int  stride_con = cl->stride_con();
   1.103 +    if (stride_con > 0) {
   1.104 +      needs_guard = (init_t->_hi >= limit_t->_lo);
   1.105 +    } else {
   1.106 +      needs_guard = (init_t->_lo <= limit_t->_hi);
   1.107 +    }
   1.108 +  }
   1.109 +  if (needs_guard) {
   1.110      // Check for an obvious zero trip guard.
   1.111      Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
   1.112      if (inctrl->Opcode() == Op_IfTrue) {
   1.113 @@ -1846,12 +1883,49 @@
   1.114    return true;
   1.115  }
   1.116  
   1.117 +//------------------------------policy_do_one_iteration_loop-------------------
   1.118 +// Convert one iteration loop into normal code.
   1.119 +bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) {
   1.120 +  if (!_head->as_Loop()->is_valid_counted_loop())
   1.121 +    return false; // Only for counted loop
   1.122 +
   1.123 +  CountedLoopNode *cl = _head->as_CountedLoop();
   1.124 +  if (!cl->has_exact_trip_count() || cl->trip_count() != 1) {
   1.125 +    return false;
   1.126 +  }
   1.127 +
   1.128 +#ifndef PRODUCT
   1.129 +  if(TraceLoopOpts) {
   1.130 +    tty->print("OneIteration ");
   1.131 +    this->dump_head();
   1.132 +  }
   1.133 +#endif
   1.134 +
   1.135 +  Node *init_n = cl->init_trip();
   1.136 +#ifdef ASSERT
   1.137 +  // Loop boundaries should be constant since trip count is exact.
   1.138 +  assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration");
   1.139 +#endif
   1.140 +  // Replace the phi at loop head with the value of the init_trip.
   1.141 +  // Then the CountedLoopEnd will collapse (backedge will not be taken)
   1.142 +  // and all loop-invariant uses of the exit values will be correct.
   1.143 +  phase->_igvn.replace_node(cl->phi(), cl->init_trip());
   1.144 +  phase->C->set_major_progress();
   1.145 +  return true;
   1.146 +}
   1.147  
   1.148  //=============================================================================
   1.149  //------------------------------iteration_split_impl---------------------------
   1.150  bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
   1.151 +  // Compute exact loop trip count if possible.
   1.152 +  compute_exact_trip_count(phase);
   1.153 +
   1.154 +  // Convert one iteration loop into normal code.
   1.155 +  if (policy_do_one_iteration_loop(phase))
   1.156 +    return true;
   1.157 +
   1.158    // Check and remove empty loops (spam micro-benchmarks)
   1.159 -  if( policy_do_remove_empty_loop(phase) )
   1.160 +  if (policy_do_remove_empty_loop(phase))
   1.161      return true;  // Here we removed an empty loop
   1.162  
   1.163    bool should_peel = policy_peeling(phase); // Should we peel?
   1.164 @@ -1860,40 +1934,40 @@
   1.165  
   1.166    // Non-counted loops may be peeled; exactly 1 iteration is peeled.
   1.167    // This removes loop-invariant tests (usually null checks).
   1.168 -  if( !_head->is_CountedLoop() ) { // Non-counted loop
   1.169 +  if (!_head->is_CountedLoop()) { // Non-counted loop
   1.170      if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
   1.171        // Partial peel succeeded so terminate this round of loop opts
   1.172        return false;
   1.173      }
   1.174 -    if( should_peel ) {            // Should we peel?
   1.175 +    if (should_peel) {            // Should we peel?
   1.176  #ifndef PRODUCT
   1.177        if (PrintOpto) tty->print_cr("should_peel");
   1.178  #endif
   1.179        phase->do_peeling(this,old_new);
   1.180 -    } else if( should_unswitch ) {
   1.181 +    } else if (should_unswitch) {
   1.182        phase->do_unswitching(this, old_new);
   1.183      }
   1.184      return true;
   1.185    }
   1.186    CountedLoopNode *cl = _head->as_CountedLoop();
   1.187  
   1.188 -  if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
   1.189 +  if (!cl->loopexit()) return true; // Ignore various kinds of broken loops
   1.190  
   1.191    // Do nothing special to pre- and post- loops
   1.192 -  if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
   1.193 +  if (cl->is_pre_loop() || cl->is_post_loop()) return true;
   1.194  
   1.195    // Compute loop trip count from profile data
   1.196    compute_profile_trip_cnt(phase);
   1.197  
   1.198    // Before attempting fancy unrolling, RCE or alignment, see if we want
   1.199    // to completely unroll this loop or do loop unswitching.
   1.200 -  if( cl->is_normal_loop() ) {
   1.201 +  if (cl->is_normal_loop()) {
   1.202      if (should_unswitch) {
   1.203        phase->do_unswitching(this, old_new);
   1.204        return true;
   1.205      }
   1.206      bool should_maximally_unroll =  policy_maximally_unroll(phase);
   1.207 -    if( should_maximally_unroll ) {
   1.208 +    if (should_maximally_unroll) {
   1.209        // Here we did some unrolling and peeling.  Eventually we will
   1.210        // completely unroll this loop and it will no longer be a loop.
   1.211        phase->do_maximally_unroll(this,old_new);
   1.212 @@ -1937,14 +2011,14 @@
   1.213    // If we have any of these conditions (RCE, alignment, unrolling) met, then
   1.214    // we switch to the pre-/main-/post-loop model.  This model also covers
   1.215    // peeling.
   1.216 -  if( should_rce || should_align || should_unroll ) {
   1.217 -    if( cl->is_normal_loop() )  // Convert to 'pre/main/post' loops
   1.218 +  if (should_rce || should_align || should_unroll) {
   1.219 +    if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
   1.220        phase->insert_pre_post_loops(this,old_new, !may_rce_align);
   1.221  
   1.222      // Adjust the pre- and main-loop limits to let the pre and post loops run
   1.223      // with full checks, but the main-loop with no checks.  Remove said
   1.224      // checks from the main body.
   1.225 -    if( should_rce )
   1.226 +    if (should_rce)
   1.227        phase->do_range_check(this,old_new);
   1.228  
   1.229      // Double loop body for unrolling.  Adjust the minimum-trip test (will do
   1.230 @@ -1952,16 +2026,16 @@
   1.231      // an even number of trips).  If we are peeling, we might enable some RCE
   1.232      // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
   1.233      // peeling.
   1.234 -      if( should_unroll && !should_peel )
   1.235 -        phase->do_unroll(this,old_new, true);
   1.236 +    if (should_unroll && !should_peel)
   1.237 +      phase->do_unroll(this,old_new, true);
   1.238  
   1.239      // Adjust the pre-loop limits to align the main body
   1.240      // iterations.
   1.241 -    if( should_align )
   1.242 +    if (should_align)
   1.243        Unimplemented();
   1.244  
   1.245    } else {                      // Else we have an unchanged counted loop
   1.246 -    if( should_peel )           // Might want to peel but do nothing else
   1.247 +    if (should_peel)           // Might want to peel but do nothing else
   1.248        phase->do_peeling(this,old_new);
   1.249    }
   1.250    return true;

mercurial