8030976: Untaken paths should be more vigorously pruned at highest optimization level

Wed, 10 Sep 2014 12:39:11 +0200

author
rbackman
date
Wed, 10 Sep 2014 12:39:11 +0200
changeset 7153
f6f9aec27858
parent 7152
166d744df0de
child 7154
42460b71ba70

8030976: Untaken paths should be more vigorously pruned at highest optimization level
Reviewed-by: roland, vlivanov

src/share/vm/oops/methodData.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/parse2.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/deoptimization.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/deoptimization.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/vmStructs.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/oops/methodData.hpp	Tue Sep 02 12:48:45 2014 -0700
     1.2 +++ b/src/share/vm/oops/methodData.hpp	Wed Sep 10 12:39:11 2014 +0200
     1.3 @@ -2057,7 +2057,7 @@
     1.4  
     1.5    // Whole-method sticky bits and flags
     1.6    enum {
     1.7 -    _trap_hist_limit    = 19,   // decoupled from Deoptimization::Reason_LIMIT
     1.8 +    _trap_hist_limit    = 20,   // decoupled from Deoptimization::Reason_LIMIT
     1.9      _trap_hist_mask     = max_jubyte,
    1.10      _extra_data_count   = 4     // extra DataLayout headers, for trap history
    1.11    }; // Public flag values
     2.1 --- a/src/share/vm/opto/parse2.cpp	Tue Sep 02 12:48:45 2014 -0700
     2.2 +++ b/src/share/vm/opto/parse2.cpp	Wed Sep 10 12:39:11 2014 +0200
     2.3 @@ -895,53 +895,12 @@
     2.4  // if a path is never taken, its controlling comparison is
     2.5  // already acting in a stable fashion.  If the comparison
     2.6  // seems stable, we will put an expensive uncommon trap
     2.7 -// on the untaken path.  To be conservative, and to allow
     2.8 -// partially executed counted loops to be compiled fully,
     2.9 -// we will plant uncommon traps only after pointer comparisons.
    2.10 +// on the untaken path.
    2.11  bool Parse::seems_stable_comparison(BoolTest::mask btest, Node* cmp) {
    2.12 -  for (int depth = 4; depth > 0; depth--) {
    2.13 -    // The following switch can find CmpP here over half the time for
    2.14 -    // dynamic language code rich with type tests.
    2.15 -    // Code using counted loops or array manipulations (typical
    2.16 -    // of benchmarks) will have many (>80%) CmpI instructions.
    2.17 -    switch (cmp->Opcode()) {
    2.18 -    case Op_CmpP:
    2.19 -      // A never-taken null check looks like CmpP/BoolTest::eq.
    2.20 -      // These certainly should be closed off as uncommon traps.
    2.21 -      if (btest == BoolTest::eq)
    2.22 -        return true;
    2.23 -      // A never-failed type check looks like CmpP/BoolTest::ne.
    2.24 -      // Let's put traps on those, too, so that we don't have to compile
    2.25 -      // unused paths with indeterminate dynamic type information.
    2.26 -      if (ProfileDynamicTypes)
    2.27 -        return true;
    2.28 -      return false;
    2.29 -
    2.30 -    case Op_CmpI:
    2.31 -      // A small minority (< 10%) of CmpP are masked as CmpI,
    2.32 -      // as if by boolean conversion ((p == q? 1: 0) != 0).
    2.33 -      // Detect that here, even if it hasn't optimized away yet.
    2.34 -      // Specifically, this covers the 'instanceof' operator.
    2.35 -      if (btest == BoolTest::ne || btest == BoolTest::eq) {
    2.36 -        if (_gvn.type(cmp->in(2))->singleton() &&
    2.37 -            cmp->in(1)->is_Phi()) {
    2.38 -          PhiNode* phi = cmp->in(1)->as_Phi();
    2.39 -          int true_path = phi->is_diamond_phi();
    2.40 -          if (true_path > 0 &&
    2.41 -              _gvn.type(phi->in(1))->singleton() &&
    2.42 -              _gvn.type(phi->in(2))->singleton()) {
    2.43 -            // phi->region->if_proj->ifnode->bool->cmp
    2.44 -            BoolNode* bol = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
    2.45 -            btest = bol->_test._test;
    2.46 -            cmp = bol->in(1);
    2.47 -            continue;
    2.48 -          }
    2.49 -        }
    2.50 -      }
    2.51 -      return false;
    2.52 -    }
    2.53 +  if (C->too_many_traps(method(), bci(), Deoptimization::Reason_unstable_if)) {
    2.54 +    return false;
    2.55    }
    2.56 -  return false;
    2.57 +  return true;
    2.58  }
    2.59  
    2.60  //-------------------------------repush_if_args--------------------------------
    2.61 @@ -1180,32 +1139,8 @@
    2.62    bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
    2.63  
    2.64    if (seems_never_taken(prob) && seems_stable_comparison(btest, c)) {
    2.65 -    // If this might possibly turn into an implicit null check,
    2.66 -    // and the null has never yet been seen, we need to generate
    2.67 -    // an uncommon trap, so as to recompile instead of suffering
    2.68 -    // with very slow branches.  (We'll get the slow branches if
    2.69 -    // the program ever changes phase and starts seeing nulls here.)
    2.70 -    //
    2.71 -    // We do not inspect for a null constant, since a node may
    2.72 -    // optimize to 'null' later on.
    2.73 -    //
    2.74 -    // Null checks, and other tests which expect inequality,
    2.75 -    // show btest == BoolTest::eq along the non-taken branch.
    2.76 -    // On the other hand, type tests, must-be-null tests,
    2.77 -    // and other tests which expect pointer equality,
    2.78 -    // show btest == BoolTest::ne along the non-taken branch.
    2.79 -    // We prune both types of branches if they look unused.
    2.80      repush_if_args();
    2.81 -    // We need to mark this branch as taken so that if we recompile we will
    2.82 -    // see that it is possible. In the tiered system the interpreter doesn't
    2.83 -    // do profiling and by the time we get to the lower tier from the interpreter
    2.84 -    // the path may be cold again. Make sure it doesn't look untaken
    2.85 -    if (is_fallthrough) {
    2.86 -      profile_not_taken_branch(!ProfileInterpreter);
    2.87 -    } else {
    2.88 -      profile_taken_branch(iter().get_dest(), !ProfileInterpreter);
    2.89 -    }
    2.90 -    uncommon_trap(Deoptimization::Reason_unreached,
    2.91 +    uncommon_trap(Deoptimization::Reason_unstable_if,
    2.92                    Deoptimization::Action_reinterpret,
    2.93                    NULL,
    2.94                    (is_fallthrough ? "taken always" : "taken never"));
     3.1 --- a/src/share/vm/runtime/deoptimization.cpp	Tue Sep 02 12:48:45 2014 -0700
     3.2 +++ b/src/share/vm/runtime/deoptimization.cpp	Wed Sep 10 12:39:11 2014 +0200
     3.3 @@ -1835,7 +1835,8 @@
     3.4    "predicate",
     3.5    "loop_limit_check",
     3.6    "speculate_class_check",
     3.7 -  "rtm_state_change"
     3.8 +  "rtm_state_change",
     3.9 +  "unstable_if"
    3.10  };
    3.11  const char* Deoptimization::_trap_action_name[Action_LIMIT] = {
    3.12    // Note:  Keep this in sync. with enum DeoptAction.
     4.1 --- a/src/share/vm/runtime/deoptimization.hpp	Tue Sep 02 12:48:45 2014 -0700
     4.2 +++ b/src/share/vm/runtime/deoptimization.hpp	Wed Sep 10 12:39:11 2014 +0200
     4.3 @@ -60,6 +60,7 @@
     4.4      Reason_predicate,             // compiler generated predicate failed
     4.5      Reason_loop_limit_check,      // compiler generated loop limits check failed
     4.6      Reason_speculate_class_check, // saw unexpected object class from type speculation
     4.7 +    Reason_unstable_if,           // a branch predicted always false was taken
     4.8      Reason_rtm_state_change,      // rtm state change detected
     4.9      Reason_LIMIT,
    4.10      // Note:  Keep this enum in sync. with _trap_reason_name.
    4.11 @@ -315,6 +316,8 @@
    4.12        return Reason_null_check;           // recorded per BCI as a null check
    4.13      else if (reason == Reason_speculate_class_check)
    4.14        return Reason_class_check;
    4.15 +    else if (reason == Reason_unstable_if)
    4.16 +      return Reason_intrinsic;
    4.17      else
    4.18        return Reason_none;
    4.19    }
     5.1 --- a/src/share/vm/runtime/vmStructs.cpp	Tue Sep 02 12:48:45 2014 -0700
     5.2 +++ b/src/share/vm/runtime/vmStructs.cpp	Wed Sep 10 12:39:11 2014 +0200
     5.3 @@ -2496,6 +2496,7 @@
     5.4    declare_constant(Deoptimization::Reason_age)                            \
     5.5    declare_constant(Deoptimization::Reason_predicate)                      \
     5.6    declare_constant(Deoptimization::Reason_loop_limit_check)               \
     5.7 +  declare_constant(Deoptimization::Reason_unstable_if)                    \
     5.8    declare_constant(Deoptimization::Reason_LIMIT)                          \
     5.9    declare_constant(Deoptimization::Reason_RECORDED_LIMIT)                 \
    5.10                                                                            \

mercurial