8029302: Performance regression in Math.pow intrinsic

Fri, 25 Apr 2014 12:48:34 +0200

author
adlertz
date
Fri, 25 Apr 2014 12:48:34 +0200
changeset 6665
400709e275c1
parent 6664
f1b83c6b8411
child 6666
a062c3691003

8029302: Performance regression in Math.pow intrinsic
Summary: Added special case for x^y where y == 2
Reviewed-by: kvn

src/cpu/x86/vm/macroAssembler_x86.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 08 23:10:23 2014 -0700
     1.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Apr 25 12:48:34 2014 +0200
     1.3 @@ -3152,10 +3152,12 @@
     1.4    // if fast computation is not possible, result is NaN. Requires
     1.5    // fallback from user of this macro.
     1.6    // increase precision for intermediate steps of the computation
     1.7 +  BLOCK_COMMENT("fast_pow {");
     1.8    increase_precision();
     1.9    fyl2x();                 // Stack: (Y*log2(X)) ...
    1.10    pow_exp_core_encoding(); // Stack: exp(X) ...
    1.11    restore_precision();
    1.12 +  BLOCK_COMMENT("} fast_pow");
    1.13  }
    1.14  
    1.15  void MacroAssembler::fast_exp() {
     2.1 --- a/src/share/vm/opto/library_call.cpp	Thu May 08 23:10:23 2014 -0700
     2.2 +++ b/src/share/vm/opto/library_call.cpp	Fri Apr 25 12:48:34 2014 +0200
     2.3 @@ -216,7 +216,7 @@
     2.4    bool inline_math_subtractExactL(bool is_decrement);
     2.5    bool inline_exp();
     2.6    bool inline_pow();
     2.7 -  void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
     2.8 +  Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
     2.9    bool inline_min_max(vmIntrinsics::ID id);
    2.10    Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
    2.11    // This returns Type::AnyPtr, RawPtr, or OopPtr.
    2.12 @@ -1678,7 +1678,7 @@
    2.13    return true;
    2.14  }
    2.15  
    2.16 -void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
    2.17 +Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
    2.18    //-------------------
    2.19    //result=(result.isNaN())? funcAddr():result;
    2.20    // Check: If isNaN() by checking result!=result? then either trap
    2.21 @@ -1694,7 +1694,7 @@
    2.22        uncommon_trap(Deoptimization::Reason_intrinsic,
    2.23                      Deoptimization::Action_make_not_entrant);
    2.24      }
    2.25 -    set_result(result);
    2.26 +    return result;
    2.27    } else {
    2.28      // If this inlining ever returned NaN in the past, we compile a call
    2.29      // to the runtime to properly handle corner cases
    2.30 @@ -1724,9 +1724,10 @@
    2.31  
    2.32        result_region->init_req(2, control());
    2.33        result_val->init_req(2, value);
    2.34 -      set_result(result_region, result_val);
    2.35 +      set_control(_gvn.transform(result_region));
    2.36 +      return result_val;
    2.37      } else {
    2.38 -      set_result(result);
    2.39 +      return result;
    2.40      }
    2.41    }
    2.42  }
    2.43 @@ -1738,7 +1739,8 @@
    2.44    Node* arg = round_double_node(argument(0));
    2.45    Node* n   = _gvn.transform(new (C) ExpDNode(C, control(), arg));
    2.46  
    2.47 -  finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
    2.48 +  n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
    2.49 +  set_result(n);
    2.50  
    2.51    C->set_has_split_ifs(true); // Has chance for split-if optimization
    2.52    return true;
    2.53 @@ -1748,27 +1750,48 @@
    2.54  // Inline power instructions, if possible.
    2.55  bool LibraryCallKit::inline_pow() {
    2.56    // Pseudocode for pow
    2.57 -  // if (x <= 0.0) {
    2.58 -  //   long longy = (long)y;
    2.59 -  //   if ((double)longy == y) { // if y is long
    2.60 -  //     if (y + 1 == y) longy = 0; // huge number: even
    2.61 -  //     result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
    2.62 +  // if (y == 2) {
    2.63 +  //   return x * x;
    2.64 +  // } else {
    2.65 +  //   if (x <= 0.0) {
    2.66 +  //     long longy = (long)y;
    2.67 +  //     if ((double)longy == y) { // if y is long
    2.68 +  //       if (y + 1 == y) longy = 0; // huge number: even
    2.69 +  //       result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
    2.70 +  //     } else {
    2.71 +  //       result = NaN;
    2.72 +  //     }
    2.73    //   } else {
    2.74 -  //     result = NaN;
    2.75 +  //     result = DPow(x,y);
    2.76    //   }
    2.77 -  // } else {
    2.78 -  //   result = DPow(x,y);
    2.79 +  //   if (result != result)?  {
    2.80 +  //     result = uncommon_trap() or runtime_call();
    2.81 +  //   }
    2.82 +  //   return result;
    2.83    // }
    2.84 -  // if (result != result)?  {
    2.85 -  //   result = uncommon_trap() or runtime_call();
    2.86 -  // }
    2.87 -  // return result;
    2.88  
    2.89    Node* x = round_double_node(argument(0));
    2.90    Node* y = round_double_node(argument(2));
    2.91  
    2.92    Node* result = NULL;
    2.93  
    2.94 +  Node*   const_two_node = makecon(TypeD::make(2.0));
    2.95 +  Node*   cmp_node       = _gvn.transform(new (C) CmpDNode(y, const_two_node));
    2.96 +  Node*   bool_node      = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq));
    2.97 +  IfNode* if_node        = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
    2.98 +  Node*   if_true        = _gvn.transform(new (C) IfTrueNode(if_node));
    2.99 +  Node*   if_false       = _gvn.transform(new (C) IfFalseNode(if_node));
   2.100 +
   2.101 +  RegionNode* region_node = new (C) RegionNode(3);
   2.102 +  region_node->init_req(1, if_true);
   2.103 +
   2.104 +  Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE);
   2.105 +  // special case for x^y where y == 2, we can convert it to x * x
   2.106 +  phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x)));
   2.107 +
   2.108 +  // set control to if_false since we will now process the false branch
   2.109 +  set_control(if_false);
   2.110 +
   2.111    if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
   2.112      // Short form: skip the fancy tests and just check for NaN result.
   2.113      result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
   2.114 @@ -1892,7 +1915,15 @@
   2.115      result = _gvn.transform(phi);
   2.116    }
   2.117  
   2.118 -  finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
   2.119 +  result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
   2.120 +
   2.121 +  // control from finish_pow_exp is now input to the region node
   2.122 +  region_node->set_req(2, control());
   2.123 +  // the result from finish_pow_exp is now input to the phi node
   2.124 +  phi_node->init_req(2, _gvn.transform(result));
   2.125 +  set_control(_gvn.transform(region_node));
   2.126 +  record_for_igvn(region_node);
   2.127 +  set_result(_gvn.transform(phi_node));
   2.128  
   2.129    C->set_has_split_ifs(true); // Has chance for split-if optimization
   2.130    return true;

mercurial