Fri, 25 Apr 2014 12:48:34 +0200
8029302: Performance regression in Math.pow intrinsic
Summary: Added special case for x^y where y == 2
Reviewed-by: kvn
src/cpu/x86/vm/macroAssembler_x86.cpp | file | annotate | diff | comparison | revisions | |
src/share/vm/opto/library_call.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Thu May 08 23:10:23 2014 -0700 1.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Apr 25 12:48:34 2014 +0200 1.3 @@ -3152,10 +3152,12 @@ 1.4 // if fast computation is not possible, result is NaN. Requires 1.5 // fallback from user of this macro. 1.6 // increase precision for intermediate steps of the computation 1.7 + BLOCK_COMMENT("fast_pow {"); 1.8 increase_precision(); 1.9 fyl2x(); // Stack: (Y*log2(X)) ... 1.10 pow_exp_core_encoding(); // Stack: exp(X) ... 1.11 restore_precision(); 1.12 + BLOCK_COMMENT("} fast_pow"); 1.13 } 1.14 1.15 void MacroAssembler::fast_exp() {
2.1 --- a/src/share/vm/opto/library_call.cpp Thu May 08 23:10:23 2014 -0700 2.2 +++ b/src/share/vm/opto/library_call.cpp Fri Apr 25 12:48:34 2014 +0200 2.3 @@ -216,7 +216,7 @@ 2.4 bool inline_math_subtractExactL(bool is_decrement); 2.5 bool inline_exp(); 2.6 bool inline_pow(); 2.7 - void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); 2.8 + Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); 2.9 bool inline_min_max(vmIntrinsics::ID id); 2.10 Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); 2.11 // This returns Type::AnyPtr, RawPtr, or OopPtr. 2.12 @@ -1678,7 +1678,7 @@ 2.13 return true; 2.14 } 2.15 2.16 -void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) { 2.17 +Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) { 2.18 //------------------- 2.19 //result=(result.isNaN())? funcAddr():result; 2.20 // Check: If isNaN() by checking result!=result? then either trap 2.21 @@ -1694,7 +1694,7 @@ 2.22 uncommon_trap(Deoptimization::Reason_intrinsic, 2.23 Deoptimization::Action_make_not_entrant); 2.24 } 2.25 - set_result(result); 2.26 + return result; 2.27 } else { 2.28 // If this inlining ever returned NaN in the past, we compile a call 2.29 // to the runtime to properly handle corner cases 2.30 @@ -1724,9 +1724,10 @@ 2.31 2.32 result_region->init_req(2, control()); 2.33 result_val->init_req(2, value); 2.34 - set_result(result_region, result_val); 2.35 + set_control(_gvn.transform(result_region)); 2.36 + return result_val; 2.37 } else { 2.38 - set_result(result); 2.39 + return result; 2.40 } 2.41 } 2.42 } 2.43 @@ -1738,7 +1739,8 @@ 2.44 Node* arg = round_double_node(argument(0)); 2.45 Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg)); 2.46 2.47 - finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); 2.48 + n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); 2.49 + set_result(n); 2.50 2.51 C->set_has_split_ifs(true); // Has chance for split-if optimization 2.52 return true; 2.53 @@ -1748,27 +1750,48 @@ 2.54 // Inline power instructions, if possible. 2.55 bool LibraryCallKit::inline_pow() { 2.56 // Pseudocode for pow 2.57 - // if (x <= 0.0) { 2.58 - // long longy = (long)y; 2.59 - // if ((double)longy == y) { // if y is long 2.60 - // if (y + 1 == y) longy = 0; // huge number: even 2.61 - // result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y); 2.62 + // if (y == 2) { 2.63 + // return x * x; 2.64 + // } else { 2.65 + // if (x <= 0.0) { 2.66 + // long longy = (long)y; 2.67 + // if ((double)longy == y) { // if y is long 2.68 + // if (y + 1 == y) longy = 0; // huge number: even 2.69 + // result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y); 2.70 + // } else { 2.71 + // result = NaN; 2.72 + // } 2.73 // } else { 2.74 - // result = NaN; 2.75 + // result = DPow(x,y); 2.76 // } 2.77 - // } else { 2.78 - // result = DPow(x,y); 2.79 + // if (result != result)? { 2.80 + // result = uncommon_trap() or runtime_call(); 2.81 + // } 2.82 + // return result; 2.83 // } 2.84 - // if (result != result)? { 2.85 - // result = uncommon_trap() or runtime_call(); 2.86 - // } 2.87 - // return result; 2.88 2.89 Node* x = round_double_node(argument(0)); 2.90 Node* y = round_double_node(argument(2)); 2.91 2.92 Node* result = NULL; 2.93 2.94 + Node* const_two_node = makecon(TypeD::make(2.0)); 2.95 + Node* cmp_node = _gvn.transform(new (C) CmpDNode(y, const_two_node)); 2.96 + Node* bool_node = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq)); 2.97 + IfNode* if_node = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN); 2.98 + Node* if_true = _gvn.transform(new (C) IfTrueNode(if_node)); 2.99 + Node* if_false = _gvn.transform(new (C) IfFalseNode(if_node)); 2.100 + 2.101 + RegionNode* region_node = new (C) RegionNode(3); 2.102 + region_node->init_req(1, if_true); 2.103 + 2.104 + Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE); 2.105 + // special case for x^y where y == 2, we can convert it to x * x 2.106 + phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x))); 2.107 + 2.108 + // set control to if_false since we will now process the false branch 2.109 + set_control(if_false); 2.110 + 2.111 if (!too_many_traps(Deoptimization::Reason_intrinsic)) { 2.112 // Short form: skip the fancy tests and just check for NaN result. 2.113 result = _gvn.transform(new (C) PowDNode(C, control(), x, y)); 2.114 @@ -1892,7 +1915,15 @@ 2.115 result = _gvn.transform(phi); 2.116 } 2.117 2.118 - finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); 2.119 + result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); 2.120 + 2.121 + // control from finish_pow_exp is now input to the region node 2.122 + region_node->set_req(2, control()); 2.123 + // the result from finish_pow_exp is now input to the phi node 2.124 + phi_node->init_req(2, _gvn.transform(result)); 2.125 + set_control(_gvn.transform(region_node)); 2.126 + record_for_igvn(region_node); 2.127 + set_result(_gvn.transform(phi_node)); 2.128 2.129 C->set_has_split_ifs(true); // Has chance for split-if optimization 2.130 return true;