Tue, 12 Feb 2013 14:33:19 -0800
Merge
1.1 --- a/src/share/vm/opto/c2_globals.hpp Tue Feb 12 07:39:42 2013 -0800 1.2 +++ b/src/share/vm/opto/c2_globals.hpp Tue Feb 12 14:33:19 2013 -0800 1.3 @@ -618,6 +618,9 @@ 1.4 \ 1.5 product(intx, LiveNodeCountInliningCutoff, 20000, \ 1.6 "max number of live nodes in a method") \ 1.7 + \ 1.8 + diagnostic(bool, OptimizeExpensiveOps, true, \ 1.9 + "Find best control for expensive operations") \ 1.10 1.11 1.12 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
2.1 --- a/src/share/vm/opto/compile.cpp Tue Feb 12 07:39:42 2013 -0800 2.2 +++ b/src/share/vm/opto/compile.cpp Tue Feb 12 14:33:19 2013 -0800 2.3 @@ -409,6 +409,13 @@ 2.4 remove_macro_node(n); 2.5 } 2.6 } 2.7 + // Remove useless expensive node 2.8 + for (int i = C->expensive_count()-1; i >= 0; i--) { 2.9 + Node* n = C->expensive_node(i); 2.10 + if (!useful.member(n)) { 2.11 + remove_expensive_node(n); 2.12 + } 2.13 + } 2.14 // clean up the late inline lists 2.15 remove_useless_late_inlines(&_string_late_inlines, useful); 2.16 remove_useless_late_inlines(&_late_inlines, useful); 2.17 @@ -1061,6 +1068,7 @@ 2.18 _intrinsics = NULL; 2.19 _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); 2.20 _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); 2.21 + _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL); 2.22 register_library_intrinsics(); 2.23 } 2.24 2.25 @@ -1927,6 +1935,10 @@ 2.26 2.27 if (failing()) return; 2.28 2.29 + // No more new expensive nodes will be added to the list from here 2.30 + // so keep only the actual candidates for optimizations. 2.31 + cleanup_expensive_nodes(igvn); 2.32 + 2.33 // Perform escape analysis 2.34 if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) { 2.35 if (has_loops()) { 2.36 @@ -3010,6 +3022,15 @@ 2.37 return true; 2.38 } 2.39 2.40 + // Expensive nodes have their control input set to prevent the GVN 2.41 + // from freely commoning them. There's no GVN beyond this point so 2.42 + // no need to keep the control input. We want the expensive nodes to 2.43 + // be freely moved to the least frequent code path by gcm. 2.44 + assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?"); 2.45 + for (int i = 0; i < expensive_count(); i++) { 2.46 + _expensive_nodes->at(i)->set_req(0, NULL); 2.47 + } 2.48 + 2.49 Final_Reshape_Counts frc; 2.50 2.51 // Visit everybody reachable! 2.52 @@ -3525,3 +3546,126 @@ 2.53 } 2.54 } 2.55 } 2.56 + 2.57 +int Compile::cmp_expensive_nodes(Node* n1, Node* n2) { 2.58 + if (n1->Opcode() < n2->Opcode()) return -1; 2.59 + else if (n1->Opcode() > n2->Opcode()) return 1; 2.60 + 2.61 + assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req())); 2.62 + for (uint i = 1; i < n1->req(); i++) { 2.63 + if (n1->in(i) < n2->in(i)) return -1; 2.64 + else if (n1->in(i) > n2->in(i)) return 1; 2.65 + } 2.66 + 2.67 + return 0; 2.68 +} 2.69 + 2.70 +int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) { 2.71 + Node* n1 = *n1p; 2.72 + Node* n2 = *n2p; 2.73 + 2.74 + return cmp_expensive_nodes(n1, n2); 2.75 +} 2.76 + 2.77 +void Compile::sort_expensive_nodes() { 2.78 + if (!expensive_nodes_sorted()) { 2.79 + _expensive_nodes->sort(cmp_expensive_nodes); 2.80 + } 2.81 +} 2.82 + 2.83 +bool Compile::expensive_nodes_sorted() const { 2.84 + for (int i = 1; i < _expensive_nodes->length(); i++) { 2.85 + if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) { 2.86 + return false; 2.87 + } 2.88 + } 2.89 + return true; 2.90 +} 2.91 + 2.92 +bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) { 2.93 + if (_expensive_nodes->length() == 0) { 2.94 + return false; 2.95 + } 2.96 + 2.97 + assert(OptimizeExpensiveOps, "optimization off?"); 2.98 + 2.99 + // Take this opportunity to remove dead nodes from the list 2.100 + int j = 0; 2.101 + for (int i = 0; i < _expensive_nodes->length(); i++) { 2.102 + Node* n = _expensive_nodes->at(i); 2.103 + if (!n->is_unreachable(igvn)) { 2.104 + assert(n->is_expensive(), "should be expensive"); 2.105 + _expensive_nodes->at_put(j, n); 2.106 + j++; 2.107 + } 2.108 + } 2.109 + _expensive_nodes->trunc_to(j); 2.110 + 2.111 + // Then sort the list so that similar nodes are next to each other 2.112 + // and check for at least two nodes of identical kind with same data 2.113 + // inputs. 2.114 + sort_expensive_nodes(); 2.115 + 2.116 + for (int i = 0; i < _expensive_nodes->length()-1; i++) { 2.117 + if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) { 2.118 + return true; 2.119 + } 2.120 + } 2.121 + 2.122 + return false; 2.123 +} 2.124 + 2.125 +void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) { 2.126 + if (_expensive_nodes->length() == 0) { 2.127 + return; 2.128 + } 2.129 + 2.130 + assert(OptimizeExpensiveOps, "optimization off?"); 2.131 + 2.132 + // Sort to bring similar nodes next to each other and clear the 2.133 + // control input of nodes for which there's only a single copy. 2.134 + sort_expensive_nodes(); 2.135 + 2.136 + int j = 0; 2.137 + int identical = 0; 2.138 + int i = 0; 2.139 + for (; i < _expensive_nodes->length()-1; i++) { 2.140 + assert(j <= i, "can't write beyond current index"); 2.141 + if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) { 2.142 + identical++; 2.143 + _expensive_nodes->at_put(j++, _expensive_nodes->at(i)); 2.144 + continue; 2.145 + } 2.146 + if (identical > 0) { 2.147 + _expensive_nodes->at_put(j++, _expensive_nodes->at(i)); 2.148 + identical = 0; 2.149 + } else { 2.150 + Node* n = _expensive_nodes->at(i); 2.151 + igvn.hash_delete(n); 2.152 + n->set_req(0, NULL); 2.153 + igvn.hash_insert(n); 2.154 + } 2.155 + } 2.156 + if (identical > 0) { 2.157 + _expensive_nodes->at_put(j++, _expensive_nodes->at(i)); 2.158 + } else if (_expensive_nodes->length() >= 1) { 2.159 + Node* n = _expensive_nodes->at(i); 2.160 + igvn.hash_delete(n); 2.161 + n->set_req(0, NULL); 2.162 + igvn.hash_insert(n); 2.163 + } 2.164 + _expensive_nodes->trunc_to(j); 2.165 +} 2.166 + 2.167 +void Compile::add_expensive_node(Node * n) { 2.168 + assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list"); 2.169 + assert(n->is_expensive(), "expensive nodes with non-null control here only"); 2.170 + assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here"); 2.171 + if (OptimizeExpensiveOps) { 2.172 + _expensive_nodes->append(n); 2.173 + } else { 2.174 + // Clear control input and let IGVN optimize expensive nodes if 2.175 + // OptimizeExpensiveOps is off. 2.176 + n->set_req(0, NULL); 2.177 + } 2.178 +}
3.1 --- a/src/share/vm/opto/compile.hpp Tue Feb 12 07:39:42 2013 -0800 3.2 +++ b/src/share/vm/opto/compile.hpp Tue Feb 12 14:33:19 2013 -0800 3.3 @@ -314,6 +314,7 @@ 3.4 GrowableArray<CallGenerator*>* _intrinsics; // List of intrinsics. 3.5 GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching. 3.6 GrowableArray<Node*>* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. 3.7 + GrowableArray<Node*>* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common 3.8 ConnectionGraph* _congraph; 3.9 #ifndef PRODUCT 3.10 IdealGraphPrinter* _printer; 3.11 @@ -398,6 +399,13 @@ 3.12 GrowableArray<PrintInliningBuffer>* _print_inlining_list; 3.13 int _print_inlining; 3.14 3.15 + // Only keep nodes in the expensive node list that need to be optimized 3.16 + void cleanup_expensive_nodes(PhaseIterGVN &igvn); 3.17 + // Use for sorting expensive nodes to bring similar nodes together 3.18 + static int cmp_expensive_nodes(Node** n1, Node** n2); 3.19 + // Expensive nodes list already sorted? 3.20 + bool expensive_nodes_sorted() const; 3.21 + 3.22 public: 3.23 3.24 outputStream* print_inlining_stream() const { 3.25 @@ -573,8 +581,10 @@ 3.26 3.27 int macro_count() { return _macro_nodes->length(); } 3.28 int predicate_count() { return _predicate_opaqs->length();} 3.29 + int expensive_count() { return _expensive_nodes->length(); } 3.30 Node* macro_node(int idx) { return _macro_nodes->at(idx); } 3.31 Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);} 3.32 + Node* expensive_node(int idx) { return _expensive_nodes->at(idx); } 3.33 ConnectionGraph* congraph() { return _congraph;} 3.34 void set_congraph(ConnectionGraph* congraph) { _congraph = congraph;} 3.35 void add_macro_node(Node * n) { 3.36 @@ -592,6 +602,12 @@ 3.37 _predicate_opaqs->remove(n); 3.38 } 3.39 } 3.40 + void add_expensive_node(Node * n); 3.41 + void remove_expensive_node(Node * n) { 3.42 + if (_expensive_nodes->contains(n)) { 3.43 + _expensive_nodes->remove(n); 3.44 + } 3.45 + } 3.46 void add_predicate_opaq(Node * n) { 3.47 assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); 3.48 assert(_macro_nodes->contains(n), "should have already been in macro list"); 3.49 @@ -604,6 +620,13 @@ 3.50 return _predicate_opaqs->contains(n); 3.51 } 3.52 3.53 + // Are there candidate expensive nodes for optimization? 3.54 + bool should_optimize_expensive_nodes(PhaseIterGVN &igvn); 3.55 + // Check whether n1 and n2 are similar 3.56 + static int cmp_expensive_nodes(Node* n1, Node* n2); 3.57 + // Sort expensive nodes to locate similar expensive nodes 3.58 + void sort_expensive_nodes(); 3.59 + 3.60 // Compilation environment. 3.61 Arena* comp_arena() { return &_comp_arena; } 3.62 ciEnv* env() const { return _env; }
4.1 --- a/src/share/vm/opto/library_call.cpp Tue Feb 12 07:39:42 2013 -0800 4.2 +++ b/src/share/vm/opto/library_call.cpp Tue Feb 12 14:33:19 2013 -0800 4.3 @@ -1653,7 +1653,7 @@ 4.4 // really odd corner cases (+/- Infinity). Just uncommon-trap them. 4.5 bool LibraryCallKit::inline_exp() { 4.6 Node* arg = round_double_node(argument(0)); 4.7 - Node* n = _gvn.transform(new (C) ExpDNode(0, arg)); 4.8 + Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg)); 4.9 4.10 finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); 4.11 4.12 @@ -1688,7 +1688,7 @@ 4.13 4.14 if (!too_many_traps(Deoptimization::Reason_intrinsic)) { 4.15 // Short form: skip the fancy tests and just check for NaN result. 4.16 - result = _gvn.transform(new (C) PowDNode(0, x, y)); 4.17 + result = _gvn.transform(new (C) PowDNode(C, control(), x, y)); 4.18 } else { 4.19 // If this inlining ever returned NaN in the past, include all 4.20 // checks + call to the runtime. 4.21 @@ -1715,7 +1715,7 @@ 4.22 Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) ); 4.23 4.24 // Set fast path result 4.25 - Node *fast_result = _gvn.transform( new (C) PowDNode(0, x, y) ); 4.26 + Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) ); 4.27 phi->init_req(3, fast_result); 4.28 4.29 // Complex path 4.30 @@ -1775,7 +1775,7 @@ 4.31 // abs(x) 4.32 Node *absx=_gvn.transform( new (C) AbsDNode(x)); 4.33 // abs(x)^y 4.34 - Node *absxpowy = _gvn.transform( new (C) PowDNode(0, absx, y) ); 4.35 + Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) ); 4.36 // -abs(x)^y 4.37 Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy)); 4.38 // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
5.1 --- a/src/share/vm/opto/loopnode.cpp Tue Feb 12 07:39:42 2013 -0800 5.2 +++ b/src/share/vm/opto/loopnode.cpp Tue Feb 12 14:33:19 2013 -0800 5.3 @@ -88,9 +88,9 @@ 5.4 assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" ); 5.5 uint i; 5.6 Node *early; 5.7 - if( n->in(0) ) { 5.8 + if (n->in(0) && !n->is_expensive()) { 5.9 early = n->in(0); 5.10 - if( !early->is_CFG() ) // Might be a non-CFG multi-def 5.11 + if (!early->is_CFG()) // Might be a non-CFG multi-def 5.12 early = get_ctrl(early); // So treat input as a straight data input 5.13 i = 1; 5.14 } else { 5.15 @@ -99,28 +99,28 @@ 5.16 } 5.17 uint e_d = dom_depth(early); 5.18 assert( early, "" ); 5.19 - for( ; i < n->req(); i++ ) { 5.20 + for (; i < n->req(); i++) { 5.21 Node *cin = get_ctrl(n->in(i)); 5.22 assert( cin, "" ); 5.23 // Keep deepest dominator depth 5.24 uint c_d = dom_depth(cin); 5.25 - if( c_d > e_d ) { // Deeper guy? 5.26 + if (c_d > e_d) { // Deeper guy? 5.27 early = cin; // Keep deepest found so far 5.28 e_d = c_d; 5.29 - } else if( c_d == e_d && // Same depth? 5.30 - early != cin ) { // If not equal, must use slower algorithm 5.31 + } else if (c_d == e_d && // Same depth? 5.32 + early != cin) { // If not equal, must use slower algorithm 5.33 // If same depth but not equal, one _must_ dominate the other 5.34 // and we want the deeper (i.e., dominated) guy. 5.35 Node *n1 = early; 5.36 Node *n2 = cin; 5.37 - while( 1 ) { 5.38 + while (1) { 5.39 n1 = idom(n1); // Walk up until break cycle 5.40 n2 = idom(n2); 5.41 - if( n1 == cin || // Walked early up to cin 5.42 - dom_depth(n2) < c_d ) 5.43 + if (n1 == cin || // Walked early up to cin 5.44 + dom_depth(n2) < c_d) 5.45 break; // early is deeper; keep him 5.46 - if( n2 == early || // Walked cin up to early 5.47 - dom_depth(n1) < c_d ) { 5.48 + if (n2 == early || // Walked cin up to early 5.49 + dom_depth(n1) < c_d) { 5.50 early = cin; // cin is deeper; keep him 5.51 break; 5.52 } 5.53 @@ -132,9 +132,108 @@ 5.54 // Return earliest legal location 5.55 assert(early == find_non_split_ctrl(early), "unexpected early control"); 5.56 5.57 + if (n->is_expensive()) { 5.58 + assert(n->in(0), "should have control input"); 5.59 + early = get_early_ctrl_for_expensive(n, early); 5.60 + } 5.61 + 5.62 return early; 5.63 } 5.64 5.65 +//------------------------------get_early_ctrl_for_expensive--------------------------------- 5.66 +// Move node up the dominator tree as high as legal while still beneficial 5.67 +Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) { 5.68 + assert(n->in(0) && n->is_expensive(), "expensive node with control input here"); 5.69 + assert(OptimizeExpensiveOps, "optimization off?"); 5.70 + 5.71 + Node* ctl = n->in(0); 5.72 + assert(ctl->is_CFG(), "expensive input 0 must be cfg"); 5.73 + uint min_dom_depth = dom_depth(earliest); 5.74 +#ifdef ASSERT 5.75 + if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) { 5.76 + dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl); 5.77 + assert(false, "Bad graph detected in get_early_ctrl_for_expensive"); 5.78 + } 5.79 +#endif 5.80 + if (dom_depth(ctl) < min_dom_depth) { 5.81 + return earliest; 5.82 + } 5.83 + 5.84 + while (1) { 5.85 + Node *next = ctl; 5.86 + // Moving the node out of a loop on the projection of a If 5.87 + // confuses loop predication. So once we hit a Loop in a If branch 5.88 + // that doesn't branch to an UNC, we stop. The code that process 5.89 + // expensive nodes will notice the loop and skip over it to try to 5.90 + // move the node further up. 5.91 + if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) { 5.92 + if (!is_uncommon_trap_if_pattern(ctl->in(1)->as_Proj(), Deoptimization::Reason_none)) { 5.93 + break; 5.94 + } 5.95 + next = idom(ctl->in(1)->in(0)); 5.96 + } else if (ctl->is_Proj()) { 5.97 + // We only move it up along a projection if the projection is 5.98 + // the single control projection for its parent: same code path, 5.99 + // if it's a If with UNC or fallthrough of a call. 5.100 + Node* parent_ctl = ctl->in(0); 5.101 + if (parent_ctl == NULL) { 5.102 + break; 5.103 + } else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) { 5.104 + next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control(); 5.105 + } else if (parent_ctl->is_If()) { 5.106 + if (!is_uncommon_trap_if_pattern(ctl->as_Proj(), Deoptimization::Reason_none)) { 5.107 + break; 5.108 + } 5.109 + assert(idom(ctl) == parent_ctl, "strange"); 5.110 + next = idom(parent_ctl); 5.111 + } else if (ctl->is_CatchProj()) { 5.112 + if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) { 5.113 + break; 5.114 + } 5.115 + assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph"); 5.116 + next = parent_ctl->in(0)->in(0)->in(0); 5.117 + } else { 5.118 + // Check if parent control has a single projection (this 5.119 + // control is the only possible successor of the parent 5.120 + // control). If so, we can try to move the node above the 5.121 + // parent control. 5.122 + int nb_ctl_proj = 0; 5.123 + for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) { 5.124 + Node *p = parent_ctl->fast_out(i); 5.125 + if (p->is_Proj() && p->is_CFG()) { 5.126 + nb_ctl_proj++; 5.127 + if (nb_ctl_proj > 1) { 5.128 + break; 5.129 + } 5.130 + } 5.131 + } 5.132 + 5.133 + if (nb_ctl_proj > 1) { 5.134 + break; 5.135 + } 5.136 + assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node"); 5.137 + assert(idom(ctl) == parent_ctl, "strange"); 5.138 + next = idom(parent_ctl); 5.139 + } 5.140 + } else { 5.141 + next = idom(ctl); 5.142 + } 5.143 + if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) { 5.144 + break; 5.145 + } 5.146 + ctl = next; 5.147 + } 5.148 + 5.149 + if (ctl != n->in(0)) { 5.150 + _igvn.hash_delete(n); 5.151 + n->set_req(0, ctl); 5.152 + _igvn.hash_insert(n); 5.153 + } 5.154 + 5.155 + return ctl; 5.156 +} 5.157 + 5.158 + 5.159 //------------------------------set_early_ctrl--------------------------------- 5.160 // Set earliest legal control 5.161 void PhaseIdealLoop::set_early_ctrl( Node *n ) { 5.162 @@ -1892,6 +1991,98 @@ 5.163 } 5.164 } 5.165 5.166 +//------------------------process_expensive_nodes----------------------------- 5.167 +// Expensive nodes have their control input set to prevent the GVN 5.168 +// from commoning them and as a result forcing the resulting node to 5.169 +// be in a more frequent path. Use CFG information here, to change the 5.170 +// control inputs so that some expensive nodes can be commoned while 5.171 +// not executed more frequently. 5.172 +bool PhaseIdealLoop::process_expensive_nodes() { 5.173 + assert(OptimizeExpensiveOps, "optimization off?"); 5.174 + 5.175 + // Sort nodes to bring similar nodes together 5.176 + C->sort_expensive_nodes(); 5.177 + 5.178 + bool progress = false; 5.179 + 5.180 + for (int i = 0; i < C->expensive_count(); ) { 5.181 + Node* n = C->expensive_node(i); 5.182 + int start = i; 5.183 + // Find nodes similar to n 5.184 + i++; 5.185 + for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++); 5.186 + int end = i; 5.187 + // And compare them two by two 5.188 + for (int j = start; j < end; j++) { 5.189 + Node* n1 = C->expensive_node(j); 5.190 + if (is_node_unreachable(n1)) { 5.191 + continue; 5.192 + } 5.193 + for (int k = j+1; k < end; k++) { 5.194 + Node* n2 = C->expensive_node(k); 5.195 + if (is_node_unreachable(n2)) { 5.196 + continue; 5.197 + } 5.198 + 5.199 + assert(n1 != n2, "should be pair of nodes"); 5.200 + 5.201 + Node* c1 = n1->in(0); 5.202 + Node* c2 = n2->in(0); 5.203 + 5.204 + Node* parent_c1 = c1; 5.205 + Node* parent_c2 = c2; 5.206 + 5.207 + // The call to get_early_ctrl_for_expensive() moves the 5.208 + // expensive nodes up but stops at loops that are in a if 5.209 + // branch. See whether we can exit the loop and move above the 5.210 + // If. 5.211 + if (c1->is_Loop()) { 5.212 + parent_c1 = c1->in(1); 5.213 + } 5.214 + if (c2->is_Loop()) { 5.215 + parent_c2 = c2->in(1); 5.216 + } 5.217 + 5.218 + if (parent_c1 == parent_c2) { 5.219 + _igvn._worklist.push(n1); 5.220 + _igvn._worklist.push(n2); 5.221 + continue; 5.222 + } 5.223 + 5.224 + // Look for identical expensive node up the dominator chain. 5.225 + if (is_dominator(c1, c2)) { 5.226 + c2 = c1; 5.227 + } else if (is_dominator(c2, c1)) { 5.228 + c1 = c2; 5.229 + } else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() && 5.230 + parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) { 5.231 + // Both branches have the same expensive node so move it up 5.232 + // before the if. 5.233 + c1 = c2 = idom(parent_c1->in(0)); 5.234 + } 5.235 + // Do the actual moves 5.236 + if (n1->in(0) != c1) { 5.237 + _igvn.hash_delete(n1); 5.238 + n1->set_req(0, c1); 5.239 + _igvn.hash_insert(n1); 5.240 + _igvn._worklist.push(n1); 5.241 + progress = true; 5.242 + } 5.243 + if (n2->in(0) != c2) { 5.244 + _igvn.hash_delete(n2); 5.245 + n2->set_req(0, c2); 5.246 + _igvn.hash_insert(n2); 5.247 + _igvn._worklist.push(n2); 5.248 + progress = true; 5.249 + } 5.250 + } 5.251 + } 5.252 + } 5.253 + 5.254 + return progress; 5.255 +} 5.256 + 5.257 + 5.258 //============================================================================= 5.259 //----------------------------build_and_optimize------------------------------- 5.260 // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to 5.261 @@ -1960,7 +2151,9 @@ 5.262 } 5.263 5.264 // Nothing to do, so get out 5.265 - if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) { 5.266 + bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only; 5.267 + bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn); 5.268 + if (stop_early && !do_expensive_nodes) { 5.269 _igvn.optimize(); // Cleanup NeverBranches 5.270 return; 5.271 } 5.272 @@ -2058,6 +2251,21 @@ 5.273 return; 5.274 } 5.275 5.276 + if (stop_early) { 5.277 + assert(do_expensive_nodes, "why are we here?"); 5.278 + if (process_expensive_nodes()) { 5.279 + // If we made some progress when processing expensive nodes then 5.280 + // the IGVN may modify the graph in a way that will allow us to 5.281 + // make some more progress: we need to try processing expensive 5.282 + // nodes again. 5.283 + C->set_major_progress(); 5.284 + } 5.285 + 5.286 + _igvn.optimize(); 5.287 + 5.288 + return; 5.289 + } 5.290 + 5.291 // Some parser-inserted loop predicates could never be used by loop 5.292 // predication or they were moved away from loop during some optimizations. 5.293 // For example, peeling. Eliminate them before next loop optimizations. 5.294 @@ -2120,6 +2328,10 @@ 5.295 NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); 5.296 } 5.297 5.298 + if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) { 5.299 + C->set_major_progress(); 5.300 + } 5.301 + 5.302 // Perform loop predication before iteration splitting 5.303 if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) { 5.304 _ltree_root->_child->loop_predication(this); 5.305 @@ -3299,7 +3511,7 @@ 5.306 #ifdef ASSERT 5.307 if (legal->is_Start() && !early->is_Root()) { 5.308 // Bad graph. Print idom path and fail. 5.309 - dump_bad_graph(n, early, LCA); 5.310 + dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA); 5.311 assert(false, "Bad graph detected in build_loop_late"); 5.312 } 5.313 #endif 5.314 @@ -3350,8 +3562,8 @@ 5.315 } 5.316 5.317 #ifdef ASSERT 5.318 -void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) { 5.319 - tty->print_cr( "Bad graph detected in build_loop_late"); 5.320 +void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) { 5.321 + tty->print_cr(msg); 5.322 tty->print("n: "); n->dump(); 5.323 tty->print("early(n): "); early->dump(); 5.324 if (n->in(0) != NULL && !n->in(0)->is_top() &&
6.1 --- a/src/share/vm/opto/loopnode.hpp Tue Feb 12 07:39:42 2013 -0800 6.2 +++ b/src/share/vm/opto/loopnode.hpp Tue Feb 12 14:33:19 2013 -0800 6.3 @@ -263,9 +263,18 @@ 6.4 bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); } 6.5 BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; } 6.6 CountedLoopNode *loopnode() const { 6.7 + // The CountedLoopNode that goes with this CountedLoopEndNode may 6.8 + // have been optimized out by the IGVN so be cautious with the 6.9 + // pattern matching on the graph 6.10 + if (phi() == NULL) { 6.11 + return NULL; 6.12 + } 6.13 Node *ln = phi()->in(0); 6.14 - assert( ln->Opcode() == Op_CountedLoop, "malformed loop" ); 6.15 - return (CountedLoopNode*)ln; } 6.16 + if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) { 6.17 + return (CountedLoopNode*)ln; 6.18 + } 6.19 + return NULL; 6.20 + } 6.21 6.22 #ifndef PRODUCT 6.23 virtual void dump_spec(outputStream *st) const; 6.24 @@ -598,6 +607,7 @@ 6.25 // check if transform created new nodes that need _ctrl recorded 6.26 Node *get_late_ctrl( Node *n, Node *early ); 6.27 Node *get_early_ctrl( Node *n ); 6.28 + Node *get_early_ctrl_for_expensive(Node *n, Node* earliest); 6.29 void set_early_ctrl( Node *n ); 6.30 void set_subtree_ctrl( Node *root ); 6.31 void set_ctrl( Node *n, Node *ctrl ) { 6.32 @@ -905,6 +915,16 @@ 6.33 void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); 6.34 void eliminate_useless_predicates(); 6.35 6.36 + // Change the control input of expensive nodes to allow commoning by 6.37 + // IGVN when it is guaranteed to not result in a more frequent 6.38 + // execution of the expensive node. Return true if progress. 6.39 + bool process_expensive_nodes(); 6.40 + 6.41 + // Check whether node has become unreachable 6.42 + bool is_node_unreachable(Node *n) const { 6.43 + return !has_node(n) || n->is_unreachable(_igvn); 6.44 + } 6.45 + 6.46 // Eliminate range-checks and other trip-counter vs loop-invariant tests. 6.47 void do_range_check( IdealLoopTree *loop, Node_List &old_new ); 6.48 6.49 @@ -1043,7 +1063,7 @@ 6.50 void register_new_node( Node *n, Node *blk ); 6.51 6.52 #ifdef ASSERT 6.53 -void dump_bad_graph(Node* n, Node* early, Node* LCA); 6.54 + void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA); 6.55 #endif 6.56 6.57 #ifndef PRODUCT
7.1 --- a/src/share/vm/opto/node.cpp Tue Feb 12 07:39:42 2013 -0800 7.2 +++ b/src/share/vm/opto/node.cpp Tue Feb 12 14:33:19 2013 -0800 7.3 @@ -493,6 +493,8 @@ 7.4 } 7.5 if (is_macro()) 7.6 compile->add_macro_node(n); 7.7 + if (is_expensive()) 7.8 + compile->add_expensive_node(n); 7.9 7.10 n->set_idx(compile->next_unique()); // Get new unique index as well 7.11 debug_only( n->verify_construction() ); 7.12 @@ -616,6 +618,9 @@ 7.13 if (is_macro()) { 7.14 compile->remove_macro_node(this); 7.15 } 7.16 + if (is_expensive()) { 7.17 + compile->remove_expensive_node(this); 7.18 + } 7.19 #ifdef ASSERT 7.20 // We will not actually delete the storage, but we'll make the node unusable. 7.21 *(address*)this = badAddress; // smash the C++ vtbl, probably 7.22 @@ -689,6 +694,13 @@ 7.23 } 7.24 #endif 7.25 7.26 + 7.27 +//------------------------------is_unreachable--------------------------------- 7.28 +bool Node::is_unreachable(PhaseIterGVN &igvn) const { 7.29 + assert(!is_Mach(), "doesn't work with MachNodes"); 7.30 + return outcnt() == 0 || igvn.type(this) == Type::TOP || in(0)->is_top(); 7.31 +} 7.32 + 7.33 //------------------------------add_req---------------------------------------- 7.34 // Add a new required input at the end 7.35 void Node::add_req( Node *n ) { 7.36 @@ -1246,6 +1258,9 @@ 7.37 if (dead->is_macro()) { 7.38 igvn->C->remove_macro_node(dead); 7.39 } 7.40 + if (dead->is_expensive()) { 7.41 + igvn->C->remove_expensive_node(dead); 7.42 + } 7.43 // Kill all inputs to the dead guy 7.44 for (uint i=0; i < dead->req(); i++) { 7.45 Node *n = dead->in(i); // Get input to dead guy
8.1 --- a/src/share/vm/opto/node.hpp Tue Feb 12 07:39:42 2013 -0800 8.2 +++ b/src/share/vm/opto/node.hpp Tue Feb 12 14:33:19 2013 -0800 8.3 @@ -378,6 +378,8 @@ 8.4 bool is_dead() const; 8.5 #define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead())) 8.6 #endif 8.7 + // Check whether node has become unreachable 8.8 + bool is_unreachable(PhaseIterGVN &igvn) const; 8.9 8.10 // Set a required input edge, also updates corresponding output edge 8.11 void add_req( Node *n ); // Append a NEW required input 8.12 @@ -646,7 +648,8 @@ 8.13 Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1, 8.14 Flag_avoid_back_to_back = Flag_may_be_short_branch << 1, 8.15 Flag_has_call = Flag_avoid_back_to_back << 1, 8.16 - _max_flags = (Flag_has_call << 1) - 1 // allow flags combination 8.17 + Flag_is_expensive = Flag_has_call << 1, 8.18 + _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination 8.19 }; 8.20 8.21 private: 8.22 @@ -819,6 +822,8 @@ 8.23 8.24 // The node is a "macro" node which needs to be expanded before matching 8.25 bool is_macro() const { return (_flags & Flag_is_macro) != 0; } 8.26 + // The node is expensive: the best control is set during loop opts 8.27 + bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; } 8.28 8.29 //----------------- Optimization 8.30
9.1 --- a/src/share/vm/opto/phaseX.cpp Tue Feb 12 07:39:42 2013 -0800 9.2 +++ b/src/share/vm/opto/phaseX.cpp Tue Feb 12 14:33:19 2013 -0800 9.3 @@ -1203,6 +1203,9 @@ 9.4 if (dead->is_macro()) { 9.5 C->remove_macro_node(dead); 9.6 } 9.7 + if (dead->is_expensive()) { 9.8 + C->remove_expensive_node(dead); 9.9 + } 9.10 9.11 if (recurse) { 9.12 continue;
10.1 --- a/src/share/vm/opto/subnode.hpp Tue Feb 12 07:39:42 2013 -0800 10.2 +++ b/src/share/vm/opto/subnode.hpp Tue Feb 12 14:33:19 2013 -0800 10.3 @@ -456,7 +456,10 @@ 10.4 // Exponentiate a double 10.5 class ExpDNode : public Node { 10.6 public: 10.7 - ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {} 10.8 + ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) { 10.9 + init_flags(Flag_is_expensive); 10.10 + C->add_expensive_node(this); 10.11 + } 10.12 virtual int Opcode() const; 10.13 const Type *bottom_type() const { return Type::DOUBLE; } 10.14 virtual uint ideal_reg() const { return Op_RegD; } 10.15 @@ -489,7 +492,10 @@ 10.16 // Raise a double to a double power 10.17 class PowDNode : public Node { 10.18 public: 10.19 - PowDNode(Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {} 10.20 + PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) { 10.21 + init_flags(Flag_is_expensive); 10.22 + C->add_expensive_node(this); 10.23 + } 10.24 virtual int Opcode() const; 10.25 const Type *bottom_type() const { return Type::DOUBLE; } 10.26 virtual uint ideal_reg() const { return Op_RegD; }