Merge

Tue, 12 Feb 2013 14:33:19 -0800

author
kmo
date
Tue, 12 Feb 2013 14:33:19 -0800
changeset 4591
aaad39923cdb
parent 4590
c703f9c4b025
parent 4589
8b3da8d14c93
child 4592
12e01444ca2d

Merge

     1.1 --- a/src/share/vm/opto/c2_globals.hpp	Tue Feb 12 07:39:42 2013 -0800
     1.2 +++ b/src/share/vm/opto/c2_globals.hpp	Tue Feb 12 14:33:19 2013 -0800
     1.3 @@ -618,6 +618,9 @@
     1.4                                                                              \
     1.5    product(intx, LiveNodeCountInliningCutoff, 20000,                         \
     1.6            "max number of live nodes in a method")                           \
     1.7 +                                                                            \
     1.8 +  diagnostic(bool, OptimizeExpensiveOps, true,                              \
     1.9 +          "Find best control for expensive operations")                     \
    1.10  
    1.11  
    1.12  C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
     2.1 --- a/src/share/vm/opto/compile.cpp	Tue Feb 12 07:39:42 2013 -0800
     2.2 +++ b/src/share/vm/opto/compile.cpp	Tue Feb 12 14:33:19 2013 -0800
     2.3 @@ -409,6 +409,13 @@
     2.4        remove_macro_node(n);
     2.5      }
     2.6    }
     2.7 +  // Remove useless expensive node
     2.8 +  for (int i = C->expensive_count()-1; i >= 0; i--) {
     2.9 +    Node* n = C->expensive_node(i);
    2.10 +    if (!useful.member(n)) {
    2.11 +      remove_expensive_node(n);
    2.12 +    }
    2.13 +  }
    2.14    // clean up the late inline lists
    2.15    remove_useless_late_inlines(&_string_late_inlines, useful);
    2.16    remove_useless_late_inlines(&_late_inlines, useful);
    2.17 @@ -1061,6 +1068,7 @@
    2.18    _intrinsics = NULL;
    2.19    _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
    2.20    _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
    2.21 +  _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
    2.22    register_library_intrinsics();
    2.23  }
    2.24  
    2.25 @@ -1927,6 +1935,10 @@
    2.26  
    2.27    if (failing())  return;
    2.28  
    2.29 +  // No more new expensive nodes will be added to the list from here
    2.30 +  // so keep only the actual candidates for optimizations.
    2.31 +  cleanup_expensive_nodes(igvn);
    2.32 +
    2.33    // Perform escape analysis
    2.34    if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
    2.35      if (has_loops()) {
    2.36 @@ -3010,6 +3022,15 @@
    2.37      return true;
    2.38    }
    2.39  
    2.40 +  // Expensive nodes have their control input set to prevent the GVN
    2.41 +  // from freely commoning them. There's no GVN beyond this point so
    2.42 +  // no need to keep the control input. We want the expensive nodes to
    2.43 +  // be freely moved to the least frequent code path by gcm.
    2.44 +  assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");
    2.45 +  for (int i = 0; i < expensive_count(); i++) {
    2.46 +    _expensive_nodes->at(i)->set_req(0, NULL);
    2.47 +  }
    2.48 +
    2.49    Final_Reshape_Counts frc;
    2.50  
    2.51    // Visit everybody reachable!
    2.52 @@ -3525,3 +3546,126 @@
    2.53      }
    2.54    }
    2.55  }
    2.56 +
    2.57 +int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
    2.58 +  if (n1->Opcode() < n2->Opcode())      return -1;
    2.59 +  else if (n1->Opcode() > n2->Opcode()) return 1;
    2.60 +
    2.61 +  assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));
    2.62 +  for (uint i = 1; i < n1->req(); i++) {
    2.63 +    if (n1->in(i) < n2->in(i))      return -1;
    2.64 +    else if (n1->in(i) > n2->in(i)) return 1;
    2.65 +  }
    2.66 +
    2.67 +  return 0;
    2.68 +}
    2.69 +
    2.70 +int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {
    2.71 +  Node* n1 = *n1p;
    2.72 +  Node* n2 = *n2p;
    2.73 +
    2.74 +  return cmp_expensive_nodes(n1, n2);
    2.75 +}
    2.76 +
    2.77 +void Compile::sort_expensive_nodes() {
    2.78 +  if (!expensive_nodes_sorted()) {
    2.79 +    _expensive_nodes->sort(cmp_expensive_nodes);
    2.80 +  }
    2.81 +}
    2.82 +
    2.83 +bool Compile::expensive_nodes_sorted() const {
    2.84 +  for (int i = 1; i < _expensive_nodes->length(); i++) {
    2.85 +    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {
    2.86 +      return false;
    2.87 +    }
    2.88 +  }
    2.89 +  return true;
    2.90 +}
    2.91 +
    2.92 +bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {
    2.93 +  if (_expensive_nodes->length() == 0) {
    2.94 +    return false;
    2.95 +  }
    2.96 +
    2.97 +  assert(OptimizeExpensiveOps, "optimization off?");
    2.98 +
    2.99 +  // Take this opportunity to remove dead nodes from the list
   2.100 +  int j = 0;
   2.101 +  for (int i = 0; i < _expensive_nodes->length(); i++) {
   2.102 +    Node* n = _expensive_nodes->at(i);
   2.103 +    if (!n->is_unreachable(igvn)) {
   2.104 +      assert(n->is_expensive(), "should be expensive");
   2.105 +      _expensive_nodes->at_put(j, n);
   2.106 +      j++;
   2.107 +    }
   2.108 +  }
   2.109 +  _expensive_nodes->trunc_to(j);
   2.110 +
   2.111 +  // Then sort the list so that similar nodes are next to each other
   2.112 +  // and check for at least two nodes of identical kind with same data
   2.113 +  // inputs.
   2.114 +  sort_expensive_nodes();
   2.115 +
   2.116 +  for (int i = 0; i < _expensive_nodes->length()-1; i++) {
   2.117 +    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {
   2.118 +      return true;
   2.119 +    }
   2.120 +  }
   2.121 +
   2.122 +  return false;
   2.123 +}
   2.124 +
   2.125 +void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
   2.126 +  if (_expensive_nodes->length() == 0) {
   2.127 +    return;
   2.128 +  }
   2.129 +
   2.130 +  assert(OptimizeExpensiveOps, "optimization off?");
   2.131 +
   2.132 +  // Sort to bring similar nodes next to each other and clear the
   2.133 +  // control input of nodes for which there's only a single copy.
   2.134 +  sort_expensive_nodes();
   2.135 +
   2.136 +  int j = 0;
   2.137 +  int identical = 0;
   2.138 +  int i = 0;
   2.139 +  for (; i < _expensive_nodes->length()-1; i++) {
   2.140 +    assert(j <= i, "can't write beyond current index");
   2.141 +    if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
   2.142 +      identical++;
   2.143 +      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
   2.144 +      continue;
   2.145 +    }
   2.146 +    if (identical > 0) {
   2.147 +      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
   2.148 +      identical = 0;
   2.149 +    } else {
   2.150 +      Node* n = _expensive_nodes->at(i);
   2.151 +      igvn.hash_delete(n);
   2.152 +      n->set_req(0, NULL);
   2.153 +      igvn.hash_insert(n);
   2.154 +    }
   2.155 +  }
   2.156 +  if (identical > 0) {
   2.157 +    _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
   2.158 +  } else if (_expensive_nodes->length() >= 1) {
   2.159 +    Node* n = _expensive_nodes->at(i);
   2.160 +    igvn.hash_delete(n);
   2.161 +    n->set_req(0, NULL);
   2.162 +    igvn.hash_insert(n);
   2.163 +  }
   2.164 +  _expensive_nodes->trunc_to(j);
   2.165 +}
   2.166 +
   2.167 +void Compile::add_expensive_node(Node * n) {
   2.168 +  assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");
   2.169 +  assert(n->is_expensive(), "expensive nodes with non-null control here only");
   2.170 +  assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");
   2.171 +  if (OptimizeExpensiveOps) {
   2.172 +    _expensive_nodes->append(n);
   2.173 +  } else {
   2.174 +    // Clear control input and let IGVN optimize expensive nodes if
   2.175 +    // OptimizeExpensiveOps is off.
   2.176 +    n->set_req(0, NULL);
   2.177 +  }
   2.178 +}
     3.1 --- a/src/share/vm/opto/compile.hpp	Tue Feb 12 07:39:42 2013 -0800
     3.2 +++ b/src/share/vm/opto/compile.hpp	Tue Feb 12 14:33:19 2013 -0800
     3.3 @@ -314,6 +314,7 @@
     3.4    GrowableArray<CallGenerator*>* _intrinsics;   // List of intrinsics.
     3.5    GrowableArray<Node*>* _macro_nodes;           // List of nodes which need to be expanded before matching.
     3.6    GrowableArray<Node*>* _predicate_opaqs;       // List of Opaque1 nodes for the loop predicates.
     3.7 +  GrowableArray<Node*>* _expensive_nodes;       // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
     3.8    ConnectionGraph*      _congraph;
     3.9  #ifndef PRODUCT
    3.10    IdealGraphPrinter*    _printer;
    3.11 @@ -398,6 +399,13 @@
    3.12    GrowableArray<PrintInliningBuffer>* _print_inlining_list;
    3.13    int _print_inlining;
    3.14  
    3.15 +  // Only keep nodes in the expensive node list that need to be optimized
    3.16 +  void cleanup_expensive_nodes(PhaseIterGVN &igvn);
    3.17 +  // Use for sorting expensive nodes to bring similar nodes together
    3.18 +  static int cmp_expensive_nodes(Node** n1, Node** n2);
    3.19 +  // Expensive nodes list already sorted?
    3.20 +  bool expensive_nodes_sorted() const;
    3.21 +
    3.22   public:
    3.23  
    3.24    outputStream* print_inlining_stream() const {
    3.25 @@ -573,8 +581,10 @@
    3.26  
    3.27    int           macro_count()                   { return _macro_nodes->length(); }
    3.28    int           predicate_count()               { return _predicate_opaqs->length();}
    3.29 +  int           expensive_count()               { return _expensive_nodes->length(); }
    3.30    Node*         macro_node(int idx)             { return _macro_nodes->at(idx); }
    3.31    Node*         predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
    3.32 +  Node*         expensive_node(int idx)         { return _expensive_nodes->at(idx); }
    3.33    ConnectionGraph* congraph()                   { return _congraph;}
    3.34    void set_congraph(ConnectionGraph* congraph)  { _congraph = congraph;}
    3.35    void add_macro_node(Node * n) {
    3.36 @@ -592,6 +602,12 @@
    3.37        _predicate_opaqs->remove(n);
    3.38      }
    3.39    }
    3.40 +  void add_expensive_node(Node * n);
    3.41 +  void remove_expensive_node(Node * n) {
    3.42 +    if (_expensive_nodes->contains(n)) {
    3.43 +      _expensive_nodes->remove(n);
    3.44 +    }
    3.45 +  }
    3.46    void add_predicate_opaq(Node * n) {
    3.47      assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
    3.48      assert(_macro_nodes->contains(n), "should have already been in macro list");
    3.49 @@ -604,6 +620,13 @@
    3.50      return _predicate_opaqs->contains(n);
    3.51    }
    3.52  
    3.53 +  // Are there candidate expensive nodes for optimization?
    3.54 +  bool should_optimize_expensive_nodes(PhaseIterGVN &igvn);
    3.55 +  // Check whether n1 and n2 are similar
    3.56 +  static int cmp_expensive_nodes(Node* n1, Node* n2);
    3.57 +  // Sort expensive nodes to locate similar expensive nodes
    3.58 +  void sort_expensive_nodes();
    3.59 +
    3.60    // Compilation environment.
    3.61    Arena*            comp_arena()                { return &_comp_arena; }
    3.62    ciEnv*            env() const                 { return _env; }
     4.1 --- a/src/share/vm/opto/library_call.cpp	Tue Feb 12 07:39:42 2013 -0800
     4.2 +++ b/src/share/vm/opto/library_call.cpp	Tue Feb 12 14:33:19 2013 -0800
     4.3 @@ -1653,7 +1653,7 @@
     4.4  // really odd corner cases (+/- Infinity).  Just uncommon-trap them.
     4.5  bool LibraryCallKit::inline_exp() {
     4.6    Node* arg = round_double_node(argument(0));
     4.7 -  Node* n   = _gvn.transform(new (C) ExpDNode(0, arg));
     4.8 +  Node* n   = _gvn.transform(new (C) ExpDNode(C, control(), arg));
     4.9  
    4.10    finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
    4.11  
    4.12 @@ -1688,7 +1688,7 @@
    4.13  
    4.14    if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
    4.15      // Short form: skip the fancy tests and just check for NaN result.
    4.16 -    result = _gvn.transform(new (C) PowDNode(0, x, y));
    4.17 +    result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
    4.18    } else {
    4.19      // If this inlining ever returned NaN in the past, include all
    4.20      // checks + call to the runtime.
    4.21 @@ -1715,7 +1715,7 @@
    4.22      Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) );
    4.23  
    4.24      // Set fast path result
    4.25 -    Node *fast_result = _gvn.transform( new (C) PowDNode(0, x, y) );
    4.26 +    Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) );
    4.27      phi->init_req(3, fast_result);
    4.28  
    4.29      // Complex path
    4.30 @@ -1775,7 +1775,7 @@
    4.31      // abs(x)
    4.32      Node *absx=_gvn.transform( new (C) AbsDNode(x));
    4.33      // abs(x)^y
    4.34 -    Node *absxpowy = _gvn.transform( new (C) PowDNode(0, absx, y) );
    4.35 +    Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) );
    4.36      // -abs(x)^y
    4.37      Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
    4.38      // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
     5.1 --- a/src/share/vm/opto/loopnode.cpp	Tue Feb 12 07:39:42 2013 -0800
     5.2 +++ b/src/share/vm/opto/loopnode.cpp	Tue Feb 12 14:33:19 2013 -0800
     5.3 @@ -88,9 +88,9 @@
     5.4    assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
     5.5    uint i;
     5.6    Node *early;
     5.7 -  if( n->in(0) ) {
     5.8 +  if (n->in(0) && !n->is_expensive()) {
     5.9      early = n->in(0);
    5.10 -    if( !early->is_CFG() ) // Might be a non-CFG multi-def
    5.11 +    if (!early->is_CFG()) // Might be a non-CFG multi-def
    5.12        early = get_ctrl(early);        // So treat input as a straight data input
    5.13      i = 1;
    5.14    } else {
    5.15 @@ -99,28 +99,28 @@
    5.16    }
    5.17    uint e_d = dom_depth(early);
    5.18    assert( early, "" );
    5.19 -  for( ; i < n->req(); i++ ) {
    5.20 +  for (; i < n->req(); i++) {
    5.21      Node *cin = get_ctrl(n->in(i));
    5.22      assert( cin, "" );
    5.23      // Keep deepest dominator depth
    5.24      uint c_d = dom_depth(cin);
    5.25 -    if( c_d > e_d ) {           // Deeper guy?
    5.26 +    if (c_d > e_d) {           // Deeper guy?
    5.27        early = cin;              // Keep deepest found so far
    5.28        e_d = c_d;
    5.29 -    } else if( c_d == e_d &&    // Same depth?
    5.30 -               early != cin ) { // If not equal, must use slower algorithm
    5.31 +    } else if (c_d == e_d &&    // Same depth?
    5.32 +               early != cin) { // If not equal, must use slower algorithm
    5.33        // If same depth but not equal, one _must_ dominate the other
    5.34        // and we want the deeper (i.e., dominated) guy.
    5.35        Node *n1 = early;
    5.36        Node *n2 = cin;
    5.37 -      while( 1 ) {
    5.38 +      while (1) {
    5.39          n1 = idom(n1);          // Walk up until break cycle
    5.40          n2 = idom(n2);
    5.41 -        if( n1 == cin ||        // Walked early up to cin
    5.42 -            dom_depth(n2) < c_d )
    5.43 +        if (n1 == cin ||        // Walked early up to cin
    5.44 +            dom_depth(n2) < c_d)
    5.45            break;                // early is deeper; keep him
    5.46 -        if( n2 == early ||      // Walked cin up to early
    5.47 -            dom_depth(n1) < c_d ) {
    5.48 +        if (n2 == early ||      // Walked cin up to early
    5.49 +            dom_depth(n1) < c_d) {
    5.50            early = cin;          // cin is deeper; keep him
    5.51            break;
    5.52          }
    5.53 @@ -132,9 +132,108 @@
    5.54    // Return earliest legal location
    5.55    assert(early == find_non_split_ctrl(early), "unexpected early control");
    5.56  
    5.57 +  if (n->is_expensive()) {
    5.58 +    assert(n->in(0), "should have control input");
    5.59 +    early = get_early_ctrl_for_expensive(n, early);
    5.60 +  }
    5.61 +
    5.62    return early;
    5.63  }
    5.64  
    5.65 +//------------------------------get_early_ctrl_for_expensive---------------------------------
    5.66 +// Move node up the dominator tree as high as legal while still beneficial
    5.67 +Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
    5.68 +  assert(n->in(0) && n->is_expensive(), "expensive node with control input here");
    5.69 +  assert(OptimizeExpensiveOps, "optimization off?");
    5.70 +
    5.71 +  Node* ctl = n->in(0);
    5.72 +  assert(ctl->is_CFG(), "expensive input 0 must be cfg");
    5.73 +  uint min_dom_depth = dom_depth(earliest);
    5.74 +#ifdef ASSERT
    5.75 +  if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) {
    5.76 +    dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl);
    5.77 +    assert(false, "Bad graph detected in get_early_ctrl_for_expensive");
    5.78 +  }
    5.79 +#endif
    5.80 +  if (dom_depth(ctl) < min_dom_depth) {
    5.81 +    return earliest;
    5.82 +  }
    5.83 +
    5.84 +  while (1) {
    5.85 +    Node *next = ctl;
    5.86 +    // Moving the node out of a loop on the projection of a If
    5.87 +    // confuses loop predication. So once we hit a Loop in a If branch
    5.88 +    // that doesn't branch to an UNC, we stop. The code that process
    5.89 +    // expensive nodes will notice the loop and skip over it to try to
    5.90 +    // move the node further up.
    5.91 +    if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) {
    5.92 +      if (!is_uncommon_trap_if_pattern(ctl->in(1)->as_Proj(), Deoptimization::Reason_none)) {
    5.93 +        break;
    5.94 +      }
    5.95 +      next = idom(ctl->in(1)->in(0));
    5.96 +    } else if (ctl->is_Proj()) {
    5.97 +      // We only move it up along a projection if the projection is
    5.98 +      // the single control projection for its parent: same code path,
    5.99 +      // if it's a If with UNC or fallthrough of a call.
   5.100 +      Node* parent_ctl = ctl->in(0);
   5.101 +      if (parent_ctl == NULL) {
   5.102 +        break;
   5.103 +      } else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) {
   5.104 +        next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control();
   5.105 +      } else if (parent_ctl->is_If()) {
   5.106 +        if (!is_uncommon_trap_if_pattern(ctl->as_Proj(), Deoptimization::Reason_none)) {
   5.107 +          break;
   5.108 +        }
   5.109 +        assert(idom(ctl) == parent_ctl, "strange");
   5.110 +        next = idom(parent_ctl);
   5.111 +      } else if (ctl->is_CatchProj()) {
   5.112 +        if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) {
   5.113 +          break;
   5.114 +        }
   5.115 +        assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph");
   5.116 +        next = parent_ctl->in(0)->in(0)->in(0);
   5.117 +      } else {
   5.118 +        // Check if parent control has a single projection (this
   5.119 +        // control is the only possible successor of the parent
   5.120 +        // control). If so, we can try to move the node above the
   5.121 +        // parent control.
   5.122 +        int nb_ctl_proj = 0;
   5.123 +        for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) {
   5.124 +          Node *p = parent_ctl->fast_out(i);
   5.125 +          if (p->is_Proj() && p->is_CFG()) {
   5.126 +            nb_ctl_proj++;
   5.127 +            if (nb_ctl_proj > 1) {
   5.128 +              break;
   5.129 +            }
   5.130 +          }
   5.131 +        }
   5.132 +
   5.133 +        if (nb_ctl_proj > 1) {
   5.134 +          break;
   5.135 +        }
   5.136 +        assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node");
   5.137 +        assert(idom(ctl) == parent_ctl, "strange");
   5.138 +        next = idom(parent_ctl);
   5.139 +      }
   5.140 +    } else {
   5.141 +      next = idom(ctl);
   5.142 +    }
   5.143 +    if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) {
   5.144 +      break;
   5.145 +    }
   5.146 +    ctl = next;
   5.147 +  }
   5.148 +
   5.149 +  if (ctl != n->in(0)) {
   5.150 +    _igvn.hash_delete(n);
   5.151 +    n->set_req(0, ctl);
   5.152 +    _igvn.hash_insert(n);
   5.153 +  }
   5.154 +
   5.155 +  return ctl;
   5.156 +}
   5.157 +
   5.158 +
   5.159  //------------------------------set_early_ctrl---------------------------------
   5.160  // Set earliest legal control
   5.161  void PhaseIdealLoop::set_early_ctrl( Node *n ) {
   5.162 @@ -1892,6 +1991,98 @@
   5.163    }
   5.164  }
   5.165  
   5.166 +//------------------------process_expensive_nodes-----------------------------
   5.167 +// Expensive nodes have their control input set to prevent the GVN
   5.168 +// from commoning them and as a result forcing the resulting node to
   5.169 +// be in a more frequent path. Use CFG information here, to change the
   5.170 +// control inputs so that some expensive nodes can be commoned while
   5.171 +// not executed more frequently.
   5.172 +bool PhaseIdealLoop::process_expensive_nodes() {
   5.173 +  assert(OptimizeExpensiveOps, "optimization off?");
   5.174 +
   5.175 +  // Sort nodes to bring similar nodes together
   5.176 +  C->sort_expensive_nodes();
   5.177 +
   5.178 +  bool progress = false;
   5.179 +
   5.180 +  for (int i = 0; i < C->expensive_count(); ) {
   5.181 +    Node* n = C->expensive_node(i);
   5.182 +    int start = i;
   5.183 +    // Find nodes similar to n
   5.184 +    i++;
   5.185 +    for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++);
   5.186 +    int end = i;
   5.187 +    // And compare them two by two
   5.188 +    for (int j = start; j < end; j++) {
   5.189 +      Node* n1 = C->expensive_node(j);
   5.190 +      if (is_node_unreachable(n1)) {
   5.191 +        continue;
   5.192 +      }
   5.193 +      for (int k = j+1; k < end; k++) {
   5.194 +        Node* n2 = C->expensive_node(k);
   5.195 +        if (is_node_unreachable(n2)) {
   5.196 +          continue;
   5.197 +        }
   5.198 +
   5.199 +        assert(n1 != n2, "should be pair of nodes");
   5.200 +
   5.201 +        Node* c1 = n1->in(0);
   5.202 +        Node* c2 = n2->in(0);
   5.203 +
   5.204 +        Node* parent_c1 = c1;
   5.205 +        Node* parent_c2 = c2;
   5.206 +
   5.207 +        // The call to get_early_ctrl_for_expensive() moves the
   5.208 +        // expensive nodes up but stops at loops that are in a if
   5.209 +        // branch. See whether we can exit the loop and move above the
   5.210 +        // If.
   5.211 +        if (c1->is_Loop()) {
   5.212 +          parent_c1 = c1->in(1);
   5.213 +        }
   5.214 +        if (c2->is_Loop()) {
   5.215 +          parent_c2 = c2->in(1);
   5.216 +        }
   5.217 +
   5.218 +        if (parent_c1 == parent_c2) {
   5.219 +          _igvn._worklist.push(n1);
   5.220 +          _igvn._worklist.push(n2);
   5.221 +          continue;
   5.222 +        }
   5.223 +
   5.224 +        // Look for identical expensive node up the dominator chain.
   5.225 +        if (is_dominator(c1, c2)) {
   5.226 +          c2 = c1;
   5.227 +        } else if (is_dominator(c2, c1)) {
   5.228 +          c1 = c2;
   5.229 +        } else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() &&
   5.230 +                   parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) {
   5.231 +          // Both branches have the same expensive node so move it up
   5.232 +          // before the if.
   5.233 +          c1 = c2 = idom(parent_c1->in(0));
   5.234 +        }
   5.235 +        // Do the actual moves
   5.236 +        if (n1->in(0) != c1) {
   5.237 +          _igvn.hash_delete(n1);
   5.238 +          n1->set_req(0, c1);
   5.239 +          _igvn.hash_insert(n1);
   5.240 +          _igvn._worklist.push(n1);
   5.241 +          progress = true;
   5.242 +        }
   5.243 +        if (n2->in(0) != c2) {
   5.244 +          _igvn.hash_delete(n2);
   5.245 +          n2->set_req(0, c2);
   5.246 +          _igvn.hash_insert(n2);
   5.247 +          _igvn._worklist.push(n2);
   5.248 +          progress = true;
   5.249 +        }
   5.250 +      }
   5.251 +    }
   5.252 +  }
   5.253 +
   5.254 +  return progress;
   5.255 +}
   5.256 +
   5.257 +
   5.258  //=============================================================================
   5.259  //----------------------------build_and_optimize-------------------------------
   5.260  // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
   5.261 @@ -1960,7 +2151,9 @@
   5.262    }
   5.263  
   5.264    // Nothing to do, so get out
   5.265 -  if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) {
   5.266 +  bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only;
   5.267 +  bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn);
   5.268 +  if (stop_early && !do_expensive_nodes) {
   5.269      _igvn.optimize();           // Cleanup NeverBranches
   5.270      return;
   5.271    }
   5.272 @@ -2058,6 +2251,21 @@
   5.273      return;
   5.274    }
   5.275  
   5.276 +  if (stop_early) {
   5.277 +    assert(do_expensive_nodes, "why are we here?");
   5.278 +    if (process_expensive_nodes()) {
   5.279 +      // If we made some progress when processing expensive nodes then
   5.280 +      // the IGVN may modify the graph in a way that will allow us to
   5.281 +      // make some more progress: we need to try processing expensive
   5.282 +      // nodes again.
   5.283 +      C->set_major_progress();
   5.284 +    }
   5.285 +
   5.286 +    _igvn.optimize();
   5.287 +
   5.288 +    return;
   5.289 +  }
   5.290 +
   5.291    // Some parser-inserted loop predicates could never be used by loop
   5.292    // predication or they were moved away from loop during some optimizations.
   5.293    // For example, peeling. Eliminate them before next loop optimizations.
   5.294 @@ -2120,6 +2328,10 @@
   5.295      NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
   5.296    }
   5.297  
   5.298 +  if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
   5.299 +    C->set_major_progress();
   5.300 +  }
   5.301 +
   5.302    // Perform loop predication before iteration splitting
   5.303    if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) {
   5.304      _ltree_root->_child->loop_predication(this);
   5.305 @@ -3299,7 +3511,7 @@
   5.306  #ifdef ASSERT
   5.307      if (legal->is_Start() && !early->is_Root()) {
   5.308        // Bad graph. Print idom path and fail.
   5.309 -      dump_bad_graph(n, early, LCA);
   5.310 +      dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA);
   5.311        assert(false, "Bad graph detected in build_loop_late");
   5.312      }
   5.313  #endif
   5.314 @@ -3350,8 +3562,8 @@
   5.315  }
   5.316  
   5.317  #ifdef ASSERT
   5.318 -void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) {
   5.319 -  tty->print_cr( "Bad graph detected in build_loop_late");
   5.320 +void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) {
   5.321 +  tty->print_cr(msg);
   5.322    tty->print("n: "); n->dump();
   5.323    tty->print("early(n): "); early->dump();
   5.324    if (n->in(0) != NULL  && !n->in(0)->is_top() &&
     6.1 --- a/src/share/vm/opto/loopnode.hpp	Tue Feb 12 07:39:42 2013 -0800
     6.2 +++ b/src/share/vm/opto/loopnode.hpp	Tue Feb 12 14:33:19 2013 -0800
     6.3 @@ -263,9 +263,18 @@
     6.4    bool stride_is_con() const        { Node *tmp = stride  (); return (tmp != NULL && tmp->is_Con()); }
     6.5    BoolTest::mask test_trip() const  { return in(TestValue)->as_Bool()->_test._test; }
     6.6    CountedLoopNode *loopnode() const {
     6.7 +    // The CountedLoopNode that goes with this CountedLoopEndNode may
     6.8 +    // have been optimized out by the IGVN so be cautious with the
     6.9 +    // pattern matching on the graph
    6.10 +    if (phi() == NULL) {
    6.11 +      return NULL;
    6.12 +    }
    6.13      Node *ln = phi()->in(0);
    6.14 -    assert( ln->Opcode() == Op_CountedLoop, "malformed loop" );
    6.15 -    return (CountedLoopNode*)ln; }
    6.16 +    if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) {
    6.17 +      return (CountedLoopNode*)ln;
    6.18 +    }
    6.19 +    return NULL;
    6.20 +  }
    6.21  
    6.22  #ifndef PRODUCT
    6.23    virtual void dump_spec(outputStream *st) const;
    6.24 @@ -598,6 +607,7 @@
    6.25    // check if transform created new nodes that need _ctrl recorded
    6.26    Node *get_late_ctrl( Node *n, Node *early );
    6.27    Node *get_early_ctrl( Node *n );
    6.28 +  Node *get_early_ctrl_for_expensive(Node *n, Node* earliest);
    6.29    void set_early_ctrl( Node *n );
    6.30    void set_subtree_ctrl( Node *root );
    6.31    void set_ctrl( Node *n, Node *ctrl ) {
    6.32 @@ -905,6 +915,16 @@
    6.33    void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
    6.34    void eliminate_useless_predicates();
    6.35  
    6.36 +  // Change the control input of expensive nodes to allow commoning by
    6.37 +  // IGVN when it is guaranteed to not result in a more frequent
    6.38 +  // execution of the expensive node. Return true if progress.
    6.39 +  bool process_expensive_nodes();
    6.40 +
    6.41 +  // Check whether node has become unreachable
    6.42 +  bool is_node_unreachable(Node *n) const {
    6.43 +    return !has_node(n) || n->is_unreachable(_igvn);
    6.44 +  }
    6.45 +
    6.46    // Eliminate range-checks and other trip-counter vs loop-invariant tests.
    6.47    void do_range_check( IdealLoopTree *loop, Node_List &old_new );
    6.48  
    6.49 @@ -1043,7 +1063,7 @@
    6.50    void register_new_node( Node *n, Node *blk );
    6.51  
    6.52  #ifdef ASSERT
    6.53 -void dump_bad_graph(Node* n, Node* early, Node* LCA);
    6.54 +  void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
    6.55  #endif
    6.56  
    6.57  #ifndef PRODUCT
     7.1 --- a/src/share/vm/opto/node.cpp	Tue Feb 12 07:39:42 2013 -0800
     7.2 +++ b/src/share/vm/opto/node.cpp	Tue Feb 12 14:33:19 2013 -0800
     7.3 @@ -493,6 +493,8 @@
     7.4    }
     7.5    if (is_macro())
     7.6      compile->add_macro_node(n);
     7.7 +  if (is_expensive())
     7.8 +    compile->add_expensive_node(n);
     7.9  
    7.10    n->set_idx(compile->next_unique()); // Get new unique index as well
    7.11    debug_only( n->verify_construction() );
    7.12 @@ -616,6 +618,9 @@
    7.13    if (is_macro()) {
    7.14      compile->remove_macro_node(this);
    7.15    }
    7.16 +  if (is_expensive()) {
    7.17 +    compile->remove_expensive_node(this);
    7.18 +  }
    7.19  #ifdef ASSERT
    7.20    // We will not actually delete the storage, but we'll make the node unusable.
    7.21    *(address*)this = badAddress;  // smash the C++ vtbl, probably
    7.22 @@ -689,6 +694,13 @@
    7.23  }
    7.24  #endif
    7.25  
    7.26 +
    7.27 +//------------------------------is_unreachable---------------------------------
    7.28 +bool Node::is_unreachable(PhaseIterGVN &igvn) const {
    7.29 +  assert(!is_Mach(), "doesn't work with MachNodes");
    7.30 +  return outcnt() == 0 || igvn.type(this) == Type::TOP || in(0)->is_top();
    7.31 +}
    7.32 +
    7.33  //------------------------------add_req----------------------------------------
    7.34  // Add a new required input at the end
    7.35  void Node::add_req( Node *n ) {
    7.36 @@ -1246,6 +1258,9 @@
    7.37        if (dead->is_macro()) {
    7.38          igvn->C->remove_macro_node(dead);
    7.39        }
    7.40 +      if (dead->is_expensive()) {
    7.41 +        igvn->C->remove_expensive_node(dead);
    7.42 +      }
    7.43        // Kill all inputs to the dead guy
    7.44        for (uint i=0; i < dead->req(); i++) {
    7.45          Node *n = dead->in(i);      // Get input to dead guy
     8.1 --- a/src/share/vm/opto/node.hpp	Tue Feb 12 07:39:42 2013 -0800
     8.2 +++ b/src/share/vm/opto/node.hpp	Tue Feb 12 14:33:19 2013 -0800
     8.3 @@ -378,6 +378,8 @@
     8.4    bool is_dead() const;
     8.5  #define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
     8.6  #endif
     8.7 +  // Check whether node has become unreachable
     8.8 +  bool is_unreachable(PhaseIterGVN &igvn) const;
     8.9  
    8.10    // Set a required input edge, also updates corresponding output edge
    8.11    void add_req( Node *n ); // Append a NEW required input
    8.12 @@ -646,7 +648,8 @@
    8.13      Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
    8.14      Flag_avoid_back_to_back  = Flag_may_be_short_branch << 1,
    8.15      Flag_has_call            = Flag_avoid_back_to_back << 1,
    8.16 -    _max_flags = (Flag_has_call << 1) - 1 // allow flags combination
    8.17 +    Flag_is_expensive        = Flag_has_call << 1,
    8.18 +    _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
    8.19    };
    8.20  
    8.21  private:
    8.22 @@ -819,6 +822,8 @@
    8.23  
    8.24    // The node is a "macro" node which needs to be expanded before matching
    8.25    bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
    8.26 +  // The node is expensive: the best control is set during loop opts
    8.27 +  bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; }
    8.28  
    8.29  //----------------- Optimization
    8.30  
     9.1 --- a/src/share/vm/opto/phaseX.cpp	Tue Feb 12 07:39:42 2013 -0800
     9.2 +++ b/src/share/vm/opto/phaseX.cpp	Tue Feb 12 14:33:19 2013 -0800
     9.3 @@ -1203,6 +1203,9 @@
     9.4          if (dead->is_macro()) {
     9.5            C->remove_macro_node(dead);
     9.6          }
     9.7 +        if (dead->is_expensive()) {
     9.8 +          C->remove_expensive_node(dead);
     9.9 +        }
    9.10  
    9.11          if (recurse) {
    9.12            continue;
    10.1 --- a/src/share/vm/opto/subnode.hpp	Tue Feb 12 07:39:42 2013 -0800
    10.2 +++ b/src/share/vm/opto/subnode.hpp	Tue Feb 12 14:33:19 2013 -0800
    10.3 @@ -456,7 +456,10 @@
    10.4  //  Exponentiate a double
    10.5  class ExpDNode : public Node {
    10.6  public:
    10.7 -  ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {}
    10.8 +  ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    10.9 +    init_flags(Flag_is_expensive);
   10.10 +    C->add_expensive_node(this);
   10.11 +  }
   10.12    virtual int Opcode() const;
   10.13    const Type *bottom_type() const { return Type::DOUBLE; }
   10.14    virtual uint ideal_reg() const { return Op_RegD; }
   10.15 @@ -489,7 +492,10 @@
   10.16  // Raise a double to a double power
   10.17  class PowDNode : public Node {
   10.18  public:
   10.19 -  PowDNode(Node *c, Node *in1, Node *in2  ) : Node(c, in1, in2) {}
   10.20 +  PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {
   10.21 +    init_flags(Flag_is_expensive);
   10.22 +    C->add_expensive_node(this);
   10.23 +  }
   10.24    virtual int Opcode() const;
   10.25    const Type *bottom_type() const { return Type::DOUBLE; }
   10.26    virtual uint ideal_reg() const { return Op_RegD; }

mercurial