src/share/vm/opto/block.cpp

changeset 6478
044b28168e20
parent 5639
4b078f877b56
child 6490
41b780b43b74
     1.1 --- a/src/share/vm/opto/block.cpp	Thu Nov 07 11:47:11 2013 +0100
     1.2 +++ b/src/share/vm/opto/block.cpp	Thu Nov 14 19:24:59 2013 -0800
     1.3 @@ -144,6 +144,10 @@
     1.4    remove_node(find_node(n));
     1.5  }
     1.6  
     1.7 +bool Block::contains(const Node *n) const {
     1.8 +  return _nodes.contains(n);
     1.9 +}
    1.10 +
    1.11  // Return empty status of a block.  Empty blocks contain only the head, other
    1.12  // ideal nodes, and an optional trailing goto.
    1.13  int Block::is_Empty() const {
    1.14 @@ -699,7 +703,7 @@
    1.15  // Fix up the final control flow for basic blocks.
    1.16  void PhaseCFG::fixup_flow() {
    1.17    // Fixup final control flow for the blocks.  Remove jump-to-next
    1.18 -  // block.  If neither arm of a IF follows the conditional branch, we
    1.19 +  // block. If neither arm of an IF follows the conditional branch, we
    1.20    // have to add a second jump after the conditional.  We place the
    1.21    // TRUE branch target in succs[0] for both GOTOs and IFs.
    1.22    for (uint i = 0; i < number_of_blocks(); i++) {
    1.23 @@ -844,6 +848,228 @@
    1.24  }
    1.25  
    1.26  
    1.27 +// postalloc_expand: Expand nodes after register allocation.
    1.28 +//
    1.29 +// postalloc_expand has to be called after register allocation, just
    1.30 +// before output (i.e. scheduling). It only gets called if
    1.31 +// Matcher::require_postalloc_expand is true.
    1.32 +//
    1.33 +// Background:
    1.34 +//
    1.35 +// Nodes that are expandend (one compound node requiring several
    1.36 +// assembler instructions to be implemented split into two or more
    1.37 +// non-compound nodes) after register allocation are not as nice as
    1.38 +// the ones expanded before register allocation - they don't
    1.39 +// participate in optimizations as global code motion. But after
    1.40 +// register allocation we can expand nodes that use registers which
    1.41 +// are not spillable or registers that are not allocated, because the
    1.42 +// old compound node is simply replaced (in its location in the basic
    1.43 +// block) by a new subgraph which does not contain compound nodes any
    1.44 +// more. The scheduler called during output can later on process these
    1.45 +// non-compound nodes.
    1.46 +//
    1.47 +// Implementation:
    1.48 +//
    1.49 +// Nodes requiring postalloc expand are specified in the ad file by using
    1.50 +// a postalloc_expand statement instead of ins_encode. A postalloc_expand
    1.51 +// contains a single call to an encoding, as does an ins_encode
    1.52 +// statement. Instead of an emit() function a postalloc_expand() function
    1.53 +// is generated that doesn't emit assembler but creates a new
    1.54 +// subgraph. The code below calls this postalloc_expand function for each
    1.55 +// node with the appropriate attribute. This function returns the new
    1.56 +// nodes generated in an array passed in the call. The old node,
    1.57 +// potential MachTemps before and potential Projs after it then get
    1.58 +// disconnected and replaced by the new nodes. The instruction
    1.59 +// generating the result has to be the last one in the array. In
    1.60 +// general it is assumed that Projs after the node expanded are
    1.61 +// kills. These kills are not required any more after expanding as
    1.62 +// there are now explicitly visible def-use chains and the Projs are
    1.63 +// removed. This does not hold for calls: They do not only have
    1.64 +// kill-Projs but also Projs defining values. Therefore Projs after
    1.65 +// the node expanded are removed for all but for calls. If a node is
    1.66 +// to be reused, it must be added to the nodes list returned, and it
    1.67 +// will be added again.
    1.68 +//
    1.69 +// Implementing the postalloc_expand function for a node in an enc_class
    1.70 +// is rather tedious. It requires knowledge about many node details, as
    1.71 +// the nodes and the subgraph must be hand crafted. To simplify this,
    1.72 +// adlc generates some utility variables into the postalloc_expand function,
    1.73 +// e.g., holding the operands as specified by the postalloc_expand encoding
    1.74 +// specification, e.g.:
    1.75 +//  * unsigned idx_<par_name>  holding the index of the node in the ins
    1.76 +//  * Node *n_<par_name>       holding the node loaded from the ins
    1.77 +//  * MachOpnd *op_<par_name>  holding the corresponding operand
    1.78 +//
    1.79 +// The ordering of operands can not be determined by looking at a
    1.80 +// rule. Especially if a match rule matches several different trees,
    1.81 +// several nodes are generated from one instruct specification with
    1.82 +// different operand orderings. In this case the adlc generated
    1.83 +// variables are the only way to access the ins and operands
    1.84 +// deterministically.
    1.85 +//
    1.86 +// If assigning a register to a node that contains an oop, don't
    1.87 +// forget to call ra_->set_oop() for the node.
    1.88 +void PhaseCFG::postalloc_expand(PhaseRegAlloc* _ra) {
    1.89 +  GrowableArray <Node *> new_nodes(32); // Array with new nodes filled by postalloc_expand function of node.
    1.90 +  GrowableArray <Node *> remove(32);
    1.91 +  GrowableArray <Node *> succs(32);
    1.92 +  unsigned int max_idx = C->unique();   // Remember to distinguish new from old nodes.
    1.93 +  DEBUG_ONLY(bool foundNode = false);
    1.94 +
    1.95 +  // for all blocks
    1.96 +  for (uint i = 0; i < number_of_blocks(); i++) {
    1.97 +    Block *b = _blocks[i];
    1.98 +    // For all instructions in the current block.
    1.99 +    for (uint j = 0; j < b->number_of_nodes(); j++) {
   1.100 +      Node *n = b->get_node(j);
   1.101 +      if (n->is_Mach() && n->as_Mach()->requires_postalloc_expand()) {
   1.102 +#ifdef ASSERT
   1.103 +        if (TracePostallocExpand) {
   1.104 +          if (!foundNode) {
   1.105 +            foundNode = true;
   1.106 +            tty->print("POSTALLOC EXPANDING %d %s\n", C->compile_id(),
   1.107 +                       C->method() ? C->method()->name()->as_utf8() : C->stub_name());
   1.108 +          }
   1.109 +          tty->print("  postalloc expanding "); n->dump();
   1.110 +          if (Verbose) {
   1.111 +            tty->print("    with ins:\n");
   1.112 +            for (uint k = 0; k < n->len(); ++k) {
   1.113 +              if (n->in(k)) { tty->print("        "); n->in(k)->dump(); }
   1.114 +            }
   1.115 +          }
   1.116 +        }
   1.117 +#endif
   1.118 +        new_nodes.clear();
   1.119 +        // Collect nodes that have to be removed from the block later on.
   1.120 +        uint req = n->req();
   1.121 +        remove.clear();
   1.122 +        for (uint k = 0; k < req; ++k) {
   1.123 +          if (n->in(k) && n->in(k)->is_MachTemp()) {
   1.124 +            remove.push(n->in(k)); // MachTemps which are inputs to the old node have to be removed.
   1.125 +            n->in(k)->del_req(0);
   1.126 +            j--;
   1.127 +          }
   1.128 +        }
   1.129 +
   1.130 +        // Check whether we can allocate enough nodes. We set a fix limit for
   1.131 +        // the size of postalloc expands with this.
   1.132 +        uint unique_limit = C->unique() + 40;
   1.133 +        if (unique_limit >= _ra->node_regs_max_index()) {
   1.134 +          Compile::current()->record_failure("out of nodes in postalloc expand");
   1.135 +          return;
   1.136 +        }
   1.137 +
   1.138 +        // Emit (i.e. generate new nodes).
   1.139 +        n->as_Mach()->postalloc_expand(&new_nodes, _ra);
   1.140 +
   1.141 +        assert(C->unique() < unique_limit, "You allocated too many nodes in your postalloc expand.");
   1.142 +
   1.143 +        // Disconnect the inputs of the old node.
   1.144 +        //
   1.145 +        // We reuse MachSpillCopy nodes. If we need to expand them, there
   1.146 +        // are many, so reusing pays off. If reused, the node already
   1.147 +        // has the new ins. n must be the last node on new_nodes list.
   1.148 +        if (!n->is_MachSpillCopy()) {
   1.149 +          for (int k = req - 1; k >= 0; --k) {
   1.150 +            n->del_req(k);
   1.151 +          }
   1.152 +        }
   1.153 +
   1.154 +#ifdef ASSERT
   1.155 +        // Check that all nodes have proper operands.
   1.156 +        for (int k = 0; k < new_nodes.length(); ++k) {
   1.157 +          if (new_nodes.at(k)->_idx < max_idx || !new_nodes.at(k)->is_Mach()) continue; // old node, Proj ...
   1.158 +          MachNode *m = new_nodes.at(k)->as_Mach();
   1.159 +          for (unsigned int l = 0; l < m->num_opnds(); ++l) {
   1.160 +            if (MachOper::notAnOper(m->_opnds[l])) {
   1.161 +              outputStream *os = tty;
   1.162 +              os->print("Node %s ", m->Name());
   1.163 +              os->print("has invalid opnd %d: %p\n", l, m->_opnds[l]);
   1.164 +              assert(0, "Invalid operands, see inline trace in hs_err_pid file.");
   1.165 +            }
   1.166 +          }
   1.167 +        }
   1.168 +#endif
   1.169 +
   1.170 +        // Collect succs of old node in remove (for projections) and in succs (for
   1.171 +        // all other nodes) do _not_ collect projections in remove (but in succs)
   1.172 +        // in case the node is a call. We need the projections for calls as they are
   1.173 +        // associated with registes (i.e. they are defs).
   1.174 +        succs.clear();
   1.175 +        for (DUIterator k = n->outs(); n->has_out(k); k++) {
   1.176 +          if (n->out(k)->is_Proj() && !n->is_MachCall() && !n->is_MachBranch()) {
   1.177 +            remove.push(n->out(k));
   1.178 +          } else {
   1.179 +            succs.push(n->out(k));
   1.180 +          }
   1.181 +        }
   1.182 +        // Replace old node n as input of its succs by last of the new nodes.
   1.183 +        for (int k = 0; k < succs.length(); ++k) {
   1.184 +          Node *succ = succs.at(k);
   1.185 +          for (uint l = 0; l < succ->req(); ++l) {
   1.186 +            if (succ->in(l) == n) {
   1.187 +              succ->set_req(l, new_nodes.at(new_nodes.length() - 1));
   1.188 +            }
   1.189 +          }
   1.190 +          for (uint l = succ->req(); l < succ->len(); ++l) {
   1.191 +            if (succ->in(l) == n) {
   1.192 +              succ->set_prec(l, new_nodes.at(new_nodes.length() - 1));
   1.193 +            }
   1.194 +          }
   1.195 +        }
   1.196 +
   1.197 +        // Index of old node in block.
   1.198 +        uint index = b->find_node(n);
   1.199 +        // Insert new nodes into block and map them in nodes->blocks array
   1.200 +        // and remember last node in n2.
   1.201 +        Node *n2 = NULL;
   1.202 +        for (int k = 0; k < new_nodes.length(); ++k) {
   1.203 +          n2 = new_nodes.at(k);
   1.204 +          b->insert_node(n2, ++index);
   1.205 +          map_node_to_block(n2, b);
   1.206 +        }
   1.207 +
   1.208 +        // Add old node n to remove and remove them all from block.
   1.209 +        remove.push(n);
   1.210 +        j--;
   1.211 +#ifdef ASSERT
   1.212 +        if (TracePostallocExpand && Verbose) {
   1.213 +          tty->print("    removing:\n");
   1.214 +          for (int k = 0; k < remove.length(); ++k) {
   1.215 +            tty->print("        "); remove.at(k)->dump();
   1.216 +          }
   1.217 +          tty->print("    inserting:\n");
   1.218 +          for (int k = 0; k < new_nodes.length(); ++k) {
   1.219 +            tty->print("        "); new_nodes.at(k)->dump();
   1.220 +          }
   1.221 +        }
   1.222 +#endif
   1.223 +        for (int k = 0; k < remove.length(); ++k) {
   1.224 +          if (b->contains(remove.at(k))) {
   1.225 +            b->find_remove(remove.at(k));
   1.226 +          } else {
   1.227 +            assert(remove.at(k)->is_Proj() && (remove.at(k)->in(0)->is_MachBranch()), "");
   1.228 +          }
   1.229 +        }
   1.230 +        // If anything has been inserted (n2 != NULL), continue after last node inserted.
   1.231 +        // This does not always work. Some postalloc expands don't insert any nodes, if they
   1.232 +        // do optimizations (e.g., max(x,x)). In this case we decrement j accordingly.
   1.233 +        j = n2 ? b->find_node(n2) : j;
   1.234 +      }
   1.235 +    }
   1.236 +  }
   1.237 +
   1.238 +#ifdef ASSERT
   1.239 +  if (foundNode) {
   1.240 +    tty->print("FINISHED %d %s\n", C->compile_id(),
   1.241 +               C->method() ? C->method()->name()->as_utf8() : C->stub_name());
   1.242 +    tty->flush();
   1.243 +  }
   1.244 +#endif
   1.245 +}
   1.246 +
   1.247 +
   1.248 +//------------------------------dump-------------------------------------------
   1.249  #ifndef PRODUCT
   1.250  void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited  ) const {
   1.251    const Node *x = end->is_block_proj();

mercurial