1.1 --- a/src/share/vm/opto/block.cpp Thu Nov 07 11:47:11 2013 +0100 1.2 +++ b/src/share/vm/opto/block.cpp Thu Nov 14 19:24:59 2013 -0800 1.3 @@ -144,6 +144,10 @@ 1.4 remove_node(find_node(n)); 1.5 } 1.6 1.7 +bool Block::contains(const Node *n) const { 1.8 + return _nodes.contains(n); 1.9 +} 1.10 + 1.11 // Return empty status of a block. Empty blocks contain only the head, other 1.12 // ideal nodes, and an optional trailing goto. 1.13 int Block::is_Empty() const { 1.14 @@ -699,7 +703,7 @@ 1.15 // Fix up the final control flow for basic blocks. 1.16 void PhaseCFG::fixup_flow() { 1.17 // Fixup final control flow for the blocks. Remove jump-to-next 1.18 - // block. If neither arm of a IF follows the conditional branch, we 1.19 + // block. If neither arm of an IF follows the conditional branch, we 1.20 // have to add a second jump after the conditional. We place the 1.21 // TRUE branch target in succs[0] for both GOTOs and IFs. 1.22 for (uint i = 0; i < number_of_blocks(); i++) { 1.23 @@ -844,6 +848,228 @@ 1.24 } 1.25 1.26 1.27 +// postalloc_expand: Expand nodes after register allocation. 1.28 +// 1.29 +// postalloc_expand has to be called after register allocation, just 1.30 +// before output (i.e. scheduling). It only gets called if 1.31 +// Matcher::require_postalloc_expand is true. 1.32 +// 1.33 +// Background: 1.34 +// 1.35 +// Nodes that are expandend (one compound node requiring several 1.36 +// assembler instructions to be implemented split into two or more 1.37 +// non-compound nodes) after register allocation are not as nice as 1.38 +// the ones expanded before register allocation - they don't 1.39 +// participate in optimizations as global code motion. But after 1.40 +// register allocation we can expand nodes that use registers which 1.41 +// are not spillable or registers that are not allocated, because the 1.42 +// old compound node is simply replaced (in its location in the basic 1.43 +// block) by a new subgraph which does not contain compound nodes any 1.44 +// more. The scheduler called during output can later on process these 1.45 +// non-compound nodes. 1.46 +// 1.47 +// Implementation: 1.48 +// 1.49 +// Nodes requiring postalloc expand are specified in the ad file by using 1.50 +// a postalloc_expand statement instead of ins_encode. A postalloc_expand 1.51 +// contains a single call to an encoding, as does an ins_encode 1.52 +// statement. Instead of an emit() function a postalloc_expand() function 1.53 +// is generated that doesn't emit assembler but creates a new 1.54 +// subgraph. The code below calls this postalloc_expand function for each 1.55 +// node with the appropriate attribute. This function returns the new 1.56 +// nodes generated in an array passed in the call. The old node, 1.57 +// potential MachTemps before and potential Projs after it then get 1.58 +// disconnected and replaced by the new nodes. The instruction 1.59 +// generating the result has to be the last one in the array. In 1.60 +// general it is assumed that Projs after the node expanded are 1.61 +// kills. These kills are not required any more after expanding as 1.62 +// there are now explicitly visible def-use chains and the Projs are 1.63 +// removed. This does not hold for calls: They do not only have 1.64 +// kill-Projs but also Projs defining values. Therefore Projs after 1.65 +// the node expanded are removed for all but for calls. If a node is 1.66 +// to be reused, it must be added to the nodes list returned, and it 1.67 +// will be added again. 1.68 +// 1.69 +// Implementing the postalloc_expand function for a node in an enc_class 1.70 +// is rather tedious. It requires knowledge about many node details, as 1.71 +// the nodes and the subgraph must be hand crafted. To simplify this, 1.72 +// adlc generates some utility variables into the postalloc_expand function, 1.73 +// e.g., holding the operands as specified by the postalloc_expand encoding 1.74 +// specification, e.g.: 1.75 +// * unsigned idx_<par_name> holding the index of the node in the ins 1.76 +// * Node *n_<par_name> holding the node loaded from the ins 1.77 +// * MachOpnd *op_<par_name> holding the corresponding operand 1.78 +// 1.79 +// The ordering of operands can not be determined by looking at a 1.80 +// rule. Especially if a match rule matches several different trees, 1.81 +// several nodes are generated from one instruct specification with 1.82 +// different operand orderings. In this case the adlc generated 1.83 +// variables are the only way to access the ins and operands 1.84 +// deterministically. 1.85 +// 1.86 +// If assigning a register to a node that contains an oop, don't 1.87 +// forget to call ra_->set_oop() for the node. 1.88 +void PhaseCFG::postalloc_expand(PhaseRegAlloc* _ra) { 1.89 + GrowableArray <Node *> new_nodes(32); // Array with new nodes filled by postalloc_expand function of node. 1.90 + GrowableArray <Node *> remove(32); 1.91 + GrowableArray <Node *> succs(32); 1.92 + unsigned int max_idx = C->unique(); // Remember to distinguish new from old nodes. 1.93 + DEBUG_ONLY(bool foundNode = false); 1.94 + 1.95 + // for all blocks 1.96 + for (uint i = 0; i < number_of_blocks(); i++) { 1.97 + Block *b = _blocks[i]; 1.98 + // For all instructions in the current block. 1.99 + for (uint j = 0; j < b->number_of_nodes(); j++) { 1.100 + Node *n = b->get_node(j); 1.101 + if (n->is_Mach() && n->as_Mach()->requires_postalloc_expand()) { 1.102 +#ifdef ASSERT 1.103 + if (TracePostallocExpand) { 1.104 + if (!foundNode) { 1.105 + foundNode = true; 1.106 + tty->print("POSTALLOC EXPANDING %d %s\n", C->compile_id(), 1.107 + C->method() ? C->method()->name()->as_utf8() : C->stub_name()); 1.108 + } 1.109 + tty->print(" postalloc expanding "); n->dump(); 1.110 + if (Verbose) { 1.111 + tty->print(" with ins:\n"); 1.112 + for (uint k = 0; k < n->len(); ++k) { 1.113 + if (n->in(k)) { tty->print(" "); n->in(k)->dump(); } 1.114 + } 1.115 + } 1.116 + } 1.117 +#endif 1.118 + new_nodes.clear(); 1.119 + // Collect nodes that have to be removed from the block later on. 1.120 + uint req = n->req(); 1.121 + remove.clear(); 1.122 + for (uint k = 0; k < req; ++k) { 1.123 + if (n->in(k) && n->in(k)->is_MachTemp()) { 1.124 + remove.push(n->in(k)); // MachTemps which are inputs to the old node have to be removed. 1.125 + n->in(k)->del_req(0); 1.126 + j--; 1.127 + } 1.128 + } 1.129 + 1.130 + // Check whether we can allocate enough nodes. We set a fix limit for 1.131 + // the size of postalloc expands with this. 1.132 + uint unique_limit = C->unique() + 40; 1.133 + if (unique_limit >= _ra->node_regs_max_index()) { 1.134 + Compile::current()->record_failure("out of nodes in postalloc expand"); 1.135 + return; 1.136 + } 1.137 + 1.138 + // Emit (i.e. generate new nodes). 1.139 + n->as_Mach()->postalloc_expand(&new_nodes, _ra); 1.140 + 1.141 + assert(C->unique() < unique_limit, "You allocated too many nodes in your postalloc expand."); 1.142 + 1.143 + // Disconnect the inputs of the old node. 1.144 + // 1.145 + // We reuse MachSpillCopy nodes. If we need to expand them, there 1.146 + // are many, so reusing pays off. If reused, the node already 1.147 + // has the new ins. n must be the last node on new_nodes list. 1.148 + if (!n->is_MachSpillCopy()) { 1.149 + for (int k = req - 1; k >= 0; --k) { 1.150 + n->del_req(k); 1.151 + } 1.152 + } 1.153 + 1.154 +#ifdef ASSERT 1.155 + // Check that all nodes have proper operands. 1.156 + for (int k = 0; k < new_nodes.length(); ++k) { 1.157 + if (new_nodes.at(k)->_idx < max_idx || !new_nodes.at(k)->is_Mach()) continue; // old node, Proj ... 1.158 + MachNode *m = new_nodes.at(k)->as_Mach(); 1.159 + for (unsigned int l = 0; l < m->num_opnds(); ++l) { 1.160 + if (MachOper::notAnOper(m->_opnds[l])) { 1.161 + outputStream *os = tty; 1.162 + os->print("Node %s ", m->Name()); 1.163 + os->print("has invalid opnd %d: %p\n", l, m->_opnds[l]); 1.164 + assert(0, "Invalid operands, see inline trace in hs_err_pid file."); 1.165 + } 1.166 + } 1.167 + } 1.168 +#endif 1.169 + 1.170 + // Collect succs of old node in remove (for projections) and in succs (for 1.171 + // all other nodes) do _not_ collect projections in remove (but in succs) 1.172 + // in case the node is a call. We need the projections for calls as they are 1.173 + // associated with registes (i.e. they are defs). 1.174 + succs.clear(); 1.175 + for (DUIterator k = n->outs(); n->has_out(k); k++) { 1.176 + if (n->out(k)->is_Proj() && !n->is_MachCall() && !n->is_MachBranch()) { 1.177 + remove.push(n->out(k)); 1.178 + } else { 1.179 + succs.push(n->out(k)); 1.180 + } 1.181 + } 1.182 + // Replace old node n as input of its succs by last of the new nodes. 1.183 + for (int k = 0; k < succs.length(); ++k) { 1.184 + Node *succ = succs.at(k); 1.185 + for (uint l = 0; l < succ->req(); ++l) { 1.186 + if (succ->in(l) == n) { 1.187 + succ->set_req(l, new_nodes.at(new_nodes.length() - 1)); 1.188 + } 1.189 + } 1.190 + for (uint l = succ->req(); l < succ->len(); ++l) { 1.191 + if (succ->in(l) == n) { 1.192 + succ->set_prec(l, new_nodes.at(new_nodes.length() - 1)); 1.193 + } 1.194 + } 1.195 + } 1.196 + 1.197 + // Index of old node in block. 1.198 + uint index = b->find_node(n); 1.199 + // Insert new nodes into block and map them in nodes->blocks array 1.200 + // and remember last node in n2. 1.201 + Node *n2 = NULL; 1.202 + for (int k = 0; k < new_nodes.length(); ++k) { 1.203 + n2 = new_nodes.at(k); 1.204 + b->insert_node(n2, ++index); 1.205 + map_node_to_block(n2, b); 1.206 + } 1.207 + 1.208 + // Add old node n to remove and remove them all from block. 1.209 + remove.push(n); 1.210 + j--; 1.211 +#ifdef ASSERT 1.212 + if (TracePostallocExpand && Verbose) { 1.213 + tty->print(" removing:\n"); 1.214 + for (int k = 0; k < remove.length(); ++k) { 1.215 + tty->print(" "); remove.at(k)->dump(); 1.216 + } 1.217 + tty->print(" inserting:\n"); 1.218 + for (int k = 0; k < new_nodes.length(); ++k) { 1.219 + tty->print(" "); new_nodes.at(k)->dump(); 1.220 + } 1.221 + } 1.222 +#endif 1.223 + for (int k = 0; k < remove.length(); ++k) { 1.224 + if (b->contains(remove.at(k))) { 1.225 + b->find_remove(remove.at(k)); 1.226 + } else { 1.227 + assert(remove.at(k)->is_Proj() && (remove.at(k)->in(0)->is_MachBranch()), ""); 1.228 + } 1.229 + } 1.230 + // If anything has been inserted (n2 != NULL), continue after last node inserted. 1.231 + // This does not always work. Some postalloc expands don't insert any nodes, if they 1.232 + // do optimizations (e.g., max(x,x)). In this case we decrement j accordingly. 1.233 + j = n2 ? b->find_node(n2) : j; 1.234 + } 1.235 + } 1.236 + } 1.237 + 1.238 +#ifdef ASSERT 1.239 + if (foundNode) { 1.240 + tty->print("FINISHED %d %s\n", C->compile_id(), 1.241 + C->method() ? C->method()->name()->as_utf8() : C->stub_name()); 1.242 + tty->flush(); 1.243 + } 1.244 +#endif 1.245 +} 1.246 + 1.247 + 1.248 +//------------------------------dump------------------------------------------- 1.249 #ifndef PRODUCT 1.250 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const { 1.251 const Node *x = end->is_block_proj();