1.1 --- a/src/share/vm/opto/gcm.cpp Thu Aug 22 09:39:54 2013 -0700 1.2 +++ b/src/share/vm/opto/gcm.cpp Thu Sep 05 11:04:39 2013 -0700 1.3 @@ -70,7 +70,7 @@ 1.4 // are in b also. 1.5 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { 1.6 // Set basic block of n, Add n to b, 1.7 - _bbs.map(n->_idx, b); 1.8 + map_node_to_block(n, b); 1.9 b->add_inst(n); 1.10 1.11 // After Matching, nearly any old Node may have projections trailing it. 1.12 @@ -79,11 +79,12 @@ 1.13 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 1.14 Node* use = n->fast_out(i); 1.15 if (use->is_Proj()) { 1.16 - Block* buse = _bbs[use->_idx]; 1.17 + Block* buse = get_block_for_node(use); 1.18 if (buse != b) { // In wrong block? 1.19 - if (buse != NULL) 1.20 + if (buse != NULL) { 1.21 buse->find_remove(use); // Remove from wrong block 1.22 - _bbs.map(use->_idx, b); // Re-insert in this block 1.23 + } 1.24 + map_node_to_block(use, b); 1.25 b->add_inst(use); 1.26 } 1.27 } 1.28 @@ -101,7 +102,7 @@ 1.29 if (p != NULL && p != n) { // Control from a block projection? 1.30 assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); 1.31 // Find trailing Region 1.32 - Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block 1.33 + Block *pb = get_block_for_node(in0); // Block-projection already has basic block 1.34 uint j = 0; 1.35 if (pb->_num_succs != 1) { // More then 1 successor? 1.36 // Search for successor 1.37 @@ -124,26 +125,30 @@ 1.38 1.39 //------------------------------schedule_pinned_nodes-------------------------- 1.40 // Set the basic block for Nodes pinned into blocks 1.41 -void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { 1.42 +void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { 1.43 // Allocate node stack of size C->unique()+8 to avoid frequent realloc 1.44 - GrowableArray <Node *> spstack(C->unique()+8); 1.45 + GrowableArray <Node *> spstack(C->unique() + 8); 1.46 spstack.push(_root); 1.47 - while ( spstack.is_nonempty() ) { 1.48 - Node *n = spstack.pop(); 1.49 - if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited 1.50 - if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! 1.51 - assert( n->in(0), "pinned Node must have Control" ); 1.52 + while (spstack.is_nonempty()) { 1.53 + Node* node = spstack.pop(); 1.54 + if (!visited.test_set(node->_idx)) { // Test node and flag it as visited 1.55 + if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! 1.56 + assert(node->in(0), "pinned Node must have Control"); 1.57 // Before setting block replace block_proj control edge 1.58 - replace_block_proj_ctrl(n); 1.59 - Node *input = n->in(0); 1.60 - while( !input->is_block_start() ) 1.61 + replace_block_proj_ctrl(node); 1.62 + Node* input = node->in(0); 1.63 + while (!input->is_block_start()) { 1.64 input = input->in(0); 1.65 - Block *b = _bbs[input->_idx]; // Basic block of controlling input 1.66 - schedule_node_into_block(n, b); 1.67 + } 1.68 + Block* block = get_block_for_node(input); // Basic block of controlling input 1.69 + schedule_node_into_block(node, block); 1.70 } 1.71 - for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs 1.72 - if( n->in(i) != NULL ) 1.73 - spstack.push(n->in(i)); 1.74 + 1.75 + // process all inputs that are non NULL 1.76 + for (int i = node->req() - 1; i >= 0; --i) { 1.77 + if (node->in(i) != NULL) { 1.78 + spstack.push(node->in(i)); 1.79 + } 1.80 } 1.81 } 1.82 } 1.83 @@ -153,7 +158,7 @@ 1.84 // Assert that new input b2 is dominated by all previous inputs. 1.85 // Check this by by seeing that it is dominated by b1, the deepest 1.86 // input observed until b2. 1.87 -static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { 1.88 +static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) { 1.89 if (b1 == NULL) return; 1.90 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); 1.91 Block* tmp = b2; 1.92 @@ -166,7 +171,7 @@ 1.93 for (uint j=0; j<n->len(); j++) { // For all inputs 1.94 Node* inn = n->in(j); // Get input 1.95 if (inn == NULL) continue; // Ignore NULL, missing inputs 1.96 - Block* inb = bbs[inn->_idx]; 1.97 + Block* inb = cfg->get_block_for_node(inn); 1.98 tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, 1.99 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); 1.100 inn->dump(); 1.101 @@ -178,20 +183,20 @@ 1.102 } 1.103 #endif 1.104 1.105 -static Block* find_deepest_input(Node* n, Block_Array &bbs) { 1.106 +static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) { 1.107 // Find the last input dominated by all other inputs. 1.108 Block* deepb = NULL; // Deepest block so far 1.109 int deepb_dom_depth = 0; 1.110 for (uint k = 0; k < n->len(); k++) { // For all inputs 1.111 Node* inn = n->in(k); // Get input 1.112 if (inn == NULL) continue; // Ignore NULL, missing inputs 1.113 - Block* inb = bbs[inn->_idx]; 1.114 + Block* inb = cfg->get_block_for_node(inn); 1.115 assert(inb != NULL, "must already have scheduled this input"); 1.116 if (deepb_dom_depth < (int) inb->_dom_depth) { 1.117 // The new inb must be dominated by the previous deepb. 1.118 // The various inputs must be linearly ordered in the dom 1.119 // tree, or else there will not be a unique deepest block. 1.120 - DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); 1.121 + DEBUG_ONLY(assert_dom(deepb, inb, n, cfg)); 1.122 deepb = inb; // Save deepest block 1.123 deepb_dom_depth = deepb->_dom_depth; 1.124 } 1.125 @@ -207,32 +212,29 @@ 1.126 // which all their inputs occur. 1.127 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { 1.128 // Allocate stack with enough space to avoid frequent realloc 1.129 - Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats 1.130 - // roots.push(_root); _root will be processed among C->top() inputs 1.131 + Node_Stack nstack(roots.Size() + 8); 1.132 + // _root will be processed among C->top() inputs 1.133 roots.push(C->top()); 1.134 visited.set(C->top()->_idx); 1.135 1.136 while (roots.size() != 0) { 1.137 // Use local variables nstack_top_n & nstack_top_i to cache values 1.138 // on stack's top. 1.139 - Node *nstack_top_n = roots.pop(); 1.140 - uint nstack_top_i = 0; 1.141 -//while_nstack_nonempty: 1.142 + Node* parent_node = roots.pop(); 1.143 + uint input_index = 0; 1.144 + 1.145 while (true) { 1.146 - // Get parent node and next input's index from stack's top. 1.147 - Node *n = nstack_top_n; 1.148 - uint i = nstack_top_i; 1.149 - 1.150 - if (i == 0) { 1.151 + if (input_index == 0) { 1.152 // Fixup some control. Constants without control get attached 1.153 // to root and nodes that use is_block_proj() nodes should be attached 1.154 // to the region that starts their block. 1.155 - const Node *in0 = n->in(0); 1.156 - if (in0 != NULL) { // Control-dependent? 1.157 - replace_block_proj_ctrl(n); 1.158 - } else { // n->in(0) == NULL 1.159 - if (n->req() == 1) { // This guy is a constant with NO inputs? 1.160 - n->set_req(0, _root); 1.161 + const Node* control_input = parent_node->in(0); 1.162 + if (control_input != NULL) { 1.163 + replace_block_proj_ctrl(parent_node); 1.164 + } else { 1.165 + // Is a constant with NO inputs? 1.166 + if (parent_node->req() == 1) { 1.167 + parent_node->set_req(0, _root); 1.168 } 1.169 } 1.170 } 1.171 @@ -241,37 +243,47 @@ 1.172 // input is already in a block we quit following inputs (to avoid 1.173 // cycles). Instead we put that Node on a worklist to be handled 1.174 // later (since IT'S inputs may not have a block yet). 1.175 - bool done = true; // Assume all n's inputs will be processed 1.176 - while (i < n->len()) { // For all inputs 1.177 - Node *in = n->in(i); // Get input 1.178 - ++i; 1.179 - if (in == NULL) continue; // Ignore NULL, missing inputs 1.180 + 1.181 + // Assume all n's inputs will be processed 1.182 + bool done = true; 1.183 + 1.184 + while (input_index < parent_node->len()) { 1.185 + Node* in = parent_node->in(input_index++); 1.186 + if (in == NULL) { 1.187 + continue; 1.188 + } 1.189 + 1.190 int is_visited = visited.test_set(in->_idx); 1.191 - if (!_bbs.lookup(in->_idx)) { // Missing block selection? 1.192 + if (!has_block(in)) { 1.193 if (is_visited) { 1.194 - // assert( !visited.test(in->_idx), "did not schedule early" ); 1.195 return false; 1.196 } 1.197 - nstack.push(n, i); // Save parent node and next input's index. 1.198 - nstack_top_n = in; // Process current input now. 1.199 - nstack_top_i = 0; 1.200 - done = false; // Not all n's inputs processed. 1.201 - break; // continue while_nstack_nonempty; 1.202 - } else if (!is_visited) { // Input not yet visited? 1.203 - roots.push(in); // Visit this guy later, using worklist 1.204 + // Save parent node and next input's index. 1.205 + nstack.push(parent_node, input_index); 1.206 + // Process current input now. 1.207 + parent_node = in; 1.208 + input_index = 0; 1.209 + // Not all n's inputs processed. 1.210 + done = false; 1.211 + break; 1.212 + } else if (!is_visited) { 1.213 + // Visit this guy later, using worklist 1.214 + roots.push(in); 1.215 } 1.216 } 1.217 + 1.218 if (done) { 1.219 // All of n's inputs have been processed, complete post-processing. 1.220 1.221 // Some instructions are pinned into a block. These include Region, 1.222 // Phi, Start, Return, and other control-dependent instructions and 1.223 // any projections which depend on them. 1.224 - if (!n->pinned()) { 1.225 + if (!parent_node->pinned()) { 1.226 // Set earliest legal block. 1.227 - _bbs.map(n->_idx, find_deepest_input(n, _bbs)); 1.228 + Block* earliest_block = find_deepest_input(parent_node, this); 1.229 + map_node_to_block(parent_node, earliest_block); 1.230 } else { 1.231 - assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); 1.232 + assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); 1.233 } 1.234 1.235 if (nstack.is_empty()) { 1.236 @@ -280,12 +292,12 @@ 1.237 break; 1.238 } 1.239 // Get saved parent node and next input's index. 1.240 - nstack_top_n = nstack.node(); 1.241 - nstack_top_i = nstack.index(); 1.242 + parent_node = nstack.node(); 1.243 + input_index = nstack.index(); 1.244 nstack.pop(); 1.245 - } // if (done) 1.246 - } // while (true) 1.247 - } // while (roots.size() != 0) 1.248 + } 1.249 + } 1.250 + } 1.251 return true; 1.252 } 1.253 1.254 @@ -317,8 +329,8 @@ 1.255 // The definition must dominate the use, so move the LCA upward in the 1.256 // dominator tree to dominate the use. If the use is a phi, adjust 1.257 // the LCA only with the phi input paths which actually use this def. 1.258 -static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { 1.259 - Block* buse = bbs[use->_idx]; 1.260 +static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) { 1.261 + Block* buse = cfg->get_block_for_node(use); 1.262 if (buse == NULL) return LCA; // Unused killing Projs have no use block 1.263 if (!use->is_Phi()) return buse->dom_lca(LCA); 1.264 uint pmax = use->req(); // Number of Phi inputs 1.265 @@ -333,7 +345,7 @@ 1.266 // more than once. 1.267 for (uint j=1; j<pmax; j++) { // For all inputs 1.268 if (use->in(j) == def) { // Found matching input? 1.269 - Block* pred = bbs[buse->pred(j)->_idx]; 1.270 + Block* pred = cfg->get_block_for_node(buse->pred(j)); 1.271 LCA = pred->dom_lca(LCA); 1.272 } 1.273 } 1.274 @@ -346,8 +358,7 @@ 1.275 // which are marked with the given index. Return the LCA (in the dom tree) 1.276 // of all marked blocks. If there are none marked, return the original 1.277 // LCA. 1.278 -static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, 1.279 - Block* early, Block_Array &bbs) { 1.280 +static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) { 1.281 Block_List worklist; 1.282 worklist.push(LCA); 1.283 while (worklist.size() > 0) { 1.284 @@ -370,7 +381,7 @@ 1.285 } else { 1.286 // Keep searching through this block's predecessors. 1.287 for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { 1.288 - Block* mid_parent = bbs[ mid->pred(j)->_idx ]; 1.289 + Block* mid_parent = cfg->get_block_for_node(mid->pred(j)); 1.290 worklist.push(mid_parent); 1.291 } 1.292 } 1.293 @@ -388,7 +399,7 @@ 1.294 // be earlier (at a shallower dom_depth) than the true schedule_early 1.295 // point of the node. We compute this earlier block as a more permissive 1.296 // site for anti-dependency insertion, but only if subsume_loads is enabled. 1.297 -static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { 1.298 +static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) { 1.299 Node* base; 1.300 Node* index; 1.301 Node* store = load->in(MemNode::Memory); 1.302 @@ -416,12 +427,12 @@ 1.303 Block* deepb = NULL; // Deepest block so far 1.304 int deepb_dom_depth = 0; 1.305 for (int i = 0; i < mem_inputs_length; i++) { 1.306 - Block* inb = bbs[mem_inputs[i]->_idx]; 1.307 + Block* inb = cfg->get_block_for_node(mem_inputs[i]); 1.308 if (deepb_dom_depth < (int) inb->_dom_depth) { 1.309 // The new inb must be dominated by the previous deepb. 1.310 // The various inputs must be linearly ordered in the dom 1.311 // tree, or else there will not be a unique deepest block. 1.312 - DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); 1.313 + DEBUG_ONLY(assert_dom(deepb, inb, load, cfg)); 1.314 deepb = inb; // Save deepest block 1.315 deepb_dom_depth = deepb->_dom_depth; 1.316 } 1.317 @@ -492,14 +503,14 @@ 1.318 // and other inputs are first available. (Computed by schedule_early.) 1.319 // For normal loads, 'early' is the shallowest place (dom graph wise) 1.320 // to look for anti-deps between this load and any store. 1.321 - Block* early = _bbs[load_index]; 1.322 + Block* early = get_block_for_node(load); 1.323 1.324 // If we are subsuming loads, compute an "early" block that only considers 1.325 // memory or address inputs. This block may be different than the 1.326 // schedule_early block in that it could be at an even shallower depth in the 1.327 // dominator tree, and allow for a broader discovery of anti-dependences. 1.328 if (C->subsume_loads()) { 1.329 - early = memory_early_block(load, early, _bbs); 1.330 + early = memory_early_block(load, early, this); 1.331 } 1.332 1.333 ResourceArea *area = Thread::current()->resource_area(); 1.334 @@ -623,7 +634,7 @@ 1.335 // or else observe that 'store' is all the way up in the 1.336 // earliest legal block for 'load'. In the latter case, 1.337 // immediately insert an anti-dependence edge. 1.338 - Block* store_block = _bbs[store->_idx]; 1.339 + Block* store_block = get_block_for_node(store); 1.340 assert(store_block != NULL, "unused killing projections skipped above"); 1.341 1.342 if (store->is_Phi()) { 1.343 @@ -641,7 +652,7 @@ 1.344 for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { 1.345 if (store->in(j) == mem) { // Found matching input? 1.346 DEBUG_ONLY(found_match = true); 1.347 - Block* pred_block = _bbs[store_block->pred(j)->_idx]; 1.348 + Block* pred_block = get_block_for_node(store_block->pred(j)); 1.349 if (pred_block != early) { 1.350 // If any predecessor of the Phi matches the load's "early block", 1.351 // we do not need a precedence edge between the Phi and 'load' 1.352 @@ -715,7 +726,7 @@ 1.353 // preventing the load from sinking past any block containing 1.354 // a store that may invalidate the memory state required by 'load'. 1.355 if (must_raise_LCA) 1.356 - LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); 1.357 + LCA = raise_LCA_above_marks(LCA, load->_idx, early, this); 1.358 if (LCA == early) return LCA; 1.359 1.360 // Insert anti-dependence edges from 'load' to each store 1.361 @@ -724,7 +735,7 @@ 1.362 if (LCA->raise_LCA_mark() == load_index) { 1.363 while (non_early_stores.size() > 0) { 1.364 Node* store = non_early_stores.pop(); 1.365 - Block* store_block = _bbs[store->_idx]; 1.366 + Block* store_block = get_block_for_node(store); 1.367 if (store_block == LCA) { 1.368 // add anti_dependence from store to load in its own block 1.369 assert(store != load->in(0), "dependence cycle found"); 1.370 @@ -758,7 +769,7 @@ 1.371 1.372 public: 1.373 // Constructor for the iterator 1.374 - Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); 1.375 + Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg); 1.376 1.377 // Postincrement operator to iterate over the nodes 1.378 Node *next(); 1.379 @@ -766,12 +777,12 @@ 1.380 private: 1.381 VectorSet &_visited; 1.382 Node_List &_stack; 1.383 - Block_Array &_bbs; 1.384 + PhaseCFG &_cfg; 1.385 }; 1.386 1.387 // Constructor for the Node_Backward_Iterator 1.388 -Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) 1.389 - : _visited(visited), _stack(stack), _bbs(bbs) { 1.390 +Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg) 1.391 + : _visited(visited), _stack(stack), _cfg(cfg) { 1.392 // The stack should contain exactly the root 1.393 stack.clear(); 1.394 stack.push(root); 1.395 @@ -801,8 +812,8 @@ 1.396 _visited.set(self->_idx); 1.397 1.398 // Now schedule all uses as late as possible. 1.399 - uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; 1.400 - uint src_rpo = _bbs[src]->_rpo; 1.401 + const Node* src = self->is_Proj() ? self->in(0) : self; 1.402 + uint src_rpo = _cfg.get_block_for_node(src)->_rpo; 1.403 1.404 // Schedule all nodes in a post-order visit 1.405 Node *unvisited = NULL; // Unvisited anti-dependent Node, if any 1.406 @@ -818,7 +829,7 @@ 1.407 1.408 // do not traverse backward control edges 1.409 Node *use = n->is_Proj() ? n->in(0) : n; 1.410 - uint use_rpo = _bbs[use->_idx]->_rpo; 1.411 + uint use_rpo = _cfg.get_block_for_node(use)->_rpo; 1.412 1.413 if ( use_rpo < src_rpo ) 1.414 continue; 1.415 @@ -850,13 +861,13 @@ 1.416 1.417 //------------------------------ComputeLatenciesBackwards---------------------- 1.418 // Compute the latency of all the instructions. 1.419 -void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { 1.420 +void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { 1.421 #ifndef PRODUCT 1.422 if (trace_opto_pipelining()) 1.423 tty->print("\n#---- ComputeLatenciesBackwards ----\n"); 1.424 #endif 1.425 1.426 - Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); 1.427 + Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); 1.428 Node *n; 1.429 1.430 // Walk over all the nodes from last to first 1.431 @@ -873,31 +884,34 @@ 1.432 // Set the latency for this instruction 1.433 #ifndef PRODUCT 1.434 if (trace_opto_pipelining()) { 1.435 - tty->print("# latency_to_inputs: node_latency[%d] = %d for node", 1.436 - n->_idx, _node_latency->at_grow(n->_idx)); 1.437 + tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); 1.438 dump(); 1.439 } 1.440 #endif 1.441 1.442 - if (n->is_Proj()) 1.443 + if (n->is_Proj()) { 1.444 n = n->in(0); 1.445 + } 1.446 1.447 - if (n->is_Root()) 1.448 + if (n->is_Root()) { 1.449 return; 1.450 + } 1.451 1.452 uint nlen = n->len(); 1.453 - uint use_latency = _node_latency->at_grow(n->_idx); 1.454 - uint use_pre_order = _bbs[n->_idx]->_pre_order; 1.455 + uint use_latency = get_latency_for_node(n); 1.456 + uint use_pre_order = get_block_for_node(n)->_pre_order; 1.457 1.458 - for ( uint j=0; j<nlen; j++ ) { 1.459 + for (uint j = 0; j < nlen; j++) { 1.460 Node *def = n->in(j); 1.461 1.462 - if (!def || def == n) 1.463 + if (!def || def == n) { 1.464 continue; 1.465 + } 1.466 1.467 // Walk backwards thru projections 1.468 - if (def->is_Proj()) 1.469 + if (def->is_Proj()) { 1.470 def = def->in(0); 1.471 + } 1.472 1.473 #ifndef PRODUCT 1.474 if (trace_opto_pipelining()) { 1.475 @@ -907,25 +921,23 @@ 1.476 #endif 1.477 1.478 // If the defining block is not known, assume it is ok 1.479 - Block *def_block = _bbs[def->_idx]; 1.480 + Block *def_block = get_block_for_node(def); 1.481 uint def_pre_order = def_block ? def_block->_pre_order : 0; 1.482 1.483 - if ( (use_pre_order < def_pre_order) || 1.484 - (use_pre_order == def_pre_order && n->is_Phi()) ) 1.485 + if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { 1.486 continue; 1.487 + } 1.488 1.489 uint delta_latency = n->latency(j); 1.490 uint current_latency = delta_latency + use_latency; 1.491 1.492 - if (_node_latency->at_grow(def->_idx) < current_latency) { 1.493 - _node_latency->at_put_grow(def->_idx, current_latency); 1.494 + if (get_latency_for_node(def) < current_latency) { 1.495 + set_latency_for_node(def, current_latency); 1.496 } 1.497 1.498 #ifndef PRODUCT 1.499 if (trace_opto_pipelining()) { 1.500 - tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", 1.501 - use_latency, j, delta_latency, current_latency, def->_idx, 1.502 - _node_latency->at_grow(def->_idx)); 1.503 + tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); 1.504 } 1.505 #endif 1.506 } 1.507 @@ -935,10 +947,11 @@ 1.508 // Compute the latency of a specific use 1.509 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { 1.510 // If self-reference, return no latency 1.511 - if (use == n || use->is_Root()) 1.512 + if (use == n || use->is_Root()) { 1.513 return 0; 1.514 + } 1.515 1.516 - uint def_pre_order = _bbs[def->_idx]->_pre_order; 1.517 + uint def_pre_order = get_block_for_node(def)->_pre_order; 1.518 uint latency = 0; 1.519 1.520 // If the use is not a projection, then it is simple... 1.521 @@ -950,7 +963,7 @@ 1.522 } 1.523 #endif 1.524 1.525 - uint use_pre_order = _bbs[use->_idx]->_pre_order; 1.526 + uint use_pre_order = get_block_for_node(use)->_pre_order; 1.527 1.528 if (use_pre_order < def_pre_order) 1.529 return 0; 1.530 @@ -959,7 +972,7 @@ 1.531 return 0; 1.532 1.533 uint nlen = use->len(); 1.534 - uint nl = _node_latency->at_grow(use->_idx); 1.535 + uint nl = get_latency_for_node(use); 1.536 1.537 for ( uint j=0; j<nlen; j++ ) { 1.538 if (use->in(j) == n) { 1.539 @@ -994,8 +1007,7 @@ 1.540 // Set the latency for this instruction 1.541 #ifndef PRODUCT 1.542 if (trace_opto_pipelining()) { 1.543 - tty->print("# latency_from_outputs: node_latency[%d] = %d for node", 1.544 - n->_idx, _node_latency->at_grow(n->_idx)); 1.545 + tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); 1.546 dump(); 1.547 } 1.548 #endif 1.549 @@ -1008,7 +1020,7 @@ 1.550 if (latency < l) latency = l; 1.551 } 1.552 1.553 - _node_latency->at_put_grow(n->_idx, latency); 1.554 + set_latency_for_node(n, latency); 1.555 } 1.556 1.557 //------------------------------hoist_to_cheaper_block------------------------- 1.558 @@ -1018,11 +1030,11 @@ 1.559 const double delta = 1+PROB_UNLIKELY_MAG(4); 1.560 Block* least = LCA; 1.561 double least_freq = least->_freq; 1.562 - uint target = _node_latency->at_grow(self->_idx); 1.563 - uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); 1.564 - uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); 1.565 + uint target = get_latency_for_node(self); 1.566 + uint start_latency = get_latency_for_node(LCA->_nodes[0]); 1.567 + uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); 1.568 bool in_latency = (target <= start_latency); 1.569 - const Block* root_block = _bbs[_root->_idx]; 1.570 + const Block* root_block = get_block_for_node(_root); 1.571 1.572 // Turn off latency scheduling if scheduling is just plain off 1.573 if (!C->do_scheduling()) 1.574 @@ -1037,8 +1049,7 @@ 1.575 1.576 #ifndef PRODUCT 1.577 if (trace_opto_pipelining()) { 1.578 - tty->print("# Find cheaper block for latency %d: ", 1.579 - _node_latency->at_grow(self->_idx)); 1.580 + tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); 1.581 self->dump(); 1.582 tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", 1.583 LCA->_pre_order, 1.584 @@ -1067,9 +1078,9 @@ 1.585 if (mach && LCA == root_block) 1.586 break; 1.587 1.588 - uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); 1.589 + uint start_lat = get_latency_for_node(LCA->_nodes[0]); 1.590 uint end_idx = LCA->end_idx(); 1.591 - uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); 1.592 + uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]); 1.593 double LCA_freq = LCA->_freq; 1.594 #ifndef PRODUCT 1.595 if (trace_opto_pipelining()) { 1.596 @@ -1111,7 +1122,7 @@ 1.597 tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); 1.598 } 1.599 #endif 1.600 - _node_latency->at_put_grow(self->_idx, end_latency); 1.601 + set_latency_for_node(self, end_latency); 1.602 partial_latency_of_defs(self); 1.603 } 1.604 1.605 @@ -1130,12 +1141,12 @@ 1.606 tty->print("\n#---- schedule_late ----\n"); 1.607 #endif 1.608 1.609 - Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); 1.610 + Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); 1.611 Node *self; 1.612 1.613 // Walk over all the nodes from last to first 1.614 while (self = iter.next()) { 1.615 - Block* early = _bbs[self->_idx]; // Earliest legal placement 1.616 + Block* early = get_block_for_node(self); // Earliest legal placement 1.617 1.618 if (self->is_top()) { 1.619 // Top node goes in bb #2 with other constants. 1.620 @@ -1183,7 +1194,7 @@ 1.621 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { 1.622 // For all uses, find LCA 1.623 Node* use = self->fast_out(i); 1.624 - LCA = raise_LCA_above_use(LCA, use, self, _bbs); 1.625 + LCA = raise_LCA_above_use(LCA, use, self, this); 1.626 } 1.627 } // (Hide defs of imax, i from rest of block.) 1.628 1.629 @@ -1191,7 +1202,7 @@ 1.630 // requirement for correctness but it reduces useless 1.631 // interference between temps and other nodes. 1.632 if (mach != NULL && mach->is_MachTemp()) { 1.633 - _bbs.map(self->_idx, LCA); 1.634 + map_node_to_block(self, LCA); 1.635 LCA->add_inst(self); 1.636 continue; 1.637 } 1.638 @@ -1257,7 +1268,7 @@ 1.639 } // end ScheduleLate 1.640 1.641 //------------------------------GlobalCodeMotion------------------------------- 1.642 -void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { 1.643 +void PhaseCFG::global_code_motion() { 1.644 ResourceMark rm; 1.645 1.646 #ifndef PRODUCT 1.647 @@ -1266,22 +1277,23 @@ 1.648 } 1.649 #endif 1.650 1.651 - // Initialize the bbs.map for things on the proj_list 1.652 - uint i; 1.653 - for( i=0; i < proj_list.size(); i++ ) 1.654 - _bbs.map(proj_list[i]->_idx, NULL); 1.655 + // Initialize the node to block mapping for things on the proj_list 1.656 + for (uint i = 0; i < _matcher.number_of_projections(); i++) { 1.657 + unmap_node_from_block(_matcher.get_projection(i)); 1.658 + } 1.659 1.660 // Set the basic block for Nodes pinned into blocks 1.661 - Arena *a = Thread::current()->resource_area(); 1.662 - VectorSet visited(a); 1.663 - schedule_pinned_nodes( visited ); 1.664 + Arena* arena = Thread::current()->resource_area(); 1.665 + VectorSet visited(arena); 1.666 + schedule_pinned_nodes(visited); 1.667 1.668 // Find the earliest Block any instruction can be placed in. Some 1.669 // instructions are pinned into Blocks. Unpinned instructions can 1.670 // appear in last block in which all their inputs occur. 1.671 visited.Clear(); 1.672 - Node_List stack(a); 1.673 - stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list 1.674 + Node_List stack(arena); 1.675 + // Pre-grow the list 1.676 + stack.map((C->unique() >> 1) + 16, NULL); 1.677 if (!schedule_early(visited, stack)) { 1.678 // Bailout without retry 1.679 C->record_method_not_compilable("early schedule failed"); 1.680 @@ -1289,29 +1301,25 @@ 1.681 } 1.682 1.683 // Build Def-Use edges. 1.684 - proj_list.push(_root); // Add real root as another root 1.685 - proj_list.pop(); 1.686 - 1.687 // Compute the latency information (via backwards walk) for all the 1.688 // instructions in the graph 1.689 _node_latency = new GrowableArray<uint>(); // resource_area allocation 1.690 1.691 - if( C->do_scheduling() ) 1.692 - ComputeLatenciesBackwards(visited, stack); 1.693 + if (C->do_scheduling()) { 1.694 + compute_latencies_backwards(visited, stack); 1.695 + } 1.696 1.697 // Now schedule all codes as LATE as possible. This is the LCA in the 1.698 // dominator tree of all USES of a value. Pick the block with the least 1.699 // loop nesting depth that is lowest in the dominator tree. 1.700 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) 1.701 schedule_late(visited, stack); 1.702 - if( C->failing() ) { 1.703 + if (C->failing()) { 1.704 // schedule_late fails only when graph is incorrect. 1.705 assert(!VerifyGraphEdges, "verification should have failed"); 1.706 return; 1.707 } 1.708 1.709 - unique = C->unique(); 1.710 - 1.711 #ifndef PRODUCT 1.712 if (trace_opto_pipelining()) { 1.713 tty->print("\n---- Detect implicit null checks ----\n"); 1.714 @@ -1334,10 +1342,11 @@ 1.715 // By reversing the loop direction we get a very minor gain on mpegaudio. 1.716 // Feel free to revert to a forward loop for clarity. 1.717 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { 1.718 - for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { 1.719 - Node *proj = matcher._null_check_tests[i ]; 1.720 - Node *val = matcher._null_check_tests[i+1]; 1.721 - _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); 1.722 + for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { 1.723 + Node* proj = _matcher._null_check_tests[i]; 1.724 + Node* val = _matcher._null_check_tests[i + 1]; 1.725 + Block* block = get_block_for_node(proj); 1.726 + block->implicit_null_check(this, proj, val, allowed_reasons); 1.727 // The implicit_null_check will only perform the transformation 1.728 // if the null branch is truly uncommon, *and* it leads to an 1.729 // uncommon trap. Combined with the too_many_traps guards 1.730 @@ -1354,11 +1363,11 @@ 1.731 1.732 // Schedule locally. Right now a simple topological sort. 1.733 // Later, do a real latency aware scheduler. 1.734 - uint max_idx = C->unique(); 1.735 - GrowableArray<int> ready_cnt(max_idx, max_idx, -1); 1.736 + GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); 1.737 visited.Clear(); 1.738 - for (i = 0; i < _num_blocks; i++) { 1.739 - if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { 1.740 + for (uint i = 0; i < number_of_blocks(); i++) { 1.741 + Block* block = get_block(i); 1.742 + if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { 1.743 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { 1.744 C->record_method_not_compilable("local schedule failed"); 1.745 } 1.746 @@ -1368,14 +1377,17 @@ 1.747 1.748 // If we inserted any instructions between a Call and his CatchNode, 1.749 // clone the instructions on all paths below the Catch. 1.750 - for( i=0; i < _num_blocks; i++ ) 1.751 - _blocks[i]->call_catch_cleanup(_bbs, C); 1.752 + for (uint i = 0; i < number_of_blocks(); i++) { 1.753 + Block* block = get_block(i); 1.754 + block->call_catch_cleanup(this, C); 1.755 + } 1.756 1.757 #ifndef PRODUCT 1.758 if (trace_opto_pipelining()) { 1.759 tty->print("\n---- After GlobalCodeMotion ----\n"); 1.760 - for (uint i = 0; i < _num_blocks; i++) { 1.761 - _blocks[i]->dump(); 1.762 + for (uint i = 0; i < number_of_blocks(); i++) { 1.763 + Block* block = get_block(i); 1.764 + block->dump(); 1.765 } 1.766 } 1.767 #endif 1.768 @@ -1383,10 +1395,29 @@ 1.769 _node_latency = (GrowableArray<uint> *)0xdeadbeef; 1.770 } 1.771 1.772 +bool PhaseCFG::do_global_code_motion() { 1.773 + 1.774 + build_dominator_tree(); 1.775 + if (C->failing()) { 1.776 + return false; 1.777 + } 1.778 + 1.779 + NOT_PRODUCT( C->verify_graph_edges(); ) 1.780 + 1.781 + estimate_block_frequency(); 1.782 + 1.783 + global_code_motion(); 1.784 + 1.785 + if (C->failing()) { 1.786 + return false; 1.787 + } 1.788 + 1.789 + return true; 1.790 +} 1.791 1.792 //------------------------------Estimate_Block_Frequency----------------------- 1.793 // Estimate block frequencies based on IfNode probabilities. 1.794 -void PhaseCFG::Estimate_Block_Frequency() { 1.795 +void PhaseCFG::estimate_block_frequency() { 1.796 1.797 // Force conditional branches leading to uncommon traps to be unlikely, 1.798 // not because we get to the uncommon_trap with less relative frequency, 1.799 @@ -1394,18 +1425,20 @@ 1.800 // there once. 1.801 if (C->do_freq_based_layout()) { 1.802 Block_List worklist; 1.803 - Block* root_blk = _blocks[0]; 1.804 + Block* root_blk = get_block(0); 1.805 for (uint i = 1; i < root_blk->num_preds(); i++) { 1.806 - Block *pb = _bbs[root_blk->pred(i)->_idx]; 1.807 + Block *pb = get_block_for_node(root_blk->pred(i)); 1.808 if (pb->has_uncommon_code()) { 1.809 worklist.push(pb); 1.810 } 1.811 } 1.812 while (worklist.size() > 0) { 1.813 Block* uct = worklist.pop(); 1.814 - if (uct == _broot) continue; 1.815 + if (uct == get_root_block()) { 1.816 + continue; 1.817 + } 1.818 for (uint i = 1; i < uct->num_preds(); i++) { 1.819 - Block *pb = _bbs[uct->pred(i)->_idx]; 1.820 + Block *pb = get_block_for_node(uct->pred(i)); 1.821 if (pb->_num_succs == 1) { 1.822 worklist.push(pb); 1.823 } else if (pb->num_fall_throughs() == 2) { 1.824 @@ -1427,14 +1460,14 @@ 1.825 _root_loop->scale_freq(); 1.826 1.827 // Save outmost loop frequency for LRG frequency threshold 1.828 - _outer_loop_freq = _root_loop->outer_loop_freq(); 1.829 + _outer_loop_frequency = _root_loop->outer_loop_freq(); 1.830 1.831 // force paths ending at uncommon traps to be infrequent 1.832 if (!C->do_freq_based_layout()) { 1.833 Block_List worklist; 1.834 - Block* root_blk = _blocks[0]; 1.835 + Block* root_blk = get_block(0); 1.836 for (uint i = 1; i < root_blk->num_preds(); i++) { 1.837 - Block *pb = _bbs[root_blk->pred(i)->_idx]; 1.838 + Block *pb = get_block_for_node(root_blk->pred(i)); 1.839 if (pb->has_uncommon_code()) { 1.840 worklist.push(pb); 1.841 } 1.842 @@ -1443,7 +1476,7 @@ 1.843 Block* uct = worklist.pop(); 1.844 uct->_freq = PROB_MIN; 1.845 for (uint i = 1; i < uct->num_preds(); i++) { 1.846 - Block *pb = _bbs[uct->pred(i)->_idx]; 1.847 + Block *pb = get_block_for_node(uct->pred(i)); 1.848 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { 1.849 worklist.push(pb); 1.850 } 1.851 @@ -1452,8 +1485,8 @@ 1.852 } 1.853 1.854 #ifdef ASSERT 1.855 - for (uint i = 0; i < _num_blocks; i++ ) { 1.856 - Block *b = _blocks[i]; 1.857 + for (uint i = 0; i < number_of_blocks(); i++) { 1.858 + Block* b = get_block(i); 1.859 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); 1.860 } 1.861 #endif 1.862 @@ -1477,16 +1510,16 @@ 1.863 CFGLoop* PhaseCFG::create_loop_tree() { 1.864 1.865 #ifdef ASSERT 1.866 - assert( _blocks[0] == _broot, "" ); 1.867 - for (uint i = 0; i < _num_blocks; i++ ) { 1.868 - Block *b = _blocks[i]; 1.869 + assert(get_block(0) == get_root_block(), "first block should be root block"); 1.870 + for (uint i = 0; i < number_of_blocks(); i++) { 1.871 + Block* block = get_block(i); 1.872 // Check that _loop field are clear...we could clear them if not. 1.873 - assert(b->_loop == NULL, "clear _loop expected"); 1.874 + assert(block->_loop == NULL, "clear _loop expected"); 1.875 // Sanity check that the RPO numbering is reflected in the _blocks array. 1.876 // It doesn't have to be for the loop tree to be built, but if it is not, 1.877 // then the blocks have been reordered since dom graph building...which 1.878 // may question the RPO numbering 1.879 - assert(b->_rpo == i, "unexpected reverse post order number"); 1.880 + assert(block->_rpo == i, "unexpected reverse post order number"); 1.881 } 1.882 #endif 1.883 1.884 @@ -1496,14 +1529,14 @@ 1.885 Block_List worklist; 1.886 1.887 // Assign blocks to loops 1.888 - for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block 1.889 - Block *b = _blocks[i]; 1.890 + for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block 1.891 + Block* block = get_block(i); 1.892 1.893 - if (b->head()->is_Loop()) { 1.894 - Block* loop_head = b; 1.895 + if (block->head()->is_Loop()) { 1.896 + Block* loop_head = block; 1.897 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); 1.898 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); 1.899 - Block* tail = _bbs[tail_n->_idx]; 1.900 + Block* tail = get_block_for_node(tail_n); 1.901 1.902 // Defensively filter out Loop nodes for non-single-entry loops. 1.903 // For all reasonable loops, the head occurs before the tail in RPO. 1.904 @@ -1518,13 +1551,13 @@ 1.905 loop_head->_loop = nloop; 1.906 // Add to nloop so push_pred() will skip over inner loops 1.907 nloop->add_member(loop_head); 1.908 - nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); 1.909 + nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this); 1.910 1.911 while (worklist.size() > 0) { 1.912 Block* member = worklist.pop(); 1.913 if (member != loop_head) { 1.914 for (uint j = 1; j < member->num_preds(); j++) { 1.915 - nloop->push_pred(member, j, worklist, _bbs); 1.916 + nloop->push_pred(member, j, worklist, this); 1.917 } 1.918 } 1.919 } 1.920 @@ -1534,23 +1567,23 @@ 1.921 1.922 // Create a member list for each loop consisting 1.923 // of both blocks and (immediate child) loops. 1.924 - for (uint i = 0; i < _num_blocks; i++) { 1.925 - Block *b = _blocks[i]; 1.926 - CFGLoop* lp = b->_loop; 1.927 + for (uint i = 0; i < number_of_blocks(); i++) { 1.928 + Block* block = get_block(i); 1.929 + CFGLoop* lp = block->_loop; 1.930 if (lp == NULL) { 1.931 // Not assigned to a loop. Add it to the method's pseudo loop. 1.932 - b->_loop = root_loop; 1.933 + block->_loop = root_loop; 1.934 lp = root_loop; 1.935 } 1.936 - if (lp == root_loop || b != lp->head()) { // loop heads are already members 1.937 - lp->add_member(b); 1.938 + if (lp == root_loop || block != lp->head()) { // loop heads are already members 1.939 + lp->add_member(block); 1.940 } 1.941 if (lp != root_loop) { 1.942 if (lp->parent() == NULL) { 1.943 // Not a nested loop. Make it a child of the method's pseudo loop. 1.944 root_loop->add_nested_loop(lp); 1.945 } 1.946 - if (b == lp->head()) { 1.947 + if (block == lp->head()) { 1.948 // Add nested loop to member list of parent loop. 1.949 lp->parent()->add_member(lp); 1.950 } 1.951 @@ -1561,9 +1594,9 @@ 1.952 } 1.953 1.954 //------------------------------push_pred-------------------------------------- 1.955 -void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { 1.956 +void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) { 1.957 Node* pred_n = blk->pred(i); 1.958 - Block* pred = node_to_blk[pred_n->_idx]; 1.959 + Block* pred = cfg->get_block_for_node(pred_n); 1.960 CFGLoop *pred_loop = pred->_loop; 1.961 if (pred_loop == NULL) { 1.962 // Filter out blocks for non-single-entry loops. 1.963 @@ -1584,7 +1617,7 @@ 1.964 Block* pred_head = pred_loop->head(); 1.965 assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); 1.966 assert(pred_head != head(), "loop head in only one loop"); 1.967 - push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); 1.968 + push_pred(pred_head, LoopNode::EntryControl, worklist, cfg); 1.969 } else { 1.970 assert(pred_loop->_parent == this && _parent == NULL, "just checking"); 1.971 }