68 //----------------------------schedule_node_into_block------------------------- |
68 //----------------------------schedule_node_into_block------------------------- |
69 // Insert node n into block b. Look for projections of n and make sure they |
69 // Insert node n into block b. Look for projections of n and make sure they |
70 // are in b also. |
70 // are in b also. |
71 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { |
71 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { |
72 // Set basic block of n, Add n to b, |
72 // Set basic block of n, Add n to b, |
73 _bbs.map(n->_idx, b); |
73 map_node_to_block(n, b); |
74 b->add_inst(n); |
74 b->add_inst(n); |
75 |
75 |
76 // After Matching, nearly any old Node may have projections trailing it. |
76 // After Matching, nearly any old Node may have projections trailing it. |
77 // These are usually machine-dependent flags. In any case, they might |
77 // These are usually machine-dependent flags. In any case, they might |
78 // float to another block below this one. Move them up. |
78 // float to another block below this one. Move them up. |
79 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
79 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
80 Node* use = n->fast_out(i); |
80 Node* use = n->fast_out(i); |
81 if (use->is_Proj()) { |
81 if (use->is_Proj()) { |
82 Block* buse = _bbs[use->_idx]; |
82 Block* buse = get_block_for_node(use); |
83 if (buse != b) { // In wrong block? |
83 if (buse != b) { // In wrong block? |
84 if (buse != NULL) |
84 if (buse != NULL) { |
85 buse->find_remove(use); // Remove from wrong block |
85 buse->find_remove(use); // Remove from wrong block |
86 _bbs.map(use->_idx, b); // Re-insert in this block |
86 } |
|
87 map_node_to_block(use, b); |
87 b->add_inst(use); |
88 b->add_inst(use); |
88 } |
89 } |
89 } |
90 } |
90 } |
91 } |
91 } |
92 } |
122 } |
123 } |
123 |
124 |
124 |
125 |
125 //------------------------------schedule_pinned_nodes-------------------------- |
126 //------------------------------schedule_pinned_nodes-------------------------- |
126 // Set the basic block for Nodes pinned into blocks |
127 // Set the basic block for Nodes pinned into blocks |
127 void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { |
128 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { |
128 // Allocate node stack of size C->unique()+8 to avoid frequent realloc |
129 // Allocate node stack of size C->unique()+8 to avoid frequent realloc |
129 GrowableArray <Node *> spstack(C->unique()+8); |
130 GrowableArray <Node *> spstack(C->unique() + 8); |
130 spstack.push(_root); |
131 spstack.push(_root); |
131 while ( spstack.is_nonempty() ) { |
132 while (spstack.is_nonempty()) { |
132 Node *n = spstack.pop(); |
133 Node* node = spstack.pop(); |
133 if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited |
134 if (!visited.test_set(node->_idx)) { // Test node and flag it as visited |
134 if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! |
135 if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! |
135 assert( n->in(0), "pinned Node must have Control" ); |
136 assert(node->in(0), "pinned Node must have Control"); |
136 // Before setting block replace block_proj control edge |
137 // Before setting block replace block_proj control edge |
137 replace_block_proj_ctrl(n); |
138 replace_block_proj_ctrl(node); |
138 Node *input = n->in(0); |
139 Node* input = node->in(0); |
139 while( !input->is_block_start() ) |
140 while (!input->is_block_start()) { |
140 input = input->in(0); |
141 input = input->in(0); |
141 Block *b = _bbs[input->_idx]; // Basic block of controlling input |
142 } |
142 schedule_node_into_block(n, b); |
143 Block* block = get_block_for_node(input); // Basic block of controlling input |
143 } |
144 schedule_node_into_block(node, block); |
144 for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs |
145 } |
145 if( n->in(i) != NULL ) |
146 |
146 spstack.push(n->in(i)); |
147 // process all inputs that are non NULL |
|
148 for (int i = node->req() - 1; i >= 0; --i) { |
|
149 if (node->in(i) != NULL) { |
|
150 spstack.push(node->in(i)); |
|
151 } |
147 } |
152 } |
148 } |
153 } |
149 } |
154 } |
150 } |
155 } |
151 |
156 |
152 #ifdef ASSERT |
157 #ifdef ASSERT |
153 // Assert that new input b2 is dominated by all previous inputs. |
158 // Assert that new input b2 is dominated by all previous inputs. |
154 // Check this by by seeing that it is dominated by b1, the deepest |
159 // Check this by by seeing that it is dominated by b1, the deepest |
155 // input observed until b2. |
160 // input observed until b2. |
156 static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { |
161 static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) { |
157 if (b1 == NULL) return; |
162 if (b1 == NULL) return; |
158 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); |
163 assert(b1->_dom_depth < b2->_dom_depth, "sanity"); |
159 Block* tmp = b2; |
164 Block* tmp = b2; |
160 while (tmp != b1 && tmp != NULL) { |
165 while (tmp != b1 && tmp != NULL) { |
161 tmp = tmp->_idom; |
166 tmp = tmp->_idom; |
176 assert(false, "unscheduable graph"); |
181 assert(false, "unscheduable graph"); |
177 } |
182 } |
178 } |
183 } |
179 #endif |
184 #endif |
180 |
185 |
181 static Block* find_deepest_input(Node* n, Block_Array &bbs) { |
186 static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) { |
182 // Find the last input dominated by all other inputs. |
187 // Find the last input dominated by all other inputs. |
183 Block* deepb = NULL; // Deepest block so far |
188 Block* deepb = NULL; // Deepest block so far |
184 int deepb_dom_depth = 0; |
189 int deepb_dom_depth = 0; |
185 for (uint k = 0; k < n->len(); k++) { // For all inputs |
190 for (uint k = 0; k < n->len(); k++) { // For all inputs |
186 Node* inn = n->in(k); // Get input |
191 Node* inn = n->in(k); // Get input |
187 if (inn == NULL) continue; // Ignore NULL, missing inputs |
192 if (inn == NULL) continue; // Ignore NULL, missing inputs |
188 Block* inb = bbs[inn->_idx]; |
193 Block* inb = cfg->get_block_for_node(inn); |
189 assert(inb != NULL, "must already have scheduled this input"); |
194 assert(inb != NULL, "must already have scheduled this input"); |
190 if (deepb_dom_depth < (int) inb->_dom_depth) { |
195 if (deepb_dom_depth < (int) inb->_dom_depth) { |
191 // The new inb must be dominated by the previous deepb. |
196 // The new inb must be dominated by the previous deepb. |
192 // The various inputs must be linearly ordered in the dom |
197 // The various inputs must be linearly ordered in the dom |
193 // tree, or else there will not be a unique deepest block. |
198 // tree, or else there will not be a unique deepest block. |
194 DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); |
199 DEBUG_ONLY(assert_dom(deepb, inb, n, cfg)); |
195 deepb = inb; // Save deepest block |
200 deepb = inb; // Save deepest block |
196 deepb_dom_depth = deepb->_dom_depth; |
201 deepb_dom_depth = deepb->_dom_depth; |
197 } |
202 } |
198 } |
203 } |
199 assert(deepb != NULL, "must be at least one input to n"); |
204 assert(deepb != NULL, "must be at least one input to n"); |
205 // Find the earliest Block any instruction can be placed in. Some instructions |
210 // Find the earliest Block any instruction can be placed in. Some instructions |
206 // are pinned into Blocks. Unpinned instructions can appear in last block in |
211 // are pinned into Blocks. Unpinned instructions can appear in last block in |
207 // which all their inputs occur. |
212 // which all their inputs occur. |
208 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { |
213 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { |
209 // Allocate stack with enough space to avoid frequent realloc |
214 // Allocate stack with enough space to avoid frequent realloc |
210 Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats |
215 Node_Stack nstack(roots.Size() + 8); |
211 // roots.push(_root); _root will be processed among C->top() inputs |
216 // _root will be processed among C->top() inputs |
212 roots.push(C->top()); |
217 roots.push(C->top()); |
213 visited.set(C->top()->_idx); |
218 visited.set(C->top()->_idx); |
214 |
219 |
215 while (roots.size() != 0) { |
220 while (roots.size() != 0) { |
216 // Use local variables nstack_top_n & nstack_top_i to cache values |
221 // Use local variables nstack_top_n & nstack_top_i to cache values |
217 // on stack's top. |
222 // on stack's top. |
218 Node *nstack_top_n = roots.pop(); |
223 Node* parent_node = roots.pop(); |
219 uint nstack_top_i = 0; |
224 uint input_index = 0; |
220 //while_nstack_nonempty: |
225 |
221 while (true) { |
226 while (true) { |
222 // Get parent node and next input's index from stack's top. |
227 if (input_index == 0) { |
223 Node *n = nstack_top_n; |
|
224 uint i = nstack_top_i; |
|
225 |
|
226 if (i == 0) { |
|
227 // Fixup some control. Constants without control get attached |
228 // Fixup some control. Constants without control get attached |
228 // to root and nodes that use is_block_proj() nodes should be attached |
229 // to root and nodes that use is_block_proj() nodes should be attached |
229 // to the region that starts their block. |
230 // to the region that starts their block. |
230 const Node *in0 = n->in(0); |
231 const Node* control_input = parent_node->in(0); |
231 if (in0 != NULL) { // Control-dependent? |
232 if (control_input != NULL) { |
232 replace_block_proj_ctrl(n); |
233 replace_block_proj_ctrl(parent_node); |
233 } else { // n->in(0) == NULL |
234 } else { |
234 if (n->req() == 1) { // This guy is a constant with NO inputs? |
235 // Is a constant with NO inputs? |
235 n->set_req(0, _root); |
236 if (parent_node->req() == 1) { |
|
237 parent_node->set_req(0, _root); |
236 } |
238 } |
237 } |
239 } |
238 } |
240 } |
239 |
241 |
240 // First, visit all inputs and force them to get a block. If an |
242 // First, visit all inputs and force them to get a block. If an |
241 // input is already in a block we quit following inputs (to avoid |
243 // input is already in a block we quit following inputs (to avoid |
242 // cycles). Instead we put that Node on a worklist to be handled |
244 // cycles). Instead we put that Node on a worklist to be handled |
243 // later (since IT'S inputs may not have a block yet). |
245 // later (since IT'S inputs may not have a block yet). |
244 bool done = true; // Assume all n's inputs will be processed |
246 |
245 while (i < n->len()) { // For all inputs |
247 // Assume all n's inputs will be processed |
246 Node *in = n->in(i); // Get input |
248 bool done = true; |
247 ++i; |
249 |
248 if (in == NULL) continue; // Ignore NULL, missing inputs |
250 while (input_index < parent_node->len()) { |
|
251 Node* in = parent_node->in(input_index++); |
|
252 if (in == NULL) { |
|
253 continue; |
|
254 } |
|
255 |
249 int is_visited = visited.test_set(in->_idx); |
256 int is_visited = visited.test_set(in->_idx); |
250 if (!_bbs.lookup(in->_idx)) { // Missing block selection? |
257 if (!has_block(in)) { |
251 if (is_visited) { |
258 if (is_visited) { |
252 // assert( !visited.test(in->_idx), "did not schedule early" ); |
|
253 return false; |
259 return false; |
254 } |
260 } |
255 nstack.push(n, i); // Save parent node and next input's index. |
261 // Save parent node and next input's index. |
256 nstack_top_n = in; // Process current input now. |
262 nstack.push(parent_node, input_index); |
257 nstack_top_i = 0; |
263 // Process current input now. |
258 done = false; // Not all n's inputs processed. |
264 parent_node = in; |
259 break; // continue while_nstack_nonempty; |
265 input_index = 0; |
260 } else if (!is_visited) { // Input not yet visited? |
266 // Not all n's inputs processed. |
261 roots.push(in); // Visit this guy later, using worklist |
267 done = false; |
|
268 break; |
|
269 } else if (!is_visited) { |
|
270 // Visit this guy later, using worklist |
|
271 roots.push(in); |
262 } |
272 } |
263 } |
273 } |
|
274 |
264 if (done) { |
275 if (done) { |
265 // All of n's inputs have been processed, complete post-processing. |
276 // All of n's inputs have been processed, complete post-processing. |
266 |
277 |
267 // Some instructions are pinned into a block. These include Region, |
278 // Some instructions are pinned into a block. These include Region, |
268 // Phi, Start, Return, and other control-dependent instructions and |
279 // Phi, Start, Return, and other control-dependent instructions and |
269 // any projections which depend on them. |
280 // any projections which depend on them. |
270 if (!n->pinned()) { |
281 if (!parent_node->pinned()) { |
271 // Set earliest legal block. |
282 // Set earliest legal block. |
272 _bbs.map(n->_idx, find_deepest_input(n, _bbs)); |
283 Block* earliest_block = find_deepest_input(parent_node, this); |
|
284 map_node_to_block(parent_node, earliest_block); |
273 } else { |
285 } else { |
274 assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); |
286 assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); |
275 } |
287 } |
276 |
288 |
277 if (nstack.is_empty()) { |
289 if (nstack.is_empty()) { |
278 // Finished all nodes on stack. |
290 // Finished all nodes on stack. |
279 // Process next node on the worklist 'roots'. |
291 // Process next node on the worklist 'roots'. |
280 break; |
292 break; |
281 } |
293 } |
282 // Get saved parent node and next input's index. |
294 // Get saved parent node and next input's index. |
283 nstack_top_n = nstack.node(); |
295 parent_node = nstack.node(); |
284 nstack_top_i = nstack.index(); |
296 input_index = nstack.index(); |
285 nstack.pop(); |
297 nstack.pop(); |
286 } // if (done) |
298 } |
287 } // while (true) |
299 } |
288 } // while (roots.size() != 0) |
300 } |
289 return true; |
301 return true; |
290 } |
302 } |
291 |
303 |
292 //------------------------------dom_lca---------------------------------------- |
304 //------------------------------dom_lca---------------------------------------- |
293 // Find least common ancestor in dominator tree |
305 // Find least common ancestor in dominator tree |
315 //--------------------------raise_LCA_above_use-------------------------------- |
327 //--------------------------raise_LCA_above_use-------------------------------- |
316 // We are placing a definition, and have been given a def->use edge. |
328 // We are placing a definition, and have been given a def->use edge. |
317 // The definition must dominate the use, so move the LCA upward in the |
329 // The definition must dominate the use, so move the LCA upward in the |
318 // dominator tree to dominate the use. If the use is a phi, adjust |
330 // dominator tree to dominate the use. If the use is a phi, adjust |
319 // the LCA only with the phi input paths which actually use this def. |
331 // the LCA only with the phi input paths which actually use this def. |
320 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { |
332 static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) { |
321 Block* buse = bbs[use->_idx]; |
333 Block* buse = cfg->get_block_for_node(use); |
322 if (buse == NULL) return LCA; // Unused killing Projs have no use block |
334 if (buse == NULL) return LCA; // Unused killing Projs have no use block |
323 if (!use->is_Phi()) return buse->dom_lca(LCA); |
335 if (!use->is_Phi()) return buse->dom_lca(LCA); |
324 uint pmax = use->req(); // Number of Phi inputs |
336 uint pmax = use->req(); // Number of Phi inputs |
325 // Why does not this loop just break after finding the matching input to |
337 // Why does not this loop just break after finding the matching input to |
326 // the Phi? Well...it's like this. I do not have true def-use/use-def |
338 // the Phi? Well...it's like this. I do not have true def-use/use-def |
414 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); |
425 if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0); |
415 |
426 |
416 Block* deepb = NULL; // Deepest block so far |
427 Block* deepb = NULL; // Deepest block so far |
417 int deepb_dom_depth = 0; |
428 int deepb_dom_depth = 0; |
418 for (int i = 0; i < mem_inputs_length; i++) { |
429 for (int i = 0; i < mem_inputs_length; i++) { |
419 Block* inb = bbs[mem_inputs[i]->_idx]; |
430 Block* inb = cfg->get_block_for_node(mem_inputs[i]); |
420 if (deepb_dom_depth < (int) inb->_dom_depth) { |
431 if (deepb_dom_depth < (int) inb->_dom_depth) { |
421 // The new inb must be dominated by the previous deepb. |
432 // The new inb must be dominated by the previous deepb. |
422 // The various inputs must be linearly ordered in the dom |
433 // The various inputs must be linearly ordered in the dom |
423 // tree, or else there will not be a unique deepest block. |
434 // tree, or else there will not be a unique deepest block. |
424 DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); |
435 DEBUG_ONLY(assert_dom(deepb, inb, load, cfg)); |
425 deepb = inb; // Save deepest block |
436 deepb = inb; // Save deepest block |
426 deepb_dom_depth = deepb->_dom_depth; |
437 deepb_dom_depth = deepb->_dom_depth; |
427 } |
438 } |
428 } |
439 } |
429 early = deepb; |
440 early = deepb; |
490 // Note the earliest legal placement of 'load', as determined by |
501 // Note the earliest legal placement of 'load', as determined by |
491 // by the unique point in the dom tree where all memory effects |
502 // by the unique point in the dom tree where all memory effects |
492 // and other inputs are first available. (Computed by schedule_early.) |
503 // and other inputs are first available. (Computed by schedule_early.) |
493 // For normal loads, 'early' is the shallowest place (dom graph wise) |
504 // For normal loads, 'early' is the shallowest place (dom graph wise) |
494 // to look for anti-deps between this load and any store. |
505 // to look for anti-deps between this load and any store. |
495 Block* early = _bbs[load_index]; |
506 Block* early = get_block_for_node(load); |
496 |
507 |
497 // If we are subsuming loads, compute an "early" block that only considers |
508 // If we are subsuming loads, compute an "early" block that only considers |
498 // memory or address inputs. This block may be different than the |
509 // memory or address inputs. This block may be different than the |
499 // schedule_early block in that it could be at an even shallower depth in the |
510 // schedule_early block in that it could be at an even shallower depth in the |
500 // dominator tree, and allow for a broader discovery of anti-dependences. |
511 // dominator tree, and allow for a broader discovery of anti-dependences. |
501 if (C->subsume_loads()) { |
512 if (C->subsume_loads()) { |
502 early = memory_early_block(load, early, _bbs); |
513 early = memory_early_block(load, early, this); |
503 } |
514 } |
504 |
515 |
505 ResourceArea *area = Thread::current()->resource_area(); |
516 ResourceArea *area = Thread::current()->resource_area(); |
506 Node_List worklist_mem(area); // prior memory state to store |
517 Node_List worklist_mem(area); // prior memory state to store |
507 Node_List worklist_store(area); // possible-def to explore |
518 Node_List worklist_store(area); // possible-def to explore |
713 // |
724 // |
714 // The raised LCA will be a lower bound for placing the load, |
725 // The raised LCA will be a lower bound for placing the load, |
715 // preventing the load from sinking past any block containing |
726 // preventing the load from sinking past any block containing |
716 // a store that may invalidate the memory state required by 'load'. |
727 // a store that may invalidate the memory state required by 'load'. |
717 if (must_raise_LCA) |
728 if (must_raise_LCA) |
718 LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); |
729 LCA = raise_LCA_above_marks(LCA, load->_idx, early, this); |
719 if (LCA == early) return LCA; |
730 if (LCA == early) return LCA; |
720 |
731 |
721 // Insert anti-dependence edges from 'load' to each store |
732 // Insert anti-dependence edges from 'load' to each store |
722 // in the non-early LCA block. |
733 // in the non-early LCA block. |
723 // Mine the non_early_stores list for such stores. |
734 // Mine the non_early_stores list for such stores. |
724 if (LCA->raise_LCA_mark() == load_index) { |
735 if (LCA->raise_LCA_mark() == load_index) { |
725 while (non_early_stores.size() > 0) { |
736 while (non_early_stores.size() > 0) { |
726 Node* store = non_early_stores.pop(); |
737 Node* store = non_early_stores.pop(); |
727 Block* store_block = _bbs[store->_idx]; |
738 Block* store_block = get_block_for_node(store); |
728 if (store_block == LCA) { |
739 if (store_block == LCA) { |
729 // add anti_dependence from store to load in its own block |
740 // add anti_dependence from store to load in its own block |
730 assert(store != load->in(0), "dependence cycle found"); |
741 assert(store != load->in(0), "dependence cycle found"); |
731 if (verify) { |
742 if (verify) { |
732 assert(store->find_edge(load) != -1, "missing precedence edge"); |
743 assert(store->find_edge(load) != -1, "missing precedence edge"); |
871 // a number that increases as we approach the beginning of the routine. |
882 // a number that increases as we approach the beginning of the routine. |
872 void PhaseCFG::partial_latency_of_defs(Node *n) { |
883 void PhaseCFG::partial_latency_of_defs(Node *n) { |
873 // Set the latency for this instruction |
884 // Set the latency for this instruction |
874 #ifndef PRODUCT |
885 #ifndef PRODUCT |
875 if (trace_opto_pipelining()) { |
886 if (trace_opto_pipelining()) { |
876 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", |
887 tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); |
877 n->_idx, _node_latency->at_grow(n->_idx)); |
|
878 dump(); |
888 dump(); |
879 } |
889 } |
880 #endif |
890 #endif |
881 |
891 |
882 if (n->is_Proj()) |
892 if (n->is_Proj()) { |
883 n = n->in(0); |
893 n = n->in(0); |
884 |
894 } |
885 if (n->is_Root()) |
895 |
|
896 if (n->is_Root()) { |
886 return; |
897 return; |
|
898 } |
887 |
899 |
888 uint nlen = n->len(); |
900 uint nlen = n->len(); |
889 uint use_latency = _node_latency->at_grow(n->_idx); |
901 uint use_latency = get_latency_for_node(n); |
890 uint use_pre_order = _bbs[n->_idx]->_pre_order; |
902 uint use_pre_order = get_block_for_node(n)->_pre_order; |
891 |
903 |
892 for ( uint j=0; j<nlen; j++ ) { |
904 for (uint j = 0; j < nlen; j++) { |
893 Node *def = n->in(j); |
905 Node *def = n->in(j); |
894 |
906 |
895 if (!def || def == n) |
907 if (!def || def == n) { |
896 continue; |
908 continue; |
|
909 } |
897 |
910 |
898 // Walk backwards thru projections |
911 // Walk backwards thru projections |
899 if (def->is_Proj()) |
912 if (def->is_Proj()) { |
900 def = def->in(0); |
913 def = def->in(0); |
|
914 } |
901 |
915 |
902 #ifndef PRODUCT |
916 #ifndef PRODUCT |
903 if (trace_opto_pipelining()) { |
917 if (trace_opto_pipelining()) { |
904 tty->print("# in(%2d): ", j); |
918 tty->print("# in(%2d): ", j); |
905 def->dump(); |
919 def->dump(); |
906 } |
920 } |
907 #endif |
921 #endif |
908 |
922 |
909 // If the defining block is not known, assume it is ok |
923 // If the defining block is not known, assume it is ok |
910 Block *def_block = _bbs[def->_idx]; |
924 Block *def_block = get_block_for_node(def); |
911 uint def_pre_order = def_block ? def_block->_pre_order : 0; |
925 uint def_pre_order = def_block ? def_block->_pre_order : 0; |
912 |
926 |
913 if ( (use_pre_order < def_pre_order) || |
927 if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { |
914 (use_pre_order == def_pre_order && n->is_Phi()) ) |
|
915 continue; |
928 continue; |
|
929 } |
916 |
930 |
917 uint delta_latency = n->latency(j); |
931 uint delta_latency = n->latency(j); |
918 uint current_latency = delta_latency + use_latency; |
932 uint current_latency = delta_latency + use_latency; |
919 |
933 |
920 if (_node_latency->at_grow(def->_idx) < current_latency) { |
934 if (get_latency_for_node(def) < current_latency) { |
921 _node_latency->at_put_grow(def->_idx, current_latency); |
935 set_latency_for_node(def, current_latency); |
922 } |
936 } |
923 |
937 |
924 #ifndef PRODUCT |
938 #ifndef PRODUCT |
925 if (trace_opto_pipelining()) { |
939 if (trace_opto_pipelining()) { |
926 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", |
940 tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); |
927 use_latency, j, delta_latency, current_latency, def->_idx, |
|
928 _node_latency->at_grow(def->_idx)); |
|
929 } |
941 } |
930 #endif |
942 #endif |
931 } |
943 } |
932 } |
944 } |
933 |
945 |
934 //------------------------------latency_from_use------------------------------- |
946 //------------------------------latency_from_use------------------------------- |
935 // Compute the latency of a specific use |
947 // Compute the latency of a specific use |
936 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { |
948 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { |
937 // If self-reference, return no latency |
949 // If self-reference, return no latency |
938 if (use == n || use->is_Root()) |
950 if (use == n || use->is_Root()) { |
939 return 0; |
951 return 0; |
940 |
952 } |
941 uint def_pre_order = _bbs[def->_idx]->_pre_order; |
953 |
|
954 uint def_pre_order = get_block_for_node(def)->_pre_order; |
942 uint latency = 0; |
955 uint latency = 0; |
943 |
956 |
944 // If the use is not a projection, then it is simple... |
957 // If the use is not a projection, then it is simple... |
945 if (!use->is_Proj()) { |
958 if (!use->is_Proj()) { |
946 #ifndef PRODUCT |
959 #ifndef PRODUCT |
1006 uint l = latency_from_use(n, def, n->fast_out(i)); |
1018 uint l = latency_from_use(n, def, n->fast_out(i)); |
1007 |
1019 |
1008 if (latency < l) latency = l; |
1020 if (latency < l) latency = l; |
1009 } |
1021 } |
1010 |
1022 |
1011 _node_latency->at_put_grow(n->_idx, latency); |
1023 set_latency_for_node(n, latency); |
1012 } |
1024 } |
1013 |
1025 |
1014 //------------------------------hoist_to_cheaper_block------------------------- |
1026 //------------------------------hoist_to_cheaper_block------------------------- |
1015 // Pick a block for node self, between early and LCA, that is a cheaper |
1027 // Pick a block for node self, between early and LCA, that is a cheaper |
1016 // alternative to LCA. |
1028 // alternative to LCA. |
1017 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { |
1029 Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { |
1018 const double delta = 1+PROB_UNLIKELY_MAG(4); |
1030 const double delta = 1+PROB_UNLIKELY_MAG(4); |
1019 Block* least = LCA; |
1031 Block* least = LCA; |
1020 double least_freq = least->_freq; |
1032 double least_freq = least->_freq; |
1021 uint target = _node_latency->at_grow(self->_idx); |
1033 uint target = get_latency_for_node(self); |
1022 uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); |
1034 uint start_latency = get_latency_for_node(LCA->_nodes[0]); |
1023 uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); |
1035 uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); |
1024 bool in_latency = (target <= start_latency); |
1036 bool in_latency = (target <= start_latency); |
1025 const Block* root_block = _bbs[_root->_idx]; |
1037 const Block* root_block = get_block_for_node(_root); |
1026 |
1038 |
1027 // Turn off latency scheduling if scheduling is just plain off |
1039 // Turn off latency scheduling if scheduling is just plain off |
1028 if (!C->do_scheduling()) |
1040 if (!C->do_scheduling()) |
1029 in_latency = true; |
1041 in_latency = true; |
1030 |
1042 |
1181 Block *LCA = NULL; |
1192 Block *LCA = NULL; |
1182 { |
1193 { |
1183 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { |
1194 for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { |
1184 // For all uses, find LCA |
1195 // For all uses, find LCA |
1185 Node* use = self->fast_out(i); |
1196 Node* use = self->fast_out(i); |
1186 LCA = raise_LCA_above_use(LCA, use, self, _bbs); |
1197 LCA = raise_LCA_above_use(LCA, use, self, this); |
1187 } |
1198 } |
1188 } // (Hide defs of imax, i from rest of block.) |
1199 } // (Hide defs of imax, i from rest of block.) |
1189 |
1200 |
1190 // Place temps in the block of their use. This isn't a |
1201 // Place temps in the block of their use. This isn't a |
1191 // requirement for correctness but it reduces useless |
1202 // requirement for correctness but it reduces useless |
1192 // interference between temps and other nodes. |
1203 // interference between temps and other nodes. |
1193 if (mach != NULL && mach->is_MachTemp()) { |
1204 if (mach != NULL && mach->is_MachTemp()) { |
1194 _bbs.map(self->_idx, LCA); |
1205 map_node_to_block(self, LCA); |
1195 LCA->add_inst(self); |
1206 LCA->add_inst(self); |
1196 continue; |
1207 continue; |
1197 } |
1208 } |
1198 |
1209 |
1199 // Check if 'self' could be anti-dependent on memory |
1210 // Check if 'self' could be anti-dependent on memory |
1255 } // Loop until all nodes have been visited |
1266 } // Loop until all nodes have been visited |
1256 |
1267 |
1257 } // end ScheduleLate |
1268 } // end ScheduleLate |
1258 |
1269 |
1259 //------------------------------GlobalCodeMotion------------------------------- |
1270 //------------------------------GlobalCodeMotion------------------------------- |
1260 void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { |
1271 void PhaseCFG::global_code_motion() { |
1261 ResourceMark rm; |
1272 ResourceMark rm; |
1262 |
1273 |
1263 #ifndef PRODUCT |
1274 #ifndef PRODUCT |
1264 if (trace_opto_pipelining()) { |
1275 if (trace_opto_pipelining()) { |
1265 tty->print("\n---- Start GlobalCodeMotion ----\n"); |
1276 tty->print("\n---- Start GlobalCodeMotion ----\n"); |
1266 } |
1277 } |
1267 #endif |
1278 #endif |
1268 |
1279 |
1269 // Initialize the bbs.map for things on the proj_list |
1280 // Initialize the node to block mapping for things on the proj_list |
1270 uint i; |
1281 for (uint i = 0; i < _matcher.number_of_projections(); i++) { |
1271 for( i=0; i < proj_list.size(); i++ ) |
1282 unmap_node_from_block(_matcher.get_projection(i)); |
1272 _bbs.map(proj_list[i]->_idx, NULL); |
1283 } |
1273 |
1284 |
1274 // Set the basic block for Nodes pinned into blocks |
1285 // Set the basic block for Nodes pinned into blocks |
1275 Arena *a = Thread::current()->resource_area(); |
1286 Arena* arena = Thread::current()->resource_area(); |
1276 VectorSet visited(a); |
1287 VectorSet visited(arena); |
1277 schedule_pinned_nodes( visited ); |
1288 schedule_pinned_nodes(visited); |
1278 |
1289 |
1279 // Find the earliest Block any instruction can be placed in. Some |
1290 // Find the earliest Block any instruction can be placed in. Some |
1280 // instructions are pinned into Blocks. Unpinned instructions can |
1291 // instructions are pinned into Blocks. Unpinned instructions can |
1281 // appear in last block in which all their inputs occur. |
1292 // appear in last block in which all their inputs occur. |
1282 visited.Clear(); |
1293 visited.Clear(); |
1283 Node_List stack(a); |
1294 Node_List stack(arena); |
1284 stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list |
1295 // Pre-grow the list |
|
1296 stack.map((C->unique() >> 1) + 16, NULL); |
1285 if (!schedule_early(visited, stack)) { |
1297 if (!schedule_early(visited, stack)) { |
1286 // Bailout without retry |
1298 // Bailout without retry |
1287 C->record_method_not_compilable("early schedule failed"); |
1299 C->record_method_not_compilable("early schedule failed"); |
1288 return; |
1300 return; |
1289 } |
1301 } |
1290 |
1302 |
1291 // Build Def-Use edges. |
1303 // Build Def-Use edges. |
1292 proj_list.push(_root); // Add real root as another root |
|
1293 proj_list.pop(); |
|
1294 |
|
1295 // Compute the latency information (via backwards walk) for all the |
1304 // Compute the latency information (via backwards walk) for all the |
1296 // instructions in the graph |
1305 // instructions in the graph |
1297 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
1306 _node_latency = new GrowableArray<uint>(); // resource_area allocation |
1298 |
1307 |
1299 if( C->do_scheduling() ) |
1308 if (C->do_scheduling()) { |
1300 ComputeLatenciesBackwards(visited, stack); |
1309 compute_latencies_backwards(visited, stack); |
|
1310 } |
1301 |
1311 |
1302 // Now schedule all codes as LATE as possible. This is the LCA in the |
1312 // Now schedule all codes as LATE as possible. This is the LCA in the |
1303 // dominator tree of all USES of a value. Pick the block with the least |
1313 // dominator tree of all USES of a value. Pick the block with the least |
1304 // loop nesting depth that is lowest in the dominator tree. |
1314 // loop nesting depth that is lowest in the dominator tree. |
1305 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) |
1315 // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) |
1306 schedule_late(visited, stack); |
1316 schedule_late(visited, stack); |
1307 if( C->failing() ) { |
1317 if (C->failing()) { |
1308 // schedule_late fails only when graph is incorrect. |
1318 // schedule_late fails only when graph is incorrect. |
1309 assert(!VerifyGraphEdges, "verification should have failed"); |
1319 assert(!VerifyGraphEdges, "verification should have failed"); |
1310 return; |
1320 return; |
1311 } |
1321 } |
1312 |
|
1313 unique = C->unique(); |
|
1314 |
1322 |
1315 #ifndef PRODUCT |
1323 #ifndef PRODUCT |
1316 if (trace_opto_pipelining()) { |
1324 if (trace_opto_pipelining()) { |
1317 tty->print("\n---- Detect implicit null checks ----\n"); |
1325 tty->print("\n---- Detect implicit null checks ----\n"); |
1318 } |
1326 } |
1332 allowed_reasons |= nth_bit(reason); |
1340 allowed_reasons |= nth_bit(reason); |
1333 } |
1341 } |
1334 // By reversing the loop direction we get a very minor gain on mpegaudio. |
1342 // By reversing the loop direction we get a very minor gain on mpegaudio. |
1335 // Feel free to revert to a forward loop for clarity. |
1343 // Feel free to revert to a forward loop for clarity. |
1336 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { |
1344 // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { |
1337 for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { |
1345 for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { |
1338 Node *proj = matcher._null_check_tests[i ]; |
1346 Node* proj = _matcher._null_check_tests[i]; |
1339 Node *val = matcher._null_check_tests[i+1]; |
1347 Node* val = _matcher._null_check_tests[i + 1]; |
1340 _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); |
1348 Block* block = get_block_for_node(proj); |
|
1349 block->implicit_null_check(this, proj, val, allowed_reasons); |
1341 // The implicit_null_check will only perform the transformation |
1350 // The implicit_null_check will only perform the transformation |
1342 // if the null branch is truly uncommon, *and* it leads to an |
1351 // if the null branch is truly uncommon, *and* it leads to an |
1343 // uncommon trap. Combined with the too_many_traps guards |
1352 // uncommon trap. Combined with the too_many_traps guards |
1344 // above, this prevents SEGV storms reported in 6366351, |
1353 // above, this prevents SEGV storms reported in 6366351, |
1345 // by recompiling offending methods without this optimization. |
1354 // by recompiling offending methods without this optimization. |
1352 } |
1361 } |
1353 #endif |
1362 #endif |
1354 |
1363 |
1355 // Schedule locally. Right now a simple topological sort. |
1364 // Schedule locally. Right now a simple topological sort. |
1356 // Later, do a real latency aware scheduler. |
1365 // Later, do a real latency aware scheduler. |
1357 uint max_idx = C->unique(); |
1366 GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1); |
1358 GrowableArray<int> ready_cnt(max_idx, max_idx, -1); |
|
1359 visited.Clear(); |
1367 visited.Clear(); |
1360 for (i = 0; i < _num_blocks; i++) { |
1368 for (uint i = 0; i < number_of_blocks(); i++) { |
1361 if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { |
1369 Block* block = get_block(i); |
|
1370 if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { |
1362 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { |
1371 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { |
1363 C->record_method_not_compilable("local schedule failed"); |
1372 C->record_method_not_compilable("local schedule failed"); |
1364 } |
1373 } |
1365 return; |
1374 return; |
1366 } |
1375 } |
1367 } |
1376 } |
1368 |
1377 |
1369 // If we inserted any instructions between a Call and his CatchNode, |
1378 // If we inserted any instructions between a Call and his CatchNode, |
1370 // clone the instructions on all paths below the Catch. |
1379 // clone the instructions on all paths below the Catch. |
1371 for( i=0; i < _num_blocks; i++ ) |
1380 for (uint i = 0; i < number_of_blocks(); i++) { |
1372 _blocks[i]->call_catch_cleanup(_bbs, C); |
1381 Block* block = get_block(i); |
|
1382 block->call_catch_cleanup(this, C); |
|
1383 } |
1373 |
1384 |
1374 #ifndef PRODUCT |
1385 #ifndef PRODUCT |
1375 if (trace_opto_pipelining()) { |
1386 if (trace_opto_pipelining()) { |
1376 tty->print("\n---- After GlobalCodeMotion ----\n"); |
1387 tty->print("\n---- After GlobalCodeMotion ----\n"); |
1377 for (uint i = 0; i < _num_blocks; i++) { |
1388 for (uint i = 0; i < number_of_blocks(); i++) { |
1378 _blocks[i]->dump(); |
1389 Block* block = get_block(i); |
|
1390 block->dump(); |
1379 } |
1391 } |
1380 } |
1392 } |
1381 #endif |
1393 #endif |
1382 // Dead. |
1394 // Dead. |
1383 _node_latency = (GrowableArray<uint> *)0xdeadbeef; |
1395 _node_latency = (GrowableArray<uint> *)0xdeadbeef; |
1384 } |
1396 } |
1385 |
1397 |
|
1398 bool PhaseCFG::do_global_code_motion() { |
|
1399 |
|
1400 build_dominator_tree(); |
|
1401 if (C->failing()) { |
|
1402 return false; |
|
1403 } |
|
1404 |
|
1405 NOT_PRODUCT( C->verify_graph_edges(); ) |
|
1406 |
|
1407 estimate_block_frequency(); |
|
1408 |
|
1409 global_code_motion(); |
|
1410 |
|
1411 if (C->failing()) { |
|
1412 return false; |
|
1413 } |
|
1414 |
|
1415 return true; |
|
1416 } |
1386 |
1417 |
1387 //------------------------------Estimate_Block_Frequency----------------------- |
1418 //------------------------------Estimate_Block_Frequency----------------------- |
1388 // Estimate block frequencies based on IfNode probabilities. |
1419 // Estimate block frequencies based on IfNode probabilities. |
1389 void PhaseCFG::Estimate_Block_Frequency() { |
1420 void PhaseCFG::estimate_block_frequency() { |
1390 |
1421 |
1391 // Force conditional branches leading to uncommon traps to be unlikely, |
1422 // Force conditional branches leading to uncommon traps to be unlikely, |
1392 // not because we get to the uncommon_trap with less relative frequency, |
1423 // not because we get to the uncommon_trap with less relative frequency, |
1393 // but because an uncommon_trap typically causes a deopt, so we only get |
1424 // but because an uncommon_trap typically causes a deopt, so we only get |
1394 // there once. |
1425 // there once. |
1395 if (C->do_freq_based_layout()) { |
1426 if (C->do_freq_based_layout()) { |
1396 Block_List worklist; |
1427 Block_List worklist; |
1397 Block* root_blk = _blocks[0]; |
1428 Block* root_blk = get_block(0); |
1398 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1429 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1399 Block *pb = _bbs[root_blk->pred(i)->_idx]; |
1430 Block *pb = get_block_for_node(root_blk->pred(i)); |
1400 if (pb->has_uncommon_code()) { |
1431 if (pb->has_uncommon_code()) { |
1401 worklist.push(pb); |
1432 worklist.push(pb); |
1402 } |
1433 } |
1403 } |
1434 } |
1404 while (worklist.size() > 0) { |
1435 while (worklist.size() > 0) { |
1405 Block* uct = worklist.pop(); |
1436 Block* uct = worklist.pop(); |
1406 if (uct == _broot) continue; |
1437 if (uct == get_root_block()) { |
|
1438 continue; |
|
1439 } |
1407 for (uint i = 1; i < uct->num_preds(); i++) { |
1440 for (uint i = 1; i < uct->num_preds(); i++) { |
1408 Block *pb = _bbs[uct->pred(i)->_idx]; |
1441 Block *pb = get_block_for_node(uct->pred(i)); |
1409 if (pb->_num_succs == 1) { |
1442 if (pb->_num_succs == 1) { |
1410 worklist.push(pb); |
1443 worklist.push(pb); |
1411 } else if (pb->num_fall_throughs() == 2) { |
1444 } else if (pb->num_fall_throughs() == 2) { |
1412 pb->update_uncommon_branch(uct); |
1445 pb->update_uncommon_branch(uct); |
1413 } |
1446 } |
1425 // Adjust all frequencies to be relative to a single method entry |
1458 // Adjust all frequencies to be relative to a single method entry |
1426 _root_loop->_freq = 1.0; |
1459 _root_loop->_freq = 1.0; |
1427 _root_loop->scale_freq(); |
1460 _root_loop->scale_freq(); |
1428 |
1461 |
1429 // Save outmost loop frequency for LRG frequency threshold |
1462 // Save outmost loop frequency for LRG frequency threshold |
1430 _outer_loop_freq = _root_loop->outer_loop_freq(); |
1463 _outer_loop_frequency = _root_loop->outer_loop_freq(); |
1431 |
1464 |
1432 // force paths ending at uncommon traps to be infrequent |
1465 // force paths ending at uncommon traps to be infrequent |
1433 if (!C->do_freq_based_layout()) { |
1466 if (!C->do_freq_based_layout()) { |
1434 Block_List worklist; |
1467 Block_List worklist; |
1435 Block* root_blk = _blocks[0]; |
1468 Block* root_blk = get_block(0); |
1436 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1469 for (uint i = 1; i < root_blk->num_preds(); i++) { |
1437 Block *pb = _bbs[root_blk->pred(i)->_idx]; |
1470 Block *pb = get_block_for_node(root_blk->pred(i)); |
1438 if (pb->has_uncommon_code()) { |
1471 if (pb->has_uncommon_code()) { |
1439 worklist.push(pb); |
1472 worklist.push(pb); |
1440 } |
1473 } |
1441 } |
1474 } |
1442 while (worklist.size() > 0) { |
1475 while (worklist.size() > 0) { |
1443 Block* uct = worklist.pop(); |
1476 Block* uct = worklist.pop(); |
1444 uct->_freq = PROB_MIN; |
1477 uct->_freq = PROB_MIN; |
1445 for (uint i = 1; i < uct->num_preds(); i++) { |
1478 for (uint i = 1; i < uct->num_preds(); i++) { |
1446 Block *pb = _bbs[uct->pred(i)->_idx]; |
1479 Block *pb = get_block_for_node(uct->pred(i)); |
1447 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { |
1480 if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { |
1448 worklist.push(pb); |
1481 worklist.push(pb); |
1449 } |
1482 } |
1450 } |
1483 } |
1451 } |
1484 } |
1452 } |
1485 } |
1453 |
1486 |
1454 #ifdef ASSERT |
1487 #ifdef ASSERT |
1455 for (uint i = 0; i < _num_blocks; i++ ) { |
1488 for (uint i = 0; i < number_of_blocks(); i++) { |
1456 Block *b = _blocks[i]; |
1489 Block* b = get_block(i); |
1457 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
1490 assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); |
1458 } |
1491 } |
1459 #endif |
1492 #endif |
1460 |
1493 |
1461 #ifndef PRODUCT |
1494 #ifndef PRODUCT |
1475 //----------------------------create_loop_tree-------------------------------- |
1508 //----------------------------create_loop_tree-------------------------------- |
1476 // Create a loop tree from the CFG |
1509 // Create a loop tree from the CFG |
1477 CFGLoop* PhaseCFG::create_loop_tree() { |
1510 CFGLoop* PhaseCFG::create_loop_tree() { |
1478 |
1511 |
1479 #ifdef ASSERT |
1512 #ifdef ASSERT |
1480 assert( _blocks[0] == _broot, "" ); |
1513 assert(get_block(0) == get_root_block(), "first block should be root block"); |
1481 for (uint i = 0; i < _num_blocks; i++ ) { |
1514 for (uint i = 0; i < number_of_blocks(); i++) { |
1482 Block *b = _blocks[i]; |
1515 Block* block = get_block(i); |
1483 // Check that _loop field are clear...we could clear them if not. |
1516 // Check that _loop field are clear...we could clear them if not. |
1484 assert(b->_loop == NULL, "clear _loop expected"); |
1517 assert(block->_loop == NULL, "clear _loop expected"); |
1485 // Sanity check that the RPO numbering is reflected in the _blocks array. |
1518 // Sanity check that the RPO numbering is reflected in the _blocks array. |
1486 // It doesn't have to be for the loop tree to be built, but if it is not, |
1519 // It doesn't have to be for the loop tree to be built, but if it is not, |
1487 // then the blocks have been reordered since dom graph building...which |
1520 // then the blocks have been reordered since dom graph building...which |
1488 // may question the RPO numbering |
1521 // may question the RPO numbering |
1489 assert(b->_rpo == i, "unexpected reverse post order number"); |
1522 assert(block->_rpo == i, "unexpected reverse post order number"); |
1490 } |
1523 } |
1491 #endif |
1524 #endif |
1492 |
1525 |
1493 int idct = 0; |
1526 int idct = 0; |
1494 CFGLoop* root_loop = new CFGLoop(idct++); |
1527 CFGLoop* root_loop = new CFGLoop(idct++); |
1495 |
1528 |
1496 Block_List worklist; |
1529 Block_List worklist; |
1497 |
1530 |
1498 // Assign blocks to loops |
1531 // Assign blocks to loops |
1499 for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block |
1532 for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block |
1500 Block *b = _blocks[i]; |
1533 Block* block = get_block(i); |
1501 |
1534 |
1502 if (b->head()->is_Loop()) { |
1535 if (block->head()->is_Loop()) { |
1503 Block* loop_head = b; |
1536 Block* loop_head = block; |
1504 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); |
1537 assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); |
1505 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); |
1538 Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); |
1506 Block* tail = _bbs[tail_n->_idx]; |
1539 Block* tail = get_block_for_node(tail_n); |
1507 |
1540 |
1508 // Defensively filter out Loop nodes for non-single-entry loops. |
1541 // Defensively filter out Loop nodes for non-single-entry loops. |
1509 // For all reasonable loops, the head occurs before the tail in RPO. |
1542 // For all reasonable loops, the head occurs before the tail in RPO. |
1510 if (i <= tail->_rpo) { |
1543 if (i <= tail->_rpo) { |
1511 |
1544 |
1516 CFGLoop* nloop = new CFGLoop(idct++); |
1549 CFGLoop* nloop = new CFGLoop(idct++); |
1517 assert(loop_head->_loop == NULL, "just checking"); |
1550 assert(loop_head->_loop == NULL, "just checking"); |
1518 loop_head->_loop = nloop; |
1551 loop_head->_loop = nloop; |
1519 // Add to nloop so push_pred() will skip over inner loops |
1552 // Add to nloop so push_pred() will skip over inner loops |
1520 nloop->add_member(loop_head); |
1553 nloop->add_member(loop_head); |
1521 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); |
1554 nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this); |
1522 |
1555 |
1523 while (worklist.size() > 0) { |
1556 while (worklist.size() > 0) { |
1524 Block* member = worklist.pop(); |
1557 Block* member = worklist.pop(); |
1525 if (member != loop_head) { |
1558 if (member != loop_head) { |
1526 for (uint j = 1; j < member->num_preds(); j++) { |
1559 for (uint j = 1; j < member->num_preds(); j++) { |
1527 nloop->push_pred(member, j, worklist, _bbs); |
1560 nloop->push_pred(member, j, worklist, this); |
1528 } |
1561 } |
1529 } |
1562 } |
1530 } |
1563 } |
1531 } |
1564 } |
1532 } |
1565 } |
1533 } |
1566 } |
1534 |
1567 |
1535 // Create a member list for each loop consisting |
1568 // Create a member list for each loop consisting |
1536 // of both blocks and (immediate child) loops. |
1569 // of both blocks and (immediate child) loops. |
1537 for (uint i = 0; i < _num_blocks; i++) { |
1570 for (uint i = 0; i < number_of_blocks(); i++) { |
1538 Block *b = _blocks[i]; |
1571 Block* block = get_block(i); |
1539 CFGLoop* lp = b->_loop; |
1572 CFGLoop* lp = block->_loop; |
1540 if (lp == NULL) { |
1573 if (lp == NULL) { |
1541 // Not assigned to a loop. Add it to the method's pseudo loop. |
1574 // Not assigned to a loop. Add it to the method's pseudo loop. |
1542 b->_loop = root_loop; |
1575 block->_loop = root_loop; |
1543 lp = root_loop; |
1576 lp = root_loop; |
1544 } |
1577 } |
1545 if (lp == root_loop || b != lp->head()) { // loop heads are already members |
1578 if (lp == root_loop || block != lp->head()) { // loop heads are already members |
1546 lp->add_member(b); |
1579 lp->add_member(block); |
1547 } |
1580 } |
1548 if (lp != root_loop) { |
1581 if (lp != root_loop) { |
1549 if (lp->parent() == NULL) { |
1582 if (lp->parent() == NULL) { |
1550 // Not a nested loop. Make it a child of the method's pseudo loop. |
1583 // Not a nested loop. Make it a child of the method's pseudo loop. |
1551 root_loop->add_nested_loop(lp); |
1584 root_loop->add_nested_loop(lp); |
1552 } |
1585 } |
1553 if (b == lp->head()) { |
1586 if (block == lp->head()) { |
1554 // Add nested loop to member list of parent loop. |
1587 // Add nested loop to member list of parent loop. |
1555 lp->parent()->add_member(lp); |
1588 lp->parent()->add_member(lp); |
1556 } |
1589 } |
1557 } |
1590 } |
1558 } |
1591 } |
1559 |
1592 |
1560 return root_loop; |
1593 return root_loop; |
1561 } |
1594 } |
1562 |
1595 |
1563 //------------------------------push_pred-------------------------------------- |
1596 //------------------------------push_pred-------------------------------------- |
1564 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { |
1597 void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) { |
1565 Node* pred_n = blk->pred(i); |
1598 Node* pred_n = blk->pred(i); |
1566 Block* pred = node_to_blk[pred_n->_idx]; |
1599 Block* pred = cfg->get_block_for_node(pred_n); |
1567 CFGLoop *pred_loop = pred->_loop; |
1600 CFGLoop *pred_loop = pred->_loop; |
1568 if (pred_loop == NULL) { |
1601 if (pred_loop == NULL) { |
1569 // Filter out blocks for non-single-entry loops. |
1602 // Filter out blocks for non-single-entry loops. |
1570 // For all reasonable loops, the head occurs before the tail in RPO. |
1603 // For all reasonable loops, the head occurs before the tail in RPO. |
1571 if (pred->_rpo > head()->_rpo) { |
1604 if (pred->_rpo > head()->_rpo) { |