Thu, 23 Jul 2009 14:53:56 -0700
6860599: nodes limit could be reached during Output phase
Summary: Bailout compilation if nodes limit could be reached during Output phase.
Reviewed-by: never, twisti
1.1 --- a/src/share/vm/opto/compile.cpp Wed Jul 22 15:48:51 2009 -0700 1.2 +++ b/src/share/vm/opto/compile.cpp Thu Jul 23 14:53:56 2009 -0700 1.3 @@ -441,6 +441,8 @@ 1.4 _orig_pc_slot_offset_in_bytes(0), 1.5 _node_bundling_limit(0), 1.6 _node_bundling_base(NULL), 1.7 + _java_calls(0), 1.8 + _inner_loops(0), 1.9 #ifndef PRODUCT 1.10 _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), 1.11 _printer(IdealGraphPrinter::printer()), 1.12 @@ -711,6 +713,8 @@ 1.13 _code_buffer("Compile::Fill_buffer"), 1.14 _node_bundling_limit(0), 1.15 _node_bundling_base(NULL), 1.16 + _java_calls(0), 1.17 + _inner_loops(0), 1.18 #ifndef PRODUCT 1.19 _trace_opto_output(TraceOptoOutput), 1.20 _printer(NULL), 1.21 @@ -1851,22 +1855,26 @@ 1.22 int _float_count; // count float ops requiring 24-bit precision 1.23 int _double_count; // count double ops requiring more precision 1.24 int _java_call_count; // count non-inlined 'java' calls 1.25 + int _inner_loop_count; // count loops which need alignment 1.26 VectorSet _visited; // Visitation flags 1.27 Node_List _tests; // Set of IfNodes & PCTableNodes 1.28 1.29 Final_Reshape_Counts() : 1.30 - _call_count(0), _float_count(0), _double_count(0), _java_call_count(0), 1.31 + _call_count(0), _float_count(0), _double_count(0), 1.32 + _java_call_count(0), _inner_loop_count(0), 1.33 _visited( Thread::current()->resource_area() ) { } 1.34 1.35 void inc_call_count () { _call_count ++; } 1.36 void inc_float_count () { _float_count ++; } 1.37 void inc_double_count() { _double_count++; } 1.38 void inc_java_call_count() { _java_call_count++; } 1.39 + void inc_inner_loop_count() { _inner_loop_count++; } 1.40 1.41 int get_call_count () const { return _call_count ; } 1.42 int get_float_count () const { return _float_count ; } 1.43 int get_double_count() const { return _double_count; } 1.44 int get_java_call_count() const { return _java_call_count; } 1.45 + int get_inner_loop_count() const { return _inner_loop_count; } 1.46 }; 1.47 1.48 static bool oop_offset_is_sane(const TypeInstPtr* tp) { 1.49 @@ -1878,7 +1886,7 @@ 1.50 1.51 //------------------------------final_graph_reshaping_impl---------------------- 1.52 // Implement items 1-5 from final_graph_reshaping below. 1.53 -static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) { 1.54 +static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { 1.55 1.56 if ( n->outcnt() == 0 ) return; // dead node 1.57 uint nop = n->Opcode(); 1.58 @@ -1920,13 +1928,13 @@ 1.59 case Op_CmpF: 1.60 case Op_CmpF3: 1.61 // case Op_ConvL2F: // longs are split into 32-bit halves 1.62 - fpu.inc_float_count(); 1.63 + frc.inc_float_count(); 1.64 break; 1.65 1.66 case Op_ConvF2D: 1.67 case Op_ConvD2F: 1.68 - fpu.inc_float_count(); 1.69 - fpu.inc_double_count(); 1.70 + frc.inc_float_count(); 1.71 + frc.inc_double_count(); 1.72 break; 1.73 1.74 // Count all double operations that may use FPU 1.75 @@ -1943,7 +1951,7 @@ 1.76 case Op_ConD: 1.77 case Op_CmpD: 1.78 case Op_CmpD3: 1.79 - fpu.inc_double_count(); 1.80 + frc.inc_double_count(); 1.81 break; 1.82 case Op_Opaque1: // Remove Opaque Nodes before matching 1.83 case Op_Opaque2: // Remove Opaque Nodes before matching 1.84 @@ -1952,7 +1960,7 @@ 1.85 case Op_CallStaticJava: 1.86 case Op_CallJava: 1.87 case Op_CallDynamicJava: 1.88 - fpu.inc_java_call_count(); // Count java call site; 1.89 + frc.inc_java_call_count(); // Count java call site; 1.90 case Op_CallRuntime: 1.91 case Op_CallLeaf: 1.92 case Op_CallLeafNoFP: { 1.93 @@ -1963,7 +1971,7 @@ 1.94 // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking, 1.95 // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ... 1.96 if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) { 1.97 - fpu.inc_call_count(); // Count the call site 1.98 + frc.inc_call_count(); // Count the call site 1.99 } else { // See if uncommon argument is shared 1.100 Node *n = call->in(TypeFunc::Parms); 1.101 int nop = n->Opcode(); 1.102 @@ -1984,11 +1992,11 @@ 1.103 case Op_StoreD: 1.104 case Op_LoadD: 1.105 case Op_LoadD_unaligned: 1.106 - fpu.inc_double_count(); 1.107 + frc.inc_double_count(); 1.108 goto handle_mem; 1.109 case Op_StoreF: 1.110 case Op_LoadF: 1.111 - fpu.inc_float_count(); 1.112 + frc.inc_float_count(); 1.113 goto handle_mem; 1.114 1.115 case Op_StoreB: 1.116 @@ -2325,6 +2333,12 @@ 1.117 n->subsume_by(btp); 1.118 } 1.119 break; 1.120 + case Op_Loop: 1.121 + case Op_CountedLoop: 1.122 + if (n->as_Loop()->is_inner_loop()) { 1.123 + frc.inc_inner_loop_count(); 1.124 + } 1.125 + break; 1.126 default: 1.127 assert( !n->is_Call(), "" ); 1.128 assert( !n->is_Mem(), "" ); 1.129 @@ -2333,17 +2347,17 @@ 1.130 1.131 // Collect CFG split points 1.132 if (n->is_MultiBranch()) 1.133 - fpu._tests.push(n); 1.134 + frc._tests.push(n); 1.135 } 1.136 1.137 //------------------------------final_graph_reshaping_walk--------------------- 1.138 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(), 1.139 // requires that the walk visits a node's inputs before visiting the node. 1.140 -static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) { 1.141 +static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { 1.142 ResourceArea *area = Thread::current()->resource_area(); 1.143 Unique_Node_List sfpt(area); 1.144 1.145 - fpu._visited.set(root->_idx); // first, mark node as visited 1.146 + frc._visited.set(root->_idx); // first, mark node as visited 1.147 uint cnt = root->req(); 1.148 Node *n = root; 1.149 uint i = 0; 1.150 @@ -2352,7 +2366,7 @@ 1.151 // Place all non-visited non-null inputs onto stack 1.152 Node* m = n->in(i); 1.153 ++i; 1.154 - if (m != NULL && !fpu._visited.test_set(m->_idx)) { 1.155 + if (m != NULL && !frc._visited.test_set(m->_idx)) { 1.156 if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) 1.157 sfpt.push(m); 1.158 cnt = m->req(); 1.159 @@ -2362,7 +2376,7 @@ 1.160 } 1.161 } else { 1.162 // Now do post-visit work 1.163 - final_graph_reshaping_impl( n, fpu ); 1.164 + final_graph_reshaping_impl( n, frc ); 1.165 if (nstack.is_empty()) 1.166 break; // finished 1.167 n = nstack.node(); // Get node from stack 1.168 @@ -2443,16 +2457,16 @@ 1.169 return true; 1.170 } 1.171 1.172 - Final_Reshape_Counts fpu; 1.173 + Final_Reshape_Counts frc; 1.174 1.175 // Visit everybody reachable! 1.176 // Allocate stack of size C->unique()/2 to avoid frequent realloc 1.177 Node_Stack nstack(unique() >> 1); 1.178 - final_graph_reshaping_walk(nstack, root(), fpu); 1.179 + final_graph_reshaping_walk(nstack, root(), frc); 1.180 1.181 // Check for unreachable (from below) code (i.e., infinite loops). 1.182 - for( uint i = 0; i < fpu._tests.size(); i++ ) { 1.183 - MultiBranchNode *n = fpu._tests[i]->as_MultiBranch(); 1.184 + for( uint i = 0; i < frc._tests.size(); i++ ) { 1.185 + MultiBranchNode *n = frc._tests[i]->as_MultiBranch(); 1.186 // Get number of CFG targets. 1.187 // Note that PCTables include exception targets after calls. 1.188 uint required_outcnt = n->required_outcnt(); 1.189 @@ -2498,7 +2512,7 @@ 1.190 // Check that I actually visited all kids. Unreached kids 1.191 // must be infinite loops. 1.192 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) 1.193 - if (!fpu._visited.test(n->fast_out(j)->_idx)) { 1.194 + if (!frc._visited.test(n->fast_out(j)->_idx)) { 1.195 record_method_not_compilable("infinite loop"); 1.196 return true; // Found unvisited kid; must be unreach 1.197 } 1.198 @@ -2507,13 +2521,14 @@ 1.199 // If original bytecodes contained a mixture of floats and doubles 1.200 // check if the optimizer has made it homogenous, item (3). 1.201 if( Use24BitFPMode && Use24BitFP && 1.202 - fpu.get_float_count() > 32 && 1.203 - fpu.get_double_count() == 0 && 1.204 - (10 * fpu.get_call_count() < fpu.get_float_count()) ) { 1.205 + frc.get_float_count() > 32 && 1.206 + frc.get_double_count() == 0 && 1.207 + (10 * frc.get_call_count() < frc.get_float_count()) ) { 1.208 set_24_bit_selection_and_mode( false, true ); 1.209 } 1.210 1.211 - set_has_java_calls(fpu.get_java_call_count() > 0); 1.212 + set_java_calls(frc.get_java_call_count()); 1.213 + set_inner_loops(frc.get_inner_loop_count()); 1.214 1.215 // No infinite loops, no reason to bail out. 1.216 return false;
2.1 --- a/src/share/vm/opto/compile.hpp Wed Jul 22 15:48:51 2009 -0700 2.2 +++ b/src/share/vm/opto/compile.hpp Thu Jul 23 14:53:56 2009 -0700 2.3 @@ -223,7 +223,8 @@ 2.4 PhaseCFG* _cfg; // Results of CFG finding 2.5 bool _select_24_bit_instr; // We selected an instruction with a 24-bit result 2.6 bool _in_24_bit_fp_mode; // We are emitting instructions with 24-bit results 2.7 - bool _has_java_calls; // True if the method has java calls 2.8 + int _java_calls; // Number of java calls in the method 2.9 + int _inner_loops; // Number of inner loops in the method 2.10 Matcher* _matcher; // Engine to map ideal to machine instructions 2.11 PhaseRegAlloc* _regalloc; // Results of register allocation. 2.12 int _frame_slots; // Size of total frame in stack slots 2.13 @@ -505,7 +506,9 @@ 2.14 PhaseCFG* cfg() { return _cfg; } 2.15 bool select_24_bit_instr() const { return _select_24_bit_instr; } 2.16 bool in_24_bit_fp_mode() const { return _in_24_bit_fp_mode; } 2.17 - bool has_java_calls() const { return _has_java_calls; } 2.18 + bool has_java_calls() const { return _java_calls > 0; } 2.19 + int java_calls() const { return _java_calls; } 2.20 + int inner_loops() const { return _inner_loops; } 2.21 Matcher* matcher() { return _matcher; } 2.22 PhaseRegAlloc* regalloc() { return _regalloc; } 2.23 int frame_slots() const { return _frame_slots; } 2.24 @@ -532,7 +535,8 @@ 2.25 _in_24_bit_fp_mode = mode; 2.26 } 2.27 2.28 - void set_has_java_calls(bool z) { _has_java_calls = z; } 2.29 + void set_java_calls(int z) { _java_calls = z; } 2.30 + void set_inner_loops(int z) { _inner_loops = z; } 2.31 2.32 // Instruction bits passed off to the VM 2.33 int code_size() { return _method_size; }
3.1 --- a/src/share/vm/opto/output.cpp Wed Jul 22 15:48:51 2009 -0700 3.2 +++ b/src/share/vm/opto/output.cpp Thu Jul 23 14:53:56 2009 -0700 3.3 @@ -50,6 +50,13 @@ 3.4 init_scratch_buffer_blob(); 3.5 if (failing()) return; // Out of memory 3.6 3.7 + // The number of new nodes (mostly MachNop) is proportional to 3.8 + // the number of java calls and inner loops which are aligned. 3.9 + if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + 3.10 + C->inner_loops()*(OptoLoopAlignment-1)), 3.11 + "out of nodes before code generation" ) ) { 3.12 + return; 3.13 + } 3.14 // Make sure I can find the Start Node 3.15 Block_Array& bbs = _cfg->_bbs; 3.16 Block *entry = _cfg->_blocks[1]; 3.17 @@ -1105,7 +1112,7 @@ 3.18 uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); 3.19 3.20 uint return_offset = 0; 3.21 - MachNode *nop = new (this) MachNopNode(); 3.22 + int nop_size = (new (this) MachNopNode())->size(_regalloc); 3.23 3.24 int previous_offset = 0; 3.25 int current_offset = 0; 3.26 @@ -1188,7 +1195,6 @@ 3.27 } 3.28 3.29 // align the instruction if necessary 3.30 - int nop_size = nop->size(_regalloc); 3.31 int padding = mach->compute_padding(current_offset); 3.32 // Make sure safepoint node for polling is distinct from a call's 3.33 // return by adding a nop if needed. 3.34 @@ -1372,7 +1378,6 @@ 3.35 3.36 // If the next block is the top of a loop, pad this block out to align 3.37 // the loop top a little. Helps prevent pipe stalls at loop back branches. 3.38 - int nop_size = (new (this) MachNopNode())->size(_regalloc); 3.39 if( i<_cfg->_num_blocks-1 ) { 3.40 Block *nb = _cfg->_blocks[i+1]; 3.41 uint padding = nb->alignment_padding(current_offset);