Fri, 12 Feb 2010 15:27:36 -0800
Merge
duke@435 | 1 | /* |
twisti@1639 | 2 | * Copyright 1998-2010 Sun Microsystems, Inc. All Rights Reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
duke@435 | 19 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
duke@435 | 20 | * CA 95054 USA or visit www.sun.com if you need additional information or |
duke@435 | 21 | * have any questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
duke@435 | 25 | #include "incls/_precompiled.incl" |
duke@435 | 26 | #include "incls/_output.cpp.incl" |
duke@435 | 27 | |
duke@435 | 28 | extern uint size_java_to_interp(); |
duke@435 | 29 | extern uint reloc_java_to_interp(); |
duke@435 | 30 | extern uint size_exception_handler(); |
duke@435 | 31 | extern uint size_deopt_handler(); |
duke@435 | 32 | |
duke@435 | 33 | #ifndef PRODUCT |
duke@435 | 34 | #define DEBUG_ARG(x) , x |
duke@435 | 35 | #else |
duke@435 | 36 | #define DEBUG_ARG(x) |
duke@435 | 37 | #endif |
duke@435 | 38 | |
duke@435 | 39 | extern int emit_exception_handler(CodeBuffer &cbuf); |
duke@435 | 40 | extern int emit_deopt_handler(CodeBuffer &cbuf); |
duke@435 | 41 | |
duke@435 | 42 | //------------------------------Output----------------------------------------- |
duke@435 | 43 | // Convert Nodes to instruction bits and pass off to the VM |
duke@435 | 44 | void Compile::Output() { |
duke@435 | 45 | // RootNode goes |
duke@435 | 46 | assert( _cfg->_broot->_nodes.size() == 0, "" ); |
duke@435 | 47 | |
duke@435 | 48 | // Initialize the space for the BufferBlob used to find and verify |
duke@435 | 49 | // instruction size in MachNode::emit_size() |
duke@435 | 50 | init_scratch_buffer_blob(); |
kvn@598 | 51 | if (failing()) return; // Out of memory |
duke@435 | 52 | |
kvn@1294 | 53 | // The number of new nodes (mostly MachNop) is proportional to |
kvn@1294 | 54 | // the number of java calls and inner loops which are aligned. |
kvn@1294 | 55 | if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + |
kvn@1294 | 56 | C->inner_loops()*(OptoLoopAlignment-1)), |
kvn@1294 | 57 | "out of nodes before code generation" ) ) { |
kvn@1294 | 58 | return; |
kvn@1294 | 59 | } |
duke@435 | 60 | // Make sure I can find the Start Node |
duke@435 | 61 | Block_Array& bbs = _cfg->_bbs; |
duke@435 | 62 | Block *entry = _cfg->_blocks[1]; |
duke@435 | 63 | Block *broot = _cfg->_broot; |
duke@435 | 64 | |
duke@435 | 65 | const StartNode *start = entry->_nodes[0]->as_Start(); |
duke@435 | 66 | |
duke@435 | 67 | // Replace StartNode with prolog |
duke@435 | 68 | MachPrologNode *prolog = new (this) MachPrologNode(); |
duke@435 | 69 | entry->_nodes.map( 0, prolog ); |
duke@435 | 70 | bbs.map( prolog->_idx, entry ); |
duke@435 | 71 | bbs.map( start->_idx, NULL ); // start is no longer in any block |
duke@435 | 72 | |
duke@435 | 73 | // Virtual methods need an unverified entry point |
duke@435 | 74 | |
duke@435 | 75 | if( is_osr_compilation() ) { |
duke@435 | 76 | if( PoisonOSREntry ) { |
duke@435 | 77 | // TODO: Should use a ShouldNotReachHereNode... |
duke@435 | 78 | _cfg->insert( broot, 0, new (this) MachBreakpointNode() ); |
duke@435 | 79 | } |
duke@435 | 80 | } else { |
duke@435 | 81 | if( _method && !_method->flags().is_static() ) { |
duke@435 | 82 | // Insert unvalidated entry point |
duke@435 | 83 | _cfg->insert( broot, 0, new (this) MachUEPNode() ); |
duke@435 | 84 | } |
duke@435 | 85 | |
duke@435 | 86 | } |
duke@435 | 87 | |
duke@435 | 88 | |
duke@435 | 89 | // Break before main entry point |
duke@435 | 90 | if( (_method && _method->break_at_execute()) |
duke@435 | 91 | #ifndef PRODUCT |
duke@435 | 92 | ||(OptoBreakpoint && is_method_compilation()) |
duke@435 | 93 | ||(OptoBreakpointOSR && is_osr_compilation()) |
duke@435 | 94 | ||(OptoBreakpointC2R && !_method) |
duke@435 | 95 | #endif |
duke@435 | 96 | ) { |
duke@435 | 97 | // checking for _method means that OptoBreakpoint does not apply to |
duke@435 | 98 | // runtime stubs or frame converters |
duke@435 | 99 | _cfg->insert( entry, 1, new (this) MachBreakpointNode() ); |
duke@435 | 100 | } |
duke@435 | 101 | |
duke@435 | 102 | // Insert epilogs before every return |
duke@435 | 103 | for( uint i=0; i<_cfg->_num_blocks; i++ ) { |
duke@435 | 104 | Block *b = _cfg->_blocks[i]; |
duke@435 | 105 | if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point? |
duke@435 | 106 | Node *m = b->end(); |
duke@435 | 107 | if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) { |
duke@435 | 108 | MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); |
duke@435 | 109 | b->add_inst( epilog ); |
duke@435 | 110 | bbs.map(epilog->_idx, b); |
duke@435 | 111 | //_regalloc->set_bad(epilog->_idx); // Already initialized this way. |
duke@435 | 112 | } |
duke@435 | 113 | } |
duke@435 | 114 | } |
duke@435 | 115 | |
duke@435 | 116 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 117 | if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); |
duke@435 | 118 | # endif |
duke@435 | 119 | |
duke@435 | 120 | ScheduleAndBundle(); |
duke@435 | 121 | |
duke@435 | 122 | #ifndef PRODUCT |
duke@435 | 123 | if (trace_opto_output()) { |
duke@435 | 124 | tty->print("\n---- After ScheduleAndBundle ----\n"); |
duke@435 | 125 | for (uint i = 0; i < _cfg->_num_blocks; i++) { |
duke@435 | 126 | tty->print("\nBB#%03d:\n", i); |
duke@435 | 127 | Block *bb = _cfg->_blocks[i]; |
duke@435 | 128 | for (uint j = 0; j < bb->_nodes.size(); j++) { |
duke@435 | 129 | Node *n = bb->_nodes[j]; |
duke@435 | 130 | OptoReg::Name reg = _regalloc->get_reg_first(n); |
duke@435 | 131 | tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); |
duke@435 | 132 | n->dump(); |
duke@435 | 133 | } |
duke@435 | 134 | } |
duke@435 | 135 | } |
duke@435 | 136 | #endif |
duke@435 | 137 | |
duke@435 | 138 | if (failing()) return; |
duke@435 | 139 | |
duke@435 | 140 | BuildOopMaps(); |
duke@435 | 141 | |
duke@435 | 142 | if (failing()) return; |
duke@435 | 143 | |
duke@435 | 144 | Fill_buffer(); |
duke@435 | 145 | } |
duke@435 | 146 | |
duke@435 | 147 | bool Compile::need_stack_bang(int frame_size_in_bytes) const { |
duke@435 | 148 | // Determine if we need to generate a stack overflow check. |
duke@435 | 149 | // Do it if the method is not a stub function and |
duke@435 | 150 | // has java calls or has frame size > vm_page_size/8. |
duke@435 | 151 | return (stub_function() == NULL && |
duke@435 | 152 | (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3)); |
duke@435 | 153 | } |
duke@435 | 154 | |
duke@435 | 155 | bool Compile::need_register_stack_bang() const { |
duke@435 | 156 | // Determine if we need to generate a register stack overflow check. |
duke@435 | 157 | // This is only used on architectures which have split register |
duke@435 | 158 | // and memory stacks (ie. IA64). |
duke@435 | 159 | // Bang if the method is not a stub function and has java calls |
duke@435 | 160 | return (stub_function() == NULL && has_java_calls()); |
duke@435 | 161 | } |
duke@435 | 162 | |
duke@435 | 163 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 164 | |
duke@435 | 165 | |
duke@435 | 166 | // In order to catch compiler oop-map bugs, we have implemented |
duke@435 | 167 | // a debugging mode called ZapDeadCompilerLocals. |
duke@435 | 168 | // This mode causes the compiler to insert a call to a runtime routine, |
duke@435 | 169 | // "zap_dead_locals", right before each place in compiled code |
duke@435 | 170 | // that could potentially be a gc-point (i.e., a safepoint or oop map point). |
duke@435 | 171 | // The runtime routine checks that locations mapped as oops are really |
duke@435 | 172 | // oops, that locations mapped as values do not look like oops, |
duke@435 | 173 | // and that locations mapped as dead are not used later |
duke@435 | 174 | // (by zapping them to an invalid address). |
duke@435 | 175 | |
duke@435 | 176 | int Compile::_CompiledZap_count = 0; |
duke@435 | 177 | |
duke@435 | 178 | void Compile::Insert_zap_nodes() { |
duke@435 | 179 | bool skip = false; |
duke@435 | 180 | |
duke@435 | 181 | |
duke@435 | 182 | // Dink with static counts because code code without the extra |
duke@435 | 183 | // runtime calls is MUCH faster for debugging purposes |
duke@435 | 184 | |
duke@435 | 185 | if ( CompileZapFirst == 0 ) ; // nothing special |
duke@435 | 186 | else if ( CompileZapFirst > CompiledZap_count() ) skip = true; |
duke@435 | 187 | else if ( CompileZapFirst == CompiledZap_count() ) |
duke@435 | 188 | warning("starting zap compilation after skipping"); |
duke@435 | 189 | |
duke@435 | 190 | if ( CompileZapLast == -1 ) ; // nothing special |
duke@435 | 191 | else if ( CompileZapLast < CompiledZap_count() ) skip = true; |
duke@435 | 192 | else if ( CompileZapLast == CompiledZap_count() ) |
duke@435 | 193 | warning("about to compile last zap"); |
duke@435 | 194 | |
duke@435 | 195 | ++_CompiledZap_count; // counts skipped zaps, too |
duke@435 | 196 | |
duke@435 | 197 | if ( skip ) return; |
duke@435 | 198 | |
duke@435 | 199 | |
duke@435 | 200 | if ( _method == NULL ) |
duke@435 | 201 | return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care |
duke@435 | 202 | |
duke@435 | 203 | // Insert call to zap runtime stub before every node with an oop map |
duke@435 | 204 | for( uint i=0; i<_cfg->_num_blocks; i++ ) { |
duke@435 | 205 | Block *b = _cfg->_blocks[i]; |
duke@435 | 206 | for ( uint j = 0; j < b->_nodes.size(); ++j ) { |
duke@435 | 207 | Node *n = b->_nodes[j]; |
duke@435 | 208 | |
duke@435 | 209 | // Determining if we should insert a zap-a-lot node in output. |
duke@435 | 210 | // We do that for all nodes that has oopmap info, except for calls |
duke@435 | 211 | // to allocation. Calls to allocation passes in the old top-of-eden pointer |
duke@435 | 212 | // and expect the C code to reset it. Hence, there can be no safepoints between |
duke@435 | 213 | // the inlined-allocation and the call to new_Java, etc. |
duke@435 | 214 | // We also cannot zap monitor calls, as they must hold the microlock |
duke@435 | 215 | // during the call to Zap, which also wants to grab the microlock. |
duke@435 | 216 | bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL); |
duke@435 | 217 | if ( insert ) { // it is MachSafePoint |
duke@435 | 218 | if ( !n->is_MachCall() ) { |
duke@435 | 219 | insert = false; |
duke@435 | 220 | } else if ( n->is_MachCall() ) { |
duke@435 | 221 | MachCallNode* call = n->as_MachCall(); |
duke@435 | 222 | if (call->entry_point() == OptoRuntime::new_instance_Java() || |
duke@435 | 223 | call->entry_point() == OptoRuntime::new_array_Java() || |
duke@435 | 224 | call->entry_point() == OptoRuntime::multianewarray2_Java() || |
duke@435 | 225 | call->entry_point() == OptoRuntime::multianewarray3_Java() || |
duke@435 | 226 | call->entry_point() == OptoRuntime::multianewarray4_Java() || |
duke@435 | 227 | call->entry_point() == OptoRuntime::multianewarray5_Java() || |
duke@435 | 228 | call->entry_point() == OptoRuntime::slow_arraycopy_Java() || |
duke@435 | 229 | call->entry_point() == OptoRuntime::complete_monitor_locking_Java() |
duke@435 | 230 | ) { |
duke@435 | 231 | insert = false; |
duke@435 | 232 | } |
duke@435 | 233 | } |
duke@435 | 234 | if (insert) { |
duke@435 | 235 | Node *zap = call_zap_node(n->as_MachSafePoint(), i); |
duke@435 | 236 | b->_nodes.insert( j, zap ); |
duke@435 | 237 | _cfg->_bbs.map( zap->_idx, b ); |
duke@435 | 238 | ++j; |
duke@435 | 239 | } |
duke@435 | 240 | } |
duke@435 | 241 | } |
duke@435 | 242 | } |
duke@435 | 243 | } |
duke@435 | 244 | |
duke@435 | 245 | |
duke@435 | 246 | Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) { |
duke@435 | 247 | const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type(); |
duke@435 | 248 | CallStaticJavaNode* ideal_node = |
duke@435 | 249 | new (this, tf->domain()->cnt()) CallStaticJavaNode( tf, |
duke@435 | 250 | OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()), |
duke@435 | 251 | "call zap dead locals stub", 0, TypePtr::BOTTOM); |
duke@435 | 252 | // We need to copy the OopMap from the site we're zapping at. |
duke@435 | 253 | // We have to make a copy, because the zap site might not be |
duke@435 | 254 | // a call site, and zap_dead is a call site. |
duke@435 | 255 | OopMap* clone = node_to_check->oop_map()->deep_copy(); |
duke@435 | 256 | |
duke@435 | 257 | // Add the cloned OopMap to the zap node |
duke@435 | 258 | ideal_node->set_oop_map(clone); |
duke@435 | 259 | return _matcher->match_sfpt(ideal_node); |
duke@435 | 260 | } |
duke@435 | 261 | |
duke@435 | 262 | //------------------------------is_node_getting_a_safepoint-------------------- |
duke@435 | 263 | bool Compile::is_node_getting_a_safepoint( Node* n) { |
duke@435 | 264 | // This code duplicates the logic prior to the call of add_safepoint |
duke@435 | 265 | // below in this file. |
duke@435 | 266 | if( n->is_MachSafePoint() ) return true; |
duke@435 | 267 | return false; |
duke@435 | 268 | } |
duke@435 | 269 | |
duke@435 | 270 | # endif // ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 271 | |
duke@435 | 272 | //------------------------------compute_loop_first_inst_sizes------------------ |
rasbold@853 | 273 | // Compute the size of first NumberOfLoopInstrToAlign instructions at the top |
duke@435 | 274 | // of a loop. When aligning a loop we need to provide enough instructions |
duke@435 | 275 | // in cpu's fetch buffer to feed decoders. The loop alignment could be |
duke@435 | 276 | // avoided if we have enough instructions in fetch buffer at the head of a loop. |
duke@435 | 277 | // By default, the size is set to 999999 by Block's constructor so that |
duke@435 | 278 | // a loop will be aligned if the size is not reset here. |
duke@435 | 279 | // |
duke@435 | 280 | // Note: Mach instructions could contain several HW instructions |
duke@435 | 281 | // so the size is estimated only. |
duke@435 | 282 | // |
duke@435 | 283 | void Compile::compute_loop_first_inst_sizes() { |
duke@435 | 284 | // The next condition is used to gate the loop alignment optimization. |
duke@435 | 285 | // Don't aligned a loop if there are enough instructions at the head of a loop |
duke@435 | 286 | // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad |
duke@435 | 287 | // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is |
duke@435 | 288 | // equal to 11 bytes which is the largest address NOP instruction. |
duke@435 | 289 | if( MaxLoopPad < OptoLoopAlignment-1 ) { |
duke@435 | 290 | uint last_block = _cfg->_num_blocks-1; |
duke@435 | 291 | for( uint i=1; i <= last_block; i++ ) { |
duke@435 | 292 | Block *b = _cfg->_blocks[i]; |
duke@435 | 293 | // Check the first loop's block which requires an alignment. |
rasbold@853 | 294 | if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { |
duke@435 | 295 | uint sum_size = 0; |
duke@435 | 296 | uint inst_cnt = NumberOfLoopInstrToAlign; |
rasbold@853 | 297 | inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
rasbold@853 | 298 | |
rasbold@853 | 299 | // Check subsequent fallthrough blocks if the loop's first |
rasbold@853 | 300 | // block(s) does not have enough instructions. |
rasbold@853 | 301 | Block *nb = b; |
rasbold@853 | 302 | while( inst_cnt > 0 && |
rasbold@853 | 303 | i < last_block && |
rasbold@853 | 304 | !_cfg->_blocks[i+1]->has_loop_alignment() && |
rasbold@853 | 305 | !nb->has_successor(b) ) { |
rasbold@853 | 306 | i++; |
rasbold@853 | 307 | nb = _cfg->_blocks[i]; |
rasbold@853 | 308 | inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
rasbold@853 | 309 | } // while( inst_cnt > 0 && i < last_block ) |
rasbold@853 | 310 | |
duke@435 | 311 | b->set_first_inst_size(sum_size); |
duke@435 | 312 | } // f( b->head()->is_Loop() ) |
duke@435 | 313 | } // for( i <= last_block ) |
duke@435 | 314 | } // if( MaxLoopPad < OptoLoopAlignment-1 ) |
duke@435 | 315 | } |
duke@435 | 316 | |
duke@435 | 317 | //----------------------Shorten_branches--------------------------------------- |
duke@435 | 318 | // The architecture description provides short branch variants for some long |
duke@435 | 319 | // branch instructions. Replace eligible long branches with short branches. |
duke@435 | 320 | void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) { |
duke@435 | 321 | |
duke@435 | 322 | // fill in the nop array for bundling computations |
duke@435 | 323 | MachNode *_nop_list[Bundle::_nop_count]; |
duke@435 | 324 | Bundle::initialize_nops(_nop_list, this); |
duke@435 | 325 | |
duke@435 | 326 | // ------------------ |
duke@435 | 327 | // Compute size of each block, method size, and relocation information size |
duke@435 | 328 | uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); |
duke@435 | 329 | uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); |
duke@435 | 330 | DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) |
never@850 | 331 | DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); ) |
duke@435 | 332 | blk_starts[0] = 0; |
duke@435 | 333 | |
duke@435 | 334 | // Initialize the sizes to 0 |
duke@435 | 335 | code_size = 0; // Size in bytes of generated code |
duke@435 | 336 | stub_size = 0; // Size in bytes of all stub entries |
duke@435 | 337 | // Size in bytes of all relocation entries, including those in local stubs. |
duke@435 | 338 | // Start with 2-bytes of reloc info for the unvalidated entry point |
duke@435 | 339 | reloc_size = 1; // Number of relocation entries |
duke@435 | 340 | const_size = 0; // size of fp constants in words |
duke@435 | 341 | |
duke@435 | 342 | // Make three passes. The first computes pessimistic blk_starts, |
duke@435 | 343 | // relative jmp_end, reloc_size and const_size information. |
duke@435 | 344 | // The second performs short branch substitution using the pessimistic |
duke@435 | 345 | // sizing. The third inserts nops where needed. |
duke@435 | 346 | |
duke@435 | 347 | Node *nj; // tmp |
duke@435 | 348 | |
duke@435 | 349 | // Step one, perform a pessimistic sizing pass. |
duke@435 | 350 | uint i; |
duke@435 | 351 | uint min_offset_from_last_call = 1; // init to a positive value |
duke@435 | 352 | uint nop_size = (new (this) MachNopNode())->size(_regalloc); |
duke@435 | 353 | for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks |
duke@435 | 354 | Block *b = _cfg->_blocks[i]; |
duke@435 | 355 | |
duke@435 | 356 | // Sum all instruction sizes to compute block size |
duke@435 | 357 | uint last_inst = b->_nodes.size(); |
duke@435 | 358 | uint blk_size = 0; |
duke@435 | 359 | for( uint j = 0; j<last_inst; j++ ) { |
duke@435 | 360 | nj = b->_nodes[j]; |
duke@435 | 361 | uint inst_size = nj->size(_regalloc); |
duke@435 | 362 | blk_size += inst_size; |
duke@435 | 363 | // Handle machine instruction nodes |
duke@435 | 364 | if( nj->is_Mach() ) { |
duke@435 | 365 | MachNode *mach = nj->as_Mach(); |
duke@435 | 366 | blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding |
duke@435 | 367 | reloc_size += mach->reloc(); |
duke@435 | 368 | const_size += mach->const_size(); |
duke@435 | 369 | if( mach->is_MachCall() ) { |
duke@435 | 370 | MachCallNode *mcall = mach->as_MachCall(); |
duke@435 | 371 | // This destination address is NOT PC-relative |
duke@435 | 372 | |
duke@435 | 373 | mcall->method_set((intptr_t)mcall->entry_point()); |
duke@435 | 374 | |
duke@435 | 375 | if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) { |
duke@435 | 376 | stub_size += size_java_to_interp(); |
duke@435 | 377 | reloc_size += reloc_java_to_interp(); |
duke@435 | 378 | } |
duke@435 | 379 | } else if (mach->is_MachSafePoint()) { |
duke@435 | 380 | // If call/safepoint are adjacent, account for possible |
duke@435 | 381 | // nop to disambiguate the two safepoints. |
duke@435 | 382 | if (min_offset_from_last_call == 0) { |
duke@435 | 383 | blk_size += nop_size; |
duke@435 | 384 | } |
duke@435 | 385 | } |
duke@435 | 386 | } |
duke@435 | 387 | min_offset_from_last_call += inst_size; |
duke@435 | 388 | // Remember end of call offset |
duke@435 | 389 | if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) { |
duke@435 | 390 | min_offset_from_last_call = 0; |
duke@435 | 391 | } |
duke@435 | 392 | } |
duke@435 | 393 | |
duke@435 | 394 | // During short branch replacement, we store the relative (to blk_starts) |
duke@435 | 395 | // end of jump in jmp_end, rather than the absolute end of jump. This |
duke@435 | 396 | // is so that we do not need to recompute sizes of all nodes when we compute |
duke@435 | 397 | // correct blk_starts in our next sizing pass. |
duke@435 | 398 | jmp_end[i] = blk_size; |
duke@435 | 399 | DEBUG_ONLY( jmp_target[i] = 0; ) |
duke@435 | 400 | |
duke@435 | 401 | // When the next block starts a loop, we may insert pad NOP |
duke@435 | 402 | // instructions. Since we cannot know our future alignment, |
duke@435 | 403 | // assume the worst. |
duke@435 | 404 | if( i<_cfg->_num_blocks-1 ) { |
duke@435 | 405 | Block *nb = _cfg->_blocks[i+1]; |
duke@435 | 406 | int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); |
duke@435 | 407 | if( max_loop_pad > 0 ) { |
duke@435 | 408 | assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); |
duke@435 | 409 | blk_size += max_loop_pad; |
duke@435 | 410 | } |
duke@435 | 411 | } |
duke@435 | 412 | |
duke@435 | 413 | // Save block size; update total method size |
duke@435 | 414 | blk_starts[i+1] = blk_starts[i]+blk_size; |
duke@435 | 415 | } |
duke@435 | 416 | |
duke@435 | 417 | // Step two, replace eligible long jumps. |
duke@435 | 418 | |
duke@435 | 419 | // Note: this will only get the long branches within short branch |
duke@435 | 420 | // range. Another pass might detect more branches that became |
duke@435 | 421 | // candidates because the shortening in the first pass exposed |
duke@435 | 422 | // more opportunities. Unfortunately, this would require |
duke@435 | 423 | // recomputing the starting and ending positions for the blocks |
duke@435 | 424 | for( i=0; i<_cfg->_num_blocks; i++ ) { |
duke@435 | 425 | Block *b = _cfg->_blocks[i]; |
duke@435 | 426 | |
duke@435 | 427 | int j; |
duke@435 | 428 | // Find the branch; ignore trailing NOPs. |
duke@435 | 429 | for( j = b->_nodes.size()-1; j>=0; j-- ) { |
duke@435 | 430 | nj = b->_nodes[j]; |
duke@435 | 431 | if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con ) |
duke@435 | 432 | break; |
duke@435 | 433 | } |
duke@435 | 434 | |
duke@435 | 435 | if (j >= 0) { |
duke@435 | 436 | if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) { |
duke@435 | 437 | MachNode *mach = nj->as_Mach(); |
duke@435 | 438 | // This requires the TRUE branch target be in succs[0] |
duke@435 | 439 | uint bnum = b->non_connector_successor(0)->_pre_order; |
duke@435 | 440 | uintptr_t target = blk_starts[bnum]; |
duke@435 | 441 | if( mach->is_pc_relative() ) { |
duke@435 | 442 | int offset = target-(blk_starts[i] + jmp_end[i]); |
never@850 | 443 | if (_matcher->is_short_branch_offset(mach->rule(), offset)) { |
duke@435 | 444 | // We've got a winner. Replace this branch. |
never@850 | 445 | MachNode* replacement = mach->short_branch_version(this); |
duke@435 | 446 | b->_nodes.map(j, replacement); |
never@657 | 447 | mach->subsume_by(replacement); |
duke@435 | 448 | |
duke@435 | 449 | // Update the jmp_end size to save time in our |
duke@435 | 450 | // next pass. |
duke@435 | 451 | jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc)); |
duke@435 | 452 | DEBUG_ONLY( jmp_target[i] = bnum; ); |
never@850 | 453 | DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); |
duke@435 | 454 | } |
duke@435 | 455 | } else { |
duke@435 | 456 | #ifndef PRODUCT |
duke@435 | 457 | mach->dump(3); |
duke@435 | 458 | #endif |
duke@435 | 459 | Unimplemented(); |
duke@435 | 460 | } |
duke@435 | 461 | } |
duke@435 | 462 | } |
duke@435 | 463 | } |
duke@435 | 464 | |
duke@435 | 465 | // Compute the size of first NumberOfLoopInstrToAlign instructions at head |
duke@435 | 466 | // of a loop. It is used to determine the padding for loop alignment. |
duke@435 | 467 | compute_loop_first_inst_sizes(); |
duke@435 | 468 | |
duke@435 | 469 | // Step 3, compute the offsets of all the labels |
duke@435 | 470 | uint last_call_adr = max_uint; |
duke@435 | 471 | for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks |
duke@435 | 472 | // copy the offset of the beginning to the corresponding label |
duke@435 | 473 | assert(labels[i].is_unused(), "cannot patch at this point"); |
duke@435 | 474 | labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS); |
duke@435 | 475 | |
duke@435 | 476 | // insert padding for any instructions that need it |
duke@435 | 477 | Block *b = _cfg->_blocks[i]; |
duke@435 | 478 | uint last_inst = b->_nodes.size(); |
duke@435 | 479 | uint adr = blk_starts[i]; |
duke@435 | 480 | for( uint j = 0; j<last_inst; j++ ) { |
duke@435 | 481 | nj = b->_nodes[j]; |
duke@435 | 482 | if( nj->is_Mach() ) { |
duke@435 | 483 | int padding = nj->as_Mach()->compute_padding(adr); |
duke@435 | 484 | // If call/safepoint are adjacent insert a nop (5010568) |
duke@435 | 485 | if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() && |
duke@435 | 486 | adr == last_call_adr ) { |
duke@435 | 487 | padding = nop_size; |
duke@435 | 488 | } |
duke@435 | 489 | if(padding > 0) { |
duke@435 | 490 | assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
duke@435 | 491 | int nops_cnt = padding / nop_size; |
duke@435 | 492 | MachNode *nop = new (this) MachNopNode(nops_cnt); |
duke@435 | 493 | b->_nodes.insert(j++, nop); |
duke@435 | 494 | _cfg->_bbs.map( nop->_idx, b ); |
duke@435 | 495 | adr += padding; |
duke@435 | 496 | last_inst++; |
duke@435 | 497 | } |
duke@435 | 498 | } |
duke@435 | 499 | adr += nj->size(_regalloc); |
duke@435 | 500 | |
duke@435 | 501 | // Remember end of call offset |
duke@435 | 502 | if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) { |
duke@435 | 503 | last_call_adr = adr; |
duke@435 | 504 | } |
duke@435 | 505 | } |
duke@435 | 506 | |
duke@435 | 507 | if ( i != _cfg->_num_blocks-1) { |
duke@435 | 508 | // Get the size of the block |
duke@435 | 509 | uint blk_size = adr - blk_starts[i]; |
duke@435 | 510 | |
rasbold@853 | 511 | // When the next block is the top of a loop, we may insert pad NOP |
duke@435 | 512 | // instructions. |
duke@435 | 513 | Block *nb = _cfg->_blocks[i+1]; |
duke@435 | 514 | int current_offset = blk_starts[i] + blk_size; |
duke@435 | 515 | current_offset += nb->alignment_padding(current_offset); |
duke@435 | 516 | // Save block size; update total method size |
duke@435 | 517 | blk_starts[i+1] = current_offset; |
duke@435 | 518 | } |
duke@435 | 519 | } |
duke@435 | 520 | |
duke@435 | 521 | #ifdef ASSERT |
duke@435 | 522 | for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks |
duke@435 | 523 | if( jmp_target[i] != 0 ) { |
duke@435 | 524 | int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]); |
never@850 | 525 | if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) { |
duke@435 | 526 | tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]); |
duke@435 | 527 | } |
never@850 | 528 | assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp"); |
duke@435 | 529 | } |
duke@435 | 530 | } |
duke@435 | 531 | #endif |
duke@435 | 532 | |
duke@435 | 533 | // ------------------ |
duke@435 | 534 | // Compute size for code buffer |
duke@435 | 535 | code_size = blk_starts[i-1] + jmp_end[i-1]; |
duke@435 | 536 | |
duke@435 | 537 | // Relocation records |
duke@435 | 538 | reloc_size += 1; // Relo entry for exception handler |
duke@435 | 539 | |
duke@435 | 540 | // Adjust reloc_size to number of record of relocation info |
duke@435 | 541 | // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for |
duke@435 | 542 | // a relocation index. |
duke@435 | 543 | // The CodeBuffer will expand the locs array if this estimate is too low. |
duke@435 | 544 | reloc_size *= 10 / sizeof(relocInfo); |
duke@435 | 545 | |
duke@435 | 546 | // Adjust const_size to number of bytes |
duke@435 | 547 | const_size *= 2*jintSize; // both float and double take two words per entry |
duke@435 | 548 | |
duke@435 | 549 | } |
duke@435 | 550 | |
duke@435 | 551 | //------------------------------FillLocArray----------------------------------- |
duke@435 | 552 | // Create a bit of debug info and append it to the array. The mapping is from |
duke@435 | 553 | // Java local or expression stack to constant, register or stack-slot. For |
duke@435 | 554 | // doubles, insert 2 mappings and return 1 (to tell the caller that the next |
duke@435 | 555 | // entry has been taken care of and caller should skip it). |
duke@435 | 556 | static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) { |
duke@435 | 557 | // This should never have accepted Bad before |
duke@435 | 558 | assert(OptoReg::is_valid(regnum), "location must be valid"); |
duke@435 | 559 | return (OptoReg::is_reg(regnum)) |
duke@435 | 560 | ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) ) |
duke@435 | 561 | : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); |
duke@435 | 562 | } |
duke@435 | 563 | |
kvn@498 | 564 | |
kvn@498 | 565 | ObjectValue* |
kvn@498 | 566 | Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) { |
kvn@498 | 567 | for (int i = 0; i < objs->length(); i++) { |
kvn@498 | 568 | assert(objs->at(i)->is_object(), "corrupt object cache"); |
kvn@498 | 569 | ObjectValue* sv = (ObjectValue*) objs->at(i); |
kvn@498 | 570 | if (sv->id() == id) { |
kvn@498 | 571 | return sv; |
kvn@498 | 572 | } |
kvn@498 | 573 | } |
kvn@498 | 574 | // Otherwise.. |
kvn@498 | 575 | return NULL; |
kvn@498 | 576 | } |
kvn@498 | 577 | |
kvn@498 | 578 | void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs, |
kvn@498 | 579 | ObjectValue* sv ) { |
kvn@498 | 580 | assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition"); |
kvn@498 | 581 | objs->append(sv); |
kvn@498 | 582 | } |
kvn@498 | 583 | |
kvn@498 | 584 | |
kvn@498 | 585 | void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local, |
kvn@498 | 586 | GrowableArray<ScopeValue*> *array, |
kvn@498 | 587 | GrowableArray<ScopeValue*> *objs ) { |
duke@435 | 588 | assert( local, "use _top instead of null" ); |
duke@435 | 589 | if (array->length() != idx) { |
duke@435 | 590 | assert(array->length() == idx + 1, "Unexpected array count"); |
duke@435 | 591 | // Old functionality: |
duke@435 | 592 | // return |
duke@435 | 593 | // New functionality: |
duke@435 | 594 | // Assert if the local is not top. In product mode let the new node |
duke@435 | 595 | // override the old entry. |
duke@435 | 596 | assert(local == top(), "LocArray collision"); |
duke@435 | 597 | if (local == top()) { |
duke@435 | 598 | return; |
duke@435 | 599 | } |
duke@435 | 600 | array->pop(); |
duke@435 | 601 | } |
duke@435 | 602 | const Type *t = local->bottom_type(); |
duke@435 | 603 | |
kvn@498 | 604 | // Is it a safepoint scalar object node? |
kvn@498 | 605 | if (local->is_SafePointScalarObject()) { |
kvn@498 | 606 | SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject(); |
kvn@498 | 607 | |
kvn@498 | 608 | ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx); |
kvn@498 | 609 | if (sv == NULL) { |
kvn@498 | 610 | ciKlass* cik = t->is_oopptr()->klass(); |
kvn@498 | 611 | assert(cik->is_instance_klass() || |
kvn@498 | 612 | cik->is_array_klass(), "Not supported allocation."); |
kvn@498 | 613 | sv = new ObjectValue(spobj->_idx, |
jrose@1424 | 614 | new ConstantOopWriteValue(cik->constant_encoding())); |
kvn@498 | 615 | Compile::set_sv_for_object_node(objs, sv); |
kvn@498 | 616 | |
kvn@498 | 617 | uint first_ind = spobj->first_index(); |
kvn@498 | 618 | for (uint i = 0; i < spobj->n_fields(); i++) { |
kvn@498 | 619 | Node* fld_node = sfpt->in(first_ind+i); |
kvn@498 | 620 | (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); |
kvn@498 | 621 | } |
kvn@498 | 622 | } |
kvn@498 | 623 | array->append(sv); |
kvn@498 | 624 | return; |
kvn@498 | 625 | } |
kvn@498 | 626 | |
duke@435 | 627 | // Grab the register number for the local |
duke@435 | 628 | OptoReg::Name regnum = _regalloc->get_reg_first(local); |
duke@435 | 629 | if( OptoReg::is_valid(regnum) ) {// Got a register/stack? |
duke@435 | 630 | // Record the double as two float registers. |
duke@435 | 631 | // The register mask for such a value always specifies two adjacent |
duke@435 | 632 | // float registers, with the lower register number even. |
duke@435 | 633 | // Normally, the allocation of high and low words to these registers |
duke@435 | 634 | // is irrelevant, because nearly all operations on register pairs |
duke@435 | 635 | // (e.g., StoreD) treat them as a single unit. |
duke@435 | 636 | // Here, we assume in addition that the words in these two registers |
duke@435 | 637 | // stored "naturally" (by operations like StoreD and double stores |
duke@435 | 638 | // within the interpreter) such that the lower-numbered register |
duke@435 | 639 | // is written to the lower memory address. This may seem like |
duke@435 | 640 | // a machine dependency, but it is not--it is a requirement on |
duke@435 | 641 | // the author of the <arch>.ad file to ensure that, for every |
duke@435 | 642 | // even/odd double-register pair to which a double may be allocated, |
duke@435 | 643 | // the word in the even single-register is stored to the first |
duke@435 | 644 | // memory word. (Note that register numbers are completely |
duke@435 | 645 | // arbitrary, and are not tied to any machine-level encodings.) |
duke@435 | 646 | #ifdef _LP64 |
duke@435 | 647 | if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) { |
duke@435 | 648 | array->append(new ConstantIntValue(0)); |
duke@435 | 649 | array->append(new_loc_value( _regalloc, regnum, Location::dbl )); |
duke@435 | 650 | } else if ( t->base() == Type::Long ) { |
duke@435 | 651 | array->append(new ConstantIntValue(0)); |
duke@435 | 652 | array->append(new_loc_value( _regalloc, regnum, Location::lng )); |
duke@435 | 653 | } else if ( t->base() == Type::RawPtr ) { |
duke@435 | 654 | // jsr/ret return address which must be restored into a the full |
duke@435 | 655 | // width 64-bit stack slot. |
duke@435 | 656 | array->append(new_loc_value( _regalloc, regnum, Location::lng )); |
duke@435 | 657 | } |
duke@435 | 658 | #else //_LP64 |
duke@435 | 659 | #ifdef SPARC |
duke@435 | 660 | if (t->base() == Type::Long && OptoReg::is_reg(regnum)) { |
duke@435 | 661 | // For SPARC we have to swap high and low words for |
duke@435 | 662 | // long values stored in a single-register (g0-g7). |
duke@435 | 663 | array->append(new_loc_value( _regalloc, regnum , Location::normal )); |
duke@435 | 664 | array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); |
duke@435 | 665 | } else |
duke@435 | 666 | #endif //SPARC |
duke@435 | 667 | if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) { |
duke@435 | 668 | // Repack the double/long as two jints. |
duke@435 | 669 | // The convention the interpreter uses is that the second local |
duke@435 | 670 | // holds the first raw word of the native double representation. |
duke@435 | 671 | // This is actually reasonable, since locals and stack arrays |
duke@435 | 672 | // grow downwards in all implementations. |
duke@435 | 673 | // (If, on some machine, the interpreter's Java locals or stack |
duke@435 | 674 | // were to grow upwards, the embedded doubles would be word-swapped.) |
duke@435 | 675 | array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); |
duke@435 | 676 | array->append(new_loc_value( _regalloc, regnum , Location::normal )); |
duke@435 | 677 | } |
duke@435 | 678 | #endif //_LP64 |
duke@435 | 679 | else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) && |
duke@435 | 680 | OptoReg::is_reg(regnum) ) { |
duke@435 | 681 | array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double |
duke@435 | 682 | ? Location::float_in_dbl : Location::normal )); |
duke@435 | 683 | } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) { |
duke@435 | 684 | array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long |
duke@435 | 685 | ? Location::int_in_long : Location::normal )); |
kvn@766 | 686 | } else if( t->base() == Type::NarrowOop ) { |
kvn@766 | 687 | array->append(new_loc_value( _regalloc, regnum, Location::narrowoop )); |
duke@435 | 688 | } else { |
duke@435 | 689 | array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal )); |
duke@435 | 690 | } |
duke@435 | 691 | return; |
duke@435 | 692 | } |
duke@435 | 693 | |
duke@435 | 694 | // No register. It must be constant data. |
duke@435 | 695 | switch (t->base()) { |
duke@435 | 696 | case Type::Half: // Second half of a double |
duke@435 | 697 | ShouldNotReachHere(); // Caller should skip 2nd halves |
duke@435 | 698 | break; |
duke@435 | 699 | case Type::AnyPtr: |
duke@435 | 700 | array->append(new ConstantOopWriteValue(NULL)); |
duke@435 | 701 | break; |
duke@435 | 702 | case Type::AryPtr: |
duke@435 | 703 | case Type::InstPtr: |
duke@435 | 704 | case Type::KlassPtr: // fall through |
jrose@1424 | 705 | array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding())); |
duke@435 | 706 | break; |
kvn@766 | 707 | case Type::NarrowOop: |
kvn@766 | 708 | if (t == TypeNarrowOop::NULL_PTR) { |
kvn@766 | 709 | array->append(new ConstantOopWriteValue(NULL)); |
kvn@766 | 710 | } else { |
jrose@1424 | 711 | array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding())); |
kvn@766 | 712 | } |
kvn@766 | 713 | break; |
duke@435 | 714 | case Type::Int: |
duke@435 | 715 | array->append(new ConstantIntValue(t->is_int()->get_con())); |
duke@435 | 716 | break; |
duke@435 | 717 | case Type::RawPtr: |
duke@435 | 718 | // A return address (T_ADDRESS). |
duke@435 | 719 | assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI"); |
duke@435 | 720 | #ifdef _LP64 |
duke@435 | 721 | // Must be restored to the full-width 64-bit stack slot. |
duke@435 | 722 | array->append(new ConstantLongValue(t->is_ptr()->get_con())); |
duke@435 | 723 | #else |
duke@435 | 724 | array->append(new ConstantIntValue(t->is_ptr()->get_con())); |
duke@435 | 725 | #endif |
duke@435 | 726 | break; |
duke@435 | 727 | case Type::FloatCon: { |
duke@435 | 728 | float f = t->is_float_constant()->getf(); |
duke@435 | 729 | array->append(new ConstantIntValue(jint_cast(f))); |
duke@435 | 730 | break; |
duke@435 | 731 | } |
duke@435 | 732 | case Type::DoubleCon: { |
duke@435 | 733 | jdouble d = t->is_double_constant()->getd(); |
duke@435 | 734 | #ifdef _LP64 |
duke@435 | 735 | array->append(new ConstantIntValue(0)); |
duke@435 | 736 | array->append(new ConstantDoubleValue(d)); |
duke@435 | 737 | #else |
duke@435 | 738 | // Repack the double as two jints. |
duke@435 | 739 | // The convention the interpreter uses is that the second local |
duke@435 | 740 | // holds the first raw word of the native double representation. |
duke@435 | 741 | // This is actually reasonable, since locals and stack arrays |
duke@435 | 742 | // grow downwards in all implementations. |
duke@435 | 743 | // (If, on some machine, the interpreter's Java locals or stack |
duke@435 | 744 | // were to grow upwards, the embedded doubles would be word-swapped.) |
duke@435 | 745 | jint *dp = (jint*)&d; |
duke@435 | 746 | array->append(new ConstantIntValue(dp[1])); |
duke@435 | 747 | array->append(new ConstantIntValue(dp[0])); |
duke@435 | 748 | #endif |
duke@435 | 749 | break; |
duke@435 | 750 | } |
duke@435 | 751 | case Type::Long: { |
duke@435 | 752 | jlong d = t->is_long()->get_con(); |
duke@435 | 753 | #ifdef _LP64 |
duke@435 | 754 | array->append(new ConstantIntValue(0)); |
duke@435 | 755 | array->append(new ConstantLongValue(d)); |
duke@435 | 756 | #else |
duke@435 | 757 | // Repack the long as two jints. |
duke@435 | 758 | // The convention the interpreter uses is that the second local |
duke@435 | 759 | // holds the first raw word of the native double representation. |
duke@435 | 760 | // This is actually reasonable, since locals and stack arrays |
duke@435 | 761 | // grow downwards in all implementations. |
duke@435 | 762 | // (If, on some machine, the interpreter's Java locals or stack |
duke@435 | 763 | // were to grow upwards, the embedded doubles would be word-swapped.) |
duke@435 | 764 | jint *dp = (jint*)&d; |
duke@435 | 765 | array->append(new ConstantIntValue(dp[1])); |
duke@435 | 766 | array->append(new ConstantIntValue(dp[0])); |
duke@435 | 767 | #endif |
duke@435 | 768 | break; |
duke@435 | 769 | } |
duke@435 | 770 | case Type::Top: // Add an illegal value here |
duke@435 | 771 | array->append(new LocationValue(Location())); |
duke@435 | 772 | break; |
duke@435 | 773 | default: |
duke@435 | 774 | ShouldNotReachHere(); |
duke@435 | 775 | break; |
duke@435 | 776 | } |
duke@435 | 777 | } |
duke@435 | 778 | |
duke@435 | 779 | // Determine if this node starts a bundle |
duke@435 | 780 | bool Compile::starts_bundle(const Node *n) const { |
duke@435 | 781 | return (_node_bundling_limit > n->_idx && |
duke@435 | 782 | _node_bundling_base[n->_idx].starts_bundle()); |
duke@435 | 783 | } |
duke@435 | 784 | |
duke@435 | 785 | //--------------------------Process_OopMap_Node-------------------------------- |
duke@435 | 786 | void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { |
duke@435 | 787 | |
duke@435 | 788 | // Handle special safepoint nodes for synchronization |
duke@435 | 789 | MachSafePointNode *sfn = mach->as_MachSafePoint(); |
duke@435 | 790 | MachCallNode *mcall; |
duke@435 | 791 | |
duke@435 | 792 | #ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 793 | assert( is_node_getting_a_safepoint(mach), "logic does not match; false negative"); |
duke@435 | 794 | #endif |
duke@435 | 795 | |
duke@435 | 796 | int safepoint_pc_offset = current_offset; |
twisti@1572 | 797 | bool is_method_handle_invoke = false; |
kvn@1688 | 798 | bool return_oop = false; |
duke@435 | 799 | |
duke@435 | 800 | // Add the safepoint in the DebugInfoRecorder |
duke@435 | 801 | if( !mach->is_MachCall() ) { |
duke@435 | 802 | mcall = NULL; |
duke@435 | 803 | debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); |
duke@435 | 804 | } else { |
duke@435 | 805 | mcall = mach->as_MachCall(); |
twisti@1572 | 806 | |
twisti@1572 | 807 | // Is the call a MethodHandle call? |
twisti@1572 | 808 | if (mcall->is_MachCallJava()) |
twisti@1572 | 809 | is_method_handle_invoke = mcall->as_MachCallJava()->_method_handle_invoke; |
twisti@1572 | 810 | |
kvn@1688 | 811 | // Check if a call returns an object. |
kvn@1688 | 812 | if (mcall->return_value_is_used() && |
kvn@1688 | 813 | mcall->tf()->range()->field_at(TypeFunc::Parms)->isa_ptr()) { |
kvn@1688 | 814 | return_oop = true; |
kvn@1688 | 815 | } |
duke@435 | 816 | safepoint_pc_offset += mcall->ret_addr_offset(); |
duke@435 | 817 | debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map); |
duke@435 | 818 | } |
duke@435 | 819 | |
duke@435 | 820 | // Loop over the JVMState list to add scope information |
duke@435 | 821 | // Do not skip safepoints with a NULL method, they need monitor info |
duke@435 | 822 | JVMState* youngest_jvms = sfn->jvms(); |
duke@435 | 823 | int max_depth = youngest_jvms->depth(); |
duke@435 | 824 | |
kvn@498 | 825 | // Allocate the object pool for scalar-replaced objects -- the map from |
kvn@498 | 826 | // small-integer keys (which can be recorded in the local and ostack |
kvn@498 | 827 | // arrays) to descriptions of the object state. |
kvn@498 | 828 | GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>(); |
kvn@498 | 829 | |
duke@435 | 830 | // Visit scopes from oldest to youngest. |
duke@435 | 831 | for (int depth = 1; depth <= max_depth; depth++) { |
duke@435 | 832 | JVMState* jvms = youngest_jvms->of_depth(depth); |
duke@435 | 833 | int idx; |
duke@435 | 834 | ciMethod* method = jvms->has_method() ? jvms->method() : NULL; |
duke@435 | 835 | // Safepoints that do not have method() set only provide oop-map and monitor info |
duke@435 | 836 | // to support GC; these do not support deoptimization. |
duke@435 | 837 | int num_locs = (method == NULL) ? 0 : jvms->loc_size(); |
duke@435 | 838 | int num_exps = (method == NULL) ? 0 : jvms->stk_size(); |
duke@435 | 839 | int num_mon = jvms->nof_monitors(); |
duke@435 | 840 | assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(), |
duke@435 | 841 | "JVMS local count must match that of the method"); |
duke@435 | 842 | |
duke@435 | 843 | // Add Local and Expression Stack Information |
duke@435 | 844 | |
duke@435 | 845 | // Insert locals into the locarray |
duke@435 | 846 | GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs); |
duke@435 | 847 | for( idx = 0; idx < num_locs; idx++ ) { |
kvn@498 | 848 | FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs ); |
duke@435 | 849 | } |
duke@435 | 850 | |
duke@435 | 851 | // Insert expression stack entries into the exparray |
duke@435 | 852 | GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps); |
duke@435 | 853 | for( idx = 0; idx < num_exps; idx++ ) { |
kvn@498 | 854 | FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs ); |
duke@435 | 855 | } |
duke@435 | 856 | |
duke@435 | 857 | // Add in mappings of the monitors |
duke@435 | 858 | assert( !method || |
duke@435 | 859 | !method->is_synchronized() || |
duke@435 | 860 | method->is_native() || |
duke@435 | 861 | num_mon > 0 || |
duke@435 | 862 | !GenerateSynchronizationCode, |
duke@435 | 863 | "monitors must always exist for synchronized methods"); |
duke@435 | 864 | |
duke@435 | 865 | // Build the growable array of ScopeValues for exp stack |
duke@435 | 866 | GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon); |
duke@435 | 867 | |
duke@435 | 868 | // Loop over monitors and insert into array |
duke@435 | 869 | for(idx = 0; idx < num_mon; idx++) { |
duke@435 | 870 | // Grab the node that defines this monitor |
kvn@895 | 871 | Node* box_node = sfn->monitor_box(jvms, idx); |
kvn@895 | 872 | Node* obj_node = sfn->monitor_obj(jvms, idx); |
duke@435 | 873 | |
duke@435 | 874 | // Create ScopeValue for object |
duke@435 | 875 | ScopeValue *scval = NULL; |
kvn@498 | 876 | |
kvn@498 | 877 | if( obj_node->is_SafePointScalarObject() ) { |
kvn@498 | 878 | SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject(); |
kvn@498 | 879 | scval = Compile::sv_for_node_id(objs, spobj->_idx); |
kvn@498 | 880 | if (scval == NULL) { |
kvn@498 | 881 | const Type *t = obj_node->bottom_type(); |
kvn@498 | 882 | ciKlass* cik = t->is_oopptr()->klass(); |
kvn@498 | 883 | assert(cik->is_instance_klass() || |
kvn@498 | 884 | cik->is_array_klass(), "Not supported allocation."); |
kvn@498 | 885 | ObjectValue* sv = new ObjectValue(spobj->_idx, |
jrose@1424 | 886 | new ConstantOopWriteValue(cik->constant_encoding())); |
kvn@498 | 887 | Compile::set_sv_for_object_node(objs, sv); |
kvn@498 | 888 | |
kvn@498 | 889 | uint first_ind = spobj->first_index(); |
kvn@498 | 890 | for (uint i = 0; i < spobj->n_fields(); i++) { |
kvn@498 | 891 | Node* fld_node = sfn->in(first_ind+i); |
kvn@498 | 892 | (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); |
kvn@498 | 893 | } |
kvn@498 | 894 | scval = sv; |
kvn@498 | 895 | } |
kvn@498 | 896 | } else if( !obj_node->is_Con() ) { |
duke@435 | 897 | OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node); |
kvn@766 | 898 | if( obj_node->bottom_type()->base() == Type::NarrowOop ) { |
kvn@766 | 899 | scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop ); |
kvn@766 | 900 | } else { |
kvn@766 | 901 | scval = new_loc_value( _regalloc, obj_reg, Location::oop ); |
kvn@766 | 902 | } |
duke@435 | 903 | } else { |
kvn@766 | 904 | const TypePtr *tp = obj_node->bottom_type()->make_ptr(); |
jrose@1424 | 905 | scval = new ConstantOopWriteValue(tp->is_instptr()->const_oop()->constant_encoding()); |
duke@435 | 906 | } |
duke@435 | 907 | |
duke@435 | 908 | OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node); |
kvn@501 | 909 | Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg)); |
kvn@895 | 910 | while( !box_node->is_BoxLock() ) box_node = box_node->in(1); |
kvn@501 | 911 | monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated())); |
duke@435 | 912 | } |
duke@435 | 913 | |
kvn@498 | 914 | // We dump the object pool first, since deoptimization reads it in first. |
kvn@498 | 915 | debug_info()->dump_object_pool(objs); |
kvn@498 | 916 | |
duke@435 | 917 | // Build first class objects to pass to scope |
duke@435 | 918 | DebugToken *locvals = debug_info()->create_scope_values(locarray); |
duke@435 | 919 | DebugToken *expvals = debug_info()->create_scope_values(exparray); |
duke@435 | 920 | DebugToken *monvals = debug_info()->create_monitor_values(monarray); |
duke@435 | 921 | |
duke@435 | 922 | // Make method available for all Safepoints |
duke@435 | 923 | ciMethod* scope_method = method ? method : _method; |
duke@435 | 924 | // Describe the scope here |
duke@435 | 925 | assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); |
twisti@1570 | 926 | assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest"); |
kvn@498 | 927 | // Now we can describe the scope. |
kvn@1688 | 928 | debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals); |
duke@435 | 929 | } // End jvms loop |
duke@435 | 930 | |
duke@435 | 931 | // Mark the end of the scope set. |
duke@435 | 932 | debug_info()->end_safepoint(safepoint_pc_offset); |
duke@435 | 933 | } |
duke@435 | 934 | |
duke@435 | 935 | |
duke@435 | 936 | |
duke@435 | 937 | // A simplified version of Process_OopMap_Node, to handle non-safepoints. |
duke@435 | 938 | class NonSafepointEmitter { |
duke@435 | 939 | Compile* C; |
duke@435 | 940 | JVMState* _pending_jvms; |
duke@435 | 941 | int _pending_offset; |
duke@435 | 942 | |
duke@435 | 943 | void emit_non_safepoint(); |
duke@435 | 944 | |
duke@435 | 945 | public: |
duke@435 | 946 | NonSafepointEmitter(Compile* compile) { |
duke@435 | 947 | this->C = compile; |
duke@435 | 948 | _pending_jvms = NULL; |
duke@435 | 949 | _pending_offset = 0; |
duke@435 | 950 | } |
duke@435 | 951 | |
duke@435 | 952 | void observe_instruction(Node* n, int pc_offset) { |
duke@435 | 953 | if (!C->debug_info()->recording_non_safepoints()) return; |
duke@435 | 954 | |
duke@435 | 955 | Node_Notes* nn = C->node_notes_at(n->_idx); |
duke@435 | 956 | if (nn == NULL || nn->jvms() == NULL) return; |
duke@435 | 957 | if (_pending_jvms != NULL && |
duke@435 | 958 | _pending_jvms->same_calls_as(nn->jvms())) { |
duke@435 | 959 | // Repeated JVMS? Stretch it up here. |
duke@435 | 960 | _pending_offset = pc_offset; |
duke@435 | 961 | } else { |
duke@435 | 962 | if (_pending_jvms != NULL && |
duke@435 | 963 | _pending_offset < pc_offset) { |
duke@435 | 964 | emit_non_safepoint(); |
duke@435 | 965 | } |
duke@435 | 966 | _pending_jvms = NULL; |
duke@435 | 967 | if (pc_offset > C->debug_info()->last_pc_offset()) { |
duke@435 | 968 | // This is the only way _pending_jvms can become non-NULL: |
duke@435 | 969 | _pending_jvms = nn->jvms(); |
duke@435 | 970 | _pending_offset = pc_offset; |
duke@435 | 971 | } |
duke@435 | 972 | } |
duke@435 | 973 | } |
duke@435 | 974 | |
duke@435 | 975 | // Stay out of the way of real safepoints: |
duke@435 | 976 | void observe_safepoint(JVMState* jvms, int pc_offset) { |
duke@435 | 977 | if (_pending_jvms != NULL && |
duke@435 | 978 | !_pending_jvms->same_calls_as(jvms) && |
duke@435 | 979 | _pending_offset < pc_offset) { |
duke@435 | 980 | emit_non_safepoint(); |
duke@435 | 981 | } |
duke@435 | 982 | _pending_jvms = NULL; |
duke@435 | 983 | } |
duke@435 | 984 | |
duke@435 | 985 | void flush_at_end() { |
duke@435 | 986 | if (_pending_jvms != NULL) { |
duke@435 | 987 | emit_non_safepoint(); |
duke@435 | 988 | } |
duke@435 | 989 | _pending_jvms = NULL; |
duke@435 | 990 | } |
duke@435 | 991 | }; |
duke@435 | 992 | |
duke@435 | 993 | void NonSafepointEmitter::emit_non_safepoint() { |
duke@435 | 994 | JVMState* youngest_jvms = _pending_jvms; |
duke@435 | 995 | int pc_offset = _pending_offset; |
duke@435 | 996 | |
duke@435 | 997 | // Clear it now: |
duke@435 | 998 | _pending_jvms = NULL; |
duke@435 | 999 | |
duke@435 | 1000 | DebugInformationRecorder* debug_info = C->debug_info(); |
duke@435 | 1001 | assert(debug_info->recording_non_safepoints(), "sanity"); |
duke@435 | 1002 | |
duke@435 | 1003 | debug_info->add_non_safepoint(pc_offset); |
duke@435 | 1004 | int max_depth = youngest_jvms->depth(); |
duke@435 | 1005 | |
duke@435 | 1006 | // Visit scopes from oldest to youngest. |
duke@435 | 1007 | for (int depth = 1; depth <= max_depth; depth++) { |
duke@435 | 1008 | JVMState* jvms = youngest_jvms->of_depth(depth); |
duke@435 | 1009 | ciMethod* method = jvms->has_method() ? jvms->method() : NULL; |
cfang@1335 | 1010 | assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest"); |
cfang@1335 | 1011 | debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute()); |
duke@435 | 1012 | } |
duke@435 | 1013 | |
duke@435 | 1014 | // Mark the end of the scope set. |
duke@435 | 1015 | debug_info->end_non_safepoint(pc_offset); |
duke@435 | 1016 | } |
duke@435 | 1017 | |
duke@435 | 1018 | |
duke@435 | 1019 | |
duke@435 | 1020 | // helper for Fill_buffer bailout logic |
duke@435 | 1021 | static void turn_off_compiler(Compile* C) { |
duke@435 | 1022 | if (CodeCache::unallocated_capacity() >= CodeCacheMinimumFreeSpace*10) { |
duke@435 | 1023 | // Do not turn off compilation if a single giant method has |
duke@435 | 1024 | // blown the code cache size. |
duke@435 | 1025 | C->record_failure("excessive request to CodeCache"); |
duke@435 | 1026 | } else { |
kvn@463 | 1027 | // Let CompilerBroker disable further compilations. |
duke@435 | 1028 | C->record_failure("CodeCache is full"); |
duke@435 | 1029 | } |
duke@435 | 1030 | } |
duke@435 | 1031 | |
duke@435 | 1032 | |
duke@435 | 1033 | //------------------------------Fill_buffer------------------------------------ |
duke@435 | 1034 | void Compile::Fill_buffer() { |
duke@435 | 1035 | |
duke@435 | 1036 | // Set the initially allocated size |
duke@435 | 1037 | int code_req = initial_code_capacity; |
duke@435 | 1038 | int locs_req = initial_locs_capacity; |
duke@435 | 1039 | int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; |
duke@435 | 1040 | int const_req = initial_const_capacity; |
duke@435 | 1041 | bool labels_not_set = true; |
duke@435 | 1042 | |
duke@435 | 1043 | int pad_req = NativeCall::instruction_size; |
duke@435 | 1044 | // The extra spacing after the code is necessary on some platforms. |
duke@435 | 1045 | // Sometimes we need to patch in a jump after the last instruction, |
duke@435 | 1046 | // if the nmethod has been deoptimized. (See 4932387, 4894843.) |
duke@435 | 1047 | |
duke@435 | 1048 | uint i; |
duke@435 | 1049 | // Compute the byte offset where we can store the deopt pc. |
duke@435 | 1050 | if (fixed_slots() != 0) { |
duke@435 | 1051 | _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); |
duke@435 | 1052 | } |
duke@435 | 1053 | |
duke@435 | 1054 | // Compute prolog code size |
duke@435 | 1055 | _method_size = 0; |
duke@435 | 1056 | _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize; |
duke@435 | 1057 | #ifdef IA64 |
duke@435 | 1058 | if (save_argument_registers()) { |
duke@435 | 1059 | // 4815101: this is a stub with implicit and unknown precision fp args. |
duke@435 | 1060 | // The usual spill mechanism can only generate stfd's in this case, which |
duke@435 | 1061 | // doesn't work if the fp reg to spill contains a single-precision denorm. |
duke@435 | 1062 | // Instead, we hack around the normal spill mechanism using stfspill's and |
duke@435 | 1063 | // ldffill's in the MachProlog and MachEpilog emit methods. We allocate |
duke@435 | 1064 | // space here for the fp arg regs (f8-f15) we're going to thusly spill. |
duke@435 | 1065 | // |
duke@435 | 1066 | // If we ever implement 16-byte 'registers' == stack slots, we can |
duke@435 | 1067 | // get rid of this hack and have SpillCopy generate stfspill/ldffill |
duke@435 | 1068 | // instead of stfd/stfs/ldfd/ldfs. |
duke@435 | 1069 | _frame_slots += 8*(16/BytesPerInt); |
duke@435 | 1070 | } |
duke@435 | 1071 | #endif |
duke@435 | 1072 | assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" ); |
duke@435 | 1073 | |
duke@435 | 1074 | // Create an array of unused labels, one for each basic block |
duke@435 | 1075 | Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1); |
duke@435 | 1076 | |
duke@435 | 1077 | for( i=0; i <= _cfg->_num_blocks; i++ ) { |
duke@435 | 1078 | blk_labels[i].init(); |
duke@435 | 1079 | } |
duke@435 | 1080 | |
duke@435 | 1081 | // If this machine supports different size branch offsets, then pre-compute |
duke@435 | 1082 | // the length of the blocks |
never@850 | 1083 | if( _matcher->is_short_branch_offset(-1, 0) ) { |
duke@435 | 1084 | Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req); |
duke@435 | 1085 | labels_not_set = false; |
duke@435 | 1086 | } |
duke@435 | 1087 | |
duke@435 | 1088 | // nmethod and CodeBuffer count stubs & constants as part of method's code. |
duke@435 | 1089 | int exception_handler_req = size_exception_handler(); |
duke@435 | 1090 | int deopt_handler_req = size_deopt_handler(); |
duke@435 | 1091 | exception_handler_req += MAX_stubs_size; // add marginal slop for handler |
duke@435 | 1092 | deopt_handler_req += MAX_stubs_size; // add marginal slop for handler |
duke@435 | 1093 | stub_req += MAX_stubs_size; // ensure per-stub margin |
duke@435 | 1094 | code_req += MAX_inst_size; // ensure per-instruction margin |
duke@435 | 1095 | if (StressCodeBuffers) |
duke@435 | 1096 | code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion |
duke@435 | 1097 | int total_req = code_req + pad_req + stub_req + exception_handler_req + deopt_handler_req + const_req; |
duke@435 | 1098 | CodeBuffer* cb = code_buffer(); |
duke@435 | 1099 | cb->initialize(total_req, locs_req); |
duke@435 | 1100 | |
duke@435 | 1101 | // Have we run out of code space? |
kvn@1637 | 1102 | if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
duke@435 | 1103 | turn_off_compiler(this); |
duke@435 | 1104 | return; |
duke@435 | 1105 | } |
duke@435 | 1106 | // Configure the code buffer. |
duke@435 | 1107 | cb->initialize_consts_size(const_req); |
duke@435 | 1108 | cb->initialize_stubs_size(stub_req); |
duke@435 | 1109 | cb->initialize_oop_recorder(env()->oop_recorder()); |
duke@435 | 1110 | |
duke@435 | 1111 | // fill in the nop array for bundling computations |
duke@435 | 1112 | MachNode *_nop_list[Bundle::_nop_count]; |
duke@435 | 1113 | Bundle::initialize_nops(_nop_list, this); |
duke@435 | 1114 | |
duke@435 | 1115 | // Create oopmap set. |
duke@435 | 1116 | _oop_map_set = new OopMapSet(); |
duke@435 | 1117 | |
duke@435 | 1118 | // !!!!! This preserves old handling of oopmaps for now |
duke@435 | 1119 | debug_info()->set_oopmaps(_oop_map_set); |
duke@435 | 1120 | |
duke@435 | 1121 | // Count and start of implicit null check instructions |
duke@435 | 1122 | uint inct_cnt = 0; |
duke@435 | 1123 | uint *inct_starts = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); |
duke@435 | 1124 | |
duke@435 | 1125 | // Count and start of calls |
duke@435 | 1126 | uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1); |
duke@435 | 1127 | |
duke@435 | 1128 | uint return_offset = 0; |
kvn@1294 | 1129 | int nop_size = (new (this) MachNopNode())->size(_regalloc); |
duke@435 | 1130 | |
duke@435 | 1131 | int previous_offset = 0; |
duke@435 | 1132 | int current_offset = 0; |
duke@435 | 1133 | int last_call_offset = -1; |
duke@435 | 1134 | |
duke@435 | 1135 | // Create an array of unused labels, one for each basic block, if printing is enabled |
duke@435 | 1136 | #ifndef PRODUCT |
duke@435 | 1137 | int *node_offsets = NULL; |
duke@435 | 1138 | uint node_offset_limit = unique(); |
duke@435 | 1139 | |
duke@435 | 1140 | |
duke@435 | 1141 | if ( print_assembly() ) |
duke@435 | 1142 | node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); |
duke@435 | 1143 | #endif |
duke@435 | 1144 | |
duke@435 | 1145 | NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily |
duke@435 | 1146 | |
duke@435 | 1147 | // ------------------ |
duke@435 | 1148 | // Now fill in the code buffer |
duke@435 | 1149 | Node *delay_slot = NULL; |
duke@435 | 1150 | |
duke@435 | 1151 | for( i=0; i < _cfg->_num_blocks; i++ ) { |
duke@435 | 1152 | Block *b = _cfg->_blocks[i]; |
duke@435 | 1153 | |
duke@435 | 1154 | Node *head = b->head(); |
duke@435 | 1155 | |
duke@435 | 1156 | // If this block needs to start aligned (i.e, can be reached other |
duke@435 | 1157 | // than by falling-thru from the previous block), then force the |
duke@435 | 1158 | // start of a new bundle. |
duke@435 | 1159 | if( Pipeline::requires_bundling() && starts_bundle(head) ) |
duke@435 | 1160 | cb->flush_bundle(true); |
duke@435 | 1161 | |
duke@435 | 1162 | // Define the label at the beginning of the basic block |
duke@435 | 1163 | if( labels_not_set ) |
duke@435 | 1164 | MacroAssembler(cb).bind( blk_labels[b->_pre_order] ); |
duke@435 | 1165 | |
duke@435 | 1166 | else |
duke@435 | 1167 | assert( blk_labels[b->_pre_order].loc_pos() == cb->code_size(), |
duke@435 | 1168 | "label position does not match code offset" ); |
duke@435 | 1169 | |
duke@435 | 1170 | uint last_inst = b->_nodes.size(); |
duke@435 | 1171 | |
duke@435 | 1172 | // Emit block normally, except for last instruction. |
duke@435 | 1173 | // Emit means "dump code bits into code buffer". |
duke@435 | 1174 | for( uint j = 0; j<last_inst; j++ ) { |
duke@435 | 1175 | |
duke@435 | 1176 | // Get the node |
duke@435 | 1177 | Node* n = b->_nodes[j]; |
duke@435 | 1178 | |
duke@435 | 1179 | // See if delay slots are supported |
duke@435 | 1180 | if (valid_bundle_info(n) && |
duke@435 | 1181 | node_bundling(n)->used_in_unconditional_delay()) { |
duke@435 | 1182 | assert(delay_slot == NULL, "no use of delay slot node"); |
duke@435 | 1183 | assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size"); |
duke@435 | 1184 | |
duke@435 | 1185 | delay_slot = n; |
duke@435 | 1186 | continue; |
duke@435 | 1187 | } |
duke@435 | 1188 | |
duke@435 | 1189 | // If this starts a new instruction group, then flush the current one |
duke@435 | 1190 | // (but allow split bundles) |
duke@435 | 1191 | if( Pipeline::requires_bundling() && starts_bundle(n) ) |
duke@435 | 1192 | cb->flush_bundle(false); |
duke@435 | 1193 | |
duke@435 | 1194 | // The following logic is duplicated in the code ifdeffed for |
twisti@1040 | 1195 | // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It |
duke@435 | 1196 | // should be factored out. Or maybe dispersed to the nodes? |
duke@435 | 1197 | |
duke@435 | 1198 | // Special handling for SafePoint/Call Nodes |
duke@435 | 1199 | bool is_mcall = false; |
duke@435 | 1200 | if( n->is_Mach() ) { |
duke@435 | 1201 | MachNode *mach = n->as_Mach(); |
duke@435 | 1202 | is_mcall = n->is_MachCall(); |
duke@435 | 1203 | bool is_sfn = n->is_MachSafePoint(); |
duke@435 | 1204 | |
duke@435 | 1205 | // If this requires all previous instructions be flushed, then do so |
duke@435 | 1206 | if( is_sfn || is_mcall || mach->alignment_required() != 1) { |
duke@435 | 1207 | cb->flush_bundle(true); |
duke@435 | 1208 | current_offset = cb->code_size(); |
duke@435 | 1209 | } |
duke@435 | 1210 | |
duke@435 | 1211 | // align the instruction if necessary |
duke@435 | 1212 | int padding = mach->compute_padding(current_offset); |
duke@435 | 1213 | // Make sure safepoint node for polling is distinct from a call's |
duke@435 | 1214 | // return by adding a nop if needed. |
duke@435 | 1215 | if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) { |
duke@435 | 1216 | padding = nop_size; |
duke@435 | 1217 | } |
duke@435 | 1218 | assert( labels_not_set || padding == 0, "instruction should already be aligned") |
duke@435 | 1219 | |
duke@435 | 1220 | if(padding > 0) { |
duke@435 | 1221 | assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
duke@435 | 1222 | int nops_cnt = padding / nop_size; |
duke@435 | 1223 | MachNode *nop = new (this) MachNopNode(nops_cnt); |
duke@435 | 1224 | b->_nodes.insert(j++, nop); |
duke@435 | 1225 | last_inst++; |
duke@435 | 1226 | _cfg->_bbs.map( nop->_idx, b ); |
duke@435 | 1227 | nop->emit(*cb, _regalloc); |
duke@435 | 1228 | cb->flush_bundle(true); |
duke@435 | 1229 | current_offset = cb->code_size(); |
duke@435 | 1230 | } |
duke@435 | 1231 | |
duke@435 | 1232 | // Remember the start of the last call in a basic block |
duke@435 | 1233 | if (is_mcall) { |
duke@435 | 1234 | MachCallNode *mcall = mach->as_MachCall(); |
duke@435 | 1235 | |
duke@435 | 1236 | // This destination address is NOT PC-relative |
duke@435 | 1237 | mcall->method_set((intptr_t)mcall->entry_point()); |
duke@435 | 1238 | |
duke@435 | 1239 | // Save the return address |
duke@435 | 1240 | call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); |
duke@435 | 1241 | |
duke@435 | 1242 | if (!mcall->is_safepoint_node()) { |
duke@435 | 1243 | is_mcall = false; |
duke@435 | 1244 | is_sfn = false; |
duke@435 | 1245 | } |
duke@435 | 1246 | } |
duke@435 | 1247 | |
duke@435 | 1248 | // sfn will be valid whenever mcall is valid now because of inheritance |
duke@435 | 1249 | if( is_sfn || is_mcall ) { |
duke@435 | 1250 | |
duke@435 | 1251 | // Handle special safepoint nodes for synchronization |
duke@435 | 1252 | if( !is_mcall ) { |
duke@435 | 1253 | MachSafePointNode *sfn = mach->as_MachSafePoint(); |
duke@435 | 1254 | // !!!!! Stubs only need an oopmap right now, so bail out |
duke@435 | 1255 | if( sfn->jvms()->method() == NULL) { |
duke@435 | 1256 | // Write the oopmap directly to the code blob??!! |
duke@435 | 1257 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 1258 | assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); |
duke@435 | 1259 | # endif |
duke@435 | 1260 | continue; |
duke@435 | 1261 | } |
duke@435 | 1262 | } // End synchronization |
duke@435 | 1263 | |
duke@435 | 1264 | non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), |
duke@435 | 1265 | current_offset); |
duke@435 | 1266 | Process_OopMap_Node(mach, current_offset); |
duke@435 | 1267 | } // End if safepoint |
duke@435 | 1268 | |
duke@435 | 1269 | // If this is a null check, then add the start of the previous instruction to the list |
duke@435 | 1270 | else if( mach->is_MachNullCheck() ) { |
duke@435 | 1271 | inct_starts[inct_cnt++] = previous_offset; |
duke@435 | 1272 | } |
duke@435 | 1273 | |
duke@435 | 1274 | // If this is a branch, then fill in the label with the target BB's label |
duke@435 | 1275 | else if ( mach->is_Branch() ) { |
duke@435 | 1276 | |
duke@435 | 1277 | if ( mach->ideal_Opcode() == Op_Jump ) { |
duke@435 | 1278 | for (uint h = 0; h < b->_num_succs; h++ ) { |
duke@435 | 1279 | Block* succs_block = b->_succs[h]; |
duke@435 | 1280 | for (uint j = 1; j < succs_block->num_preds(); j++) { |
duke@435 | 1281 | Node* jpn = succs_block->pred(j); |
duke@435 | 1282 | if ( jpn->is_JumpProj() && jpn->in(0) == mach ) { |
duke@435 | 1283 | uint block_num = succs_block->non_connector()->_pre_order; |
duke@435 | 1284 | Label *blkLabel = &blk_labels[block_num]; |
duke@435 | 1285 | mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); |
duke@435 | 1286 | } |
duke@435 | 1287 | } |
duke@435 | 1288 | } |
duke@435 | 1289 | } else { |
duke@435 | 1290 | // For Branchs |
duke@435 | 1291 | // This requires the TRUE branch target be in succs[0] |
duke@435 | 1292 | uint block_num = b->non_connector_successor(0)->_pre_order; |
duke@435 | 1293 | mach->label_set( blk_labels[block_num], block_num ); |
duke@435 | 1294 | } |
duke@435 | 1295 | } |
duke@435 | 1296 | |
duke@435 | 1297 | #ifdef ASSERT |
twisti@1040 | 1298 | // Check that oop-store precedes the card-mark |
duke@435 | 1299 | else if( mach->ideal_Opcode() == Op_StoreCM ) { |
duke@435 | 1300 | uint storeCM_idx = j; |
duke@435 | 1301 | Node *oop_store = mach->in(mach->_cnt); // First precedence edge |
duke@435 | 1302 | assert( oop_store != NULL, "storeCM expects a precedence edge"); |
duke@435 | 1303 | uint i4; |
duke@435 | 1304 | for( i4 = 0; i4 < last_inst; ++i4 ) { |
duke@435 | 1305 | if( b->_nodes[i4] == oop_store ) break; |
duke@435 | 1306 | } |
duke@435 | 1307 | // Note: This test can provide a false failure if other precedence |
duke@435 | 1308 | // edges have been added to the storeCMNode. |
duke@435 | 1309 | assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); |
duke@435 | 1310 | } |
duke@435 | 1311 | #endif |
duke@435 | 1312 | |
duke@435 | 1313 | else if( !n->is_Proj() ) { |
twisti@1040 | 1314 | // Remember the beginning of the previous instruction, in case |
duke@435 | 1315 | // it's followed by a flag-kill and a null-check. Happens on |
duke@435 | 1316 | // Intel all the time, with add-to-memory kind of opcodes. |
duke@435 | 1317 | previous_offset = current_offset; |
duke@435 | 1318 | } |
duke@435 | 1319 | } |
duke@435 | 1320 | |
duke@435 | 1321 | // Verify that there is sufficient space remaining |
duke@435 | 1322 | cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size); |
kvn@1637 | 1323 | if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
duke@435 | 1324 | turn_off_compiler(this); |
duke@435 | 1325 | return; |
duke@435 | 1326 | } |
duke@435 | 1327 | |
duke@435 | 1328 | // Save the offset for the listing |
duke@435 | 1329 | #ifndef PRODUCT |
duke@435 | 1330 | if( node_offsets && n->_idx < node_offset_limit ) |
duke@435 | 1331 | node_offsets[n->_idx] = cb->code_size(); |
duke@435 | 1332 | #endif |
duke@435 | 1333 | |
duke@435 | 1334 | // "Normal" instruction case |
duke@435 | 1335 | n->emit(*cb, _regalloc); |
duke@435 | 1336 | current_offset = cb->code_size(); |
duke@435 | 1337 | non_safepoints.observe_instruction(n, current_offset); |
duke@435 | 1338 | |
duke@435 | 1339 | // mcall is last "call" that can be a safepoint |
duke@435 | 1340 | // record it so we can see if a poll will directly follow it |
duke@435 | 1341 | // in which case we'll need a pad to make the PcDesc sites unique |
duke@435 | 1342 | // see 5010568. This can be slightly inaccurate but conservative |
duke@435 | 1343 | // in the case that return address is not actually at current_offset. |
duke@435 | 1344 | // This is a small price to pay. |
duke@435 | 1345 | |
duke@435 | 1346 | if (is_mcall) { |
duke@435 | 1347 | last_call_offset = current_offset; |
duke@435 | 1348 | } |
duke@435 | 1349 | |
duke@435 | 1350 | // See if this instruction has a delay slot |
duke@435 | 1351 | if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { |
duke@435 | 1352 | assert(delay_slot != NULL, "expecting delay slot node"); |
duke@435 | 1353 | |
duke@435 | 1354 | // Back up 1 instruction |
duke@435 | 1355 | cb->set_code_end( |
duke@435 | 1356 | cb->code_end()-Pipeline::instr_unit_size()); |
duke@435 | 1357 | |
duke@435 | 1358 | // Save the offset for the listing |
duke@435 | 1359 | #ifndef PRODUCT |
duke@435 | 1360 | if( node_offsets && delay_slot->_idx < node_offset_limit ) |
duke@435 | 1361 | node_offsets[delay_slot->_idx] = cb->code_size(); |
duke@435 | 1362 | #endif |
duke@435 | 1363 | |
duke@435 | 1364 | // Support a SafePoint in the delay slot |
duke@435 | 1365 | if( delay_slot->is_MachSafePoint() ) { |
duke@435 | 1366 | MachNode *mach = delay_slot->as_Mach(); |
duke@435 | 1367 | // !!!!! Stubs only need an oopmap right now, so bail out |
duke@435 | 1368 | if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) { |
duke@435 | 1369 | // Write the oopmap directly to the code blob??!! |
duke@435 | 1370 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 1371 | assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); |
duke@435 | 1372 | # endif |
duke@435 | 1373 | delay_slot = NULL; |
duke@435 | 1374 | continue; |
duke@435 | 1375 | } |
duke@435 | 1376 | |
duke@435 | 1377 | int adjusted_offset = current_offset - Pipeline::instr_unit_size(); |
duke@435 | 1378 | non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), |
duke@435 | 1379 | adjusted_offset); |
duke@435 | 1380 | // Generate an OopMap entry |
duke@435 | 1381 | Process_OopMap_Node(mach, adjusted_offset); |
duke@435 | 1382 | } |
duke@435 | 1383 | |
duke@435 | 1384 | // Insert the delay slot instruction |
duke@435 | 1385 | delay_slot->emit(*cb, _regalloc); |
duke@435 | 1386 | |
duke@435 | 1387 | // Don't reuse it |
duke@435 | 1388 | delay_slot = NULL; |
duke@435 | 1389 | } |
duke@435 | 1390 | |
duke@435 | 1391 | } // End for all instructions in block |
duke@435 | 1392 | |
rasbold@853 | 1393 | // If the next block is the top of a loop, pad this block out to align |
rasbold@853 | 1394 | // the loop top a little. Helps prevent pipe stalls at loop back branches. |
duke@435 | 1395 | if( i<_cfg->_num_blocks-1 ) { |
duke@435 | 1396 | Block *nb = _cfg->_blocks[i+1]; |
duke@435 | 1397 | uint padding = nb->alignment_padding(current_offset); |
duke@435 | 1398 | if( padding > 0 ) { |
duke@435 | 1399 | MachNode *nop = new (this) MachNopNode(padding / nop_size); |
duke@435 | 1400 | b->_nodes.insert( b->_nodes.size(), nop ); |
duke@435 | 1401 | _cfg->_bbs.map( nop->_idx, b ); |
duke@435 | 1402 | nop->emit(*cb, _regalloc); |
duke@435 | 1403 | current_offset = cb->code_size(); |
duke@435 | 1404 | } |
duke@435 | 1405 | } |
duke@435 | 1406 | |
duke@435 | 1407 | } // End of for all blocks |
duke@435 | 1408 | |
duke@435 | 1409 | non_safepoints.flush_at_end(); |
duke@435 | 1410 | |
duke@435 | 1411 | // Offset too large? |
duke@435 | 1412 | if (failing()) return; |
duke@435 | 1413 | |
duke@435 | 1414 | // Define a pseudo-label at the end of the code |
duke@435 | 1415 | MacroAssembler(cb).bind( blk_labels[_cfg->_num_blocks] ); |
duke@435 | 1416 | |
duke@435 | 1417 | // Compute the size of the first block |
duke@435 | 1418 | _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos(); |
duke@435 | 1419 | |
duke@435 | 1420 | assert(cb->code_size() < 500000, "method is unreasonably large"); |
duke@435 | 1421 | |
duke@435 | 1422 | // ------------------ |
duke@435 | 1423 | |
duke@435 | 1424 | #ifndef PRODUCT |
duke@435 | 1425 | // Information on the size of the method, without the extraneous code |
duke@435 | 1426 | Scheduling::increment_method_size(cb->code_size()); |
duke@435 | 1427 | #endif |
duke@435 | 1428 | |
duke@435 | 1429 | // ------------------ |
duke@435 | 1430 | // Fill in exception table entries. |
duke@435 | 1431 | FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels); |
duke@435 | 1432 | |
duke@435 | 1433 | // Only java methods have exception handlers and deopt handlers |
duke@435 | 1434 | if (_method) { |
duke@435 | 1435 | // Emit the exception handler code. |
duke@435 | 1436 | _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb)); |
duke@435 | 1437 | // Emit the deopt handler code. |
duke@435 | 1438 | _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb)); |
twisti@1639 | 1439 | // Emit the MethodHandle deopt handler code. We can use the same |
twisti@1639 | 1440 | // code as for the normal deopt handler, we just need a different |
twisti@1639 | 1441 | // entry point address. |
twisti@1639 | 1442 | _code_offsets.set_value(CodeOffsets::DeoptMH, emit_deopt_handler(*cb)); |
duke@435 | 1443 | } |
duke@435 | 1444 | |
duke@435 | 1445 | // One last check for failed CodeBuffer::expand: |
kvn@1637 | 1446 | if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
duke@435 | 1447 | turn_off_compiler(this); |
duke@435 | 1448 | return; |
duke@435 | 1449 | } |
duke@435 | 1450 | |
duke@435 | 1451 | #ifndef PRODUCT |
duke@435 | 1452 | // Dump the assembly code, including basic-block numbers |
duke@435 | 1453 | if (print_assembly()) { |
duke@435 | 1454 | ttyLocker ttyl; // keep the following output all in one block |
duke@435 | 1455 | if (!VMThread::should_terminate()) { // test this under the tty lock |
duke@435 | 1456 | // This output goes directly to the tty, not the compiler log. |
duke@435 | 1457 | // To enable tools to match it up with the compilation activity, |
duke@435 | 1458 | // be sure to tag this tty output with the compile ID. |
duke@435 | 1459 | if (xtty != NULL) { |
duke@435 | 1460 | xtty->head("opto_assembly compile_id='%d'%s", compile_id(), |
duke@435 | 1461 | is_osr_compilation() ? " compile_kind='osr'" : |
duke@435 | 1462 | ""); |
duke@435 | 1463 | } |
duke@435 | 1464 | if (method() != NULL) { |
duke@435 | 1465 | method()->print_oop(); |
duke@435 | 1466 | print_codes(); |
duke@435 | 1467 | } |
duke@435 | 1468 | dump_asm(node_offsets, node_offset_limit); |
duke@435 | 1469 | if (xtty != NULL) { |
duke@435 | 1470 | xtty->tail("opto_assembly"); |
duke@435 | 1471 | } |
duke@435 | 1472 | } |
duke@435 | 1473 | } |
duke@435 | 1474 | #endif |
duke@435 | 1475 | |
duke@435 | 1476 | } |
duke@435 | 1477 | |
duke@435 | 1478 | void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) { |
duke@435 | 1479 | _inc_table.set_size(cnt); |
duke@435 | 1480 | |
duke@435 | 1481 | uint inct_cnt = 0; |
duke@435 | 1482 | for( uint i=0; i<_cfg->_num_blocks; i++ ) { |
duke@435 | 1483 | Block *b = _cfg->_blocks[i]; |
duke@435 | 1484 | Node *n = NULL; |
duke@435 | 1485 | int j; |
duke@435 | 1486 | |
duke@435 | 1487 | // Find the branch; ignore trailing NOPs. |
duke@435 | 1488 | for( j = b->_nodes.size()-1; j>=0; j-- ) { |
duke@435 | 1489 | n = b->_nodes[j]; |
duke@435 | 1490 | if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con ) |
duke@435 | 1491 | break; |
duke@435 | 1492 | } |
duke@435 | 1493 | |
duke@435 | 1494 | // If we didn't find anything, continue |
duke@435 | 1495 | if( j < 0 ) continue; |
duke@435 | 1496 | |
duke@435 | 1497 | // Compute ExceptionHandlerTable subtable entry and add it |
duke@435 | 1498 | // (skip empty blocks) |
duke@435 | 1499 | if( n->is_Catch() ) { |
duke@435 | 1500 | |
duke@435 | 1501 | // Get the offset of the return from the call |
duke@435 | 1502 | uint call_return = call_returns[b->_pre_order]; |
duke@435 | 1503 | #ifdef ASSERT |
duke@435 | 1504 | assert( call_return > 0, "no call seen for this basic block" ); |
duke@435 | 1505 | while( b->_nodes[--j]->Opcode() == Op_MachProj ) ; |
duke@435 | 1506 | assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" ); |
duke@435 | 1507 | #endif |
duke@435 | 1508 | // last instruction is a CatchNode, find it's CatchProjNodes |
duke@435 | 1509 | int nof_succs = b->_num_succs; |
duke@435 | 1510 | // allocate space |
duke@435 | 1511 | GrowableArray<intptr_t> handler_bcis(nof_succs); |
duke@435 | 1512 | GrowableArray<intptr_t> handler_pcos(nof_succs); |
duke@435 | 1513 | // iterate through all successors |
duke@435 | 1514 | for (int j = 0; j < nof_succs; j++) { |
duke@435 | 1515 | Block* s = b->_succs[j]; |
duke@435 | 1516 | bool found_p = false; |
duke@435 | 1517 | for( uint k = 1; k < s->num_preds(); k++ ) { |
duke@435 | 1518 | Node *pk = s->pred(k); |
duke@435 | 1519 | if( pk->is_CatchProj() && pk->in(0) == n ) { |
duke@435 | 1520 | const CatchProjNode* p = pk->as_CatchProj(); |
duke@435 | 1521 | found_p = true; |
duke@435 | 1522 | // add the corresponding handler bci & pco information |
duke@435 | 1523 | if( p->_con != CatchProjNode::fall_through_index ) { |
duke@435 | 1524 | // p leads to an exception handler (and is not fall through) |
duke@435 | 1525 | assert(s == _cfg->_blocks[s->_pre_order],"bad numbering"); |
duke@435 | 1526 | // no duplicates, please |
duke@435 | 1527 | if( !handler_bcis.contains(p->handler_bci()) ) { |
duke@435 | 1528 | uint block_num = s->non_connector()->_pre_order; |
duke@435 | 1529 | handler_bcis.append(p->handler_bci()); |
duke@435 | 1530 | handler_pcos.append(blk_labels[block_num].loc_pos()); |
duke@435 | 1531 | } |
duke@435 | 1532 | } |
duke@435 | 1533 | } |
duke@435 | 1534 | } |
duke@435 | 1535 | assert(found_p, "no matching predecessor found"); |
duke@435 | 1536 | // Note: Due to empty block removal, one block may have |
duke@435 | 1537 | // several CatchProj inputs, from the same Catch. |
duke@435 | 1538 | } |
duke@435 | 1539 | |
duke@435 | 1540 | // Set the offset of the return from the call |
duke@435 | 1541 | _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos); |
duke@435 | 1542 | continue; |
duke@435 | 1543 | } |
duke@435 | 1544 | |
duke@435 | 1545 | // Handle implicit null exception table updates |
duke@435 | 1546 | if( n->is_MachNullCheck() ) { |
duke@435 | 1547 | uint block_num = b->non_connector_successor(0)->_pre_order; |
duke@435 | 1548 | _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() ); |
duke@435 | 1549 | continue; |
duke@435 | 1550 | } |
duke@435 | 1551 | } // End of for all blocks fill in exception table entries |
duke@435 | 1552 | } |
duke@435 | 1553 | |
duke@435 | 1554 | // Static Variables |
duke@435 | 1555 | #ifndef PRODUCT |
duke@435 | 1556 | uint Scheduling::_total_nop_size = 0; |
duke@435 | 1557 | uint Scheduling::_total_method_size = 0; |
duke@435 | 1558 | uint Scheduling::_total_branches = 0; |
duke@435 | 1559 | uint Scheduling::_total_unconditional_delays = 0; |
duke@435 | 1560 | uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1]; |
duke@435 | 1561 | #endif |
duke@435 | 1562 | |
duke@435 | 1563 | // Initializer for class Scheduling |
duke@435 | 1564 | |
duke@435 | 1565 | Scheduling::Scheduling(Arena *arena, Compile &compile) |
duke@435 | 1566 | : _arena(arena), |
duke@435 | 1567 | _cfg(compile.cfg()), |
duke@435 | 1568 | _bbs(compile.cfg()->_bbs), |
duke@435 | 1569 | _regalloc(compile.regalloc()), |
duke@435 | 1570 | _reg_node(arena), |
duke@435 | 1571 | _bundle_instr_count(0), |
duke@435 | 1572 | _bundle_cycle_number(0), |
duke@435 | 1573 | _scheduled(arena), |
duke@435 | 1574 | _available(arena), |
duke@435 | 1575 | _next_node(NULL), |
duke@435 | 1576 | _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]), |
duke@435 | 1577 | _pinch_free_list(arena) |
duke@435 | 1578 | #ifndef PRODUCT |
duke@435 | 1579 | , _branches(0) |
duke@435 | 1580 | , _unconditional_delays(0) |
duke@435 | 1581 | #endif |
duke@435 | 1582 | { |
duke@435 | 1583 | // Create a MachNopNode |
duke@435 | 1584 | _nop = new (&compile) MachNopNode(); |
duke@435 | 1585 | |
duke@435 | 1586 | // Now that the nops are in the array, save the count |
duke@435 | 1587 | // (but allow entries for the nops) |
duke@435 | 1588 | _node_bundling_limit = compile.unique(); |
duke@435 | 1589 | uint node_max = _regalloc->node_regs_max_index(); |
duke@435 | 1590 | |
duke@435 | 1591 | compile.set_node_bundling_limit(_node_bundling_limit); |
duke@435 | 1592 | |
twisti@1040 | 1593 | // This one is persistent within the Compile class |
duke@435 | 1594 | _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max); |
duke@435 | 1595 | |
duke@435 | 1596 | // Allocate space for fixed-size arrays |
duke@435 | 1597 | _node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); |
duke@435 | 1598 | _uses = NEW_ARENA_ARRAY(arena, short, node_max); |
duke@435 | 1599 | _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); |
duke@435 | 1600 | |
duke@435 | 1601 | // Clear the arrays |
duke@435 | 1602 | memset(_node_bundling_base, 0, node_max * sizeof(Bundle)); |
duke@435 | 1603 | memset(_node_latency, 0, node_max * sizeof(unsigned short)); |
duke@435 | 1604 | memset(_uses, 0, node_max * sizeof(short)); |
duke@435 | 1605 | memset(_current_latency, 0, node_max * sizeof(unsigned short)); |
duke@435 | 1606 | |
duke@435 | 1607 | // Clear the bundling information |
duke@435 | 1608 | memcpy(_bundle_use_elements, |
duke@435 | 1609 | Pipeline_Use::elaborated_elements, |
duke@435 | 1610 | sizeof(Pipeline_Use::elaborated_elements)); |
duke@435 | 1611 | |
duke@435 | 1612 | // Get the last node |
duke@435 | 1613 | Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1]; |
duke@435 | 1614 | |
duke@435 | 1615 | _next_node = bb->_nodes[bb->_nodes.size()-1]; |
duke@435 | 1616 | } |
duke@435 | 1617 | |
duke@435 | 1618 | #ifndef PRODUCT |
duke@435 | 1619 | // Scheduling destructor |
duke@435 | 1620 | Scheduling::~Scheduling() { |
duke@435 | 1621 | _total_branches += _branches; |
duke@435 | 1622 | _total_unconditional_delays += _unconditional_delays; |
duke@435 | 1623 | } |
duke@435 | 1624 | #endif |
duke@435 | 1625 | |
duke@435 | 1626 | // Step ahead "i" cycles |
duke@435 | 1627 | void Scheduling::step(uint i) { |
duke@435 | 1628 | |
duke@435 | 1629 | Bundle *bundle = node_bundling(_next_node); |
duke@435 | 1630 | bundle->set_starts_bundle(); |
duke@435 | 1631 | |
duke@435 | 1632 | // Update the bundle record, but leave the flags information alone |
duke@435 | 1633 | if (_bundle_instr_count > 0) { |
duke@435 | 1634 | bundle->set_instr_count(_bundle_instr_count); |
duke@435 | 1635 | bundle->set_resources_used(_bundle_use.resourcesUsed()); |
duke@435 | 1636 | } |
duke@435 | 1637 | |
duke@435 | 1638 | // Update the state information |
duke@435 | 1639 | _bundle_instr_count = 0; |
duke@435 | 1640 | _bundle_cycle_number += i; |
duke@435 | 1641 | _bundle_use.step(i); |
duke@435 | 1642 | } |
duke@435 | 1643 | |
duke@435 | 1644 | void Scheduling::step_and_clear() { |
duke@435 | 1645 | Bundle *bundle = node_bundling(_next_node); |
duke@435 | 1646 | bundle->set_starts_bundle(); |
duke@435 | 1647 | |
duke@435 | 1648 | // Update the bundle record |
duke@435 | 1649 | if (_bundle_instr_count > 0) { |
duke@435 | 1650 | bundle->set_instr_count(_bundle_instr_count); |
duke@435 | 1651 | bundle->set_resources_used(_bundle_use.resourcesUsed()); |
duke@435 | 1652 | |
duke@435 | 1653 | _bundle_cycle_number += 1; |
duke@435 | 1654 | } |
duke@435 | 1655 | |
duke@435 | 1656 | // Clear the bundling information |
duke@435 | 1657 | _bundle_instr_count = 0; |
duke@435 | 1658 | _bundle_use.reset(); |
duke@435 | 1659 | |
duke@435 | 1660 | memcpy(_bundle_use_elements, |
duke@435 | 1661 | Pipeline_Use::elaborated_elements, |
duke@435 | 1662 | sizeof(Pipeline_Use::elaborated_elements)); |
duke@435 | 1663 | } |
duke@435 | 1664 | |
duke@435 | 1665 | //------------------------------ScheduleAndBundle------------------------------ |
duke@435 | 1666 | // Perform instruction scheduling and bundling over the sequence of |
duke@435 | 1667 | // instructions in backwards order. |
duke@435 | 1668 | void Compile::ScheduleAndBundle() { |
duke@435 | 1669 | |
duke@435 | 1670 | // Don't optimize this if it isn't a method |
duke@435 | 1671 | if (!_method) |
duke@435 | 1672 | return; |
duke@435 | 1673 | |
duke@435 | 1674 | // Don't optimize this if scheduling is disabled |
duke@435 | 1675 | if (!do_scheduling()) |
duke@435 | 1676 | return; |
duke@435 | 1677 | |
duke@435 | 1678 | NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) |
duke@435 | 1679 | |
duke@435 | 1680 | // Create a data structure for all the scheduling information |
duke@435 | 1681 | Scheduling scheduling(Thread::current()->resource_area(), *this); |
duke@435 | 1682 | |
duke@435 | 1683 | // Walk backwards over each basic block, computing the needed alignment |
duke@435 | 1684 | // Walk over all the basic blocks |
duke@435 | 1685 | scheduling.DoScheduling(); |
duke@435 | 1686 | } |
duke@435 | 1687 | |
duke@435 | 1688 | //------------------------------ComputeLocalLatenciesForward------------------- |
duke@435 | 1689 | // Compute the latency of all the instructions. This is fairly simple, |
duke@435 | 1690 | // because we already have a legal ordering. Walk over the instructions |
duke@435 | 1691 | // from first to last, and compute the latency of the instruction based |
twisti@1040 | 1692 | // on the latency of the preceding instruction(s). |
duke@435 | 1693 | void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { |
duke@435 | 1694 | #ifndef PRODUCT |
duke@435 | 1695 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1696 | tty->print("# -> ComputeLocalLatenciesForward\n"); |
duke@435 | 1697 | #endif |
duke@435 | 1698 | |
duke@435 | 1699 | // Walk over all the schedulable instructions |
duke@435 | 1700 | for( uint j=_bb_start; j < _bb_end; j++ ) { |
duke@435 | 1701 | |
duke@435 | 1702 | // This is a kludge, forcing all latency calculations to start at 1. |
duke@435 | 1703 | // Used to allow latency 0 to force an instruction to the beginning |
duke@435 | 1704 | // of the bb |
duke@435 | 1705 | uint latency = 1; |
duke@435 | 1706 | Node *use = bb->_nodes[j]; |
duke@435 | 1707 | uint nlen = use->len(); |
duke@435 | 1708 | |
duke@435 | 1709 | // Walk over all the inputs |
duke@435 | 1710 | for ( uint k=0; k < nlen; k++ ) { |
duke@435 | 1711 | Node *def = use->in(k); |
duke@435 | 1712 | if (!def) |
duke@435 | 1713 | continue; |
duke@435 | 1714 | |
duke@435 | 1715 | uint l = _node_latency[def->_idx] + use->latency(k); |
duke@435 | 1716 | if (latency < l) |
duke@435 | 1717 | latency = l; |
duke@435 | 1718 | } |
duke@435 | 1719 | |
duke@435 | 1720 | _node_latency[use->_idx] = latency; |
duke@435 | 1721 | |
duke@435 | 1722 | #ifndef PRODUCT |
duke@435 | 1723 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1724 | tty->print("# latency %4d: ", latency); |
duke@435 | 1725 | use->dump(); |
duke@435 | 1726 | } |
duke@435 | 1727 | #endif |
duke@435 | 1728 | } |
duke@435 | 1729 | |
duke@435 | 1730 | #ifndef PRODUCT |
duke@435 | 1731 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1732 | tty->print("# <- ComputeLocalLatenciesForward\n"); |
duke@435 | 1733 | #endif |
duke@435 | 1734 | |
duke@435 | 1735 | } // end ComputeLocalLatenciesForward |
duke@435 | 1736 | |
duke@435 | 1737 | // See if this node fits into the present instruction bundle |
duke@435 | 1738 | bool Scheduling::NodeFitsInBundle(Node *n) { |
duke@435 | 1739 | uint n_idx = n->_idx; |
duke@435 | 1740 | |
duke@435 | 1741 | // If this is the unconditional delay instruction, then it fits |
duke@435 | 1742 | if (n == _unconditional_delay_slot) { |
duke@435 | 1743 | #ifndef PRODUCT |
duke@435 | 1744 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1745 | tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx); |
duke@435 | 1746 | #endif |
duke@435 | 1747 | return (true); |
duke@435 | 1748 | } |
duke@435 | 1749 | |
duke@435 | 1750 | // If the node cannot be scheduled this cycle, skip it |
duke@435 | 1751 | if (_current_latency[n_idx] > _bundle_cycle_number) { |
duke@435 | 1752 | #ifndef PRODUCT |
duke@435 | 1753 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1754 | tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n", |
duke@435 | 1755 | n->_idx, _current_latency[n_idx], _bundle_cycle_number); |
duke@435 | 1756 | #endif |
duke@435 | 1757 | return (false); |
duke@435 | 1758 | } |
duke@435 | 1759 | |
duke@435 | 1760 | const Pipeline *node_pipeline = n->pipeline(); |
duke@435 | 1761 | |
duke@435 | 1762 | uint instruction_count = node_pipeline->instructionCount(); |
duke@435 | 1763 | if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) |
duke@435 | 1764 | instruction_count = 0; |
duke@435 | 1765 | else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) |
duke@435 | 1766 | instruction_count++; |
duke@435 | 1767 | |
duke@435 | 1768 | if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) { |
duke@435 | 1769 | #ifndef PRODUCT |
duke@435 | 1770 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1771 | tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n", |
duke@435 | 1772 | n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle); |
duke@435 | 1773 | #endif |
duke@435 | 1774 | return (false); |
duke@435 | 1775 | } |
duke@435 | 1776 | |
duke@435 | 1777 | // Don't allow non-machine nodes to be handled this way |
duke@435 | 1778 | if (!n->is_Mach() && instruction_count == 0) |
duke@435 | 1779 | return (false); |
duke@435 | 1780 | |
duke@435 | 1781 | // See if there is any overlap |
duke@435 | 1782 | uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse()); |
duke@435 | 1783 | |
duke@435 | 1784 | if (delay > 0) { |
duke@435 | 1785 | #ifndef PRODUCT |
duke@435 | 1786 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1787 | tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx); |
duke@435 | 1788 | #endif |
duke@435 | 1789 | return false; |
duke@435 | 1790 | } |
duke@435 | 1791 | |
duke@435 | 1792 | #ifndef PRODUCT |
duke@435 | 1793 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1794 | tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx); |
duke@435 | 1795 | #endif |
duke@435 | 1796 | |
duke@435 | 1797 | return true; |
duke@435 | 1798 | } |
duke@435 | 1799 | |
duke@435 | 1800 | Node * Scheduling::ChooseNodeToBundle() { |
duke@435 | 1801 | uint siz = _available.size(); |
duke@435 | 1802 | |
duke@435 | 1803 | if (siz == 0) { |
duke@435 | 1804 | |
duke@435 | 1805 | #ifndef PRODUCT |
duke@435 | 1806 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1807 | tty->print("# ChooseNodeToBundle: NULL\n"); |
duke@435 | 1808 | #endif |
duke@435 | 1809 | return (NULL); |
duke@435 | 1810 | } |
duke@435 | 1811 | |
duke@435 | 1812 | // Fast path, if only 1 instruction in the bundle |
duke@435 | 1813 | if (siz == 1) { |
duke@435 | 1814 | #ifndef PRODUCT |
duke@435 | 1815 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1816 | tty->print("# ChooseNodeToBundle (only 1): "); |
duke@435 | 1817 | _available[0]->dump(); |
duke@435 | 1818 | } |
duke@435 | 1819 | #endif |
duke@435 | 1820 | return (_available[0]); |
duke@435 | 1821 | } |
duke@435 | 1822 | |
duke@435 | 1823 | // Don't bother, if the bundle is already full |
duke@435 | 1824 | if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) { |
duke@435 | 1825 | for ( uint i = 0; i < siz; i++ ) { |
duke@435 | 1826 | Node *n = _available[i]; |
duke@435 | 1827 | |
duke@435 | 1828 | // Skip projections, we'll handle them another way |
duke@435 | 1829 | if (n->is_Proj()) |
duke@435 | 1830 | continue; |
duke@435 | 1831 | |
duke@435 | 1832 | // This presupposed that instructions are inserted into the |
duke@435 | 1833 | // available list in a legality order; i.e. instructions that |
duke@435 | 1834 | // must be inserted first are at the head of the list |
duke@435 | 1835 | if (NodeFitsInBundle(n)) { |
duke@435 | 1836 | #ifndef PRODUCT |
duke@435 | 1837 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1838 | tty->print("# ChooseNodeToBundle: "); |
duke@435 | 1839 | n->dump(); |
duke@435 | 1840 | } |
duke@435 | 1841 | #endif |
duke@435 | 1842 | return (n); |
duke@435 | 1843 | } |
duke@435 | 1844 | } |
duke@435 | 1845 | } |
duke@435 | 1846 | |
duke@435 | 1847 | // Nothing fits in this bundle, choose the highest priority |
duke@435 | 1848 | #ifndef PRODUCT |
duke@435 | 1849 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1850 | tty->print("# ChooseNodeToBundle: "); |
duke@435 | 1851 | _available[0]->dump(); |
duke@435 | 1852 | } |
duke@435 | 1853 | #endif |
duke@435 | 1854 | |
duke@435 | 1855 | return _available[0]; |
duke@435 | 1856 | } |
duke@435 | 1857 | |
duke@435 | 1858 | //------------------------------AddNodeToAvailableList------------------------- |
duke@435 | 1859 | void Scheduling::AddNodeToAvailableList(Node *n) { |
duke@435 | 1860 | assert( !n->is_Proj(), "projections never directly made available" ); |
duke@435 | 1861 | #ifndef PRODUCT |
duke@435 | 1862 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1863 | tty->print("# AddNodeToAvailableList: "); |
duke@435 | 1864 | n->dump(); |
duke@435 | 1865 | } |
duke@435 | 1866 | #endif |
duke@435 | 1867 | |
duke@435 | 1868 | int latency = _current_latency[n->_idx]; |
duke@435 | 1869 | |
duke@435 | 1870 | // Insert in latency order (insertion sort) |
duke@435 | 1871 | uint i; |
duke@435 | 1872 | for ( i=0; i < _available.size(); i++ ) |
duke@435 | 1873 | if (_current_latency[_available[i]->_idx] > latency) |
duke@435 | 1874 | break; |
duke@435 | 1875 | |
duke@435 | 1876 | // Special Check for compares following branches |
duke@435 | 1877 | if( n->is_Mach() && _scheduled.size() > 0 ) { |
duke@435 | 1878 | int op = n->as_Mach()->ideal_Opcode(); |
duke@435 | 1879 | Node *last = _scheduled[0]; |
duke@435 | 1880 | if( last->is_MachIf() && last->in(1) == n && |
duke@435 | 1881 | ( op == Op_CmpI || |
duke@435 | 1882 | op == Op_CmpU || |
duke@435 | 1883 | op == Op_CmpP || |
duke@435 | 1884 | op == Op_CmpF || |
duke@435 | 1885 | op == Op_CmpD || |
duke@435 | 1886 | op == Op_CmpL ) ) { |
duke@435 | 1887 | |
duke@435 | 1888 | // Recalculate position, moving to front of same latency |
duke@435 | 1889 | for ( i=0 ; i < _available.size(); i++ ) |
duke@435 | 1890 | if (_current_latency[_available[i]->_idx] >= latency) |
duke@435 | 1891 | break; |
duke@435 | 1892 | } |
duke@435 | 1893 | } |
duke@435 | 1894 | |
duke@435 | 1895 | // Insert the node in the available list |
duke@435 | 1896 | _available.insert(i, n); |
duke@435 | 1897 | |
duke@435 | 1898 | #ifndef PRODUCT |
duke@435 | 1899 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1900 | dump_available(); |
duke@435 | 1901 | #endif |
duke@435 | 1902 | } |
duke@435 | 1903 | |
duke@435 | 1904 | //------------------------------DecrementUseCounts----------------------------- |
duke@435 | 1905 | void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { |
duke@435 | 1906 | for ( uint i=0; i < n->len(); i++ ) { |
duke@435 | 1907 | Node *def = n->in(i); |
duke@435 | 1908 | if (!def) continue; |
duke@435 | 1909 | if( def->is_Proj() ) // If this is a machine projection, then |
duke@435 | 1910 | def = def->in(0); // propagate usage thru to the base instruction |
duke@435 | 1911 | |
duke@435 | 1912 | if( _bbs[def->_idx] != bb ) // Ignore if not block-local |
duke@435 | 1913 | continue; |
duke@435 | 1914 | |
duke@435 | 1915 | // Compute the latency |
duke@435 | 1916 | uint l = _bundle_cycle_number + n->latency(i); |
duke@435 | 1917 | if (_current_latency[def->_idx] < l) |
duke@435 | 1918 | _current_latency[def->_idx] = l; |
duke@435 | 1919 | |
duke@435 | 1920 | // If this does not have uses then schedule it |
duke@435 | 1921 | if ((--_uses[def->_idx]) == 0) |
duke@435 | 1922 | AddNodeToAvailableList(def); |
duke@435 | 1923 | } |
duke@435 | 1924 | } |
duke@435 | 1925 | |
duke@435 | 1926 | //------------------------------AddNodeToBundle-------------------------------- |
duke@435 | 1927 | void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { |
duke@435 | 1928 | #ifndef PRODUCT |
duke@435 | 1929 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1930 | tty->print("# AddNodeToBundle: "); |
duke@435 | 1931 | n->dump(); |
duke@435 | 1932 | } |
duke@435 | 1933 | #endif |
duke@435 | 1934 | |
duke@435 | 1935 | // Remove this from the available list |
duke@435 | 1936 | uint i; |
duke@435 | 1937 | for (i = 0; i < _available.size(); i++) |
duke@435 | 1938 | if (_available[i] == n) |
duke@435 | 1939 | break; |
duke@435 | 1940 | assert(i < _available.size(), "entry in _available list not found"); |
duke@435 | 1941 | _available.remove(i); |
duke@435 | 1942 | |
duke@435 | 1943 | // See if this fits in the current bundle |
duke@435 | 1944 | const Pipeline *node_pipeline = n->pipeline(); |
duke@435 | 1945 | const Pipeline_Use& node_usage = node_pipeline->resourceUse(); |
duke@435 | 1946 | |
duke@435 | 1947 | // Check for instructions to be placed in the delay slot. We |
duke@435 | 1948 | // do this before we actually schedule the current instruction, |
duke@435 | 1949 | // because the delay slot follows the current instruction. |
duke@435 | 1950 | if (Pipeline::_branch_has_delay_slot && |
duke@435 | 1951 | node_pipeline->hasBranchDelay() && |
duke@435 | 1952 | !_unconditional_delay_slot) { |
duke@435 | 1953 | |
duke@435 | 1954 | uint siz = _available.size(); |
duke@435 | 1955 | |
duke@435 | 1956 | // Conditional branches can support an instruction that |
twisti@1040 | 1957 | // is unconditionally executed and not dependent by the |
duke@435 | 1958 | // branch, OR a conditionally executed instruction if |
duke@435 | 1959 | // the branch is taken. In practice, this means that |
duke@435 | 1960 | // the first instruction at the branch target is |
duke@435 | 1961 | // copied to the delay slot, and the branch goes to |
duke@435 | 1962 | // the instruction after that at the branch target |
duke@435 | 1963 | if ( n->is_Mach() && n->is_Branch() ) { |
duke@435 | 1964 | |
duke@435 | 1965 | assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" ); |
duke@435 | 1966 | assert( !n->is_Catch(), "should not look for delay slot for Catch" ); |
duke@435 | 1967 | |
duke@435 | 1968 | #ifndef PRODUCT |
duke@435 | 1969 | _branches++; |
duke@435 | 1970 | #endif |
duke@435 | 1971 | |
duke@435 | 1972 | // At least 1 instruction is on the available list |
twisti@1040 | 1973 | // that is not dependent on the branch |
duke@435 | 1974 | for (uint i = 0; i < siz; i++) { |
duke@435 | 1975 | Node *d = _available[i]; |
duke@435 | 1976 | const Pipeline *avail_pipeline = d->pipeline(); |
duke@435 | 1977 | |
duke@435 | 1978 | // Don't allow safepoints in the branch shadow, that will |
duke@435 | 1979 | // cause a number of difficulties |
duke@435 | 1980 | if ( avail_pipeline->instructionCount() == 1 && |
duke@435 | 1981 | !avail_pipeline->hasMultipleBundles() && |
duke@435 | 1982 | !avail_pipeline->hasBranchDelay() && |
duke@435 | 1983 | Pipeline::instr_has_unit_size() && |
duke@435 | 1984 | d->size(_regalloc) == Pipeline::instr_unit_size() && |
duke@435 | 1985 | NodeFitsInBundle(d) && |
duke@435 | 1986 | !node_bundling(d)->used_in_delay()) { |
duke@435 | 1987 | |
duke@435 | 1988 | if (d->is_Mach() && !d->is_MachSafePoint()) { |
duke@435 | 1989 | // A node that fits in the delay slot was found, so we need to |
duke@435 | 1990 | // set the appropriate bits in the bundle pipeline information so |
duke@435 | 1991 | // that it correctly indicates resource usage. Later, when we |
duke@435 | 1992 | // attempt to add this instruction to the bundle, we will skip |
duke@435 | 1993 | // setting the resource usage. |
duke@435 | 1994 | _unconditional_delay_slot = d; |
duke@435 | 1995 | node_bundling(n)->set_use_unconditional_delay(); |
duke@435 | 1996 | node_bundling(d)->set_used_in_unconditional_delay(); |
duke@435 | 1997 | _bundle_use.add_usage(avail_pipeline->resourceUse()); |
duke@435 | 1998 | _current_latency[d->_idx] = _bundle_cycle_number; |
duke@435 | 1999 | _next_node = d; |
duke@435 | 2000 | ++_bundle_instr_count; |
duke@435 | 2001 | #ifndef PRODUCT |
duke@435 | 2002 | _unconditional_delays++; |
duke@435 | 2003 | #endif |
duke@435 | 2004 | break; |
duke@435 | 2005 | } |
duke@435 | 2006 | } |
duke@435 | 2007 | } |
duke@435 | 2008 | } |
duke@435 | 2009 | |
duke@435 | 2010 | // No delay slot, add a nop to the usage |
duke@435 | 2011 | if (!_unconditional_delay_slot) { |
duke@435 | 2012 | // See if adding an instruction in the delay slot will overflow |
duke@435 | 2013 | // the bundle. |
duke@435 | 2014 | if (!NodeFitsInBundle(_nop)) { |
duke@435 | 2015 | #ifndef PRODUCT |
duke@435 | 2016 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2017 | tty->print("# *** STEP(1 instruction for delay slot) ***\n"); |
duke@435 | 2018 | #endif |
duke@435 | 2019 | step(1); |
duke@435 | 2020 | } |
duke@435 | 2021 | |
duke@435 | 2022 | _bundle_use.add_usage(_nop->pipeline()->resourceUse()); |
duke@435 | 2023 | _next_node = _nop; |
duke@435 | 2024 | ++_bundle_instr_count; |
duke@435 | 2025 | } |
duke@435 | 2026 | |
duke@435 | 2027 | // See if the instruction in the delay slot requires a |
duke@435 | 2028 | // step of the bundles |
duke@435 | 2029 | if (!NodeFitsInBundle(n)) { |
duke@435 | 2030 | #ifndef PRODUCT |
duke@435 | 2031 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2032 | tty->print("# *** STEP(branch won't fit) ***\n"); |
duke@435 | 2033 | #endif |
duke@435 | 2034 | // Update the state information |
duke@435 | 2035 | _bundle_instr_count = 0; |
duke@435 | 2036 | _bundle_cycle_number += 1; |
duke@435 | 2037 | _bundle_use.step(1); |
duke@435 | 2038 | } |
duke@435 | 2039 | } |
duke@435 | 2040 | |
duke@435 | 2041 | // Get the number of instructions |
duke@435 | 2042 | uint instruction_count = node_pipeline->instructionCount(); |
duke@435 | 2043 | if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) |
duke@435 | 2044 | instruction_count = 0; |
duke@435 | 2045 | |
duke@435 | 2046 | // Compute the latency information |
duke@435 | 2047 | uint delay = 0; |
duke@435 | 2048 | |
duke@435 | 2049 | if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) { |
duke@435 | 2050 | int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number; |
duke@435 | 2051 | if (relative_latency < 0) |
duke@435 | 2052 | relative_latency = 0; |
duke@435 | 2053 | |
duke@435 | 2054 | delay = _bundle_use.full_latency(relative_latency, node_usage); |
duke@435 | 2055 | |
duke@435 | 2056 | // Does not fit in this bundle, start a new one |
duke@435 | 2057 | if (delay > 0) { |
duke@435 | 2058 | step(delay); |
duke@435 | 2059 | |
duke@435 | 2060 | #ifndef PRODUCT |
duke@435 | 2061 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2062 | tty->print("# *** STEP(%d) ***\n", delay); |
duke@435 | 2063 | #endif |
duke@435 | 2064 | } |
duke@435 | 2065 | } |
duke@435 | 2066 | |
duke@435 | 2067 | // If this was placed in the delay slot, ignore it |
duke@435 | 2068 | if (n != _unconditional_delay_slot) { |
duke@435 | 2069 | |
duke@435 | 2070 | if (delay == 0) { |
duke@435 | 2071 | if (node_pipeline->hasMultipleBundles()) { |
duke@435 | 2072 | #ifndef PRODUCT |
duke@435 | 2073 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2074 | tty->print("# *** STEP(multiple instructions) ***\n"); |
duke@435 | 2075 | #endif |
duke@435 | 2076 | step(1); |
duke@435 | 2077 | } |
duke@435 | 2078 | |
duke@435 | 2079 | else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) { |
duke@435 | 2080 | #ifndef PRODUCT |
duke@435 | 2081 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2082 | tty->print("# *** STEP(%d >= %d instructions) ***\n", |
duke@435 | 2083 | instruction_count + _bundle_instr_count, |
duke@435 | 2084 | Pipeline::_max_instrs_per_cycle); |
duke@435 | 2085 | #endif |
duke@435 | 2086 | step(1); |
duke@435 | 2087 | } |
duke@435 | 2088 | } |
duke@435 | 2089 | |
duke@435 | 2090 | if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) |
duke@435 | 2091 | _bundle_instr_count++; |
duke@435 | 2092 | |
duke@435 | 2093 | // Set the node's latency |
duke@435 | 2094 | _current_latency[n->_idx] = _bundle_cycle_number; |
duke@435 | 2095 | |
duke@435 | 2096 | // Now merge the functional unit information |
duke@435 | 2097 | if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) |
duke@435 | 2098 | _bundle_use.add_usage(node_usage); |
duke@435 | 2099 | |
duke@435 | 2100 | // Increment the number of instructions in this bundle |
duke@435 | 2101 | _bundle_instr_count += instruction_count; |
duke@435 | 2102 | |
duke@435 | 2103 | // Remember this node for later |
duke@435 | 2104 | if (n->is_Mach()) |
duke@435 | 2105 | _next_node = n; |
duke@435 | 2106 | } |
duke@435 | 2107 | |
duke@435 | 2108 | // It's possible to have a BoxLock in the graph and in the _bbs mapping but |
duke@435 | 2109 | // not in the bb->_nodes array. This happens for debug-info-only BoxLocks. |
duke@435 | 2110 | // 'Schedule' them (basically ignore in the schedule) but do not insert them |
duke@435 | 2111 | // into the block. All other scheduled nodes get put in the schedule here. |
duke@435 | 2112 | int op = n->Opcode(); |
duke@435 | 2113 | if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR |
duke@435 | 2114 | (op != Op_Node && // Not an unused antidepedence node and |
duke@435 | 2115 | // not an unallocated boxlock |
duke@435 | 2116 | (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) { |
duke@435 | 2117 | |
duke@435 | 2118 | // Push any trailing projections |
duke@435 | 2119 | if( bb->_nodes[bb->_nodes.size()-1] != n ) { |
duke@435 | 2120 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 2121 | Node *foi = n->fast_out(i); |
duke@435 | 2122 | if( foi->is_Proj() ) |
duke@435 | 2123 | _scheduled.push(foi); |
duke@435 | 2124 | } |
duke@435 | 2125 | } |
duke@435 | 2126 | |
duke@435 | 2127 | // Put the instruction in the schedule list |
duke@435 | 2128 | _scheduled.push(n); |
duke@435 | 2129 | } |
duke@435 | 2130 | |
duke@435 | 2131 | #ifndef PRODUCT |
duke@435 | 2132 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2133 | dump_available(); |
duke@435 | 2134 | #endif |
duke@435 | 2135 | |
duke@435 | 2136 | // Walk all the definitions, decrementing use counts, and |
duke@435 | 2137 | // if a definition has a 0 use count, place it in the available list. |
duke@435 | 2138 | DecrementUseCounts(n,bb); |
duke@435 | 2139 | } |
duke@435 | 2140 | |
duke@435 | 2141 | //------------------------------ComputeUseCount-------------------------------- |
duke@435 | 2142 | // This method sets the use count within a basic block. We will ignore all |
duke@435 | 2143 | // uses outside the current basic block. As we are doing a backwards walk, |
duke@435 | 2144 | // any node we reach that has a use count of 0 may be scheduled. This also |
duke@435 | 2145 | // avoids the problem of cyclic references from phi nodes, as long as phi |
duke@435 | 2146 | // nodes are at the front of the basic block. This method also initializes |
duke@435 | 2147 | // the available list to the set of instructions that have no uses within this |
duke@435 | 2148 | // basic block. |
duke@435 | 2149 | void Scheduling::ComputeUseCount(const Block *bb) { |
duke@435 | 2150 | #ifndef PRODUCT |
duke@435 | 2151 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2152 | tty->print("# -> ComputeUseCount\n"); |
duke@435 | 2153 | #endif |
duke@435 | 2154 | |
duke@435 | 2155 | // Clear the list of available and scheduled instructions, just in case |
duke@435 | 2156 | _available.clear(); |
duke@435 | 2157 | _scheduled.clear(); |
duke@435 | 2158 | |
duke@435 | 2159 | // No delay slot specified |
duke@435 | 2160 | _unconditional_delay_slot = NULL; |
duke@435 | 2161 | |
duke@435 | 2162 | #ifdef ASSERT |
duke@435 | 2163 | for( uint i=0; i < bb->_nodes.size(); i++ ) |
duke@435 | 2164 | assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" ); |
duke@435 | 2165 | #endif |
duke@435 | 2166 | |
duke@435 | 2167 | // Force the _uses count to never go to zero for unscheduable pieces |
duke@435 | 2168 | // of the block |
duke@435 | 2169 | for( uint k = 0; k < _bb_start; k++ ) |
duke@435 | 2170 | _uses[bb->_nodes[k]->_idx] = 1; |
duke@435 | 2171 | for( uint l = _bb_end; l < bb->_nodes.size(); l++ ) |
duke@435 | 2172 | _uses[bb->_nodes[l]->_idx] = 1; |
duke@435 | 2173 | |
duke@435 | 2174 | // Iterate backwards over the instructions in the block. Don't count the |
duke@435 | 2175 | // branch projections at end or the block header instructions. |
duke@435 | 2176 | for( uint j = _bb_end-1; j >= _bb_start; j-- ) { |
duke@435 | 2177 | Node *n = bb->_nodes[j]; |
duke@435 | 2178 | if( n->is_Proj() ) continue; // Projections handled another way |
duke@435 | 2179 | |
duke@435 | 2180 | // Account for all uses |
duke@435 | 2181 | for ( uint k = 0; k < n->len(); k++ ) { |
duke@435 | 2182 | Node *inp = n->in(k); |
duke@435 | 2183 | if (!inp) continue; |
duke@435 | 2184 | assert(inp != n, "no cycles allowed" ); |
duke@435 | 2185 | if( _bbs[inp->_idx] == bb ) { // Block-local use? |
duke@435 | 2186 | if( inp->is_Proj() ) // Skip through Proj's |
duke@435 | 2187 | inp = inp->in(0); |
duke@435 | 2188 | ++_uses[inp->_idx]; // Count 1 block-local use |
duke@435 | 2189 | } |
duke@435 | 2190 | } |
duke@435 | 2191 | |
duke@435 | 2192 | // If this instruction has a 0 use count, then it is available |
duke@435 | 2193 | if (!_uses[n->_idx]) { |
duke@435 | 2194 | _current_latency[n->_idx] = _bundle_cycle_number; |
duke@435 | 2195 | AddNodeToAvailableList(n); |
duke@435 | 2196 | } |
duke@435 | 2197 | |
duke@435 | 2198 | #ifndef PRODUCT |
duke@435 | 2199 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2200 | tty->print("# uses: %3d: ", _uses[n->_idx]); |
duke@435 | 2201 | n->dump(); |
duke@435 | 2202 | } |
duke@435 | 2203 | #endif |
duke@435 | 2204 | } |
duke@435 | 2205 | |
duke@435 | 2206 | #ifndef PRODUCT |
duke@435 | 2207 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2208 | tty->print("# <- ComputeUseCount\n"); |
duke@435 | 2209 | #endif |
duke@435 | 2210 | } |
duke@435 | 2211 | |
duke@435 | 2212 | // This routine performs scheduling on each basic block in reverse order, |
duke@435 | 2213 | // using instruction latencies and taking into account function unit |
duke@435 | 2214 | // availability. |
duke@435 | 2215 | void Scheduling::DoScheduling() { |
duke@435 | 2216 | #ifndef PRODUCT |
duke@435 | 2217 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2218 | tty->print("# -> DoScheduling\n"); |
duke@435 | 2219 | #endif |
duke@435 | 2220 | |
duke@435 | 2221 | Block *succ_bb = NULL; |
duke@435 | 2222 | Block *bb; |
duke@435 | 2223 | |
duke@435 | 2224 | // Walk over all the basic blocks in reverse order |
duke@435 | 2225 | for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) { |
duke@435 | 2226 | bb = _cfg->_blocks[i]; |
duke@435 | 2227 | |
duke@435 | 2228 | #ifndef PRODUCT |
duke@435 | 2229 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2230 | tty->print("# Schedule BB#%03d (initial)\n", i); |
duke@435 | 2231 | for (uint j = 0; j < bb->_nodes.size(); j++) |
duke@435 | 2232 | bb->_nodes[j]->dump(); |
duke@435 | 2233 | } |
duke@435 | 2234 | #endif |
duke@435 | 2235 | |
duke@435 | 2236 | // On the head node, skip processing |
duke@435 | 2237 | if( bb == _cfg->_broot ) |
duke@435 | 2238 | continue; |
duke@435 | 2239 | |
duke@435 | 2240 | // Skip empty, connector blocks |
duke@435 | 2241 | if (bb->is_connector()) |
duke@435 | 2242 | continue; |
duke@435 | 2243 | |
duke@435 | 2244 | // If the following block is not the sole successor of |
duke@435 | 2245 | // this one, then reset the pipeline information |
duke@435 | 2246 | if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) { |
duke@435 | 2247 | #ifndef PRODUCT |
duke@435 | 2248 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2249 | tty->print("*** bundle start of next BB, node %d, for %d instructions\n", |
duke@435 | 2250 | _next_node->_idx, _bundle_instr_count); |
duke@435 | 2251 | } |
duke@435 | 2252 | #endif |
duke@435 | 2253 | step_and_clear(); |
duke@435 | 2254 | } |
duke@435 | 2255 | |
duke@435 | 2256 | // Leave untouched the starting instruction, any Phis, a CreateEx node |
duke@435 | 2257 | // or Top. bb->_nodes[_bb_start] is the first schedulable instruction. |
duke@435 | 2258 | _bb_end = bb->_nodes.size()-1; |
duke@435 | 2259 | for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) { |
duke@435 | 2260 | Node *n = bb->_nodes[_bb_start]; |
duke@435 | 2261 | // Things not matched, like Phinodes and ProjNodes don't get scheduled. |
duke@435 | 2262 | // Also, MachIdealNodes do not get scheduled |
duke@435 | 2263 | if( !n->is_Mach() ) continue; // Skip non-machine nodes |
duke@435 | 2264 | MachNode *mach = n->as_Mach(); |
duke@435 | 2265 | int iop = mach->ideal_Opcode(); |
duke@435 | 2266 | if( iop == Op_CreateEx ) continue; // CreateEx is pinned |
duke@435 | 2267 | if( iop == Op_Con ) continue; // Do not schedule Top |
duke@435 | 2268 | if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes |
duke@435 | 2269 | mach->pipeline() == MachNode::pipeline_class() && |
duke@435 | 2270 | !n->is_SpillCopy() ) // Breakpoints, Prolog, etc |
duke@435 | 2271 | continue; |
duke@435 | 2272 | break; // Funny loop structure to be sure... |
duke@435 | 2273 | } |
duke@435 | 2274 | // Compute last "interesting" instruction in block - last instruction we |
duke@435 | 2275 | // might schedule. _bb_end points just after last schedulable inst. We |
duke@435 | 2276 | // normally schedule conditional branches (despite them being forced last |
duke@435 | 2277 | // in the block), because they have delay slots we can fill. Calls all |
duke@435 | 2278 | // have their delay slots filled in the template expansions, so we don't |
duke@435 | 2279 | // bother scheduling them. |
duke@435 | 2280 | Node *last = bb->_nodes[_bb_end]; |
duke@435 | 2281 | if( last->is_Catch() || |
kvn@1142 | 2282 | // Exclude unreachable path case when Halt node is in a separate block. |
kvn@1142 | 2283 | (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { |
duke@435 | 2284 | // There must be a prior call. Skip it. |
duke@435 | 2285 | while( !bb->_nodes[--_bb_end]->is_Call() ) { |
duke@435 | 2286 | assert( bb->_nodes[_bb_end]->is_Proj(), "skipping projections after expected call" ); |
duke@435 | 2287 | } |
duke@435 | 2288 | } else if( last->is_MachNullCheck() ) { |
duke@435 | 2289 | // Backup so the last null-checked memory instruction is |
duke@435 | 2290 | // outside the schedulable range. Skip over the nullcheck, |
duke@435 | 2291 | // projection, and the memory nodes. |
duke@435 | 2292 | Node *mem = last->in(1); |
duke@435 | 2293 | do { |
duke@435 | 2294 | _bb_end--; |
duke@435 | 2295 | } while (mem != bb->_nodes[_bb_end]); |
duke@435 | 2296 | } else { |
duke@435 | 2297 | // Set _bb_end to point after last schedulable inst. |
duke@435 | 2298 | _bb_end++; |
duke@435 | 2299 | } |
duke@435 | 2300 | |
duke@435 | 2301 | assert( _bb_start <= _bb_end, "inverted block ends" ); |
duke@435 | 2302 | |
duke@435 | 2303 | // Compute the register antidependencies for the basic block |
duke@435 | 2304 | ComputeRegisterAntidependencies(bb); |
duke@435 | 2305 | if (_cfg->C->failing()) return; // too many D-U pinch points |
duke@435 | 2306 | |
duke@435 | 2307 | // Compute intra-bb latencies for the nodes |
duke@435 | 2308 | ComputeLocalLatenciesForward(bb); |
duke@435 | 2309 | |
duke@435 | 2310 | // Compute the usage within the block, and set the list of all nodes |
duke@435 | 2311 | // in the block that have no uses within the block. |
duke@435 | 2312 | ComputeUseCount(bb); |
duke@435 | 2313 | |
duke@435 | 2314 | // Schedule the remaining instructions in the block |
duke@435 | 2315 | while ( _available.size() > 0 ) { |
duke@435 | 2316 | Node *n = ChooseNodeToBundle(); |
duke@435 | 2317 | AddNodeToBundle(n,bb); |
duke@435 | 2318 | } |
duke@435 | 2319 | |
duke@435 | 2320 | assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" ); |
duke@435 | 2321 | #ifdef ASSERT |
duke@435 | 2322 | for( uint l = _bb_start; l < _bb_end; l++ ) { |
duke@435 | 2323 | Node *n = bb->_nodes[l]; |
duke@435 | 2324 | uint m; |
duke@435 | 2325 | for( m = 0; m < _bb_end-_bb_start; m++ ) |
duke@435 | 2326 | if( _scheduled[m] == n ) |
duke@435 | 2327 | break; |
duke@435 | 2328 | assert( m < _bb_end-_bb_start, "instruction missing in schedule" ); |
duke@435 | 2329 | } |
duke@435 | 2330 | #endif |
duke@435 | 2331 | |
duke@435 | 2332 | // Now copy the instructions (in reverse order) back to the block |
duke@435 | 2333 | for ( uint k = _bb_start; k < _bb_end; k++ ) |
duke@435 | 2334 | bb->_nodes.map(k, _scheduled[_bb_end-k-1]); |
duke@435 | 2335 | |
duke@435 | 2336 | #ifndef PRODUCT |
duke@435 | 2337 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2338 | tty->print("# Schedule BB#%03d (final)\n", i); |
duke@435 | 2339 | uint current = 0; |
duke@435 | 2340 | for (uint j = 0; j < bb->_nodes.size(); j++) { |
duke@435 | 2341 | Node *n = bb->_nodes[j]; |
duke@435 | 2342 | if( valid_bundle_info(n) ) { |
duke@435 | 2343 | Bundle *bundle = node_bundling(n); |
duke@435 | 2344 | if (bundle->instr_count() > 0 || bundle->flags() > 0) { |
duke@435 | 2345 | tty->print("*** Bundle: "); |
duke@435 | 2346 | bundle->dump(); |
duke@435 | 2347 | } |
duke@435 | 2348 | n->dump(); |
duke@435 | 2349 | } |
duke@435 | 2350 | } |
duke@435 | 2351 | } |
duke@435 | 2352 | #endif |
duke@435 | 2353 | #ifdef ASSERT |
duke@435 | 2354 | verify_good_schedule(bb,"after block local scheduling"); |
duke@435 | 2355 | #endif |
duke@435 | 2356 | } |
duke@435 | 2357 | |
duke@435 | 2358 | #ifndef PRODUCT |
duke@435 | 2359 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2360 | tty->print("# <- DoScheduling\n"); |
duke@435 | 2361 | #endif |
duke@435 | 2362 | |
duke@435 | 2363 | // Record final node-bundling array location |
duke@435 | 2364 | _regalloc->C->set_node_bundling_base(_node_bundling_base); |
duke@435 | 2365 | |
duke@435 | 2366 | } // end DoScheduling |
duke@435 | 2367 | |
duke@435 | 2368 | //------------------------------verify_good_schedule--------------------------- |
duke@435 | 2369 | // Verify that no live-range used in the block is killed in the block by a |
duke@435 | 2370 | // wrong DEF. This doesn't verify live-ranges that span blocks. |
duke@435 | 2371 | |
duke@435 | 2372 | // Check for edge existence. Used to avoid adding redundant precedence edges. |
duke@435 | 2373 | static bool edge_from_to( Node *from, Node *to ) { |
duke@435 | 2374 | for( uint i=0; i<from->len(); i++ ) |
duke@435 | 2375 | if( from->in(i) == to ) |
duke@435 | 2376 | return true; |
duke@435 | 2377 | return false; |
duke@435 | 2378 | } |
duke@435 | 2379 | |
duke@435 | 2380 | #ifdef ASSERT |
duke@435 | 2381 | //------------------------------verify_do_def---------------------------------- |
duke@435 | 2382 | void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { |
duke@435 | 2383 | // Check for bad kills |
duke@435 | 2384 | if( OptoReg::is_valid(def) ) { // Ignore stores & control flow |
duke@435 | 2385 | Node *prior_use = _reg_node[def]; |
duke@435 | 2386 | if( prior_use && !edge_from_to(prior_use,n) ) { |
duke@435 | 2387 | tty->print("%s = ",OptoReg::as_VMReg(def)->name()); |
duke@435 | 2388 | n->dump(); |
duke@435 | 2389 | tty->print_cr("..."); |
duke@435 | 2390 | prior_use->dump(); |
duke@435 | 2391 | assert_msg(edge_from_to(prior_use,n),msg); |
duke@435 | 2392 | } |
duke@435 | 2393 | _reg_node.map(def,NULL); // Kill live USEs |
duke@435 | 2394 | } |
duke@435 | 2395 | } |
duke@435 | 2396 | |
duke@435 | 2397 | //------------------------------verify_good_schedule--------------------------- |
duke@435 | 2398 | void Scheduling::verify_good_schedule( Block *b, const char *msg ) { |
duke@435 | 2399 | |
duke@435 | 2400 | // Zap to something reasonable for the verify code |
duke@435 | 2401 | _reg_node.clear(); |
duke@435 | 2402 | |
duke@435 | 2403 | // Walk over the block backwards. Check to make sure each DEF doesn't |
duke@435 | 2404 | // kill a live value (other than the one it's supposed to). Add each |
duke@435 | 2405 | // USE to the live set. |
duke@435 | 2406 | for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) { |
duke@435 | 2407 | Node *n = b->_nodes[i]; |
duke@435 | 2408 | int n_op = n->Opcode(); |
duke@435 | 2409 | if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { |
duke@435 | 2410 | // Fat-proj kills a slew of registers |
duke@435 | 2411 | RegMask rm = n->out_RegMask();// Make local copy |
duke@435 | 2412 | while( rm.is_NotEmpty() ) { |
duke@435 | 2413 | OptoReg::Name kill = rm.find_first_elem(); |
duke@435 | 2414 | rm.Remove(kill); |
duke@435 | 2415 | verify_do_def( n, kill, msg ); |
duke@435 | 2416 | } |
duke@435 | 2417 | } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes |
duke@435 | 2418 | // Get DEF'd registers the normal way |
duke@435 | 2419 | verify_do_def( n, _regalloc->get_reg_first(n), msg ); |
duke@435 | 2420 | verify_do_def( n, _regalloc->get_reg_second(n), msg ); |
duke@435 | 2421 | } |
duke@435 | 2422 | |
duke@435 | 2423 | // Now make all USEs live |
duke@435 | 2424 | for( uint i=1; i<n->req(); i++ ) { |
duke@435 | 2425 | Node *def = n->in(i); |
duke@435 | 2426 | assert(def != 0, "input edge required"); |
duke@435 | 2427 | OptoReg::Name reg_lo = _regalloc->get_reg_first(def); |
duke@435 | 2428 | OptoReg::Name reg_hi = _regalloc->get_reg_second(def); |
duke@435 | 2429 | if( OptoReg::is_valid(reg_lo) ) { |
duke@435 | 2430 | assert_msg(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg ); |
duke@435 | 2431 | _reg_node.map(reg_lo,n); |
duke@435 | 2432 | } |
duke@435 | 2433 | if( OptoReg::is_valid(reg_hi) ) { |
duke@435 | 2434 | assert_msg(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg ); |
duke@435 | 2435 | _reg_node.map(reg_hi,n); |
duke@435 | 2436 | } |
duke@435 | 2437 | } |
duke@435 | 2438 | |
duke@435 | 2439 | } |
duke@435 | 2440 | |
duke@435 | 2441 | // Zap to something reasonable for the Antidependence code |
duke@435 | 2442 | _reg_node.clear(); |
duke@435 | 2443 | } |
duke@435 | 2444 | #endif |
duke@435 | 2445 | |
duke@435 | 2446 | // Conditionally add precedence edges. Avoid putting edges on Projs. |
duke@435 | 2447 | static void add_prec_edge_from_to( Node *from, Node *to ) { |
duke@435 | 2448 | if( from->is_Proj() ) { // Put precedence edge on Proj's input |
duke@435 | 2449 | assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" ); |
duke@435 | 2450 | from = from->in(0); |
duke@435 | 2451 | } |
duke@435 | 2452 | if( from != to && // No cycles (for things like LD L0,[L0+4] ) |
duke@435 | 2453 | !edge_from_to( from, to ) ) // Avoid duplicate edge |
duke@435 | 2454 | from->add_prec(to); |
duke@435 | 2455 | } |
duke@435 | 2456 | |
duke@435 | 2457 | //------------------------------anti_do_def------------------------------------ |
duke@435 | 2458 | void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { |
duke@435 | 2459 | if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow |
duke@435 | 2460 | return; |
duke@435 | 2461 | |
duke@435 | 2462 | Node *pinch = _reg_node[def_reg]; // Get pinch point |
duke@435 | 2463 | if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet? |
duke@435 | 2464 | is_def ) { // Check for a true def (not a kill) |
duke@435 | 2465 | _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point |
duke@435 | 2466 | return; |
duke@435 | 2467 | } |
duke@435 | 2468 | |
duke@435 | 2469 | Node *kill = def; // Rename 'def' to more descriptive 'kill' |
duke@435 | 2470 | debug_only( def = (Node*)0xdeadbeef; ) |
duke@435 | 2471 | |
duke@435 | 2472 | // After some number of kills there _may_ be a later def |
duke@435 | 2473 | Node *later_def = NULL; |
duke@435 | 2474 | |
duke@435 | 2475 | // Finding a kill requires a real pinch-point. |
duke@435 | 2476 | // Check for not already having a pinch-point. |
duke@435 | 2477 | // Pinch points are Op_Node's. |
duke@435 | 2478 | if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point? |
duke@435 | 2479 | later_def = pinch; // Must be def/kill as optimistic pinch-point |
duke@435 | 2480 | if ( _pinch_free_list.size() > 0) { |
duke@435 | 2481 | pinch = _pinch_free_list.pop(); |
duke@435 | 2482 | } else { |
duke@435 | 2483 | pinch = new (_cfg->C, 1) Node(1); // Pinch point to-be |
duke@435 | 2484 | } |
duke@435 | 2485 | if (pinch->_idx >= _regalloc->node_regs_max_index()) { |
duke@435 | 2486 | _cfg->C->record_method_not_compilable("too many D-U pinch points"); |
duke@435 | 2487 | return; |
duke@435 | 2488 | } |
duke@435 | 2489 | _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init) |
duke@435 | 2490 | _reg_node.map(def_reg,pinch); // Record pinch-point |
duke@435 | 2491 | //_regalloc->set_bad(pinch->_idx); // Already initialized this way. |
duke@435 | 2492 | if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill |
duke@435 | 2493 | pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call |
duke@435 | 2494 | add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch |
duke@435 | 2495 | later_def = NULL; // and no later def |
duke@435 | 2496 | } |
duke@435 | 2497 | pinch->set_req(0,later_def); // Hook later def so we can find it |
duke@435 | 2498 | } else { // Else have valid pinch point |
duke@435 | 2499 | if( pinch->in(0) ) // If there is a later-def |
duke@435 | 2500 | later_def = pinch->in(0); // Get it |
duke@435 | 2501 | } |
duke@435 | 2502 | |
duke@435 | 2503 | // Add output-dependence edge from later def to kill |
duke@435 | 2504 | if( later_def ) // If there is some original def |
duke@435 | 2505 | add_prec_edge_from_to(later_def,kill); // Add edge from def to kill |
duke@435 | 2506 | |
duke@435 | 2507 | // See if current kill is also a use, and so is forced to be the pinch-point. |
duke@435 | 2508 | if( pinch->Opcode() == Op_Node ) { |
duke@435 | 2509 | Node *uses = kill->is_Proj() ? kill->in(0) : kill; |
duke@435 | 2510 | for( uint i=1; i<uses->req(); i++ ) { |
duke@435 | 2511 | if( _regalloc->get_reg_first(uses->in(i)) == def_reg || |
duke@435 | 2512 | _regalloc->get_reg_second(uses->in(i)) == def_reg ) { |
duke@435 | 2513 | // Yes, found a use/kill pinch-point |
duke@435 | 2514 | pinch->set_req(0,NULL); // |
duke@435 | 2515 | pinch->replace_by(kill); // Move anti-dep edges up |
duke@435 | 2516 | pinch = kill; |
duke@435 | 2517 | _reg_node.map(def_reg,pinch); |
duke@435 | 2518 | return; |
duke@435 | 2519 | } |
duke@435 | 2520 | } |
duke@435 | 2521 | } |
duke@435 | 2522 | |
duke@435 | 2523 | // Add edge from kill to pinch-point |
duke@435 | 2524 | add_prec_edge_from_to(kill,pinch); |
duke@435 | 2525 | } |
duke@435 | 2526 | |
duke@435 | 2527 | //------------------------------anti_do_use------------------------------------ |
duke@435 | 2528 | void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { |
duke@435 | 2529 | if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow |
duke@435 | 2530 | return; |
duke@435 | 2531 | Node *pinch = _reg_node[use_reg]; // Get pinch point |
duke@435 | 2532 | // Check for no later def_reg/kill in block |
duke@435 | 2533 | if( pinch && _bbs[pinch->_idx] == b && |
duke@435 | 2534 | // Use has to be block-local as well |
duke@435 | 2535 | _bbs[use->_idx] == b ) { |
duke@435 | 2536 | if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) |
duke@435 | 2537 | pinch->req() == 1 ) { // pinch not yet in block? |
duke@435 | 2538 | pinch->del_req(0); // yank pointer to later-def, also set flag |
duke@435 | 2539 | // Insert the pinch-point in the block just after the last use |
duke@435 | 2540 | b->_nodes.insert(b->find_node(use)+1,pinch); |
duke@435 | 2541 | _bb_end++; // Increase size scheduled region in block |
duke@435 | 2542 | } |
duke@435 | 2543 | |
duke@435 | 2544 | add_prec_edge_from_to(pinch,use); |
duke@435 | 2545 | } |
duke@435 | 2546 | } |
duke@435 | 2547 | |
duke@435 | 2548 | //------------------------------ComputeRegisterAntidependences----------------- |
duke@435 | 2549 | // We insert antidependences between the reads and following write of |
duke@435 | 2550 | // allocated registers to prevent illegal code motion. Hopefully, the |
duke@435 | 2551 | // number of added references should be fairly small, especially as we |
duke@435 | 2552 | // are only adding references within the current basic block. |
duke@435 | 2553 | void Scheduling::ComputeRegisterAntidependencies(Block *b) { |
duke@435 | 2554 | |
duke@435 | 2555 | #ifdef ASSERT |
duke@435 | 2556 | verify_good_schedule(b,"before block local scheduling"); |
duke@435 | 2557 | #endif |
duke@435 | 2558 | |
duke@435 | 2559 | // A valid schedule, for each register independently, is an endless cycle |
duke@435 | 2560 | // of: a def, then some uses (connected to the def by true dependencies), |
duke@435 | 2561 | // then some kills (defs with no uses), finally the cycle repeats with a new |
duke@435 | 2562 | // def. The uses are allowed to float relative to each other, as are the |
duke@435 | 2563 | // kills. No use is allowed to slide past a kill (or def). This requires |
duke@435 | 2564 | // antidependencies between all uses of a single def and all kills that |
duke@435 | 2565 | // follow, up to the next def. More edges are redundant, because later defs |
duke@435 | 2566 | // & kills are already serialized with true or antidependencies. To keep |
duke@435 | 2567 | // the edge count down, we add a 'pinch point' node if there's more than |
duke@435 | 2568 | // one use or more than one kill/def. |
duke@435 | 2569 | |
duke@435 | 2570 | // We add dependencies in one bottom-up pass. |
duke@435 | 2571 | |
duke@435 | 2572 | // For each instruction we handle it's DEFs/KILLs, then it's USEs. |
duke@435 | 2573 | |
duke@435 | 2574 | // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this |
duke@435 | 2575 | // register. If not, we record the DEF/KILL in _reg_node, the |
duke@435 | 2576 | // register-to-def mapping. If there is a prior DEF/KILL, we insert a |
duke@435 | 2577 | // "pinch point", a new Node that's in the graph but not in the block. |
duke@435 | 2578 | // We put edges from the prior and current DEF/KILLs to the pinch point. |
duke@435 | 2579 | // We put the pinch point in _reg_node. If there's already a pinch point |
duke@435 | 2580 | // we merely add an edge from the current DEF/KILL to the pinch point. |
duke@435 | 2581 | |
duke@435 | 2582 | // After doing the DEF/KILLs, we handle USEs. For each used register, we |
duke@435 | 2583 | // put an edge from the pinch point to the USE. |
duke@435 | 2584 | |
duke@435 | 2585 | // To be expedient, the _reg_node array is pre-allocated for the whole |
duke@435 | 2586 | // compilation. _reg_node is lazily initialized; it either contains a NULL, |
duke@435 | 2587 | // or a valid def/kill/pinch-point, or a leftover node from some prior |
duke@435 | 2588 | // block. Leftover node from some prior block is treated like a NULL (no |
duke@435 | 2589 | // prior def, so no anti-dependence needed). Valid def is distinguished by |
duke@435 | 2590 | // it being in the current block. |
duke@435 | 2591 | bool fat_proj_seen = false; |
duke@435 | 2592 | uint last_safept = _bb_end-1; |
duke@435 | 2593 | Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL; |
duke@435 | 2594 | Node* last_safept_node = end_node; |
duke@435 | 2595 | for( uint i = _bb_end-1; i >= _bb_start; i-- ) { |
duke@435 | 2596 | Node *n = b->_nodes[i]; |
duke@435 | 2597 | int is_def = n->outcnt(); // def if some uses prior to adding precedence edges |
duke@435 | 2598 | if( n->Opcode() == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { |
duke@435 | 2599 | // Fat-proj kills a slew of registers |
duke@435 | 2600 | // This can add edges to 'n' and obscure whether or not it was a def, |
duke@435 | 2601 | // hence the is_def flag. |
duke@435 | 2602 | fat_proj_seen = true; |
duke@435 | 2603 | RegMask rm = n->out_RegMask();// Make local copy |
duke@435 | 2604 | while( rm.is_NotEmpty() ) { |
duke@435 | 2605 | OptoReg::Name kill = rm.find_first_elem(); |
duke@435 | 2606 | rm.Remove(kill); |
duke@435 | 2607 | anti_do_def( b, n, kill, is_def ); |
duke@435 | 2608 | } |
duke@435 | 2609 | } else { |
duke@435 | 2610 | // Get DEF'd registers the normal way |
duke@435 | 2611 | anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); |
duke@435 | 2612 | anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); |
duke@435 | 2613 | } |
duke@435 | 2614 | |
duke@435 | 2615 | // Check each register used by this instruction for a following DEF/KILL |
duke@435 | 2616 | // that must occur afterward and requires an anti-dependence edge. |
duke@435 | 2617 | for( uint j=0; j<n->req(); j++ ) { |
duke@435 | 2618 | Node *def = n->in(j); |
duke@435 | 2619 | if( def ) { |
duke@435 | 2620 | assert( def->Opcode() != Op_MachProj || def->ideal_reg() != MachProjNode::fat_proj, "" ); |
duke@435 | 2621 | anti_do_use( b, n, _regalloc->get_reg_first(def) ); |
duke@435 | 2622 | anti_do_use( b, n, _regalloc->get_reg_second(def) ); |
duke@435 | 2623 | } |
duke@435 | 2624 | } |
duke@435 | 2625 | // Do not allow defs of new derived values to float above GC |
duke@435 | 2626 | // points unless the base is definitely available at the GC point. |
duke@435 | 2627 | |
duke@435 | 2628 | Node *m = b->_nodes[i]; |
duke@435 | 2629 | |
duke@435 | 2630 | // Add precedence edge from following safepoint to use of derived pointer |
duke@435 | 2631 | if( last_safept_node != end_node && |
duke@435 | 2632 | m != last_safept_node) { |
duke@435 | 2633 | for (uint k = 1; k < m->req(); k++) { |
duke@435 | 2634 | const Type *t = m->in(k)->bottom_type(); |
duke@435 | 2635 | if( t->isa_oop_ptr() && |
duke@435 | 2636 | t->is_ptr()->offset() != 0 ) { |
duke@435 | 2637 | last_safept_node->add_prec( m ); |
duke@435 | 2638 | break; |
duke@435 | 2639 | } |
duke@435 | 2640 | } |
duke@435 | 2641 | } |
duke@435 | 2642 | |
duke@435 | 2643 | if( n->jvms() ) { // Precedence edge from derived to safept |
duke@435 | 2644 | // Check if last_safept_node was moved by pinch-point insertion in anti_do_use() |
duke@435 | 2645 | if( b->_nodes[last_safept] != last_safept_node ) { |
duke@435 | 2646 | last_safept = b->find_node(last_safept_node); |
duke@435 | 2647 | } |
duke@435 | 2648 | for( uint j=last_safept; j > i; j-- ) { |
duke@435 | 2649 | Node *mach = b->_nodes[j]; |
duke@435 | 2650 | if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP ) |
duke@435 | 2651 | mach->add_prec( n ); |
duke@435 | 2652 | } |
duke@435 | 2653 | last_safept = i; |
duke@435 | 2654 | last_safept_node = m; |
duke@435 | 2655 | } |
duke@435 | 2656 | } |
duke@435 | 2657 | |
duke@435 | 2658 | if (fat_proj_seen) { |
duke@435 | 2659 | // Garbage collect pinch nodes that were not consumed. |
duke@435 | 2660 | // They are usually created by a fat kill MachProj for a call. |
duke@435 | 2661 | garbage_collect_pinch_nodes(); |
duke@435 | 2662 | } |
duke@435 | 2663 | } |
duke@435 | 2664 | |
duke@435 | 2665 | //------------------------------garbage_collect_pinch_nodes------------------------------- |
duke@435 | 2666 | |
duke@435 | 2667 | // Garbage collect pinch nodes for reuse by other blocks. |
duke@435 | 2668 | // |
duke@435 | 2669 | // The block scheduler's insertion of anti-dependence |
duke@435 | 2670 | // edges creates many pinch nodes when the block contains |
duke@435 | 2671 | // 2 or more Calls. A pinch node is used to prevent a |
duke@435 | 2672 | // combinatorial explosion of edges. If a set of kills for a |
duke@435 | 2673 | // register is anti-dependent on a set of uses (or defs), rather |
duke@435 | 2674 | // than adding an edge in the graph between each pair of kill |
duke@435 | 2675 | // and use (or def), a pinch is inserted between them: |
duke@435 | 2676 | // |
duke@435 | 2677 | // use1 use2 use3 |
duke@435 | 2678 | // \ | / |
duke@435 | 2679 | // \ | / |
duke@435 | 2680 | // pinch |
duke@435 | 2681 | // / | \ |
duke@435 | 2682 | // / | \ |
duke@435 | 2683 | // kill1 kill2 kill3 |
duke@435 | 2684 | // |
duke@435 | 2685 | // One pinch node is created per register killed when |
duke@435 | 2686 | // the second call is encountered during a backwards pass |
duke@435 | 2687 | // over the block. Most of these pinch nodes are never |
duke@435 | 2688 | // wired into the graph because the register is never |
duke@435 | 2689 | // used or def'ed in the block. |
duke@435 | 2690 | // |
duke@435 | 2691 | void Scheduling::garbage_collect_pinch_nodes() { |
duke@435 | 2692 | #ifndef PRODUCT |
duke@435 | 2693 | if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); |
duke@435 | 2694 | #endif |
duke@435 | 2695 | int trace_cnt = 0; |
duke@435 | 2696 | for (uint k = 0; k < _reg_node.Size(); k++) { |
duke@435 | 2697 | Node* pinch = _reg_node[k]; |
duke@435 | 2698 | if (pinch != NULL && pinch->Opcode() == Op_Node && |
duke@435 | 2699 | // no predecence input edges |
duke@435 | 2700 | (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { |
duke@435 | 2701 | cleanup_pinch(pinch); |
duke@435 | 2702 | _pinch_free_list.push(pinch); |
duke@435 | 2703 | _reg_node.map(k, NULL); |
duke@435 | 2704 | #ifndef PRODUCT |
duke@435 | 2705 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2706 | trace_cnt++; |
duke@435 | 2707 | if (trace_cnt > 40) { |
duke@435 | 2708 | tty->print("\n"); |
duke@435 | 2709 | trace_cnt = 0; |
duke@435 | 2710 | } |
duke@435 | 2711 | tty->print(" %d", pinch->_idx); |
duke@435 | 2712 | } |
duke@435 | 2713 | #endif |
duke@435 | 2714 | } |
duke@435 | 2715 | } |
duke@435 | 2716 | #ifndef PRODUCT |
duke@435 | 2717 | if (_cfg->C->trace_opto_output()) tty->print("\n"); |
duke@435 | 2718 | #endif |
duke@435 | 2719 | } |
duke@435 | 2720 | |
duke@435 | 2721 | // Clean up a pinch node for reuse. |
duke@435 | 2722 | void Scheduling::cleanup_pinch( Node *pinch ) { |
duke@435 | 2723 | assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking"); |
duke@435 | 2724 | |
duke@435 | 2725 | for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) { |
duke@435 | 2726 | Node* use = pinch->last_out(i); |
duke@435 | 2727 | uint uses_found = 0; |
duke@435 | 2728 | for (uint j = use->req(); j < use->len(); j++) { |
duke@435 | 2729 | if (use->in(j) == pinch) { |
duke@435 | 2730 | use->rm_prec(j); |
duke@435 | 2731 | uses_found++; |
duke@435 | 2732 | } |
duke@435 | 2733 | } |
duke@435 | 2734 | assert(uses_found > 0, "must be a precedence edge"); |
duke@435 | 2735 | i -= uses_found; // we deleted 1 or more copies of this edge |
duke@435 | 2736 | } |
duke@435 | 2737 | // May have a later_def entry |
duke@435 | 2738 | pinch->set_req(0, NULL); |
duke@435 | 2739 | } |
duke@435 | 2740 | |
duke@435 | 2741 | //------------------------------print_statistics------------------------------- |
duke@435 | 2742 | #ifndef PRODUCT |
duke@435 | 2743 | |
duke@435 | 2744 | void Scheduling::dump_available() const { |
duke@435 | 2745 | tty->print("#Availist "); |
duke@435 | 2746 | for (uint i = 0; i < _available.size(); i++) |
duke@435 | 2747 | tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]); |
duke@435 | 2748 | tty->cr(); |
duke@435 | 2749 | } |
duke@435 | 2750 | |
duke@435 | 2751 | // Print Scheduling Statistics |
duke@435 | 2752 | void Scheduling::print_statistics() { |
duke@435 | 2753 | // Print the size added by nops for bundling |
duke@435 | 2754 | tty->print("Nops added %d bytes to total of %d bytes", |
duke@435 | 2755 | _total_nop_size, _total_method_size); |
duke@435 | 2756 | if (_total_method_size > 0) |
duke@435 | 2757 | tty->print(", for %.2f%%", |
duke@435 | 2758 | ((double)_total_nop_size) / ((double) _total_method_size) * 100.0); |
duke@435 | 2759 | tty->print("\n"); |
duke@435 | 2760 | |
duke@435 | 2761 | // Print the number of branch shadows filled |
duke@435 | 2762 | if (Pipeline::_branch_has_delay_slot) { |
duke@435 | 2763 | tty->print("Of %d branches, %d had unconditional delay slots filled", |
duke@435 | 2764 | _total_branches, _total_unconditional_delays); |
duke@435 | 2765 | if (_total_branches > 0) |
duke@435 | 2766 | tty->print(", for %.2f%%", |
duke@435 | 2767 | ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0); |
duke@435 | 2768 | tty->print("\n"); |
duke@435 | 2769 | } |
duke@435 | 2770 | |
duke@435 | 2771 | uint total_instructions = 0, total_bundles = 0; |
duke@435 | 2772 | |
duke@435 | 2773 | for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) { |
duke@435 | 2774 | uint bundle_count = _total_instructions_per_bundle[i]; |
duke@435 | 2775 | total_instructions += bundle_count * i; |
duke@435 | 2776 | total_bundles += bundle_count; |
duke@435 | 2777 | } |
duke@435 | 2778 | |
duke@435 | 2779 | if (total_bundles > 0) |
duke@435 | 2780 | tty->print("Average ILP (excluding nops) is %.2f\n", |
duke@435 | 2781 | ((double)total_instructions) / ((double)total_bundles)); |
duke@435 | 2782 | } |
duke@435 | 2783 | #endif |