Wed, 28 Aug 2013 11:22:43 +0200
8023597: Optimize G1 barriers code for unsafe load_store
Summary: Avoid loading old values in G1 pre-barriers for inlined unsafe load_store nodes.
Reviewed-by: kvn, tonyp
Contributed-by: Martin Doerr <martin.doerr@sap.com>
duke@435 | 1 | /* |
coleenp@4037 | 2 | * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #include "precompiled.hpp" |
stefank@2314 | 26 | #include "asm/assembler.inline.hpp" |
dlong@5000 | 27 | #include "code/compiledIC.hpp" |
stefank@2314 | 28 | #include "code/debugInfo.hpp" |
stefank@2314 | 29 | #include "code/debugInfoRec.hpp" |
stefank@2314 | 30 | #include "compiler/compileBroker.hpp" |
stefank@2314 | 31 | #include "compiler/oopMap.hpp" |
stefank@2314 | 32 | #include "memory/allocation.inline.hpp" |
stefank@2314 | 33 | #include "opto/callnode.hpp" |
stefank@2314 | 34 | #include "opto/cfgnode.hpp" |
stefank@2314 | 35 | #include "opto/locknode.hpp" |
stefank@2314 | 36 | #include "opto/machnode.hpp" |
stefank@2314 | 37 | #include "opto/output.hpp" |
stefank@2314 | 38 | #include "opto/regalloc.hpp" |
stefank@2314 | 39 | #include "opto/runtime.hpp" |
stefank@2314 | 40 | #include "opto/subnode.hpp" |
stefank@2314 | 41 | #include "opto/type.hpp" |
stefank@2314 | 42 | #include "runtime/handles.inline.hpp" |
stefank@2314 | 43 | #include "utilities/xmlstream.hpp" |
duke@435 | 44 | |
duke@435 | 45 | extern uint size_exception_handler(); |
duke@435 | 46 | extern uint size_deopt_handler(); |
duke@435 | 47 | |
duke@435 | 48 | #ifndef PRODUCT |
duke@435 | 49 | #define DEBUG_ARG(x) , x |
duke@435 | 50 | #else |
duke@435 | 51 | #define DEBUG_ARG(x) |
duke@435 | 52 | #endif |
duke@435 | 53 | |
duke@435 | 54 | extern int emit_exception_handler(CodeBuffer &cbuf); |
duke@435 | 55 | extern int emit_deopt_handler(CodeBuffer &cbuf); |
duke@435 | 56 | |
duke@435 | 57 | // Convert Nodes to instruction bits and pass off to the VM |
duke@435 | 58 | void Compile::Output() { |
duke@435 | 59 | // RootNode goes |
adlertz@5635 | 60 | assert( _cfg->get_root_block()->number_of_nodes() == 0, "" ); |
duke@435 | 61 | |
kvn@1294 | 62 | // The number of new nodes (mostly MachNop) is proportional to |
kvn@1294 | 63 | // the number of java calls and inner loops which are aligned. |
kvn@1294 | 64 | if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 + |
kvn@1294 | 65 | C->inner_loops()*(OptoLoopAlignment-1)), |
kvn@1294 | 66 | "out of nodes before code generation" ) ) { |
kvn@1294 | 67 | return; |
kvn@1294 | 68 | } |
duke@435 | 69 | // Make sure I can find the Start Node |
adlertz@5539 | 70 | Block *entry = _cfg->get_block(1); |
adlertz@5539 | 71 | Block *broot = _cfg->get_root_block(); |
duke@435 | 72 | |
adlertz@5635 | 73 | const StartNode *start = entry->head()->as_Start(); |
duke@435 | 74 | |
duke@435 | 75 | // Replace StartNode with prolog |
duke@435 | 76 | MachPrologNode *prolog = new (this) MachPrologNode(); |
adlertz@5635 | 77 | entry->map_node(prolog, 0); |
adlertz@5509 | 78 | _cfg->map_node_to_block(prolog, entry); |
adlertz@5509 | 79 | _cfg->unmap_node_from_block(start); // start is no longer in any block |
duke@435 | 80 | |
duke@435 | 81 | // Virtual methods need an unverified entry point |
duke@435 | 82 | |
duke@435 | 83 | if( is_osr_compilation() ) { |
duke@435 | 84 | if( PoisonOSREntry ) { |
duke@435 | 85 | // TODO: Should use a ShouldNotReachHereNode... |
duke@435 | 86 | _cfg->insert( broot, 0, new (this) MachBreakpointNode() ); |
duke@435 | 87 | } |
duke@435 | 88 | } else { |
duke@435 | 89 | if( _method && !_method->flags().is_static() ) { |
duke@435 | 90 | // Insert unvalidated entry point |
duke@435 | 91 | _cfg->insert( broot, 0, new (this) MachUEPNode() ); |
duke@435 | 92 | } |
duke@435 | 93 | |
duke@435 | 94 | } |
duke@435 | 95 | |
duke@435 | 96 | |
duke@435 | 97 | // Break before main entry point |
duke@435 | 98 | if( (_method && _method->break_at_execute()) |
duke@435 | 99 | #ifndef PRODUCT |
duke@435 | 100 | ||(OptoBreakpoint && is_method_compilation()) |
duke@435 | 101 | ||(OptoBreakpointOSR && is_osr_compilation()) |
duke@435 | 102 | ||(OptoBreakpointC2R && !_method) |
duke@435 | 103 | #endif |
duke@435 | 104 | ) { |
duke@435 | 105 | // checking for _method means that OptoBreakpoint does not apply to |
duke@435 | 106 | // runtime stubs or frame converters |
duke@435 | 107 | _cfg->insert( entry, 1, new (this) MachBreakpointNode() ); |
duke@435 | 108 | } |
duke@435 | 109 | |
duke@435 | 110 | // Insert epilogs before every return |
adlertz@5539 | 111 | for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
adlertz@5539 | 112 | Block* block = _cfg->get_block(i); |
adlertz@5539 | 113 | if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point? |
adlertz@5539 | 114 | Node* m = block->end(); |
adlertz@5539 | 115 | if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) { |
adlertz@5539 | 116 | MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); |
adlertz@5539 | 117 | block->add_inst(epilog); |
adlertz@5539 | 118 | _cfg->map_node_to_block(epilog, block); |
duke@435 | 119 | } |
duke@435 | 120 | } |
duke@435 | 121 | } |
duke@435 | 122 | |
duke@435 | 123 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
adlertz@5539 | 124 | if (ZapDeadCompiledLocals) { |
adlertz@5539 | 125 | Insert_zap_nodes(); |
adlertz@5539 | 126 | } |
duke@435 | 127 | # endif |
duke@435 | 128 | |
adlertz@5539 | 129 | uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1); |
adlertz@5539 | 130 | blk_starts[0] = 0; |
kvn@3049 | 131 | |
kvn@3049 | 132 | // Initialize code buffer and process short branches. |
kvn@3049 | 133 | CodeBuffer* cb = init_buffer(blk_starts); |
kvn@3049 | 134 | |
adlertz@5539 | 135 | if (cb == NULL || failing()) { |
adlertz@5539 | 136 | return; |
adlertz@5539 | 137 | } |
kvn@3049 | 138 | |
duke@435 | 139 | ScheduleAndBundle(); |
duke@435 | 140 | |
duke@435 | 141 | #ifndef PRODUCT |
duke@435 | 142 | if (trace_opto_output()) { |
duke@435 | 143 | tty->print("\n---- After ScheduleAndBundle ----\n"); |
adlertz@5539 | 144 | for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
duke@435 | 145 | tty->print("\nBB#%03d:\n", i); |
adlertz@5539 | 146 | Block* block = _cfg->get_block(i); |
adlertz@5635 | 147 | for (uint j = 0; j < block->number_of_nodes(); j++) { |
adlertz@5635 | 148 | Node* n = block->get_node(j); |
duke@435 | 149 | OptoReg::Name reg = _regalloc->get_reg_first(n); |
duke@435 | 150 | tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); |
duke@435 | 151 | n->dump(); |
duke@435 | 152 | } |
duke@435 | 153 | } |
duke@435 | 154 | } |
duke@435 | 155 | #endif |
duke@435 | 156 | |
adlertz@5539 | 157 | if (failing()) { |
adlertz@5539 | 158 | return; |
adlertz@5539 | 159 | } |
duke@435 | 160 | |
duke@435 | 161 | BuildOopMaps(); |
duke@435 | 162 | |
adlertz@5539 | 163 | if (failing()) { |
adlertz@5539 | 164 | return; |
adlertz@5539 | 165 | } |
duke@435 | 166 | |
kvn@3049 | 167 | fill_buffer(cb, blk_starts); |
duke@435 | 168 | } |
duke@435 | 169 | |
duke@435 | 170 | bool Compile::need_stack_bang(int frame_size_in_bytes) const { |
duke@435 | 171 | // Determine if we need to generate a stack overflow check. |
duke@435 | 172 | // Do it if the method is not a stub function and |
duke@435 | 173 | // has java calls or has frame size > vm_page_size/8. |
kvn@3574 | 174 | return (UseStackBanging && stub_function() == NULL && |
duke@435 | 175 | (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3)); |
duke@435 | 176 | } |
duke@435 | 177 | |
duke@435 | 178 | bool Compile::need_register_stack_bang() const { |
duke@435 | 179 | // Determine if we need to generate a register stack overflow check. |
duke@435 | 180 | // This is only used on architectures which have split register |
duke@435 | 181 | // and memory stacks (ie. IA64). |
duke@435 | 182 | // Bang if the method is not a stub function and has java calls |
duke@435 | 183 | return (stub_function() == NULL && has_java_calls()); |
duke@435 | 184 | } |
duke@435 | 185 | |
duke@435 | 186 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 187 | |
duke@435 | 188 | |
duke@435 | 189 | // In order to catch compiler oop-map bugs, we have implemented |
duke@435 | 190 | // a debugging mode called ZapDeadCompilerLocals. |
duke@435 | 191 | // This mode causes the compiler to insert a call to a runtime routine, |
duke@435 | 192 | // "zap_dead_locals", right before each place in compiled code |
duke@435 | 193 | // that could potentially be a gc-point (i.e., a safepoint or oop map point). |
duke@435 | 194 | // The runtime routine checks that locations mapped as oops are really |
duke@435 | 195 | // oops, that locations mapped as values do not look like oops, |
duke@435 | 196 | // and that locations mapped as dead are not used later |
duke@435 | 197 | // (by zapping them to an invalid address). |
duke@435 | 198 | |
duke@435 | 199 | int Compile::_CompiledZap_count = 0; |
duke@435 | 200 | |
duke@435 | 201 | void Compile::Insert_zap_nodes() { |
duke@435 | 202 | bool skip = false; |
duke@435 | 203 | |
duke@435 | 204 | |
duke@435 | 205 | // Dink with static counts because code code without the extra |
duke@435 | 206 | // runtime calls is MUCH faster for debugging purposes |
duke@435 | 207 | |
duke@435 | 208 | if ( CompileZapFirst == 0 ) ; // nothing special |
duke@435 | 209 | else if ( CompileZapFirst > CompiledZap_count() ) skip = true; |
duke@435 | 210 | else if ( CompileZapFirst == CompiledZap_count() ) |
duke@435 | 211 | warning("starting zap compilation after skipping"); |
duke@435 | 212 | |
duke@435 | 213 | if ( CompileZapLast == -1 ) ; // nothing special |
duke@435 | 214 | else if ( CompileZapLast < CompiledZap_count() ) skip = true; |
duke@435 | 215 | else if ( CompileZapLast == CompiledZap_count() ) |
duke@435 | 216 | warning("about to compile last zap"); |
duke@435 | 217 | |
duke@435 | 218 | ++_CompiledZap_count; // counts skipped zaps, too |
duke@435 | 219 | |
duke@435 | 220 | if ( skip ) return; |
duke@435 | 221 | |
duke@435 | 222 | |
duke@435 | 223 | if ( _method == NULL ) |
duke@435 | 224 | return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care |
duke@435 | 225 | |
duke@435 | 226 | // Insert call to zap runtime stub before every node with an oop map |
adlertz@5539 | 227 | for( uint i=0; i<_cfg->number_of_blocks(); i++ ) { |
adlertz@5539 | 228 | Block *b = _cfg->get_block(i); |
adlertz@5635 | 229 | for ( uint j = 0; j < b->number_of_nodes(); ++j ) { |
adlertz@5635 | 230 | Node *n = b->get_node(j); |
duke@435 | 231 | |
duke@435 | 232 | // Determining if we should insert a zap-a-lot node in output. |
duke@435 | 233 | // We do that for all nodes that has oopmap info, except for calls |
duke@435 | 234 | // to allocation. Calls to allocation passes in the old top-of-eden pointer |
duke@435 | 235 | // and expect the C code to reset it. Hence, there can be no safepoints between |
duke@435 | 236 | // the inlined-allocation and the call to new_Java, etc. |
duke@435 | 237 | // We also cannot zap monitor calls, as they must hold the microlock |
duke@435 | 238 | // during the call to Zap, which also wants to grab the microlock. |
duke@435 | 239 | bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL); |
duke@435 | 240 | if ( insert ) { // it is MachSafePoint |
duke@435 | 241 | if ( !n->is_MachCall() ) { |
duke@435 | 242 | insert = false; |
duke@435 | 243 | } else if ( n->is_MachCall() ) { |
duke@435 | 244 | MachCallNode* call = n->as_MachCall(); |
duke@435 | 245 | if (call->entry_point() == OptoRuntime::new_instance_Java() || |
duke@435 | 246 | call->entry_point() == OptoRuntime::new_array_Java() || |
duke@435 | 247 | call->entry_point() == OptoRuntime::multianewarray2_Java() || |
duke@435 | 248 | call->entry_point() == OptoRuntime::multianewarray3_Java() || |
duke@435 | 249 | call->entry_point() == OptoRuntime::multianewarray4_Java() || |
duke@435 | 250 | call->entry_point() == OptoRuntime::multianewarray5_Java() || |
duke@435 | 251 | call->entry_point() == OptoRuntime::slow_arraycopy_Java() || |
duke@435 | 252 | call->entry_point() == OptoRuntime::complete_monitor_locking_Java() |
duke@435 | 253 | ) { |
duke@435 | 254 | insert = false; |
duke@435 | 255 | } |
duke@435 | 256 | } |
duke@435 | 257 | if (insert) { |
duke@435 | 258 | Node *zap = call_zap_node(n->as_MachSafePoint(), i); |
adlertz@5635 | 259 | b->insert_node(zap, j); |
adlertz@5509 | 260 | _cfg->map_node_to_block(zap, b); |
duke@435 | 261 | ++j; |
duke@435 | 262 | } |
duke@435 | 263 | } |
duke@435 | 264 | } |
duke@435 | 265 | } |
duke@435 | 266 | } |
duke@435 | 267 | |
duke@435 | 268 | |
duke@435 | 269 | Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) { |
duke@435 | 270 | const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type(); |
duke@435 | 271 | CallStaticJavaNode* ideal_node = |
kvn@4115 | 272 | new (this) CallStaticJavaNode( tf, |
duke@435 | 273 | OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()), |
kvn@4115 | 274 | "call zap dead locals stub", 0, TypePtr::BOTTOM); |
duke@435 | 275 | // We need to copy the OopMap from the site we're zapping at. |
duke@435 | 276 | // We have to make a copy, because the zap site might not be |
duke@435 | 277 | // a call site, and zap_dead is a call site. |
duke@435 | 278 | OopMap* clone = node_to_check->oop_map()->deep_copy(); |
duke@435 | 279 | |
duke@435 | 280 | // Add the cloned OopMap to the zap node |
duke@435 | 281 | ideal_node->set_oop_map(clone); |
duke@435 | 282 | return _matcher->match_sfpt(ideal_node); |
duke@435 | 283 | } |
duke@435 | 284 | |
duke@435 | 285 | bool Compile::is_node_getting_a_safepoint( Node* n) { |
duke@435 | 286 | // This code duplicates the logic prior to the call of add_safepoint |
duke@435 | 287 | // below in this file. |
duke@435 | 288 | if( n->is_MachSafePoint() ) return true; |
duke@435 | 289 | return false; |
duke@435 | 290 | } |
duke@435 | 291 | |
duke@435 | 292 | # endif // ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 293 | |
rasbold@853 | 294 | // Compute the size of first NumberOfLoopInstrToAlign instructions at the top |
duke@435 | 295 | // of a loop. When aligning a loop we need to provide enough instructions |
duke@435 | 296 | // in cpu's fetch buffer to feed decoders. The loop alignment could be |
duke@435 | 297 | // avoided if we have enough instructions in fetch buffer at the head of a loop. |
duke@435 | 298 | // By default, the size is set to 999999 by Block's constructor so that |
duke@435 | 299 | // a loop will be aligned if the size is not reset here. |
duke@435 | 300 | // |
duke@435 | 301 | // Note: Mach instructions could contain several HW instructions |
duke@435 | 302 | // so the size is estimated only. |
duke@435 | 303 | // |
duke@435 | 304 | void Compile::compute_loop_first_inst_sizes() { |
duke@435 | 305 | // The next condition is used to gate the loop alignment optimization. |
duke@435 | 306 | // Don't aligned a loop if there are enough instructions at the head of a loop |
duke@435 | 307 | // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad |
duke@435 | 308 | // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is |
duke@435 | 309 | // equal to 11 bytes which is the largest address NOP instruction. |
adlertz@5539 | 310 | if (MaxLoopPad < OptoLoopAlignment - 1) { |
adlertz@5539 | 311 | uint last_block = _cfg->number_of_blocks() - 1; |
adlertz@5539 | 312 | for (uint i = 1; i <= last_block; i++) { |
adlertz@5539 | 313 | Block* block = _cfg->get_block(i); |
duke@435 | 314 | // Check the first loop's block which requires an alignment. |
adlertz@5539 | 315 | if (block->loop_alignment() > (uint)relocInfo::addr_unit()) { |
duke@435 | 316 | uint sum_size = 0; |
duke@435 | 317 | uint inst_cnt = NumberOfLoopInstrToAlign; |
adlertz@5539 | 318 | inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
rasbold@853 | 319 | |
rasbold@853 | 320 | // Check subsequent fallthrough blocks if the loop's first |
rasbold@853 | 321 | // block(s) does not have enough instructions. |
adlertz@5539 | 322 | Block *nb = block; |
adlertz@5539 | 323 | while(inst_cnt > 0 && |
adlertz@5539 | 324 | i < last_block && |
adlertz@5539 | 325 | !_cfg->get_block(i + 1)->has_loop_alignment() && |
adlertz@5539 | 326 | !nb->has_successor(block)) { |
rasbold@853 | 327 | i++; |
adlertz@5539 | 328 | nb = _cfg->get_block(i); |
rasbold@853 | 329 | inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); |
rasbold@853 | 330 | } // while( inst_cnt > 0 && i < last_block ) |
rasbold@853 | 331 | |
adlertz@5539 | 332 | block->set_first_inst_size(sum_size); |
duke@435 | 333 | } // f( b->head()->is_Loop() ) |
duke@435 | 334 | } // for( i <= last_block ) |
duke@435 | 335 | } // if( MaxLoopPad < OptoLoopAlignment-1 ) |
duke@435 | 336 | } |
duke@435 | 337 | |
duke@435 | 338 | // The architecture description provides short branch variants for some long |
duke@435 | 339 | // branch instructions. Replace eligible long branches with short branches. |
kvn@3049 | 340 | void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { |
duke@435 | 341 | // Compute size of each block, method size, and relocation information size |
adlertz@5539 | 342 | uint nblocks = _cfg->number_of_blocks(); |
kvn@3049 | 343 | |
kvn@3049 | 344 | uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); |
kvn@3049 | 345 | uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); |
kvn@3049 | 346 | int* jmp_nidx = NEW_RESOURCE_ARRAY(int ,nblocks); |
kvn@3049 | 347 | DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); ) |
kvn@3049 | 348 | DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); ) |
kvn@3049 | 349 | |
kvn@3049 | 350 | bool has_short_branch_candidate = false; |
duke@435 | 351 | |
duke@435 | 352 | // Initialize the sizes to 0 |
duke@435 | 353 | code_size = 0; // Size in bytes of generated code |
duke@435 | 354 | stub_size = 0; // Size in bytes of all stub entries |
duke@435 | 355 | // Size in bytes of all relocation entries, including those in local stubs. |
duke@435 | 356 | // Start with 2-bytes of reloc info for the unvalidated entry point |
duke@435 | 357 | reloc_size = 1; // Number of relocation entries |
duke@435 | 358 | |
duke@435 | 359 | // Make three passes. The first computes pessimistic blk_starts, |
kvn@3049 | 360 | // relative jmp_offset and reloc_size information. The second performs |
twisti@2350 | 361 | // short branch substitution using the pessimistic sizing. The |
twisti@2350 | 362 | // third inserts nops where needed. |
duke@435 | 363 | |
duke@435 | 364 | // Step one, perform a pessimistic sizing pass. |
kvn@3049 | 365 | uint last_call_adr = max_uint; |
kvn@3049 | 366 | uint last_avoid_back_to_back_adr = max_uint; |
duke@435 | 367 | uint nop_size = (new (this) MachNopNode())->size(_regalloc); |
kvn@3049 | 368 | for (uint i = 0; i < nblocks; i++) { // For all blocks |
adlertz@5539 | 369 | Block* block = _cfg->get_block(i); |
duke@435 | 370 | |
kvn@3049 | 371 | // During short branch replacement, we store the relative (to blk_starts) |
kvn@3049 | 372 | // offset of jump in jmp_offset, rather than the absolute offset of jump. |
kvn@3049 | 373 | // This is so that we do not need to recompute sizes of all nodes when |
kvn@3049 | 374 | // we compute correct blk_starts in our next sizing pass. |
kvn@3049 | 375 | jmp_offset[i] = 0; |
kvn@3049 | 376 | jmp_size[i] = 0; |
kvn@3049 | 377 | jmp_nidx[i] = -1; |
kvn@3049 | 378 | DEBUG_ONLY( jmp_target[i] = 0; ) |
kvn@3049 | 379 | DEBUG_ONLY( jmp_rule[i] = 0; ) |
kvn@3049 | 380 | |
duke@435 | 381 | // Sum all instruction sizes to compute block size |
adlertz@5635 | 382 | uint last_inst = block->number_of_nodes(); |
duke@435 | 383 | uint blk_size = 0; |
kvn@3049 | 384 | for (uint j = 0; j < last_inst; j++) { |
adlertz@5635 | 385 | Node* nj = block->get_node(j); |
duke@435 | 386 | // Handle machine instruction nodes |
kvn@3049 | 387 | if (nj->is_Mach()) { |
duke@435 | 388 | MachNode *mach = nj->as_Mach(); |
duke@435 | 389 | blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding |
duke@435 | 390 | reloc_size += mach->reloc(); |
dlong@5000 | 391 | if (mach->is_MachCall()) { |
duke@435 | 392 | MachCallNode *mcall = mach->as_MachCall(); |
duke@435 | 393 | // This destination address is NOT PC-relative |
duke@435 | 394 | |
duke@435 | 395 | mcall->method_set((intptr_t)mcall->entry_point()); |
duke@435 | 396 | |
dlong@5000 | 397 | if (mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method) { |
dlong@5000 | 398 | stub_size += CompiledStaticCall::to_interp_stub_size(); |
dlong@5000 | 399 | reloc_size += CompiledStaticCall::reloc_to_interp_stub(); |
duke@435 | 400 | } |
duke@435 | 401 | } else if (mach->is_MachSafePoint()) { |
duke@435 | 402 | // If call/safepoint are adjacent, account for possible |
duke@435 | 403 | // nop to disambiguate the two safepoints. |
kvn@3049 | 404 | // ScheduleAndBundle() can rearrange nodes in a block, |
kvn@3049 | 405 | // check for all offsets inside this block. |
kvn@3049 | 406 | if (last_call_adr >= blk_starts[i]) { |
duke@435 | 407 | blk_size += nop_size; |
duke@435 | 408 | } |
duke@435 | 409 | } |
kvn@3049 | 410 | if (mach->avoid_back_to_back()) { |
kvn@3049 | 411 | // Nop is inserted between "avoid back to back" instructions. |
kvn@3049 | 412 | // ScheduleAndBundle() can rearrange nodes in a block, |
kvn@3049 | 413 | // check for all offsets inside this block. |
kvn@3049 | 414 | if (last_avoid_back_to_back_adr >= blk_starts[i]) { |
kvn@3049 | 415 | blk_size += nop_size; |
kvn@3049 | 416 | } |
kvn@3049 | 417 | } |
kvn@3049 | 418 | if (mach->may_be_short_branch()) { |
kvn@3051 | 419 | if (!nj->is_MachBranch()) { |
kvn@3049 | 420 | #ifndef PRODUCT |
kvn@3049 | 421 | nj->dump(3); |
kvn@3049 | 422 | #endif |
kvn@3049 | 423 | Unimplemented(); |
kvn@3049 | 424 | } |
kvn@3049 | 425 | assert(jmp_nidx[i] == -1, "block should have only one branch"); |
kvn@3049 | 426 | jmp_offset[i] = blk_size; |
kvn@3055 | 427 | jmp_size[i] = nj->size(_regalloc); |
kvn@3049 | 428 | jmp_nidx[i] = j; |
kvn@3049 | 429 | has_short_branch_candidate = true; |
kvn@3049 | 430 | } |
duke@435 | 431 | } |
kvn@3055 | 432 | blk_size += nj->size(_regalloc); |
duke@435 | 433 | // Remember end of call offset |
kvn@3040 | 434 | if (nj->is_MachCall() && !nj->is_MachCallLeaf()) { |
kvn@3049 | 435 | last_call_adr = blk_starts[i]+blk_size; |
kvn@3049 | 436 | } |
kvn@3049 | 437 | // Remember end of avoid_back_to_back offset |
kvn@3049 | 438 | if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) { |
kvn@3049 | 439 | last_avoid_back_to_back_adr = blk_starts[i]+blk_size; |
duke@435 | 440 | } |
duke@435 | 441 | } |
duke@435 | 442 | |
duke@435 | 443 | // When the next block starts a loop, we may insert pad NOP |
duke@435 | 444 | // instructions. Since we cannot know our future alignment, |
duke@435 | 445 | // assume the worst. |
adlertz@5539 | 446 | if (i < nblocks - 1) { |
adlertz@5539 | 447 | Block* nb = _cfg->get_block(i + 1); |
duke@435 | 448 | int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); |
kvn@3049 | 449 | if (max_loop_pad > 0) { |
duke@435 | 450 | assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); |
rasbold@4874 | 451 | // Adjust last_call_adr and/or last_avoid_back_to_back_adr. |
rasbold@4874 | 452 | // If either is the last instruction in this block, bump by |
rasbold@4874 | 453 | // max_loop_pad in lock-step with blk_size, so sizing |
rasbold@4874 | 454 | // calculations in subsequent blocks still can conservatively |
rasbold@4874 | 455 | // detect that it may the last instruction in this block. |
rasbold@4874 | 456 | if (last_call_adr == blk_starts[i]+blk_size) { |
rasbold@4874 | 457 | last_call_adr += max_loop_pad; |
rasbold@4874 | 458 | } |
rasbold@4874 | 459 | if (last_avoid_back_to_back_adr == blk_starts[i]+blk_size) { |
rasbold@4874 | 460 | last_avoid_back_to_back_adr += max_loop_pad; |
rasbold@4874 | 461 | } |
duke@435 | 462 | blk_size += max_loop_pad; |
duke@435 | 463 | } |
duke@435 | 464 | } |
duke@435 | 465 | |
duke@435 | 466 | // Save block size; update total method size |
duke@435 | 467 | blk_starts[i+1] = blk_starts[i]+blk_size; |
duke@435 | 468 | } |
duke@435 | 469 | |
duke@435 | 470 | // Step two, replace eligible long jumps. |
kvn@3049 | 471 | bool progress = true; |
kvn@3049 | 472 | uint last_may_be_short_branch_adr = max_uint; |
kvn@3049 | 473 | while (has_short_branch_candidate && progress) { |
kvn@3049 | 474 | progress = false; |
kvn@3049 | 475 | has_short_branch_candidate = false; |
kvn@3049 | 476 | int adjust_block_start = 0; |
kvn@3049 | 477 | for (uint i = 0; i < nblocks; i++) { |
adlertz@5539 | 478 | Block* block = _cfg->get_block(i); |
kvn@3049 | 479 | int idx = jmp_nidx[i]; |
adlertz@5635 | 480 | MachNode* mach = (idx == -1) ? NULL: block->get_node(idx)->as_Mach(); |
kvn@3049 | 481 | if (mach != NULL && mach->may_be_short_branch()) { |
kvn@3049 | 482 | #ifdef ASSERT |
kvn@3051 | 483 | assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity"); |
kvn@3049 | 484 | int j; |
kvn@3049 | 485 | // Find the branch; ignore trailing NOPs. |
adlertz@5635 | 486 | for (j = block->number_of_nodes()-1; j>=0; j--) { |
adlertz@5635 | 487 | Node* n = block->get_node(j); |
kvn@3049 | 488 | if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) |
kvn@3049 | 489 | break; |
kvn@3049 | 490 | } |
adlertz@5635 | 491 | assert(j >= 0 && j == idx && block->get_node(j) == (Node*)mach, "sanity"); |
kvn@3049 | 492 | #endif |
kvn@3049 | 493 | int br_size = jmp_size[i]; |
kvn@3049 | 494 | int br_offs = blk_starts[i] + jmp_offset[i]; |
kvn@3049 | 495 | |
duke@435 | 496 | // This requires the TRUE branch target be in succs[0] |
adlertz@5539 | 497 | uint bnum = block->non_connector_successor(0)->_pre_order; |
kvn@3049 | 498 | int offset = blk_starts[bnum] - br_offs; |
kvn@3049 | 499 | if (bnum > i) { // adjust following block's offset |
kvn@3049 | 500 | offset -= adjust_block_start; |
kvn@3049 | 501 | } |
kvn@3049 | 502 | // In the following code a nop could be inserted before |
kvn@3049 | 503 | // the branch which will increase the backward distance. |
kvn@3049 | 504 | bool needs_padding = ((uint)br_offs == last_may_be_short_branch_adr); |
kvn@3049 | 505 | if (needs_padding && offset <= 0) |
kvn@3049 | 506 | offset -= nop_size; |
kvn@3049 | 507 | |
kvn@3049 | 508 | if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { |
kvn@3049 | 509 | // We've got a winner. Replace this branch. |
kvn@3051 | 510 | MachNode* replacement = mach->as_MachBranch()->short_branch_version(this); |
kvn@3049 | 511 | |
kvn@3049 | 512 | // Update the jmp_size. |
kvn@3049 | 513 | int new_size = replacement->size(_regalloc); |
kvn@3049 | 514 | int diff = br_size - new_size; |
kvn@3049 | 515 | assert(diff >= (int)nop_size, "short_branch size should be smaller"); |
kvn@3049 | 516 | // Conservatively take into accound padding between |
kvn@3049 | 517 | // avoid_back_to_back branches. Previous branch could be |
kvn@3049 | 518 | // converted into avoid_back_to_back branch during next |
kvn@3049 | 519 | // rounds. |
kvn@3049 | 520 | if (needs_padding && replacement->avoid_back_to_back()) { |
kvn@3049 | 521 | jmp_offset[i] += nop_size; |
kvn@3049 | 522 | diff -= nop_size; |
duke@435 | 523 | } |
kvn@3049 | 524 | adjust_block_start += diff; |
adlertz@5635 | 525 | block->map_node(replacement, idx); |
bharadwaj@4315 | 526 | mach->subsume_by(replacement, C); |
kvn@3049 | 527 | mach = replacement; |
kvn@3049 | 528 | progress = true; |
kvn@3049 | 529 | |
kvn@3049 | 530 | jmp_size[i] = new_size; |
kvn@3049 | 531 | DEBUG_ONLY( jmp_target[i] = bnum; ); |
kvn@3049 | 532 | DEBUG_ONLY( jmp_rule[i] = mach->rule(); ); |
duke@435 | 533 | } else { |
kvn@3049 | 534 | // The jump distance is not short, try again during next iteration. |
kvn@3049 | 535 | has_short_branch_candidate = true; |
duke@435 | 536 | } |
kvn@3049 | 537 | } // (mach->may_be_short_branch()) |
kvn@3049 | 538 | if (mach != NULL && (mach->may_be_short_branch() || |
kvn@3049 | 539 | mach->avoid_back_to_back())) { |
kvn@3049 | 540 | last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i]; |
duke@435 | 541 | } |
kvn@3049 | 542 | blk_starts[i+1] -= adjust_block_start; |
duke@435 | 543 | } |
duke@435 | 544 | } |
duke@435 | 545 | |
kvn@3049 | 546 | #ifdef ASSERT |
kvn@3049 | 547 | for (uint i = 0; i < nblocks; i++) { // For all blocks |
kvn@3049 | 548 | if (jmp_target[i] != 0) { |
kvn@3049 | 549 | int br_size = jmp_size[i]; |
kvn@3049 | 550 | int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); |
kvn@3049 | 551 | if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { |
kvn@3049 | 552 | tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); |
kvn@3049 | 553 | } |
kvn@3049 | 554 | assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp"); |
kvn@3049 | 555 | } |
kvn@3049 | 556 | } |
kvn@3049 | 557 | #endif |
kvn@3049 | 558 | |
kvn@3055 | 559 | // Step 3, compute the offsets of all blocks, will be done in fill_buffer() |
kvn@3049 | 560 | // after ScheduleAndBundle(). |
kvn@3049 | 561 | |
kvn@3049 | 562 | // ------------------ |
kvn@3049 | 563 | // Compute size for code buffer |
kvn@3049 | 564 | code_size = blk_starts[nblocks]; |
kvn@3049 | 565 | |
kvn@3049 | 566 | // Relocation records |
kvn@3049 | 567 | reloc_size += 1; // Relo entry for exception handler |
kvn@3049 | 568 | |
kvn@3049 | 569 | // Adjust reloc_size to number of record of relocation info |
kvn@3049 | 570 | // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for |
kvn@3049 | 571 | // a relocation index. |
kvn@3049 | 572 | // The CodeBuffer will expand the locs array if this estimate is too low. |
kvn@3049 | 573 | reloc_size *= 10 / sizeof(relocInfo); |
kvn@3049 | 574 | } |
kvn@3049 | 575 | |
duke@435 | 576 | //------------------------------FillLocArray----------------------------------- |
duke@435 | 577 | // Create a bit of debug info and append it to the array. The mapping is from |
duke@435 | 578 | // Java local or expression stack to constant, register or stack-slot. For |
duke@435 | 579 | // doubles, insert 2 mappings and return 1 (to tell the caller that the next |
duke@435 | 580 | // entry has been taken care of and caller should skip it). |
duke@435 | 581 | static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) { |
duke@435 | 582 | // This should never have accepted Bad before |
duke@435 | 583 | assert(OptoReg::is_valid(regnum), "location must be valid"); |
duke@435 | 584 | return (OptoReg::is_reg(regnum)) |
duke@435 | 585 | ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) ) |
duke@435 | 586 | : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); |
duke@435 | 587 | } |
duke@435 | 588 | |
kvn@498 | 589 | |
kvn@498 | 590 | ObjectValue* |
kvn@498 | 591 | Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) { |
kvn@498 | 592 | for (int i = 0; i < objs->length(); i++) { |
kvn@498 | 593 | assert(objs->at(i)->is_object(), "corrupt object cache"); |
kvn@498 | 594 | ObjectValue* sv = (ObjectValue*) objs->at(i); |
kvn@498 | 595 | if (sv->id() == id) { |
kvn@498 | 596 | return sv; |
kvn@498 | 597 | } |
kvn@498 | 598 | } |
kvn@498 | 599 | // Otherwise.. |
kvn@498 | 600 | return NULL; |
kvn@498 | 601 | } |
kvn@498 | 602 | |
kvn@498 | 603 | void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs, |
kvn@498 | 604 | ObjectValue* sv ) { |
kvn@498 | 605 | assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition"); |
kvn@498 | 606 | objs->append(sv); |
kvn@498 | 607 | } |
kvn@498 | 608 | |
kvn@498 | 609 | |
kvn@498 | 610 | void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local, |
kvn@498 | 611 | GrowableArray<ScopeValue*> *array, |
kvn@498 | 612 | GrowableArray<ScopeValue*> *objs ) { |
duke@435 | 613 | assert( local, "use _top instead of null" ); |
duke@435 | 614 | if (array->length() != idx) { |
duke@435 | 615 | assert(array->length() == idx + 1, "Unexpected array count"); |
duke@435 | 616 | // Old functionality: |
duke@435 | 617 | // return |
duke@435 | 618 | // New functionality: |
duke@435 | 619 | // Assert if the local is not top. In product mode let the new node |
duke@435 | 620 | // override the old entry. |
duke@435 | 621 | assert(local == top(), "LocArray collision"); |
duke@435 | 622 | if (local == top()) { |
duke@435 | 623 | return; |
duke@435 | 624 | } |
duke@435 | 625 | array->pop(); |
duke@435 | 626 | } |
duke@435 | 627 | const Type *t = local->bottom_type(); |
duke@435 | 628 | |
kvn@498 | 629 | // Is it a safepoint scalar object node? |
kvn@498 | 630 | if (local->is_SafePointScalarObject()) { |
kvn@498 | 631 | SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject(); |
kvn@498 | 632 | |
kvn@498 | 633 | ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx); |
kvn@498 | 634 | if (sv == NULL) { |
kvn@498 | 635 | ciKlass* cik = t->is_oopptr()->klass(); |
kvn@498 | 636 | assert(cik->is_instance_klass() || |
kvn@498 | 637 | cik->is_array_klass(), "Not supported allocation."); |
kvn@498 | 638 | sv = new ObjectValue(spobj->_idx, |
coleenp@4037 | 639 | new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); |
kvn@498 | 640 | Compile::set_sv_for_object_node(objs, sv); |
kvn@498 | 641 | |
kvn@5626 | 642 | uint first_ind = spobj->first_index(sfpt->jvms()); |
kvn@498 | 643 | for (uint i = 0; i < spobj->n_fields(); i++) { |
kvn@498 | 644 | Node* fld_node = sfpt->in(first_ind+i); |
kvn@498 | 645 | (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); |
kvn@498 | 646 | } |
kvn@498 | 647 | } |
kvn@498 | 648 | array->append(sv); |
kvn@498 | 649 | return; |
kvn@498 | 650 | } |
kvn@498 | 651 | |
duke@435 | 652 | // Grab the register number for the local |
duke@435 | 653 | OptoReg::Name regnum = _regalloc->get_reg_first(local); |
duke@435 | 654 | if( OptoReg::is_valid(regnum) ) {// Got a register/stack? |
duke@435 | 655 | // Record the double as two float registers. |
duke@435 | 656 | // The register mask for such a value always specifies two adjacent |
duke@435 | 657 | // float registers, with the lower register number even. |
duke@435 | 658 | // Normally, the allocation of high and low words to these registers |
duke@435 | 659 | // is irrelevant, because nearly all operations on register pairs |
duke@435 | 660 | // (e.g., StoreD) treat them as a single unit. |
duke@435 | 661 | // Here, we assume in addition that the words in these two registers |
duke@435 | 662 | // stored "naturally" (by operations like StoreD and double stores |
duke@435 | 663 | // within the interpreter) such that the lower-numbered register |
duke@435 | 664 | // is written to the lower memory address. This may seem like |
duke@435 | 665 | // a machine dependency, but it is not--it is a requirement on |
duke@435 | 666 | // the author of the <arch>.ad file to ensure that, for every |
duke@435 | 667 | // even/odd double-register pair to which a double may be allocated, |
duke@435 | 668 | // the word in the even single-register is stored to the first |
duke@435 | 669 | // memory word. (Note that register numbers are completely |
duke@435 | 670 | // arbitrary, and are not tied to any machine-level encodings.) |
duke@435 | 671 | #ifdef _LP64 |
duke@435 | 672 | if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) { |
duke@435 | 673 | array->append(new ConstantIntValue(0)); |
duke@435 | 674 | array->append(new_loc_value( _regalloc, regnum, Location::dbl )); |
duke@435 | 675 | } else if ( t->base() == Type::Long ) { |
duke@435 | 676 | array->append(new ConstantIntValue(0)); |
duke@435 | 677 | array->append(new_loc_value( _regalloc, regnum, Location::lng )); |
duke@435 | 678 | } else if ( t->base() == Type::RawPtr ) { |
duke@435 | 679 | // jsr/ret return address which must be restored into a the full |
duke@435 | 680 | // width 64-bit stack slot. |
duke@435 | 681 | array->append(new_loc_value( _regalloc, regnum, Location::lng )); |
duke@435 | 682 | } |
duke@435 | 683 | #else //_LP64 |
duke@435 | 684 | #ifdef SPARC |
duke@435 | 685 | if (t->base() == Type::Long && OptoReg::is_reg(regnum)) { |
duke@435 | 686 | // For SPARC we have to swap high and low words for |
duke@435 | 687 | // long values stored in a single-register (g0-g7). |
duke@435 | 688 | array->append(new_loc_value( _regalloc, regnum , Location::normal )); |
duke@435 | 689 | array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); |
duke@435 | 690 | } else |
duke@435 | 691 | #endif //SPARC |
duke@435 | 692 | if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) { |
duke@435 | 693 | // Repack the double/long as two jints. |
duke@435 | 694 | // The convention the interpreter uses is that the second local |
duke@435 | 695 | // holds the first raw word of the native double representation. |
duke@435 | 696 | // This is actually reasonable, since locals and stack arrays |
duke@435 | 697 | // grow downwards in all implementations. |
duke@435 | 698 | // (If, on some machine, the interpreter's Java locals or stack |
duke@435 | 699 | // were to grow upwards, the embedded doubles would be word-swapped.) |
duke@435 | 700 | array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal )); |
duke@435 | 701 | array->append(new_loc_value( _regalloc, regnum , Location::normal )); |
duke@435 | 702 | } |
duke@435 | 703 | #endif //_LP64 |
duke@435 | 704 | else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) && |
duke@435 | 705 | OptoReg::is_reg(regnum) ) { |
kvn@1709 | 706 | array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double() |
duke@435 | 707 | ? Location::float_in_dbl : Location::normal )); |
duke@435 | 708 | } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) { |
duke@435 | 709 | array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long |
duke@435 | 710 | ? Location::int_in_long : Location::normal )); |
kvn@766 | 711 | } else if( t->base() == Type::NarrowOop ) { |
kvn@766 | 712 | array->append(new_loc_value( _regalloc, regnum, Location::narrowoop )); |
duke@435 | 713 | } else { |
duke@435 | 714 | array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal )); |
duke@435 | 715 | } |
duke@435 | 716 | return; |
duke@435 | 717 | } |
duke@435 | 718 | |
duke@435 | 719 | // No register. It must be constant data. |
duke@435 | 720 | switch (t->base()) { |
duke@435 | 721 | case Type::Half: // Second half of a double |
duke@435 | 722 | ShouldNotReachHere(); // Caller should skip 2nd halves |
duke@435 | 723 | break; |
duke@435 | 724 | case Type::AnyPtr: |
duke@435 | 725 | array->append(new ConstantOopWriteValue(NULL)); |
duke@435 | 726 | break; |
duke@435 | 727 | case Type::AryPtr: |
coleenp@4037 | 728 | case Type::InstPtr: // fall through |
jrose@1424 | 729 | array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding())); |
duke@435 | 730 | break; |
kvn@766 | 731 | case Type::NarrowOop: |
kvn@766 | 732 | if (t == TypeNarrowOop::NULL_PTR) { |
kvn@766 | 733 | array->append(new ConstantOopWriteValue(NULL)); |
kvn@766 | 734 | } else { |
jrose@1424 | 735 | array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding())); |
kvn@766 | 736 | } |
kvn@766 | 737 | break; |
duke@435 | 738 | case Type::Int: |
duke@435 | 739 | array->append(new ConstantIntValue(t->is_int()->get_con())); |
duke@435 | 740 | break; |
duke@435 | 741 | case Type::RawPtr: |
duke@435 | 742 | // A return address (T_ADDRESS). |
duke@435 | 743 | assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI"); |
duke@435 | 744 | #ifdef _LP64 |
duke@435 | 745 | // Must be restored to the full-width 64-bit stack slot. |
duke@435 | 746 | array->append(new ConstantLongValue(t->is_ptr()->get_con())); |
duke@435 | 747 | #else |
duke@435 | 748 | array->append(new ConstantIntValue(t->is_ptr()->get_con())); |
duke@435 | 749 | #endif |
duke@435 | 750 | break; |
duke@435 | 751 | case Type::FloatCon: { |
duke@435 | 752 | float f = t->is_float_constant()->getf(); |
duke@435 | 753 | array->append(new ConstantIntValue(jint_cast(f))); |
duke@435 | 754 | break; |
duke@435 | 755 | } |
duke@435 | 756 | case Type::DoubleCon: { |
duke@435 | 757 | jdouble d = t->is_double_constant()->getd(); |
duke@435 | 758 | #ifdef _LP64 |
duke@435 | 759 | array->append(new ConstantIntValue(0)); |
duke@435 | 760 | array->append(new ConstantDoubleValue(d)); |
duke@435 | 761 | #else |
duke@435 | 762 | // Repack the double as two jints. |
duke@435 | 763 | // The convention the interpreter uses is that the second local |
duke@435 | 764 | // holds the first raw word of the native double representation. |
duke@435 | 765 | // This is actually reasonable, since locals and stack arrays |
duke@435 | 766 | // grow downwards in all implementations. |
duke@435 | 767 | // (If, on some machine, the interpreter's Java locals or stack |
duke@435 | 768 | // were to grow upwards, the embedded doubles would be word-swapped.) |
duke@435 | 769 | jint *dp = (jint*)&d; |
duke@435 | 770 | array->append(new ConstantIntValue(dp[1])); |
duke@435 | 771 | array->append(new ConstantIntValue(dp[0])); |
duke@435 | 772 | #endif |
duke@435 | 773 | break; |
duke@435 | 774 | } |
duke@435 | 775 | case Type::Long: { |
duke@435 | 776 | jlong d = t->is_long()->get_con(); |
duke@435 | 777 | #ifdef _LP64 |
duke@435 | 778 | array->append(new ConstantIntValue(0)); |
duke@435 | 779 | array->append(new ConstantLongValue(d)); |
duke@435 | 780 | #else |
duke@435 | 781 | // Repack the long as two jints. |
duke@435 | 782 | // The convention the interpreter uses is that the second local |
duke@435 | 783 | // holds the first raw word of the native double representation. |
duke@435 | 784 | // This is actually reasonable, since locals and stack arrays |
duke@435 | 785 | // grow downwards in all implementations. |
duke@435 | 786 | // (If, on some machine, the interpreter's Java locals or stack |
duke@435 | 787 | // were to grow upwards, the embedded doubles would be word-swapped.) |
duke@435 | 788 | jint *dp = (jint*)&d; |
duke@435 | 789 | array->append(new ConstantIntValue(dp[1])); |
duke@435 | 790 | array->append(new ConstantIntValue(dp[0])); |
duke@435 | 791 | #endif |
duke@435 | 792 | break; |
duke@435 | 793 | } |
duke@435 | 794 | case Type::Top: // Add an illegal value here |
duke@435 | 795 | array->append(new LocationValue(Location())); |
duke@435 | 796 | break; |
duke@435 | 797 | default: |
duke@435 | 798 | ShouldNotReachHere(); |
duke@435 | 799 | break; |
duke@435 | 800 | } |
duke@435 | 801 | } |
duke@435 | 802 | |
duke@435 | 803 | // Determine if this node starts a bundle |
duke@435 | 804 | bool Compile::starts_bundle(const Node *n) const { |
duke@435 | 805 | return (_node_bundling_limit > n->_idx && |
duke@435 | 806 | _node_bundling_base[n->_idx].starts_bundle()); |
duke@435 | 807 | } |
duke@435 | 808 | |
duke@435 | 809 | //--------------------------Process_OopMap_Node-------------------------------- |
duke@435 | 810 | void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { |
duke@435 | 811 | |
duke@435 | 812 | // Handle special safepoint nodes for synchronization |
duke@435 | 813 | MachSafePointNode *sfn = mach->as_MachSafePoint(); |
duke@435 | 814 | MachCallNode *mcall; |
duke@435 | 815 | |
duke@435 | 816 | #ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 817 | assert( is_node_getting_a_safepoint(mach), "logic does not match; false negative"); |
duke@435 | 818 | #endif |
duke@435 | 819 | |
duke@435 | 820 | int safepoint_pc_offset = current_offset; |
twisti@1572 | 821 | bool is_method_handle_invoke = false; |
kvn@1688 | 822 | bool return_oop = false; |
duke@435 | 823 | |
duke@435 | 824 | // Add the safepoint in the DebugInfoRecorder |
duke@435 | 825 | if( !mach->is_MachCall() ) { |
duke@435 | 826 | mcall = NULL; |
duke@435 | 827 | debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); |
duke@435 | 828 | } else { |
duke@435 | 829 | mcall = mach->as_MachCall(); |
twisti@1572 | 830 | |
twisti@1572 | 831 | // Is the call a MethodHandle call? |
twisti@1700 | 832 | if (mcall->is_MachCallJava()) { |
twisti@1700 | 833 | if (mcall->as_MachCallJava()->_method_handle_invoke) { |
twisti@1700 | 834 | assert(has_method_handle_invokes(), "must have been set during call generation"); |
twisti@1700 | 835 | is_method_handle_invoke = true; |
twisti@1700 | 836 | } |
twisti@1700 | 837 | } |
twisti@1572 | 838 | |
kvn@1688 | 839 | // Check if a call returns an object. |
kvn@1688 | 840 | if (mcall->return_value_is_used() && |
kvn@1688 | 841 | mcall->tf()->range()->field_at(TypeFunc::Parms)->isa_ptr()) { |
kvn@1688 | 842 | return_oop = true; |
kvn@1688 | 843 | } |
duke@435 | 844 | safepoint_pc_offset += mcall->ret_addr_offset(); |
duke@435 | 845 | debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map); |
duke@435 | 846 | } |
duke@435 | 847 | |
duke@435 | 848 | // Loop over the JVMState list to add scope information |
duke@435 | 849 | // Do not skip safepoints with a NULL method, they need monitor info |
duke@435 | 850 | JVMState* youngest_jvms = sfn->jvms(); |
duke@435 | 851 | int max_depth = youngest_jvms->depth(); |
duke@435 | 852 | |
kvn@498 | 853 | // Allocate the object pool for scalar-replaced objects -- the map from |
kvn@498 | 854 | // small-integer keys (which can be recorded in the local and ostack |
kvn@498 | 855 | // arrays) to descriptions of the object state. |
kvn@498 | 856 | GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>(); |
kvn@498 | 857 | |
duke@435 | 858 | // Visit scopes from oldest to youngest. |
duke@435 | 859 | for (int depth = 1; depth <= max_depth; depth++) { |
duke@435 | 860 | JVMState* jvms = youngest_jvms->of_depth(depth); |
duke@435 | 861 | int idx; |
duke@435 | 862 | ciMethod* method = jvms->has_method() ? jvms->method() : NULL; |
duke@435 | 863 | // Safepoints that do not have method() set only provide oop-map and monitor info |
duke@435 | 864 | // to support GC; these do not support deoptimization. |
duke@435 | 865 | int num_locs = (method == NULL) ? 0 : jvms->loc_size(); |
duke@435 | 866 | int num_exps = (method == NULL) ? 0 : jvms->stk_size(); |
duke@435 | 867 | int num_mon = jvms->nof_monitors(); |
duke@435 | 868 | assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(), |
duke@435 | 869 | "JVMS local count must match that of the method"); |
duke@435 | 870 | |
duke@435 | 871 | // Add Local and Expression Stack Information |
duke@435 | 872 | |
duke@435 | 873 | // Insert locals into the locarray |
duke@435 | 874 | GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs); |
duke@435 | 875 | for( idx = 0; idx < num_locs; idx++ ) { |
kvn@498 | 876 | FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs ); |
duke@435 | 877 | } |
duke@435 | 878 | |
duke@435 | 879 | // Insert expression stack entries into the exparray |
duke@435 | 880 | GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps); |
duke@435 | 881 | for( idx = 0; idx < num_exps; idx++ ) { |
kvn@498 | 882 | FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs ); |
duke@435 | 883 | } |
duke@435 | 884 | |
duke@435 | 885 | // Add in mappings of the monitors |
duke@435 | 886 | assert( !method || |
duke@435 | 887 | !method->is_synchronized() || |
duke@435 | 888 | method->is_native() || |
duke@435 | 889 | num_mon > 0 || |
duke@435 | 890 | !GenerateSynchronizationCode, |
duke@435 | 891 | "monitors must always exist for synchronized methods"); |
duke@435 | 892 | |
duke@435 | 893 | // Build the growable array of ScopeValues for exp stack |
duke@435 | 894 | GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon); |
duke@435 | 895 | |
duke@435 | 896 | // Loop over monitors and insert into array |
kvn@5626 | 897 | for (idx = 0; idx < num_mon; idx++) { |
duke@435 | 898 | // Grab the node that defines this monitor |
kvn@895 | 899 | Node* box_node = sfn->monitor_box(jvms, idx); |
kvn@895 | 900 | Node* obj_node = sfn->monitor_obj(jvms, idx); |
duke@435 | 901 | |
duke@435 | 902 | // Create ScopeValue for object |
duke@435 | 903 | ScopeValue *scval = NULL; |
kvn@498 | 904 | |
kvn@5626 | 905 | if (obj_node->is_SafePointScalarObject()) { |
kvn@498 | 906 | SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject(); |
kvn@498 | 907 | scval = Compile::sv_for_node_id(objs, spobj->_idx); |
kvn@498 | 908 | if (scval == NULL) { |
kvn@5626 | 909 | const Type *t = spobj->bottom_type(); |
kvn@498 | 910 | ciKlass* cik = t->is_oopptr()->klass(); |
kvn@498 | 911 | assert(cik->is_instance_klass() || |
kvn@498 | 912 | cik->is_array_klass(), "Not supported allocation."); |
kvn@498 | 913 | ObjectValue* sv = new ObjectValue(spobj->_idx, |
coleenp@4037 | 914 | new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); |
kvn@498 | 915 | Compile::set_sv_for_object_node(objs, sv); |
kvn@498 | 916 | |
kvn@5626 | 917 | uint first_ind = spobj->first_index(youngest_jvms); |
kvn@498 | 918 | for (uint i = 0; i < spobj->n_fields(); i++) { |
kvn@498 | 919 | Node* fld_node = sfn->in(first_ind+i); |
kvn@498 | 920 | (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); |
kvn@498 | 921 | } |
kvn@498 | 922 | scval = sv; |
kvn@498 | 923 | } |
kvn@5626 | 924 | } else if (!obj_node->is_Con()) { |
duke@435 | 925 | OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node); |
kvn@766 | 926 | if( obj_node->bottom_type()->base() == Type::NarrowOop ) { |
kvn@766 | 927 | scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop ); |
kvn@766 | 928 | } else { |
kvn@766 | 929 | scval = new_loc_value( _regalloc, obj_reg, Location::oop ); |
kvn@766 | 930 | } |
duke@435 | 931 | } else { |
kvn@5111 | 932 | const TypePtr *tp = obj_node->get_ptr_type(); |
kvn@2931 | 933 | scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding()); |
duke@435 | 934 | } |
duke@435 | 935 | |
kvn@3406 | 936 | OptoReg::Name box_reg = BoxLockNode::reg(box_node); |
kvn@501 | 937 | Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg)); |
kvn@3406 | 938 | bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated()); |
kvn@3406 | 939 | monarray->append(new MonitorValue(scval, basic_lock, eliminated)); |
duke@435 | 940 | } |
duke@435 | 941 | |
kvn@498 | 942 | // We dump the object pool first, since deoptimization reads it in first. |
kvn@498 | 943 | debug_info()->dump_object_pool(objs); |
kvn@498 | 944 | |
duke@435 | 945 | // Build first class objects to pass to scope |
duke@435 | 946 | DebugToken *locvals = debug_info()->create_scope_values(locarray); |
duke@435 | 947 | DebugToken *expvals = debug_info()->create_scope_values(exparray); |
duke@435 | 948 | DebugToken *monvals = debug_info()->create_monitor_values(monarray); |
duke@435 | 949 | |
duke@435 | 950 | // Make method available for all Safepoints |
duke@435 | 951 | ciMethod* scope_method = method ? method : _method; |
duke@435 | 952 | // Describe the scope here |
duke@435 | 953 | assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); |
twisti@1570 | 954 | assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest"); |
kvn@498 | 955 | // Now we can describe the scope. |
kvn@1688 | 956 | debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals); |
duke@435 | 957 | } // End jvms loop |
duke@435 | 958 | |
duke@435 | 959 | // Mark the end of the scope set. |
duke@435 | 960 | debug_info()->end_safepoint(safepoint_pc_offset); |
duke@435 | 961 | } |
duke@435 | 962 | |
duke@435 | 963 | |
duke@435 | 964 | |
duke@435 | 965 | // A simplified version of Process_OopMap_Node, to handle non-safepoints. |
duke@435 | 966 | class NonSafepointEmitter { |
duke@435 | 967 | Compile* C; |
duke@435 | 968 | JVMState* _pending_jvms; |
duke@435 | 969 | int _pending_offset; |
duke@435 | 970 | |
duke@435 | 971 | void emit_non_safepoint(); |
duke@435 | 972 | |
duke@435 | 973 | public: |
duke@435 | 974 | NonSafepointEmitter(Compile* compile) { |
duke@435 | 975 | this->C = compile; |
duke@435 | 976 | _pending_jvms = NULL; |
duke@435 | 977 | _pending_offset = 0; |
duke@435 | 978 | } |
duke@435 | 979 | |
duke@435 | 980 | void observe_instruction(Node* n, int pc_offset) { |
duke@435 | 981 | if (!C->debug_info()->recording_non_safepoints()) return; |
duke@435 | 982 | |
duke@435 | 983 | Node_Notes* nn = C->node_notes_at(n->_idx); |
duke@435 | 984 | if (nn == NULL || nn->jvms() == NULL) return; |
duke@435 | 985 | if (_pending_jvms != NULL && |
duke@435 | 986 | _pending_jvms->same_calls_as(nn->jvms())) { |
duke@435 | 987 | // Repeated JVMS? Stretch it up here. |
duke@435 | 988 | _pending_offset = pc_offset; |
duke@435 | 989 | } else { |
duke@435 | 990 | if (_pending_jvms != NULL && |
duke@435 | 991 | _pending_offset < pc_offset) { |
duke@435 | 992 | emit_non_safepoint(); |
duke@435 | 993 | } |
duke@435 | 994 | _pending_jvms = NULL; |
duke@435 | 995 | if (pc_offset > C->debug_info()->last_pc_offset()) { |
duke@435 | 996 | // This is the only way _pending_jvms can become non-NULL: |
duke@435 | 997 | _pending_jvms = nn->jvms(); |
duke@435 | 998 | _pending_offset = pc_offset; |
duke@435 | 999 | } |
duke@435 | 1000 | } |
duke@435 | 1001 | } |
duke@435 | 1002 | |
duke@435 | 1003 | // Stay out of the way of real safepoints: |
duke@435 | 1004 | void observe_safepoint(JVMState* jvms, int pc_offset) { |
duke@435 | 1005 | if (_pending_jvms != NULL && |
duke@435 | 1006 | !_pending_jvms->same_calls_as(jvms) && |
duke@435 | 1007 | _pending_offset < pc_offset) { |
duke@435 | 1008 | emit_non_safepoint(); |
duke@435 | 1009 | } |
duke@435 | 1010 | _pending_jvms = NULL; |
duke@435 | 1011 | } |
duke@435 | 1012 | |
duke@435 | 1013 | void flush_at_end() { |
duke@435 | 1014 | if (_pending_jvms != NULL) { |
duke@435 | 1015 | emit_non_safepoint(); |
duke@435 | 1016 | } |
duke@435 | 1017 | _pending_jvms = NULL; |
duke@435 | 1018 | } |
duke@435 | 1019 | }; |
duke@435 | 1020 | |
duke@435 | 1021 | void NonSafepointEmitter::emit_non_safepoint() { |
duke@435 | 1022 | JVMState* youngest_jvms = _pending_jvms; |
duke@435 | 1023 | int pc_offset = _pending_offset; |
duke@435 | 1024 | |
duke@435 | 1025 | // Clear it now: |
duke@435 | 1026 | _pending_jvms = NULL; |
duke@435 | 1027 | |
duke@435 | 1028 | DebugInformationRecorder* debug_info = C->debug_info(); |
duke@435 | 1029 | assert(debug_info->recording_non_safepoints(), "sanity"); |
duke@435 | 1030 | |
duke@435 | 1031 | debug_info->add_non_safepoint(pc_offset); |
duke@435 | 1032 | int max_depth = youngest_jvms->depth(); |
duke@435 | 1033 | |
duke@435 | 1034 | // Visit scopes from oldest to youngest. |
duke@435 | 1035 | for (int depth = 1; depth <= max_depth; depth++) { |
duke@435 | 1036 | JVMState* jvms = youngest_jvms->of_depth(depth); |
duke@435 | 1037 | ciMethod* method = jvms->has_method() ? jvms->method() : NULL; |
cfang@1335 | 1038 | assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest"); |
cfang@1335 | 1039 | debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute()); |
duke@435 | 1040 | } |
duke@435 | 1041 | |
duke@435 | 1042 | // Mark the end of the scope set. |
duke@435 | 1043 | debug_info->end_non_safepoint(pc_offset); |
duke@435 | 1044 | } |
duke@435 | 1045 | |
kvn@3049 | 1046 | //------------------------------init_buffer------------------------------------ |
kvn@3049 | 1047 | CodeBuffer* Compile::init_buffer(uint* blk_starts) { |
duke@435 | 1048 | |
duke@435 | 1049 | // Set the initially allocated size |
duke@435 | 1050 | int code_req = initial_code_capacity; |
duke@435 | 1051 | int locs_req = initial_locs_capacity; |
duke@435 | 1052 | int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity; |
duke@435 | 1053 | int const_req = initial_const_capacity; |
duke@435 | 1054 | |
duke@435 | 1055 | int pad_req = NativeCall::instruction_size; |
duke@435 | 1056 | // The extra spacing after the code is necessary on some platforms. |
duke@435 | 1057 | // Sometimes we need to patch in a jump after the last instruction, |
duke@435 | 1058 | // if the nmethod has been deoptimized. (See 4932387, 4894843.) |
duke@435 | 1059 | |
duke@435 | 1060 | // Compute the byte offset where we can store the deopt pc. |
duke@435 | 1061 | if (fixed_slots() != 0) { |
duke@435 | 1062 | _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot)); |
duke@435 | 1063 | } |
duke@435 | 1064 | |
duke@435 | 1065 | // Compute prolog code size |
duke@435 | 1066 | _method_size = 0; |
duke@435 | 1067 | _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize; |
duke@435 | 1068 | #ifdef IA64 |
duke@435 | 1069 | if (save_argument_registers()) { |
duke@435 | 1070 | // 4815101: this is a stub with implicit and unknown precision fp args. |
duke@435 | 1071 | // The usual spill mechanism can only generate stfd's in this case, which |
duke@435 | 1072 | // doesn't work if the fp reg to spill contains a single-precision denorm. |
duke@435 | 1073 | // Instead, we hack around the normal spill mechanism using stfspill's and |
duke@435 | 1074 | // ldffill's in the MachProlog and MachEpilog emit methods. We allocate |
duke@435 | 1075 | // space here for the fp arg regs (f8-f15) we're going to thusly spill. |
duke@435 | 1076 | // |
duke@435 | 1077 | // If we ever implement 16-byte 'registers' == stack slots, we can |
duke@435 | 1078 | // get rid of this hack and have SpillCopy generate stfspill/ldffill |
duke@435 | 1079 | // instead of stfd/stfs/ldfd/ldfs. |
duke@435 | 1080 | _frame_slots += 8*(16/BytesPerInt); |
duke@435 | 1081 | } |
duke@435 | 1082 | #endif |
kvn@3049 | 1083 | assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check"); |
duke@435 | 1084 | |
twisti@2350 | 1085 | if (has_mach_constant_base_node()) { |
twisti@2350 | 1086 | // Fill the constant table. |
kvn@3049 | 1087 | // Note: This must happen before shorten_branches. |
adlertz@5539 | 1088 | for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
adlertz@5539 | 1089 | Block* b = _cfg->get_block(i); |
twisti@2350 | 1090 | |
adlertz@5635 | 1091 | for (uint j = 0; j < b->number_of_nodes(); j++) { |
adlertz@5635 | 1092 | Node* n = b->get_node(j); |
twisti@2350 | 1093 | |
twisti@2350 | 1094 | // If the node is a MachConstantNode evaluate the constant |
twisti@2350 | 1095 | // value section. |
twisti@2350 | 1096 | if (n->is_MachConstant()) { |
twisti@2350 | 1097 | MachConstantNode* machcon = n->as_MachConstant(); |
twisti@2350 | 1098 | machcon->eval_constant(C); |
twisti@2350 | 1099 | } |
twisti@2350 | 1100 | } |
twisti@2350 | 1101 | } |
twisti@2350 | 1102 | |
twisti@2350 | 1103 | // Calculate the offsets of the constants and the size of the |
twisti@2350 | 1104 | // constant table (including the padding to the next section). |
twisti@2350 | 1105 | constant_table().calculate_offsets_and_size(); |
twisti@2350 | 1106 | const_req = constant_table().size(); |
twisti@2350 | 1107 | } |
twisti@2350 | 1108 | |
twisti@2350 | 1109 | // Initialize the space for the BufferBlob used to find and verify |
twisti@2350 | 1110 | // instruction size in MachNode::emit_size() |
twisti@2350 | 1111 | init_scratch_buffer_blob(const_req); |
kvn@3049 | 1112 | if (failing()) return NULL; // Out of memory |
kvn@3049 | 1113 | |
kvn@3049 | 1114 | // Pre-compute the length of blocks and replace |
kvn@3049 | 1115 | // long branches with short if machine supports it. |
kvn@3049 | 1116 | shorten_branches(blk_starts, code_req, locs_req, stub_req); |
duke@435 | 1117 | |
duke@435 | 1118 | // nmethod and CodeBuffer count stubs & constants as part of method's code. |
duke@435 | 1119 | int exception_handler_req = size_exception_handler(); |
duke@435 | 1120 | int deopt_handler_req = size_deopt_handler(); |
duke@435 | 1121 | exception_handler_req += MAX_stubs_size; // add marginal slop for handler |
duke@435 | 1122 | deopt_handler_req += MAX_stubs_size; // add marginal slop for handler |
duke@435 | 1123 | stub_req += MAX_stubs_size; // ensure per-stub margin |
duke@435 | 1124 | code_req += MAX_inst_size; // ensure per-instruction margin |
twisti@1700 | 1125 | |
duke@435 | 1126 | if (StressCodeBuffers) |
duke@435 | 1127 | code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion |
twisti@1700 | 1128 | |
twisti@1700 | 1129 | int total_req = |
twisti@2350 | 1130 | const_req + |
twisti@1700 | 1131 | code_req + |
twisti@1700 | 1132 | pad_req + |
twisti@1700 | 1133 | stub_req + |
twisti@1700 | 1134 | exception_handler_req + |
twisti@2350 | 1135 | deopt_handler_req; // deopt handler |
twisti@1700 | 1136 | |
twisti@1700 | 1137 | if (has_method_handle_invokes()) |
twisti@1700 | 1138 | total_req += deopt_handler_req; // deopt MH handler |
twisti@1700 | 1139 | |
duke@435 | 1140 | CodeBuffer* cb = code_buffer(); |
duke@435 | 1141 | cb->initialize(total_req, locs_req); |
duke@435 | 1142 | |
duke@435 | 1143 | // Have we run out of code space? |
kvn@1637 | 1144 | if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
neliasso@4952 | 1145 | C->record_failure("CodeCache is full"); |
kvn@3049 | 1146 | return NULL; |
duke@435 | 1147 | } |
duke@435 | 1148 | // Configure the code buffer. |
duke@435 | 1149 | cb->initialize_consts_size(const_req); |
duke@435 | 1150 | cb->initialize_stubs_size(stub_req); |
duke@435 | 1151 | cb->initialize_oop_recorder(env()->oop_recorder()); |
duke@435 | 1152 | |
duke@435 | 1153 | // fill in the nop array for bundling computations |
duke@435 | 1154 | MachNode *_nop_list[Bundle::_nop_count]; |
duke@435 | 1155 | Bundle::initialize_nops(_nop_list, this); |
duke@435 | 1156 | |
kvn@3049 | 1157 | return cb; |
kvn@3049 | 1158 | } |
kvn@3049 | 1159 | |
kvn@3049 | 1160 | //------------------------------fill_buffer------------------------------------ |
kvn@3049 | 1161 | void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { |
kvn@3055 | 1162 | // blk_starts[] contains offsets calculated during short branches processing, |
kvn@3055 | 1163 | // offsets should not be increased during following steps. |
kvn@3055 | 1164 | |
kvn@3055 | 1165 | // Compute the size of first NumberOfLoopInstrToAlign instructions at head |
kvn@3055 | 1166 | // of a loop. It is used to determine the padding for loop alignment. |
kvn@3055 | 1167 | compute_loop_first_inst_sizes(); |
kvn@3049 | 1168 | |
duke@435 | 1169 | // Create oopmap set. |
duke@435 | 1170 | _oop_map_set = new OopMapSet(); |
duke@435 | 1171 | |
duke@435 | 1172 | // !!!!! This preserves old handling of oopmaps for now |
duke@435 | 1173 | debug_info()->set_oopmaps(_oop_map_set); |
duke@435 | 1174 | |
adlertz@5539 | 1175 | uint nblocks = _cfg->number_of_blocks(); |
duke@435 | 1176 | // Count and start of implicit null check instructions |
duke@435 | 1177 | uint inct_cnt = 0; |
kvn@3055 | 1178 | uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1); |
duke@435 | 1179 | |
duke@435 | 1180 | // Count and start of calls |
kvn@3055 | 1181 | uint *call_returns = NEW_RESOURCE_ARRAY(uint, nblocks+1); |
duke@435 | 1182 | |
duke@435 | 1183 | uint return_offset = 0; |
kvn@1294 | 1184 | int nop_size = (new (this) MachNopNode())->size(_regalloc); |
duke@435 | 1185 | |
duke@435 | 1186 | int previous_offset = 0; |
duke@435 | 1187 | int current_offset = 0; |
duke@435 | 1188 | int last_call_offset = -1; |
kvn@3049 | 1189 | int last_avoid_back_to_back_offset = -1; |
kvn@3055 | 1190 | #ifdef ASSERT |
kvn@3055 | 1191 | uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); |
kvn@3055 | 1192 | uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); |
kvn@3055 | 1193 | uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); |
kvn@3055 | 1194 | uint* jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); |
kvn@3049 | 1195 | #endif |
kvn@3055 | 1196 | |
duke@435 | 1197 | // Create an array of unused labels, one for each basic block, if printing is enabled |
duke@435 | 1198 | #ifndef PRODUCT |
duke@435 | 1199 | int *node_offsets = NULL; |
kvn@3049 | 1200 | uint node_offset_limit = unique(); |
kvn@3049 | 1201 | |
kvn@3049 | 1202 | if (print_assembly()) |
duke@435 | 1203 | node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit); |
duke@435 | 1204 | #endif |
duke@435 | 1205 | |
duke@435 | 1206 | NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily |
duke@435 | 1207 | |
twisti@2350 | 1208 | // Emit the constant table. |
twisti@2350 | 1209 | if (has_mach_constant_base_node()) { |
twisti@2350 | 1210 | constant_table().emit(*cb); |
twisti@2350 | 1211 | } |
twisti@2350 | 1212 | |
kvn@3049 | 1213 | // Create an array of labels, one for each basic block |
kvn@3055 | 1214 | Label *blk_labels = NEW_RESOURCE_ARRAY(Label, nblocks+1); |
kvn@3055 | 1215 | for (uint i=0; i <= nblocks; i++) { |
kvn@3049 | 1216 | blk_labels[i].init(); |
kvn@3049 | 1217 | } |
kvn@3049 | 1218 | |
duke@435 | 1219 | // ------------------ |
duke@435 | 1220 | // Now fill in the code buffer |
duke@435 | 1221 | Node *delay_slot = NULL; |
duke@435 | 1222 | |
adlertz@5539 | 1223 | for (uint i = 0; i < nblocks; i++) { |
adlertz@5539 | 1224 | Block* block = _cfg->get_block(i); |
adlertz@5539 | 1225 | Node* head = block->head(); |
duke@435 | 1226 | |
duke@435 | 1227 | // If this block needs to start aligned (i.e, can be reached other |
duke@435 | 1228 | // than by falling-thru from the previous block), then force the |
duke@435 | 1229 | // start of a new bundle. |
adlertz@5539 | 1230 | if (Pipeline::requires_bundling() && starts_bundle(head)) { |
duke@435 | 1231 | cb->flush_bundle(true); |
adlertz@5539 | 1232 | } |
duke@435 | 1233 | |
kvn@3049 | 1234 | #ifdef ASSERT |
adlertz@5539 | 1235 | if (!block->is_connector()) { |
kvn@3049 | 1236 | stringStream st; |
adlertz@5539 | 1237 | block->dump_head(_cfg, &st); |
kvn@3049 | 1238 | MacroAssembler(cb).block_comment(st.as_string()); |
kvn@3049 | 1239 | } |
kvn@3055 | 1240 | jmp_target[i] = 0; |
kvn@3055 | 1241 | jmp_offset[i] = 0; |
kvn@3055 | 1242 | jmp_size[i] = 0; |
kvn@3055 | 1243 | jmp_rule[i] = 0; |
kvn@3049 | 1244 | #endif |
kvn@3055 | 1245 | int blk_offset = current_offset; |
kvn@3049 | 1246 | |
duke@435 | 1247 | // Define the label at the beginning of the basic block |
adlertz@5539 | 1248 | MacroAssembler(cb).bind(blk_labels[block->_pre_order]); |
adlertz@5539 | 1249 | |
adlertz@5635 | 1250 | uint last_inst = block->number_of_nodes(); |
duke@435 | 1251 | |
duke@435 | 1252 | // Emit block normally, except for last instruction. |
duke@435 | 1253 | // Emit means "dump code bits into code buffer". |
kvn@3049 | 1254 | for (uint j = 0; j<last_inst; j++) { |
duke@435 | 1255 | |
duke@435 | 1256 | // Get the node |
adlertz@5635 | 1257 | Node* n = block->get_node(j); |
duke@435 | 1258 | |
duke@435 | 1259 | // See if delay slots are supported |
duke@435 | 1260 | if (valid_bundle_info(n) && |
duke@435 | 1261 | node_bundling(n)->used_in_unconditional_delay()) { |
duke@435 | 1262 | assert(delay_slot == NULL, "no use of delay slot node"); |
duke@435 | 1263 | assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size"); |
duke@435 | 1264 | |
duke@435 | 1265 | delay_slot = n; |
duke@435 | 1266 | continue; |
duke@435 | 1267 | } |
duke@435 | 1268 | |
duke@435 | 1269 | // If this starts a new instruction group, then flush the current one |
duke@435 | 1270 | // (but allow split bundles) |
kvn@3049 | 1271 | if (Pipeline::requires_bundling() && starts_bundle(n)) |
duke@435 | 1272 | cb->flush_bundle(false); |
duke@435 | 1273 | |
duke@435 | 1274 | // The following logic is duplicated in the code ifdeffed for |
twisti@1040 | 1275 | // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It |
duke@435 | 1276 | // should be factored out. Or maybe dispersed to the nodes? |
duke@435 | 1277 | |
duke@435 | 1278 | // Special handling for SafePoint/Call Nodes |
duke@435 | 1279 | bool is_mcall = false; |
kvn@3049 | 1280 | if (n->is_Mach()) { |
duke@435 | 1281 | MachNode *mach = n->as_Mach(); |
duke@435 | 1282 | is_mcall = n->is_MachCall(); |
duke@435 | 1283 | bool is_sfn = n->is_MachSafePoint(); |
duke@435 | 1284 | |
duke@435 | 1285 | // If this requires all previous instructions be flushed, then do so |
kvn@3049 | 1286 | if (is_sfn || is_mcall || mach->alignment_required() != 1) { |
duke@435 | 1287 | cb->flush_bundle(true); |
twisti@2103 | 1288 | current_offset = cb->insts_size(); |
duke@435 | 1289 | } |
duke@435 | 1290 | |
kvn@3049 | 1291 | // A padding may be needed again since a previous instruction |
kvn@3049 | 1292 | // could be moved to delay slot. |
kvn@3049 | 1293 | |
duke@435 | 1294 | // align the instruction if necessary |
duke@435 | 1295 | int padding = mach->compute_padding(current_offset); |
duke@435 | 1296 | // Make sure safepoint node for polling is distinct from a call's |
duke@435 | 1297 | // return by adding a nop if needed. |
kvn@3049 | 1298 | if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) { |
duke@435 | 1299 | padding = nop_size; |
duke@435 | 1300 | } |
kvn@3049 | 1301 | if (padding == 0 && mach->avoid_back_to_back() && |
kvn@3049 | 1302 | current_offset == last_avoid_back_to_back_offset) { |
kvn@3049 | 1303 | // Avoid back to back some instructions. |
kvn@3049 | 1304 | padding = nop_size; |
duke@435 | 1305 | } |
kvn@3055 | 1306 | |
kvn@3055 | 1307 | if(padding > 0) { |
kvn@3055 | 1308 | assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); |
kvn@3055 | 1309 | int nops_cnt = padding / nop_size; |
kvn@3055 | 1310 | MachNode *nop = new (this) MachNopNode(nops_cnt); |
adlertz@5635 | 1311 | block->insert_node(nop, j++); |
kvn@3055 | 1312 | last_inst++; |
adlertz@5539 | 1313 | _cfg->map_node_to_block(nop, block); |
kvn@3055 | 1314 | nop->emit(*cb, _regalloc); |
kvn@3055 | 1315 | cb->flush_bundle(true); |
kvn@3055 | 1316 | current_offset = cb->insts_size(); |
kvn@3055 | 1317 | } |
kvn@3055 | 1318 | |
duke@435 | 1319 | // Remember the start of the last call in a basic block |
duke@435 | 1320 | if (is_mcall) { |
duke@435 | 1321 | MachCallNode *mcall = mach->as_MachCall(); |
duke@435 | 1322 | |
duke@435 | 1323 | // This destination address is NOT PC-relative |
duke@435 | 1324 | mcall->method_set((intptr_t)mcall->entry_point()); |
duke@435 | 1325 | |
duke@435 | 1326 | // Save the return address |
adlertz@5539 | 1327 | call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset(); |
duke@435 | 1328 | |
kvn@3040 | 1329 | if (mcall->is_MachCallLeaf()) { |
duke@435 | 1330 | is_mcall = false; |
duke@435 | 1331 | is_sfn = false; |
duke@435 | 1332 | } |
duke@435 | 1333 | } |
duke@435 | 1334 | |
duke@435 | 1335 | // sfn will be valid whenever mcall is valid now because of inheritance |
kvn@3049 | 1336 | if (is_sfn || is_mcall) { |
duke@435 | 1337 | |
duke@435 | 1338 | // Handle special safepoint nodes for synchronization |
kvn@3049 | 1339 | if (!is_mcall) { |
duke@435 | 1340 | MachSafePointNode *sfn = mach->as_MachSafePoint(); |
duke@435 | 1341 | // !!!!! Stubs only need an oopmap right now, so bail out |
kvn@3049 | 1342 | if (sfn->jvms()->method() == NULL) { |
duke@435 | 1343 | // Write the oopmap directly to the code blob??!! |
duke@435 | 1344 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 1345 | assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive"); |
duke@435 | 1346 | # endif |
duke@435 | 1347 | continue; |
duke@435 | 1348 | } |
duke@435 | 1349 | } // End synchronization |
duke@435 | 1350 | |
duke@435 | 1351 | non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), |
duke@435 | 1352 | current_offset); |
duke@435 | 1353 | Process_OopMap_Node(mach, current_offset); |
duke@435 | 1354 | } // End if safepoint |
duke@435 | 1355 | |
duke@435 | 1356 | // If this is a null check, then add the start of the previous instruction to the list |
duke@435 | 1357 | else if( mach->is_MachNullCheck() ) { |
duke@435 | 1358 | inct_starts[inct_cnt++] = previous_offset; |
duke@435 | 1359 | } |
duke@435 | 1360 | |
duke@435 | 1361 | // If this is a branch, then fill in the label with the target BB's label |
kvn@3051 | 1362 | else if (mach->is_MachBranch()) { |
kvn@3051 | 1363 | // This requires the TRUE branch target be in succs[0] |
adlertz@5539 | 1364 | uint block_num = block->non_connector_successor(0)->_pre_order; |
kvn@3055 | 1365 | |
kvn@3055 | 1366 | // Try to replace long branch if delay slot is not used, |
kvn@3055 | 1367 | // it is mostly for back branches since forward branch's |
kvn@3055 | 1368 | // distance is not updated yet. |
kvn@3055 | 1369 | bool delay_slot_is_used = valid_bundle_info(n) && |
kvn@3055 | 1370 | node_bundling(n)->use_unconditional_delay(); |
kvn@3055 | 1371 | if (!delay_slot_is_used && mach->may_be_short_branch()) { |
kvn@3055 | 1372 | assert(delay_slot == NULL, "not expecting delay slot node"); |
kvn@3055 | 1373 | int br_size = n->size(_regalloc); |
kvn@3055 | 1374 | int offset = blk_starts[block_num] - current_offset; |
kvn@3055 | 1375 | if (block_num >= i) { |
kvn@3055 | 1376 | // Current and following block's offset are not |
kvn@3055 | 1377 | // finilized yet, adjust distance by the difference |
kvn@3055 | 1378 | // between calculated and final offsets of current block. |
kvn@3055 | 1379 | offset -= (blk_starts[i] - blk_offset); |
kvn@3055 | 1380 | } |
kvn@3055 | 1381 | // In the following code a nop could be inserted before |
kvn@3055 | 1382 | // the branch which will increase the backward distance. |
kvn@3055 | 1383 | bool needs_padding = (current_offset == last_avoid_back_to_back_offset); |
kvn@3055 | 1384 | if (needs_padding && offset <= 0) |
kvn@3055 | 1385 | offset -= nop_size; |
kvn@3055 | 1386 | |
kvn@3055 | 1387 | if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) { |
kvn@3055 | 1388 | // We've got a winner. Replace this branch. |
kvn@3055 | 1389 | MachNode* replacement = mach->as_MachBranch()->short_branch_version(this); |
kvn@3055 | 1390 | |
kvn@3055 | 1391 | // Update the jmp_size. |
kvn@3055 | 1392 | int new_size = replacement->size(_regalloc); |
kvn@3055 | 1393 | assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller"); |
kvn@3055 | 1394 | // Insert padding between avoid_back_to_back branches. |
kvn@3055 | 1395 | if (needs_padding && replacement->avoid_back_to_back()) { |
kvn@3055 | 1396 | MachNode *nop = new (this) MachNopNode(); |
adlertz@5635 | 1397 | block->insert_node(nop, j++); |
adlertz@5539 | 1398 | _cfg->map_node_to_block(nop, block); |
kvn@3055 | 1399 | last_inst++; |
kvn@3055 | 1400 | nop->emit(*cb, _regalloc); |
kvn@3055 | 1401 | cb->flush_bundle(true); |
kvn@3055 | 1402 | current_offset = cb->insts_size(); |
kvn@3055 | 1403 | } |
kvn@3055 | 1404 | #ifdef ASSERT |
kvn@3055 | 1405 | jmp_target[i] = block_num; |
kvn@3055 | 1406 | jmp_offset[i] = current_offset - blk_offset; |
kvn@3055 | 1407 | jmp_size[i] = new_size; |
kvn@3055 | 1408 | jmp_rule[i] = mach->rule(); |
kvn@3055 | 1409 | #endif |
adlertz@5635 | 1410 | block->map_node(replacement, j); |
bharadwaj@4315 | 1411 | mach->subsume_by(replacement, C); |
kvn@3055 | 1412 | n = replacement; |
kvn@3055 | 1413 | mach = replacement; |
kvn@3055 | 1414 | } |
kvn@3055 | 1415 | } |
kvn@3051 | 1416 | mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num ); |
kvn@3051 | 1417 | } else if (mach->ideal_Opcode() == Op_Jump) { |
adlertz@5539 | 1418 | for (uint h = 0; h < block->_num_succs; h++) { |
adlertz@5539 | 1419 | Block* succs_block = block->_succs[h]; |
kvn@3051 | 1420 | for (uint j = 1; j < succs_block->num_preds(); j++) { |
kvn@3051 | 1421 | Node* jpn = succs_block->pred(j); |
kvn@3051 | 1422 | if (jpn->is_JumpProj() && jpn->in(0) == mach) { |
kvn@3051 | 1423 | uint block_num = succs_block->non_connector()->_pre_order; |
kvn@3051 | 1424 | Label *blkLabel = &blk_labels[block_num]; |
kvn@3051 | 1425 | mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel); |
duke@435 | 1426 | } |
duke@435 | 1427 | } |
duke@435 | 1428 | } |
duke@435 | 1429 | } |
duke@435 | 1430 | #ifdef ASSERT |
twisti@1040 | 1431 | // Check that oop-store precedes the card-mark |
kvn@3049 | 1432 | else if (mach->ideal_Opcode() == Op_StoreCM) { |
duke@435 | 1433 | uint storeCM_idx = j; |
never@2780 | 1434 | int count = 0; |
never@2780 | 1435 | for (uint prec = mach->req(); prec < mach->len(); prec++) { |
never@2780 | 1436 | Node *oop_store = mach->in(prec); // Precedence edge |
never@2780 | 1437 | if (oop_store == NULL) continue; |
never@2780 | 1438 | count++; |
never@2780 | 1439 | uint i4; |
adlertz@5539 | 1440 | for (i4 = 0; i4 < last_inst; ++i4) { |
adlertz@5635 | 1441 | if (block->get_node(i4) == oop_store) { |
adlertz@5539 | 1442 | break; |
adlertz@5539 | 1443 | } |
never@2780 | 1444 | } |
never@2780 | 1445 | // Note: This test can provide a false failure if other precedence |
never@2780 | 1446 | // edges have been added to the storeCMNode. |
adlertz@5539 | 1447 | assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); |
duke@435 | 1448 | } |
never@2780 | 1449 | assert(count > 0, "storeCM expects at least one precedence edge"); |
duke@435 | 1450 | } |
duke@435 | 1451 | #endif |
kvn@3049 | 1452 | else if (!n->is_Proj()) { |
twisti@1040 | 1453 | // Remember the beginning of the previous instruction, in case |
duke@435 | 1454 | // it's followed by a flag-kill and a null-check. Happens on |
duke@435 | 1455 | // Intel all the time, with add-to-memory kind of opcodes. |
duke@435 | 1456 | previous_offset = current_offset; |
duke@435 | 1457 | } |
duke@435 | 1458 | } |
duke@435 | 1459 | |
duke@435 | 1460 | // Verify that there is sufficient space remaining |
duke@435 | 1461 | cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size); |
kvn@1637 | 1462 | if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
neliasso@4952 | 1463 | C->record_failure("CodeCache is full"); |
duke@435 | 1464 | return; |
duke@435 | 1465 | } |
duke@435 | 1466 | |
duke@435 | 1467 | // Save the offset for the listing |
duke@435 | 1468 | #ifndef PRODUCT |
kvn@3049 | 1469 | if (node_offsets && n->_idx < node_offset_limit) |
twisti@2103 | 1470 | node_offsets[n->_idx] = cb->insts_size(); |
duke@435 | 1471 | #endif |
duke@435 | 1472 | |
duke@435 | 1473 | // "Normal" instruction case |
kvn@3049 | 1474 | DEBUG_ONLY( uint instr_offset = cb->insts_size(); ) |
duke@435 | 1475 | n->emit(*cb, _regalloc); |
twisti@2103 | 1476 | current_offset = cb->insts_size(); |
kvn@3049 | 1477 | |
kvn@3049 | 1478 | #ifdef ASSERT |
kvn@3055 | 1479 | if (n->size(_regalloc) < (current_offset-instr_offset)) { |
kvn@3049 | 1480 | n->dump(); |
kvn@3055 | 1481 | assert(false, "wrong size of mach node"); |
kvn@3049 | 1482 | } |
kvn@3049 | 1483 | #endif |
duke@435 | 1484 | non_safepoints.observe_instruction(n, current_offset); |
duke@435 | 1485 | |
duke@435 | 1486 | // mcall is last "call" that can be a safepoint |
duke@435 | 1487 | // record it so we can see if a poll will directly follow it |
duke@435 | 1488 | // in which case we'll need a pad to make the PcDesc sites unique |
duke@435 | 1489 | // see 5010568. This can be slightly inaccurate but conservative |
duke@435 | 1490 | // in the case that return address is not actually at current_offset. |
duke@435 | 1491 | // This is a small price to pay. |
duke@435 | 1492 | |
duke@435 | 1493 | if (is_mcall) { |
duke@435 | 1494 | last_call_offset = current_offset; |
duke@435 | 1495 | } |
duke@435 | 1496 | |
kvn@3049 | 1497 | if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) { |
kvn@3049 | 1498 | // Avoid back to back some instructions. |
kvn@3049 | 1499 | last_avoid_back_to_back_offset = current_offset; |
kvn@3049 | 1500 | } |
kvn@3049 | 1501 | |
duke@435 | 1502 | // See if this instruction has a delay slot |
kvn@3049 | 1503 | if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { |
duke@435 | 1504 | assert(delay_slot != NULL, "expecting delay slot node"); |
duke@435 | 1505 | |
duke@435 | 1506 | // Back up 1 instruction |
twisti@2103 | 1507 | cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size()); |
duke@435 | 1508 | |
duke@435 | 1509 | // Save the offset for the listing |
duke@435 | 1510 | #ifndef PRODUCT |
kvn@3049 | 1511 | if (node_offsets && delay_slot->_idx < node_offset_limit) |
twisti@2103 | 1512 | node_offsets[delay_slot->_idx] = cb->insts_size(); |
duke@435 | 1513 | #endif |
duke@435 | 1514 | |
duke@435 | 1515 | // Support a SafePoint in the delay slot |
kvn@3049 | 1516 | if (delay_slot->is_MachSafePoint()) { |
duke@435 | 1517 | MachNode *mach = delay_slot->as_Mach(); |
duke@435 | 1518 | // !!!!! Stubs only need an oopmap right now, so bail out |
kvn@3049 | 1519 | if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) { |
duke@435 | 1520 | // Write the oopmap directly to the code blob??!! |
duke@435 | 1521 | # ifdef ENABLE_ZAP_DEAD_LOCALS |
duke@435 | 1522 | assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive"); |
duke@435 | 1523 | # endif |
duke@435 | 1524 | delay_slot = NULL; |
duke@435 | 1525 | continue; |
duke@435 | 1526 | } |
duke@435 | 1527 | |
duke@435 | 1528 | int adjusted_offset = current_offset - Pipeline::instr_unit_size(); |
duke@435 | 1529 | non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(), |
duke@435 | 1530 | adjusted_offset); |
duke@435 | 1531 | // Generate an OopMap entry |
duke@435 | 1532 | Process_OopMap_Node(mach, adjusted_offset); |
duke@435 | 1533 | } |
duke@435 | 1534 | |
duke@435 | 1535 | // Insert the delay slot instruction |
duke@435 | 1536 | delay_slot->emit(*cb, _regalloc); |
duke@435 | 1537 | |
duke@435 | 1538 | // Don't reuse it |
duke@435 | 1539 | delay_slot = NULL; |
duke@435 | 1540 | } |
duke@435 | 1541 | |
duke@435 | 1542 | } // End for all instructions in block |
kvn@3055 | 1543 | |
rasbold@853 | 1544 | // If the next block is the top of a loop, pad this block out to align |
rasbold@853 | 1545 | // the loop top a little. Helps prevent pipe stalls at loop back branches. |
kvn@3055 | 1546 | if (i < nblocks-1) { |
adlertz@5539 | 1547 | Block *nb = _cfg->get_block(i + 1); |
kvn@3055 | 1548 | int padding = nb->alignment_padding(current_offset); |
kvn@3055 | 1549 | if( padding > 0 ) { |
kvn@3055 | 1550 | MachNode *nop = new (this) MachNopNode(padding / nop_size); |
adlertz@5635 | 1551 | block->insert_node(nop, block->number_of_nodes()); |
adlertz@5539 | 1552 | _cfg->map_node_to_block(nop, block); |
kvn@3055 | 1553 | nop->emit(*cb, _regalloc); |
kvn@3055 | 1554 | current_offset = cb->insts_size(); |
kvn@3055 | 1555 | } |
duke@435 | 1556 | } |
kvn@3055 | 1557 | // Verify that the distance for generated before forward |
kvn@3055 | 1558 | // short branches is still valid. |
rasbold@4874 | 1559 | guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size"); |
rasbold@4874 | 1560 | |
rasbold@4874 | 1561 | // Save new block start offset |
rasbold@4874 | 1562 | blk_starts[i] = blk_offset; |
duke@435 | 1563 | } // End of for all blocks |
kvn@3055 | 1564 | blk_starts[nblocks] = current_offset; |
duke@435 | 1565 | |
duke@435 | 1566 | non_safepoints.flush_at_end(); |
duke@435 | 1567 | |
duke@435 | 1568 | // Offset too large? |
duke@435 | 1569 | if (failing()) return; |
duke@435 | 1570 | |
duke@435 | 1571 | // Define a pseudo-label at the end of the code |
kvn@3055 | 1572 | MacroAssembler(cb).bind( blk_labels[nblocks] ); |
duke@435 | 1573 | |
duke@435 | 1574 | // Compute the size of the first block |
duke@435 | 1575 | _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos(); |
duke@435 | 1576 | |
twisti@2103 | 1577 | assert(cb->insts_size() < 500000, "method is unreasonably large"); |
duke@435 | 1578 | |
kvn@3055 | 1579 | #ifdef ASSERT |
kvn@3055 | 1580 | for (uint i = 0; i < nblocks; i++) { // For all blocks |
kvn@3055 | 1581 | if (jmp_target[i] != 0) { |
kvn@3055 | 1582 | int br_size = jmp_size[i]; |
kvn@3055 | 1583 | int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]); |
kvn@3055 | 1584 | if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) { |
kvn@3055 | 1585 | tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]); |
kvn@3055 | 1586 | assert(false, "Displacement too large for short jmp"); |
kvn@3055 | 1587 | } |
kvn@3055 | 1588 | } |
kvn@3055 | 1589 | } |
kvn@3055 | 1590 | #endif |
kvn@3055 | 1591 | |
duke@435 | 1592 | #ifndef PRODUCT |
duke@435 | 1593 | // Information on the size of the method, without the extraneous code |
twisti@2103 | 1594 | Scheduling::increment_method_size(cb->insts_size()); |
duke@435 | 1595 | #endif |
duke@435 | 1596 | |
duke@435 | 1597 | // ------------------ |
duke@435 | 1598 | // Fill in exception table entries. |
duke@435 | 1599 | FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels); |
duke@435 | 1600 | |
duke@435 | 1601 | // Only java methods have exception handlers and deopt handlers |
duke@435 | 1602 | if (_method) { |
duke@435 | 1603 | // Emit the exception handler code. |
duke@435 | 1604 | _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb)); |
duke@435 | 1605 | // Emit the deopt handler code. |
duke@435 | 1606 | _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb)); |
twisti@1700 | 1607 | |
twisti@1700 | 1608 | // Emit the MethodHandle deopt handler code (if required). |
twisti@1700 | 1609 | if (has_method_handle_invokes()) { |
twisti@1700 | 1610 | // We can use the same code as for the normal deopt handler, we |
twisti@1700 | 1611 | // just need a different entry point address. |
twisti@1700 | 1612 | _code_offsets.set_value(CodeOffsets::DeoptMH, emit_deopt_handler(*cb)); |
twisti@1700 | 1613 | } |
duke@435 | 1614 | } |
duke@435 | 1615 | |
duke@435 | 1616 | // One last check for failed CodeBuffer::expand: |
kvn@1637 | 1617 | if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { |
neliasso@4952 | 1618 | C->record_failure("CodeCache is full"); |
duke@435 | 1619 | return; |
duke@435 | 1620 | } |
duke@435 | 1621 | |
duke@435 | 1622 | #ifndef PRODUCT |
duke@435 | 1623 | // Dump the assembly code, including basic-block numbers |
duke@435 | 1624 | if (print_assembly()) { |
duke@435 | 1625 | ttyLocker ttyl; // keep the following output all in one block |
duke@435 | 1626 | if (!VMThread::should_terminate()) { // test this under the tty lock |
duke@435 | 1627 | // This output goes directly to the tty, not the compiler log. |
duke@435 | 1628 | // To enable tools to match it up with the compilation activity, |
duke@435 | 1629 | // be sure to tag this tty output with the compile ID. |
duke@435 | 1630 | if (xtty != NULL) { |
duke@435 | 1631 | xtty->head("opto_assembly compile_id='%d'%s", compile_id(), |
duke@435 | 1632 | is_osr_compilation() ? " compile_kind='osr'" : |
duke@435 | 1633 | ""); |
duke@435 | 1634 | } |
duke@435 | 1635 | if (method() != NULL) { |
coleenp@4037 | 1636 | method()->print_metadata(); |
duke@435 | 1637 | } |
duke@435 | 1638 | dump_asm(node_offsets, node_offset_limit); |
duke@435 | 1639 | if (xtty != NULL) { |
duke@435 | 1640 | xtty->tail("opto_assembly"); |
duke@435 | 1641 | } |
duke@435 | 1642 | } |
duke@435 | 1643 | } |
duke@435 | 1644 | #endif |
duke@435 | 1645 | |
duke@435 | 1646 | } |
duke@435 | 1647 | |
duke@435 | 1648 | void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) { |
duke@435 | 1649 | _inc_table.set_size(cnt); |
duke@435 | 1650 | |
duke@435 | 1651 | uint inct_cnt = 0; |
adlertz@5539 | 1652 | for (uint i = 0; i < _cfg->number_of_blocks(); i++) { |
adlertz@5539 | 1653 | Block* block = _cfg->get_block(i); |
duke@435 | 1654 | Node *n = NULL; |
duke@435 | 1655 | int j; |
duke@435 | 1656 | |
duke@435 | 1657 | // Find the branch; ignore trailing NOPs. |
adlertz@5635 | 1658 | for (j = block->number_of_nodes() - 1; j >= 0; j--) { |
adlertz@5635 | 1659 | n = block->get_node(j); |
adlertz@5539 | 1660 | if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) { |
duke@435 | 1661 | break; |
adlertz@5539 | 1662 | } |
duke@435 | 1663 | } |
duke@435 | 1664 | |
duke@435 | 1665 | // If we didn't find anything, continue |
adlertz@5539 | 1666 | if (j < 0) { |
adlertz@5539 | 1667 | continue; |
adlertz@5539 | 1668 | } |
duke@435 | 1669 | |
duke@435 | 1670 | // Compute ExceptionHandlerTable subtable entry and add it |
duke@435 | 1671 | // (skip empty blocks) |
adlertz@5539 | 1672 | if (n->is_Catch()) { |
duke@435 | 1673 | |
duke@435 | 1674 | // Get the offset of the return from the call |
adlertz@5539 | 1675 | uint call_return = call_returns[block->_pre_order]; |
duke@435 | 1676 | #ifdef ASSERT |
duke@435 | 1677 | assert( call_return > 0, "no call seen for this basic block" ); |
adlertz@5635 | 1678 | while (block->get_node(--j)->is_MachProj()) ; |
adlertz@5635 | 1679 | assert(block->get_node(j)->is_MachCall(), "CatchProj must follow call"); |
duke@435 | 1680 | #endif |
duke@435 | 1681 | // last instruction is a CatchNode, find it's CatchProjNodes |
adlertz@5539 | 1682 | int nof_succs = block->_num_succs; |
duke@435 | 1683 | // allocate space |
duke@435 | 1684 | GrowableArray<intptr_t> handler_bcis(nof_succs); |
duke@435 | 1685 | GrowableArray<intptr_t> handler_pcos(nof_succs); |
duke@435 | 1686 | // iterate through all successors |
duke@435 | 1687 | for (int j = 0; j < nof_succs; j++) { |
adlertz@5539 | 1688 | Block* s = block->_succs[j]; |
duke@435 | 1689 | bool found_p = false; |
adlertz@5539 | 1690 | for (uint k = 1; k < s->num_preds(); k++) { |
adlertz@5539 | 1691 | Node* pk = s->pred(k); |
adlertz@5539 | 1692 | if (pk->is_CatchProj() && pk->in(0) == n) { |
duke@435 | 1693 | const CatchProjNode* p = pk->as_CatchProj(); |
duke@435 | 1694 | found_p = true; |
duke@435 | 1695 | // add the corresponding handler bci & pco information |
adlertz@5539 | 1696 | if (p->_con != CatchProjNode::fall_through_index) { |
duke@435 | 1697 | // p leads to an exception handler (and is not fall through) |
adlertz@5539 | 1698 | assert(s == _cfg->get_block(s->_pre_order), "bad numbering"); |
duke@435 | 1699 | // no duplicates, please |
adlertz@5539 | 1700 | if (!handler_bcis.contains(p->handler_bci())) { |
duke@435 | 1701 | uint block_num = s->non_connector()->_pre_order; |
duke@435 | 1702 | handler_bcis.append(p->handler_bci()); |
duke@435 | 1703 | handler_pcos.append(blk_labels[block_num].loc_pos()); |
duke@435 | 1704 | } |
duke@435 | 1705 | } |
duke@435 | 1706 | } |
duke@435 | 1707 | } |
duke@435 | 1708 | assert(found_p, "no matching predecessor found"); |
duke@435 | 1709 | // Note: Due to empty block removal, one block may have |
duke@435 | 1710 | // several CatchProj inputs, from the same Catch. |
duke@435 | 1711 | } |
duke@435 | 1712 | |
duke@435 | 1713 | // Set the offset of the return from the call |
duke@435 | 1714 | _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos); |
duke@435 | 1715 | continue; |
duke@435 | 1716 | } |
duke@435 | 1717 | |
duke@435 | 1718 | // Handle implicit null exception table updates |
adlertz@5539 | 1719 | if (n->is_MachNullCheck()) { |
adlertz@5539 | 1720 | uint block_num = block->non_connector_successor(0)->_pre_order; |
adlertz@5539 | 1721 | _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); |
duke@435 | 1722 | continue; |
duke@435 | 1723 | } |
duke@435 | 1724 | } // End of for all blocks fill in exception table entries |
duke@435 | 1725 | } |
duke@435 | 1726 | |
duke@435 | 1727 | // Static Variables |
duke@435 | 1728 | #ifndef PRODUCT |
duke@435 | 1729 | uint Scheduling::_total_nop_size = 0; |
duke@435 | 1730 | uint Scheduling::_total_method_size = 0; |
duke@435 | 1731 | uint Scheduling::_total_branches = 0; |
duke@435 | 1732 | uint Scheduling::_total_unconditional_delays = 0; |
duke@435 | 1733 | uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1]; |
duke@435 | 1734 | #endif |
duke@435 | 1735 | |
duke@435 | 1736 | // Initializer for class Scheduling |
duke@435 | 1737 | |
duke@435 | 1738 | Scheduling::Scheduling(Arena *arena, Compile &compile) |
duke@435 | 1739 | : _arena(arena), |
duke@435 | 1740 | _cfg(compile.cfg()), |
duke@435 | 1741 | _regalloc(compile.regalloc()), |
duke@435 | 1742 | _reg_node(arena), |
duke@435 | 1743 | _bundle_instr_count(0), |
duke@435 | 1744 | _bundle_cycle_number(0), |
duke@435 | 1745 | _scheduled(arena), |
duke@435 | 1746 | _available(arena), |
duke@435 | 1747 | _next_node(NULL), |
duke@435 | 1748 | _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]), |
duke@435 | 1749 | _pinch_free_list(arena) |
duke@435 | 1750 | #ifndef PRODUCT |
duke@435 | 1751 | , _branches(0) |
duke@435 | 1752 | , _unconditional_delays(0) |
duke@435 | 1753 | #endif |
duke@435 | 1754 | { |
duke@435 | 1755 | // Create a MachNopNode |
duke@435 | 1756 | _nop = new (&compile) MachNopNode(); |
duke@435 | 1757 | |
duke@435 | 1758 | // Now that the nops are in the array, save the count |
duke@435 | 1759 | // (but allow entries for the nops) |
duke@435 | 1760 | _node_bundling_limit = compile.unique(); |
duke@435 | 1761 | uint node_max = _regalloc->node_regs_max_index(); |
duke@435 | 1762 | |
duke@435 | 1763 | compile.set_node_bundling_limit(_node_bundling_limit); |
duke@435 | 1764 | |
twisti@1040 | 1765 | // This one is persistent within the Compile class |
duke@435 | 1766 | _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max); |
duke@435 | 1767 | |
duke@435 | 1768 | // Allocate space for fixed-size arrays |
duke@435 | 1769 | _node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); |
duke@435 | 1770 | _uses = NEW_ARENA_ARRAY(arena, short, node_max); |
duke@435 | 1771 | _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max); |
duke@435 | 1772 | |
duke@435 | 1773 | // Clear the arrays |
duke@435 | 1774 | memset(_node_bundling_base, 0, node_max * sizeof(Bundle)); |
duke@435 | 1775 | memset(_node_latency, 0, node_max * sizeof(unsigned short)); |
duke@435 | 1776 | memset(_uses, 0, node_max * sizeof(short)); |
duke@435 | 1777 | memset(_current_latency, 0, node_max * sizeof(unsigned short)); |
duke@435 | 1778 | |
duke@435 | 1779 | // Clear the bundling information |
adlertz@5539 | 1780 | memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements)); |
duke@435 | 1781 | |
duke@435 | 1782 | // Get the last node |
adlertz@5539 | 1783 | Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1); |
adlertz@5539 | 1784 | |
adlertz@5635 | 1785 | _next_node = block->get_node(block->number_of_nodes() - 1); |
duke@435 | 1786 | } |
duke@435 | 1787 | |
duke@435 | 1788 | #ifndef PRODUCT |
duke@435 | 1789 | // Scheduling destructor |
duke@435 | 1790 | Scheduling::~Scheduling() { |
duke@435 | 1791 | _total_branches += _branches; |
duke@435 | 1792 | _total_unconditional_delays += _unconditional_delays; |
duke@435 | 1793 | } |
duke@435 | 1794 | #endif |
duke@435 | 1795 | |
duke@435 | 1796 | // Step ahead "i" cycles |
duke@435 | 1797 | void Scheduling::step(uint i) { |
duke@435 | 1798 | |
duke@435 | 1799 | Bundle *bundle = node_bundling(_next_node); |
duke@435 | 1800 | bundle->set_starts_bundle(); |
duke@435 | 1801 | |
duke@435 | 1802 | // Update the bundle record, but leave the flags information alone |
duke@435 | 1803 | if (_bundle_instr_count > 0) { |
duke@435 | 1804 | bundle->set_instr_count(_bundle_instr_count); |
duke@435 | 1805 | bundle->set_resources_used(_bundle_use.resourcesUsed()); |
duke@435 | 1806 | } |
duke@435 | 1807 | |
duke@435 | 1808 | // Update the state information |
duke@435 | 1809 | _bundle_instr_count = 0; |
duke@435 | 1810 | _bundle_cycle_number += i; |
duke@435 | 1811 | _bundle_use.step(i); |
duke@435 | 1812 | } |
duke@435 | 1813 | |
duke@435 | 1814 | void Scheduling::step_and_clear() { |
duke@435 | 1815 | Bundle *bundle = node_bundling(_next_node); |
duke@435 | 1816 | bundle->set_starts_bundle(); |
duke@435 | 1817 | |
duke@435 | 1818 | // Update the bundle record |
duke@435 | 1819 | if (_bundle_instr_count > 0) { |
duke@435 | 1820 | bundle->set_instr_count(_bundle_instr_count); |
duke@435 | 1821 | bundle->set_resources_used(_bundle_use.resourcesUsed()); |
duke@435 | 1822 | |
duke@435 | 1823 | _bundle_cycle_number += 1; |
duke@435 | 1824 | } |
duke@435 | 1825 | |
duke@435 | 1826 | // Clear the bundling information |
duke@435 | 1827 | _bundle_instr_count = 0; |
duke@435 | 1828 | _bundle_use.reset(); |
duke@435 | 1829 | |
duke@435 | 1830 | memcpy(_bundle_use_elements, |
duke@435 | 1831 | Pipeline_Use::elaborated_elements, |
duke@435 | 1832 | sizeof(Pipeline_Use::elaborated_elements)); |
duke@435 | 1833 | } |
duke@435 | 1834 | |
duke@435 | 1835 | // Perform instruction scheduling and bundling over the sequence of |
duke@435 | 1836 | // instructions in backwards order. |
duke@435 | 1837 | void Compile::ScheduleAndBundle() { |
duke@435 | 1838 | |
duke@435 | 1839 | // Don't optimize this if it isn't a method |
duke@435 | 1840 | if (!_method) |
duke@435 | 1841 | return; |
duke@435 | 1842 | |
duke@435 | 1843 | // Don't optimize this if scheduling is disabled |
duke@435 | 1844 | if (!do_scheduling()) |
duke@435 | 1845 | return; |
duke@435 | 1846 | |
kvn@4103 | 1847 | // Scheduling code works only with pairs (8 bytes) maximum. |
kvn@4103 | 1848 | if (max_vector_size() > 8) |
kvn@4103 | 1849 | return; |
kvn@4007 | 1850 | |
duke@435 | 1851 | NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) |
duke@435 | 1852 | |
duke@435 | 1853 | // Create a data structure for all the scheduling information |
duke@435 | 1854 | Scheduling scheduling(Thread::current()->resource_area(), *this); |
duke@435 | 1855 | |
duke@435 | 1856 | // Walk backwards over each basic block, computing the needed alignment |
duke@435 | 1857 | // Walk over all the basic blocks |
duke@435 | 1858 | scheduling.DoScheduling(); |
duke@435 | 1859 | } |
duke@435 | 1860 | |
duke@435 | 1861 | // Compute the latency of all the instructions. This is fairly simple, |
duke@435 | 1862 | // because we already have a legal ordering. Walk over the instructions |
duke@435 | 1863 | // from first to last, and compute the latency of the instruction based |
twisti@1040 | 1864 | // on the latency of the preceding instruction(s). |
duke@435 | 1865 | void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { |
duke@435 | 1866 | #ifndef PRODUCT |
duke@435 | 1867 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1868 | tty->print("# -> ComputeLocalLatenciesForward\n"); |
duke@435 | 1869 | #endif |
duke@435 | 1870 | |
duke@435 | 1871 | // Walk over all the schedulable instructions |
duke@435 | 1872 | for( uint j=_bb_start; j < _bb_end; j++ ) { |
duke@435 | 1873 | |
duke@435 | 1874 | // This is a kludge, forcing all latency calculations to start at 1. |
duke@435 | 1875 | // Used to allow latency 0 to force an instruction to the beginning |
duke@435 | 1876 | // of the bb |
duke@435 | 1877 | uint latency = 1; |
adlertz@5635 | 1878 | Node *use = bb->get_node(j); |
duke@435 | 1879 | uint nlen = use->len(); |
duke@435 | 1880 | |
duke@435 | 1881 | // Walk over all the inputs |
duke@435 | 1882 | for ( uint k=0; k < nlen; k++ ) { |
duke@435 | 1883 | Node *def = use->in(k); |
duke@435 | 1884 | if (!def) |
duke@435 | 1885 | continue; |
duke@435 | 1886 | |
duke@435 | 1887 | uint l = _node_latency[def->_idx] + use->latency(k); |
duke@435 | 1888 | if (latency < l) |
duke@435 | 1889 | latency = l; |
duke@435 | 1890 | } |
duke@435 | 1891 | |
duke@435 | 1892 | _node_latency[use->_idx] = latency; |
duke@435 | 1893 | |
duke@435 | 1894 | #ifndef PRODUCT |
duke@435 | 1895 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1896 | tty->print("# latency %4d: ", latency); |
duke@435 | 1897 | use->dump(); |
duke@435 | 1898 | } |
duke@435 | 1899 | #endif |
duke@435 | 1900 | } |
duke@435 | 1901 | |
duke@435 | 1902 | #ifndef PRODUCT |
duke@435 | 1903 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1904 | tty->print("# <- ComputeLocalLatenciesForward\n"); |
duke@435 | 1905 | #endif |
duke@435 | 1906 | |
duke@435 | 1907 | } // end ComputeLocalLatenciesForward |
duke@435 | 1908 | |
duke@435 | 1909 | // See if this node fits into the present instruction bundle |
duke@435 | 1910 | bool Scheduling::NodeFitsInBundle(Node *n) { |
duke@435 | 1911 | uint n_idx = n->_idx; |
duke@435 | 1912 | |
duke@435 | 1913 | // If this is the unconditional delay instruction, then it fits |
duke@435 | 1914 | if (n == _unconditional_delay_slot) { |
duke@435 | 1915 | #ifndef PRODUCT |
duke@435 | 1916 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1917 | tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx); |
duke@435 | 1918 | #endif |
duke@435 | 1919 | return (true); |
duke@435 | 1920 | } |
duke@435 | 1921 | |
duke@435 | 1922 | // If the node cannot be scheduled this cycle, skip it |
duke@435 | 1923 | if (_current_latency[n_idx] > _bundle_cycle_number) { |
duke@435 | 1924 | #ifndef PRODUCT |
duke@435 | 1925 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1926 | tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n", |
duke@435 | 1927 | n->_idx, _current_latency[n_idx], _bundle_cycle_number); |
duke@435 | 1928 | #endif |
duke@435 | 1929 | return (false); |
duke@435 | 1930 | } |
duke@435 | 1931 | |
duke@435 | 1932 | const Pipeline *node_pipeline = n->pipeline(); |
duke@435 | 1933 | |
duke@435 | 1934 | uint instruction_count = node_pipeline->instructionCount(); |
duke@435 | 1935 | if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) |
duke@435 | 1936 | instruction_count = 0; |
duke@435 | 1937 | else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) |
duke@435 | 1938 | instruction_count++; |
duke@435 | 1939 | |
duke@435 | 1940 | if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) { |
duke@435 | 1941 | #ifndef PRODUCT |
duke@435 | 1942 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1943 | tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n", |
duke@435 | 1944 | n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle); |
duke@435 | 1945 | #endif |
duke@435 | 1946 | return (false); |
duke@435 | 1947 | } |
duke@435 | 1948 | |
duke@435 | 1949 | // Don't allow non-machine nodes to be handled this way |
duke@435 | 1950 | if (!n->is_Mach() && instruction_count == 0) |
duke@435 | 1951 | return (false); |
duke@435 | 1952 | |
duke@435 | 1953 | // See if there is any overlap |
duke@435 | 1954 | uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse()); |
duke@435 | 1955 | |
duke@435 | 1956 | if (delay > 0) { |
duke@435 | 1957 | #ifndef PRODUCT |
duke@435 | 1958 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1959 | tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx); |
duke@435 | 1960 | #endif |
duke@435 | 1961 | return false; |
duke@435 | 1962 | } |
duke@435 | 1963 | |
duke@435 | 1964 | #ifndef PRODUCT |
duke@435 | 1965 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1966 | tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx); |
duke@435 | 1967 | #endif |
duke@435 | 1968 | |
duke@435 | 1969 | return true; |
duke@435 | 1970 | } |
duke@435 | 1971 | |
duke@435 | 1972 | Node * Scheduling::ChooseNodeToBundle() { |
duke@435 | 1973 | uint siz = _available.size(); |
duke@435 | 1974 | |
duke@435 | 1975 | if (siz == 0) { |
duke@435 | 1976 | |
duke@435 | 1977 | #ifndef PRODUCT |
duke@435 | 1978 | if (_cfg->C->trace_opto_output()) |
duke@435 | 1979 | tty->print("# ChooseNodeToBundle: NULL\n"); |
duke@435 | 1980 | #endif |
duke@435 | 1981 | return (NULL); |
duke@435 | 1982 | } |
duke@435 | 1983 | |
duke@435 | 1984 | // Fast path, if only 1 instruction in the bundle |
duke@435 | 1985 | if (siz == 1) { |
duke@435 | 1986 | #ifndef PRODUCT |
duke@435 | 1987 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 1988 | tty->print("# ChooseNodeToBundle (only 1): "); |
duke@435 | 1989 | _available[0]->dump(); |
duke@435 | 1990 | } |
duke@435 | 1991 | #endif |
duke@435 | 1992 | return (_available[0]); |
duke@435 | 1993 | } |
duke@435 | 1994 | |
duke@435 | 1995 | // Don't bother, if the bundle is already full |
duke@435 | 1996 | if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) { |
duke@435 | 1997 | for ( uint i = 0; i < siz; i++ ) { |
duke@435 | 1998 | Node *n = _available[i]; |
duke@435 | 1999 | |
duke@435 | 2000 | // Skip projections, we'll handle them another way |
duke@435 | 2001 | if (n->is_Proj()) |
duke@435 | 2002 | continue; |
duke@435 | 2003 | |
duke@435 | 2004 | // This presupposed that instructions are inserted into the |
duke@435 | 2005 | // available list in a legality order; i.e. instructions that |
duke@435 | 2006 | // must be inserted first are at the head of the list |
duke@435 | 2007 | if (NodeFitsInBundle(n)) { |
duke@435 | 2008 | #ifndef PRODUCT |
duke@435 | 2009 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2010 | tty->print("# ChooseNodeToBundle: "); |
duke@435 | 2011 | n->dump(); |
duke@435 | 2012 | } |
duke@435 | 2013 | #endif |
duke@435 | 2014 | return (n); |
duke@435 | 2015 | } |
duke@435 | 2016 | } |
duke@435 | 2017 | } |
duke@435 | 2018 | |
duke@435 | 2019 | // Nothing fits in this bundle, choose the highest priority |
duke@435 | 2020 | #ifndef PRODUCT |
duke@435 | 2021 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2022 | tty->print("# ChooseNodeToBundle: "); |
duke@435 | 2023 | _available[0]->dump(); |
duke@435 | 2024 | } |
duke@435 | 2025 | #endif |
duke@435 | 2026 | |
duke@435 | 2027 | return _available[0]; |
duke@435 | 2028 | } |
duke@435 | 2029 | |
duke@435 | 2030 | void Scheduling::AddNodeToAvailableList(Node *n) { |
duke@435 | 2031 | assert( !n->is_Proj(), "projections never directly made available" ); |
duke@435 | 2032 | #ifndef PRODUCT |
duke@435 | 2033 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2034 | tty->print("# AddNodeToAvailableList: "); |
duke@435 | 2035 | n->dump(); |
duke@435 | 2036 | } |
duke@435 | 2037 | #endif |
duke@435 | 2038 | |
duke@435 | 2039 | int latency = _current_latency[n->_idx]; |
duke@435 | 2040 | |
duke@435 | 2041 | // Insert in latency order (insertion sort) |
duke@435 | 2042 | uint i; |
duke@435 | 2043 | for ( i=0; i < _available.size(); i++ ) |
duke@435 | 2044 | if (_current_latency[_available[i]->_idx] > latency) |
duke@435 | 2045 | break; |
duke@435 | 2046 | |
duke@435 | 2047 | // Special Check for compares following branches |
duke@435 | 2048 | if( n->is_Mach() && _scheduled.size() > 0 ) { |
duke@435 | 2049 | int op = n->as_Mach()->ideal_Opcode(); |
duke@435 | 2050 | Node *last = _scheduled[0]; |
duke@435 | 2051 | if( last->is_MachIf() && last->in(1) == n && |
duke@435 | 2052 | ( op == Op_CmpI || |
duke@435 | 2053 | op == Op_CmpU || |
duke@435 | 2054 | op == Op_CmpP || |
duke@435 | 2055 | op == Op_CmpF || |
duke@435 | 2056 | op == Op_CmpD || |
duke@435 | 2057 | op == Op_CmpL ) ) { |
duke@435 | 2058 | |
duke@435 | 2059 | // Recalculate position, moving to front of same latency |
duke@435 | 2060 | for ( i=0 ; i < _available.size(); i++ ) |
duke@435 | 2061 | if (_current_latency[_available[i]->_idx] >= latency) |
duke@435 | 2062 | break; |
duke@435 | 2063 | } |
duke@435 | 2064 | } |
duke@435 | 2065 | |
duke@435 | 2066 | // Insert the node in the available list |
duke@435 | 2067 | _available.insert(i, n); |
duke@435 | 2068 | |
duke@435 | 2069 | #ifndef PRODUCT |
duke@435 | 2070 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2071 | dump_available(); |
duke@435 | 2072 | #endif |
duke@435 | 2073 | } |
duke@435 | 2074 | |
duke@435 | 2075 | void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { |
duke@435 | 2076 | for ( uint i=0; i < n->len(); i++ ) { |
duke@435 | 2077 | Node *def = n->in(i); |
duke@435 | 2078 | if (!def) continue; |
duke@435 | 2079 | if( def->is_Proj() ) // If this is a machine projection, then |
duke@435 | 2080 | def = def->in(0); // propagate usage thru to the base instruction |
duke@435 | 2081 | |
adlertz@5509 | 2082 | if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local |
duke@435 | 2083 | continue; |
adlertz@5509 | 2084 | } |
duke@435 | 2085 | |
duke@435 | 2086 | // Compute the latency |
duke@435 | 2087 | uint l = _bundle_cycle_number + n->latency(i); |
duke@435 | 2088 | if (_current_latency[def->_idx] < l) |
duke@435 | 2089 | _current_latency[def->_idx] = l; |
duke@435 | 2090 | |
duke@435 | 2091 | // If this does not have uses then schedule it |
duke@435 | 2092 | if ((--_uses[def->_idx]) == 0) |
duke@435 | 2093 | AddNodeToAvailableList(def); |
duke@435 | 2094 | } |
duke@435 | 2095 | } |
duke@435 | 2096 | |
duke@435 | 2097 | void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { |
duke@435 | 2098 | #ifndef PRODUCT |
duke@435 | 2099 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2100 | tty->print("# AddNodeToBundle: "); |
duke@435 | 2101 | n->dump(); |
duke@435 | 2102 | } |
duke@435 | 2103 | #endif |
duke@435 | 2104 | |
duke@435 | 2105 | // Remove this from the available list |
duke@435 | 2106 | uint i; |
duke@435 | 2107 | for (i = 0; i < _available.size(); i++) |
duke@435 | 2108 | if (_available[i] == n) |
duke@435 | 2109 | break; |
duke@435 | 2110 | assert(i < _available.size(), "entry in _available list not found"); |
duke@435 | 2111 | _available.remove(i); |
duke@435 | 2112 | |
duke@435 | 2113 | // See if this fits in the current bundle |
duke@435 | 2114 | const Pipeline *node_pipeline = n->pipeline(); |
duke@435 | 2115 | const Pipeline_Use& node_usage = node_pipeline->resourceUse(); |
duke@435 | 2116 | |
duke@435 | 2117 | // Check for instructions to be placed in the delay slot. We |
duke@435 | 2118 | // do this before we actually schedule the current instruction, |
duke@435 | 2119 | // because the delay slot follows the current instruction. |
duke@435 | 2120 | if (Pipeline::_branch_has_delay_slot && |
duke@435 | 2121 | node_pipeline->hasBranchDelay() && |
duke@435 | 2122 | !_unconditional_delay_slot) { |
duke@435 | 2123 | |
duke@435 | 2124 | uint siz = _available.size(); |
duke@435 | 2125 | |
duke@435 | 2126 | // Conditional branches can support an instruction that |
twisti@1040 | 2127 | // is unconditionally executed and not dependent by the |
duke@435 | 2128 | // branch, OR a conditionally executed instruction if |
duke@435 | 2129 | // the branch is taken. In practice, this means that |
duke@435 | 2130 | // the first instruction at the branch target is |
duke@435 | 2131 | // copied to the delay slot, and the branch goes to |
duke@435 | 2132 | // the instruction after that at the branch target |
kvn@3051 | 2133 | if ( n->is_MachBranch() ) { |
duke@435 | 2134 | |
duke@435 | 2135 | assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" ); |
duke@435 | 2136 | assert( !n->is_Catch(), "should not look for delay slot for Catch" ); |
duke@435 | 2137 | |
duke@435 | 2138 | #ifndef PRODUCT |
duke@435 | 2139 | _branches++; |
duke@435 | 2140 | #endif |
duke@435 | 2141 | |
duke@435 | 2142 | // At least 1 instruction is on the available list |
twisti@1040 | 2143 | // that is not dependent on the branch |
duke@435 | 2144 | for (uint i = 0; i < siz; i++) { |
duke@435 | 2145 | Node *d = _available[i]; |
duke@435 | 2146 | const Pipeline *avail_pipeline = d->pipeline(); |
duke@435 | 2147 | |
duke@435 | 2148 | // Don't allow safepoints in the branch shadow, that will |
duke@435 | 2149 | // cause a number of difficulties |
duke@435 | 2150 | if ( avail_pipeline->instructionCount() == 1 && |
duke@435 | 2151 | !avail_pipeline->hasMultipleBundles() && |
duke@435 | 2152 | !avail_pipeline->hasBranchDelay() && |
duke@435 | 2153 | Pipeline::instr_has_unit_size() && |
duke@435 | 2154 | d->size(_regalloc) == Pipeline::instr_unit_size() && |
duke@435 | 2155 | NodeFitsInBundle(d) && |
duke@435 | 2156 | !node_bundling(d)->used_in_delay()) { |
duke@435 | 2157 | |
duke@435 | 2158 | if (d->is_Mach() && !d->is_MachSafePoint()) { |
duke@435 | 2159 | // A node that fits in the delay slot was found, so we need to |
duke@435 | 2160 | // set the appropriate bits in the bundle pipeline information so |
duke@435 | 2161 | // that it correctly indicates resource usage. Later, when we |
duke@435 | 2162 | // attempt to add this instruction to the bundle, we will skip |
duke@435 | 2163 | // setting the resource usage. |
duke@435 | 2164 | _unconditional_delay_slot = d; |
duke@435 | 2165 | node_bundling(n)->set_use_unconditional_delay(); |
duke@435 | 2166 | node_bundling(d)->set_used_in_unconditional_delay(); |
duke@435 | 2167 | _bundle_use.add_usage(avail_pipeline->resourceUse()); |
duke@435 | 2168 | _current_latency[d->_idx] = _bundle_cycle_number; |
duke@435 | 2169 | _next_node = d; |
duke@435 | 2170 | ++_bundle_instr_count; |
duke@435 | 2171 | #ifndef PRODUCT |
duke@435 | 2172 | _unconditional_delays++; |
duke@435 | 2173 | #endif |
duke@435 | 2174 | break; |
duke@435 | 2175 | } |
duke@435 | 2176 | } |
duke@435 | 2177 | } |
duke@435 | 2178 | } |
duke@435 | 2179 | |
duke@435 | 2180 | // No delay slot, add a nop to the usage |
duke@435 | 2181 | if (!_unconditional_delay_slot) { |
duke@435 | 2182 | // See if adding an instruction in the delay slot will overflow |
duke@435 | 2183 | // the bundle. |
duke@435 | 2184 | if (!NodeFitsInBundle(_nop)) { |
duke@435 | 2185 | #ifndef PRODUCT |
duke@435 | 2186 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2187 | tty->print("# *** STEP(1 instruction for delay slot) ***\n"); |
duke@435 | 2188 | #endif |
duke@435 | 2189 | step(1); |
duke@435 | 2190 | } |
duke@435 | 2191 | |
duke@435 | 2192 | _bundle_use.add_usage(_nop->pipeline()->resourceUse()); |
duke@435 | 2193 | _next_node = _nop; |
duke@435 | 2194 | ++_bundle_instr_count; |
duke@435 | 2195 | } |
duke@435 | 2196 | |
duke@435 | 2197 | // See if the instruction in the delay slot requires a |
duke@435 | 2198 | // step of the bundles |
duke@435 | 2199 | if (!NodeFitsInBundle(n)) { |
duke@435 | 2200 | #ifndef PRODUCT |
duke@435 | 2201 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2202 | tty->print("# *** STEP(branch won't fit) ***\n"); |
duke@435 | 2203 | #endif |
duke@435 | 2204 | // Update the state information |
duke@435 | 2205 | _bundle_instr_count = 0; |
duke@435 | 2206 | _bundle_cycle_number += 1; |
duke@435 | 2207 | _bundle_use.step(1); |
duke@435 | 2208 | } |
duke@435 | 2209 | } |
duke@435 | 2210 | |
duke@435 | 2211 | // Get the number of instructions |
duke@435 | 2212 | uint instruction_count = node_pipeline->instructionCount(); |
duke@435 | 2213 | if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0) |
duke@435 | 2214 | instruction_count = 0; |
duke@435 | 2215 | |
duke@435 | 2216 | // Compute the latency information |
duke@435 | 2217 | uint delay = 0; |
duke@435 | 2218 | |
duke@435 | 2219 | if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) { |
duke@435 | 2220 | int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number; |
duke@435 | 2221 | if (relative_latency < 0) |
duke@435 | 2222 | relative_latency = 0; |
duke@435 | 2223 | |
duke@435 | 2224 | delay = _bundle_use.full_latency(relative_latency, node_usage); |
duke@435 | 2225 | |
duke@435 | 2226 | // Does not fit in this bundle, start a new one |
duke@435 | 2227 | if (delay > 0) { |
duke@435 | 2228 | step(delay); |
duke@435 | 2229 | |
duke@435 | 2230 | #ifndef PRODUCT |
duke@435 | 2231 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2232 | tty->print("# *** STEP(%d) ***\n", delay); |
duke@435 | 2233 | #endif |
duke@435 | 2234 | } |
duke@435 | 2235 | } |
duke@435 | 2236 | |
duke@435 | 2237 | // If this was placed in the delay slot, ignore it |
duke@435 | 2238 | if (n != _unconditional_delay_slot) { |
duke@435 | 2239 | |
duke@435 | 2240 | if (delay == 0) { |
duke@435 | 2241 | if (node_pipeline->hasMultipleBundles()) { |
duke@435 | 2242 | #ifndef PRODUCT |
duke@435 | 2243 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2244 | tty->print("# *** STEP(multiple instructions) ***\n"); |
duke@435 | 2245 | #endif |
duke@435 | 2246 | step(1); |
duke@435 | 2247 | } |
duke@435 | 2248 | |
duke@435 | 2249 | else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) { |
duke@435 | 2250 | #ifndef PRODUCT |
duke@435 | 2251 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2252 | tty->print("# *** STEP(%d >= %d instructions) ***\n", |
duke@435 | 2253 | instruction_count + _bundle_instr_count, |
duke@435 | 2254 | Pipeline::_max_instrs_per_cycle); |
duke@435 | 2255 | #endif |
duke@435 | 2256 | step(1); |
duke@435 | 2257 | } |
duke@435 | 2258 | } |
duke@435 | 2259 | |
duke@435 | 2260 | if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot) |
duke@435 | 2261 | _bundle_instr_count++; |
duke@435 | 2262 | |
duke@435 | 2263 | // Set the node's latency |
duke@435 | 2264 | _current_latency[n->_idx] = _bundle_cycle_number; |
duke@435 | 2265 | |
duke@435 | 2266 | // Now merge the functional unit information |
duke@435 | 2267 | if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) |
duke@435 | 2268 | _bundle_use.add_usage(node_usage); |
duke@435 | 2269 | |
duke@435 | 2270 | // Increment the number of instructions in this bundle |
duke@435 | 2271 | _bundle_instr_count += instruction_count; |
duke@435 | 2272 | |
duke@435 | 2273 | // Remember this node for later |
duke@435 | 2274 | if (n->is_Mach()) |
duke@435 | 2275 | _next_node = n; |
duke@435 | 2276 | } |
duke@435 | 2277 | |
duke@435 | 2278 | // It's possible to have a BoxLock in the graph and in the _bbs mapping but |
duke@435 | 2279 | // not in the bb->_nodes array. This happens for debug-info-only BoxLocks. |
duke@435 | 2280 | // 'Schedule' them (basically ignore in the schedule) but do not insert them |
duke@435 | 2281 | // into the block. All other scheduled nodes get put in the schedule here. |
duke@435 | 2282 | int op = n->Opcode(); |
duke@435 | 2283 | if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR |
duke@435 | 2284 | (op != Op_Node && // Not an unused antidepedence node and |
duke@435 | 2285 | // not an unallocated boxlock |
duke@435 | 2286 | (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) { |
duke@435 | 2287 | |
duke@435 | 2288 | // Push any trailing projections |
adlertz@5635 | 2289 | if( bb->get_node(bb->number_of_nodes()-1) != n ) { |
duke@435 | 2290 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 2291 | Node *foi = n->fast_out(i); |
duke@435 | 2292 | if( foi->is_Proj() ) |
duke@435 | 2293 | _scheduled.push(foi); |
duke@435 | 2294 | } |
duke@435 | 2295 | } |
duke@435 | 2296 | |
duke@435 | 2297 | // Put the instruction in the schedule list |
duke@435 | 2298 | _scheduled.push(n); |
duke@435 | 2299 | } |
duke@435 | 2300 | |
duke@435 | 2301 | #ifndef PRODUCT |
duke@435 | 2302 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2303 | dump_available(); |
duke@435 | 2304 | #endif |
duke@435 | 2305 | |
duke@435 | 2306 | // Walk all the definitions, decrementing use counts, and |
duke@435 | 2307 | // if a definition has a 0 use count, place it in the available list. |
duke@435 | 2308 | DecrementUseCounts(n,bb); |
duke@435 | 2309 | } |
duke@435 | 2310 | |
duke@435 | 2311 | // This method sets the use count within a basic block. We will ignore all |
duke@435 | 2312 | // uses outside the current basic block. As we are doing a backwards walk, |
duke@435 | 2313 | // any node we reach that has a use count of 0 may be scheduled. This also |
duke@435 | 2314 | // avoids the problem of cyclic references from phi nodes, as long as phi |
duke@435 | 2315 | // nodes are at the front of the basic block. This method also initializes |
duke@435 | 2316 | // the available list to the set of instructions that have no uses within this |
duke@435 | 2317 | // basic block. |
duke@435 | 2318 | void Scheduling::ComputeUseCount(const Block *bb) { |
duke@435 | 2319 | #ifndef PRODUCT |
duke@435 | 2320 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2321 | tty->print("# -> ComputeUseCount\n"); |
duke@435 | 2322 | #endif |
duke@435 | 2323 | |
duke@435 | 2324 | // Clear the list of available and scheduled instructions, just in case |
duke@435 | 2325 | _available.clear(); |
duke@435 | 2326 | _scheduled.clear(); |
duke@435 | 2327 | |
duke@435 | 2328 | // No delay slot specified |
duke@435 | 2329 | _unconditional_delay_slot = NULL; |
duke@435 | 2330 | |
duke@435 | 2331 | #ifdef ASSERT |
adlertz@5635 | 2332 | for( uint i=0; i < bb->number_of_nodes(); i++ ) |
adlertz@5635 | 2333 | assert( _uses[bb->get_node(i)->_idx] == 0, "_use array not clean" ); |
duke@435 | 2334 | #endif |
duke@435 | 2335 | |
duke@435 | 2336 | // Force the _uses count to never go to zero for unscheduable pieces |
duke@435 | 2337 | // of the block |
duke@435 | 2338 | for( uint k = 0; k < _bb_start; k++ ) |
adlertz@5635 | 2339 | _uses[bb->get_node(k)->_idx] = 1; |
adlertz@5635 | 2340 | for( uint l = _bb_end; l < bb->number_of_nodes(); l++ ) |
adlertz@5635 | 2341 | _uses[bb->get_node(l)->_idx] = 1; |
duke@435 | 2342 | |
duke@435 | 2343 | // Iterate backwards over the instructions in the block. Don't count the |
duke@435 | 2344 | // branch projections at end or the block header instructions. |
duke@435 | 2345 | for( uint j = _bb_end-1; j >= _bb_start; j-- ) { |
adlertz@5635 | 2346 | Node *n = bb->get_node(j); |
duke@435 | 2347 | if( n->is_Proj() ) continue; // Projections handled another way |
duke@435 | 2348 | |
duke@435 | 2349 | // Account for all uses |
duke@435 | 2350 | for ( uint k = 0; k < n->len(); k++ ) { |
duke@435 | 2351 | Node *inp = n->in(k); |
duke@435 | 2352 | if (!inp) continue; |
duke@435 | 2353 | assert(inp != n, "no cycles allowed" ); |
adlertz@5509 | 2354 | if (_cfg->get_block_for_node(inp) == bb) { // Block-local use? |
adlertz@5509 | 2355 | if (inp->is_Proj()) { // Skip through Proj's |
duke@435 | 2356 | inp = inp->in(0); |
adlertz@5509 | 2357 | } |
duke@435 | 2358 | ++_uses[inp->_idx]; // Count 1 block-local use |
duke@435 | 2359 | } |
duke@435 | 2360 | } |
duke@435 | 2361 | |
duke@435 | 2362 | // If this instruction has a 0 use count, then it is available |
duke@435 | 2363 | if (!_uses[n->_idx]) { |
duke@435 | 2364 | _current_latency[n->_idx] = _bundle_cycle_number; |
duke@435 | 2365 | AddNodeToAvailableList(n); |
duke@435 | 2366 | } |
duke@435 | 2367 | |
duke@435 | 2368 | #ifndef PRODUCT |
duke@435 | 2369 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2370 | tty->print("# uses: %3d: ", _uses[n->_idx]); |
duke@435 | 2371 | n->dump(); |
duke@435 | 2372 | } |
duke@435 | 2373 | #endif |
duke@435 | 2374 | } |
duke@435 | 2375 | |
duke@435 | 2376 | #ifndef PRODUCT |
duke@435 | 2377 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2378 | tty->print("# <- ComputeUseCount\n"); |
duke@435 | 2379 | #endif |
duke@435 | 2380 | } |
duke@435 | 2381 | |
duke@435 | 2382 | // This routine performs scheduling on each basic block in reverse order, |
duke@435 | 2383 | // using instruction latencies and taking into account function unit |
duke@435 | 2384 | // availability. |
duke@435 | 2385 | void Scheduling::DoScheduling() { |
duke@435 | 2386 | #ifndef PRODUCT |
duke@435 | 2387 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2388 | tty->print("# -> DoScheduling\n"); |
duke@435 | 2389 | #endif |
duke@435 | 2390 | |
duke@435 | 2391 | Block *succ_bb = NULL; |
duke@435 | 2392 | Block *bb; |
duke@435 | 2393 | |
duke@435 | 2394 | // Walk over all the basic blocks in reverse order |
adlertz@5539 | 2395 | for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) { |
adlertz@5539 | 2396 | bb = _cfg->get_block(i); |
duke@435 | 2397 | |
duke@435 | 2398 | #ifndef PRODUCT |
duke@435 | 2399 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2400 | tty->print("# Schedule BB#%03d (initial)\n", i); |
adlertz@5635 | 2401 | for (uint j = 0; j < bb->number_of_nodes(); j++) { |
adlertz@5635 | 2402 | bb->get_node(j)->dump(); |
adlertz@5539 | 2403 | } |
duke@435 | 2404 | } |
duke@435 | 2405 | #endif |
duke@435 | 2406 | |
duke@435 | 2407 | // On the head node, skip processing |
adlertz@5539 | 2408 | if (bb == _cfg->get_root_block()) { |
duke@435 | 2409 | continue; |
adlertz@5539 | 2410 | } |
duke@435 | 2411 | |
duke@435 | 2412 | // Skip empty, connector blocks |
duke@435 | 2413 | if (bb->is_connector()) |
duke@435 | 2414 | continue; |
duke@435 | 2415 | |
duke@435 | 2416 | // If the following block is not the sole successor of |
duke@435 | 2417 | // this one, then reset the pipeline information |
duke@435 | 2418 | if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) { |
duke@435 | 2419 | #ifndef PRODUCT |
duke@435 | 2420 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2421 | tty->print("*** bundle start of next BB, node %d, for %d instructions\n", |
duke@435 | 2422 | _next_node->_idx, _bundle_instr_count); |
duke@435 | 2423 | } |
duke@435 | 2424 | #endif |
duke@435 | 2425 | step_and_clear(); |
duke@435 | 2426 | } |
duke@435 | 2427 | |
duke@435 | 2428 | // Leave untouched the starting instruction, any Phis, a CreateEx node |
adlertz@5635 | 2429 | // or Top. bb->get_node(_bb_start) is the first schedulable instruction. |
adlertz@5635 | 2430 | _bb_end = bb->number_of_nodes()-1; |
duke@435 | 2431 | for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) { |
adlertz@5635 | 2432 | Node *n = bb->get_node(_bb_start); |
duke@435 | 2433 | // Things not matched, like Phinodes and ProjNodes don't get scheduled. |
duke@435 | 2434 | // Also, MachIdealNodes do not get scheduled |
duke@435 | 2435 | if( !n->is_Mach() ) continue; // Skip non-machine nodes |
duke@435 | 2436 | MachNode *mach = n->as_Mach(); |
duke@435 | 2437 | int iop = mach->ideal_Opcode(); |
duke@435 | 2438 | if( iop == Op_CreateEx ) continue; // CreateEx is pinned |
duke@435 | 2439 | if( iop == Op_Con ) continue; // Do not schedule Top |
duke@435 | 2440 | if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes |
duke@435 | 2441 | mach->pipeline() == MachNode::pipeline_class() && |
duke@435 | 2442 | !n->is_SpillCopy() ) // Breakpoints, Prolog, etc |
duke@435 | 2443 | continue; |
duke@435 | 2444 | break; // Funny loop structure to be sure... |
duke@435 | 2445 | } |
duke@435 | 2446 | // Compute last "interesting" instruction in block - last instruction we |
duke@435 | 2447 | // might schedule. _bb_end points just after last schedulable inst. We |
duke@435 | 2448 | // normally schedule conditional branches (despite them being forced last |
duke@435 | 2449 | // in the block), because they have delay slots we can fill. Calls all |
duke@435 | 2450 | // have their delay slots filled in the template expansions, so we don't |
duke@435 | 2451 | // bother scheduling them. |
adlertz@5635 | 2452 | Node *last = bb->get_node(_bb_end); |
kvn@3049 | 2453 | // Ignore trailing NOPs. |
kvn@3049 | 2454 | while (_bb_end > 0 && last->is_Mach() && |
kvn@3049 | 2455 | last->as_Mach()->ideal_Opcode() == Op_Con) { |
adlertz@5635 | 2456 | last = bb->get_node(--_bb_end); |
kvn@3049 | 2457 | } |
kvn@3049 | 2458 | assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, ""); |
duke@435 | 2459 | if( last->is_Catch() || |
kvn@1142 | 2460 | // Exclude unreachable path case when Halt node is in a separate block. |
kvn@1142 | 2461 | (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { |
duke@435 | 2462 | // There must be a prior call. Skip it. |
adlertz@5635 | 2463 | while( !bb->get_node(--_bb_end)->is_MachCall() ) { |
adlertz@5635 | 2464 | assert( bb->get_node(_bb_end)->is_MachProj(), "skipping projections after expected call" ); |
duke@435 | 2465 | } |
duke@435 | 2466 | } else if( last->is_MachNullCheck() ) { |
duke@435 | 2467 | // Backup so the last null-checked memory instruction is |
duke@435 | 2468 | // outside the schedulable range. Skip over the nullcheck, |
duke@435 | 2469 | // projection, and the memory nodes. |
duke@435 | 2470 | Node *mem = last->in(1); |
duke@435 | 2471 | do { |
duke@435 | 2472 | _bb_end--; |
adlertz@5635 | 2473 | } while (mem != bb->get_node(_bb_end)); |
duke@435 | 2474 | } else { |
duke@435 | 2475 | // Set _bb_end to point after last schedulable inst. |
duke@435 | 2476 | _bb_end++; |
duke@435 | 2477 | } |
duke@435 | 2478 | |
duke@435 | 2479 | assert( _bb_start <= _bb_end, "inverted block ends" ); |
duke@435 | 2480 | |
duke@435 | 2481 | // Compute the register antidependencies for the basic block |
duke@435 | 2482 | ComputeRegisterAntidependencies(bb); |
duke@435 | 2483 | if (_cfg->C->failing()) return; // too many D-U pinch points |
duke@435 | 2484 | |
duke@435 | 2485 | // Compute intra-bb latencies for the nodes |
duke@435 | 2486 | ComputeLocalLatenciesForward(bb); |
duke@435 | 2487 | |
duke@435 | 2488 | // Compute the usage within the block, and set the list of all nodes |
duke@435 | 2489 | // in the block that have no uses within the block. |
duke@435 | 2490 | ComputeUseCount(bb); |
duke@435 | 2491 | |
duke@435 | 2492 | // Schedule the remaining instructions in the block |
duke@435 | 2493 | while ( _available.size() > 0 ) { |
duke@435 | 2494 | Node *n = ChooseNodeToBundle(); |
morris@4770 | 2495 | guarantee(n != NULL, "no nodes available"); |
duke@435 | 2496 | AddNodeToBundle(n,bb); |
duke@435 | 2497 | } |
duke@435 | 2498 | |
duke@435 | 2499 | assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" ); |
duke@435 | 2500 | #ifdef ASSERT |
duke@435 | 2501 | for( uint l = _bb_start; l < _bb_end; l++ ) { |
adlertz@5635 | 2502 | Node *n = bb->get_node(l); |
duke@435 | 2503 | uint m; |
duke@435 | 2504 | for( m = 0; m < _bb_end-_bb_start; m++ ) |
duke@435 | 2505 | if( _scheduled[m] == n ) |
duke@435 | 2506 | break; |
duke@435 | 2507 | assert( m < _bb_end-_bb_start, "instruction missing in schedule" ); |
duke@435 | 2508 | } |
duke@435 | 2509 | #endif |
duke@435 | 2510 | |
duke@435 | 2511 | // Now copy the instructions (in reverse order) back to the block |
duke@435 | 2512 | for ( uint k = _bb_start; k < _bb_end; k++ ) |
adlertz@5635 | 2513 | bb->map_node(_scheduled[_bb_end-k-1], k); |
duke@435 | 2514 | |
duke@435 | 2515 | #ifndef PRODUCT |
duke@435 | 2516 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2517 | tty->print("# Schedule BB#%03d (final)\n", i); |
duke@435 | 2518 | uint current = 0; |
adlertz@5635 | 2519 | for (uint j = 0; j < bb->number_of_nodes(); j++) { |
adlertz@5635 | 2520 | Node *n = bb->get_node(j); |
duke@435 | 2521 | if( valid_bundle_info(n) ) { |
duke@435 | 2522 | Bundle *bundle = node_bundling(n); |
duke@435 | 2523 | if (bundle->instr_count() > 0 || bundle->flags() > 0) { |
duke@435 | 2524 | tty->print("*** Bundle: "); |
duke@435 | 2525 | bundle->dump(); |
duke@435 | 2526 | } |
duke@435 | 2527 | n->dump(); |
duke@435 | 2528 | } |
duke@435 | 2529 | } |
duke@435 | 2530 | } |
duke@435 | 2531 | #endif |
duke@435 | 2532 | #ifdef ASSERT |
duke@435 | 2533 | verify_good_schedule(bb,"after block local scheduling"); |
duke@435 | 2534 | #endif |
duke@435 | 2535 | } |
duke@435 | 2536 | |
duke@435 | 2537 | #ifndef PRODUCT |
duke@435 | 2538 | if (_cfg->C->trace_opto_output()) |
duke@435 | 2539 | tty->print("# <- DoScheduling\n"); |
duke@435 | 2540 | #endif |
duke@435 | 2541 | |
duke@435 | 2542 | // Record final node-bundling array location |
duke@435 | 2543 | _regalloc->C->set_node_bundling_base(_node_bundling_base); |
duke@435 | 2544 | |
duke@435 | 2545 | } // end DoScheduling |
duke@435 | 2546 | |
duke@435 | 2547 | // Verify that no live-range used in the block is killed in the block by a |
duke@435 | 2548 | // wrong DEF. This doesn't verify live-ranges that span blocks. |
duke@435 | 2549 | |
duke@435 | 2550 | // Check for edge existence. Used to avoid adding redundant precedence edges. |
duke@435 | 2551 | static bool edge_from_to( Node *from, Node *to ) { |
duke@435 | 2552 | for( uint i=0; i<from->len(); i++ ) |
duke@435 | 2553 | if( from->in(i) == to ) |
duke@435 | 2554 | return true; |
duke@435 | 2555 | return false; |
duke@435 | 2556 | } |
duke@435 | 2557 | |
duke@435 | 2558 | #ifdef ASSERT |
duke@435 | 2559 | void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { |
duke@435 | 2560 | // Check for bad kills |
duke@435 | 2561 | if( OptoReg::is_valid(def) ) { // Ignore stores & control flow |
duke@435 | 2562 | Node *prior_use = _reg_node[def]; |
duke@435 | 2563 | if( prior_use && !edge_from_to(prior_use,n) ) { |
duke@435 | 2564 | tty->print("%s = ",OptoReg::as_VMReg(def)->name()); |
duke@435 | 2565 | n->dump(); |
duke@435 | 2566 | tty->print_cr("..."); |
duke@435 | 2567 | prior_use->dump(); |
jcoomes@1845 | 2568 | assert(edge_from_to(prior_use,n),msg); |
duke@435 | 2569 | } |
duke@435 | 2570 | _reg_node.map(def,NULL); // Kill live USEs |
duke@435 | 2571 | } |
duke@435 | 2572 | } |
duke@435 | 2573 | |
duke@435 | 2574 | void Scheduling::verify_good_schedule( Block *b, const char *msg ) { |
duke@435 | 2575 | |
duke@435 | 2576 | // Zap to something reasonable for the verify code |
duke@435 | 2577 | _reg_node.clear(); |
duke@435 | 2578 | |
duke@435 | 2579 | // Walk over the block backwards. Check to make sure each DEF doesn't |
duke@435 | 2580 | // kill a live value (other than the one it's supposed to). Add each |
duke@435 | 2581 | // USE to the live set. |
adlertz@5635 | 2582 | for( uint i = b->number_of_nodes()-1; i >= _bb_start; i-- ) { |
adlertz@5635 | 2583 | Node *n = b->get_node(i); |
duke@435 | 2584 | int n_op = n->Opcode(); |
duke@435 | 2585 | if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) { |
duke@435 | 2586 | // Fat-proj kills a slew of registers |
duke@435 | 2587 | RegMask rm = n->out_RegMask();// Make local copy |
duke@435 | 2588 | while( rm.is_NotEmpty() ) { |
duke@435 | 2589 | OptoReg::Name kill = rm.find_first_elem(); |
duke@435 | 2590 | rm.Remove(kill); |
duke@435 | 2591 | verify_do_def( n, kill, msg ); |
duke@435 | 2592 | } |
duke@435 | 2593 | } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes |
duke@435 | 2594 | // Get DEF'd registers the normal way |
duke@435 | 2595 | verify_do_def( n, _regalloc->get_reg_first(n), msg ); |
duke@435 | 2596 | verify_do_def( n, _regalloc->get_reg_second(n), msg ); |
duke@435 | 2597 | } |
duke@435 | 2598 | |
duke@435 | 2599 | // Now make all USEs live |
duke@435 | 2600 | for( uint i=1; i<n->req(); i++ ) { |
duke@435 | 2601 | Node *def = n->in(i); |
duke@435 | 2602 | assert(def != 0, "input edge required"); |
duke@435 | 2603 | OptoReg::Name reg_lo = _regalloc->get_reg_first(def); |
duke@435 | 2604 | OptoReg::Name reg_hi = _regalloc->get_reg_second(def); |
duke@435 | 2605 | if( OptoReg::is_valid(reg_lo) ) { |
jcoomes@1845 | 2606 | assert(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg); |
duke@435 | 2607 | _reg_node.map(reg_lo,n); |
duke@435 | 2608 | } |
duke@435 | 2609 | if( OptoReg::is_valid(reg_hi) ) { |
jcoomes@1845 | 2610 | assert(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg); |
duke@435 | 2611 | _reg_node.map(reg_hi,n); |
duke@435 | 2612 | } |
duke@435 | 2613 | } |
duke@435 | 2614 | |
duke@435 | 2615 | } |
duke@435 | 2616 | |
duke@435 | 2617 | // Zap to something reasonable for the Antidependence code |
duke@435 | 2618 | _reg_node.clear(); |
duke@435 | 2619 | } |
duke@435 | 2620 | #endif |
duke@435 | 2621 | |
duke@435 | 2622 | // Conditionally add precedence edges. Avoid putting edges on Projs. |
duke@435 | 2623 | static void add_prec_edge_from_to( Node *from, Node *to ) { |
duke@435 | 2624 | if( from->is_Proj() ) { // Put precedence edge on Proj's input |
duke@435 | 2625 | assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" ); |
duke@435 | 2626 | from = from->in(0); |
duke@435 | 2627 | } |
duke@435 | 2628 | if( from != to && // No cycles (for things like LD L0,[L0+4] ) |
duke@435 | 2629 | !edge_from_to( from, to ) ) // Avoid duplicate edge |
duke@435 | 2630 | from->add_prec(to); |
duke@435 | 2631 | } |
duke@435 | 2632 | |
duke@435 | 2633 | void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { |
duke@435 | 2634 | if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow |
duke@435 | 2635 | return; |
duke@435 | 2636 | |
duke@435 | 2637 | Node *pinch = _reg_node[def_reg]; // Get pinch point |
adlertz@5509 | 2638 | if ((pinch == NULL) || _cfg->get_block_for_node(pinch) != b || // No pinch-point yet? |
duke@435 | 2639 | is_def ) { // Check for a true def (not a kill) |
duke@435 | 2640 | _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point |
duke@435 | 2641 | return; |
duke@435 | 2642 | } |
duke@435 | 2643 | |
duke@435 | 2644 | Node *kill = def; // Rename 'def' to more descriptive 'kill' |
duke@435 | 2645 | debug_only( def = (Node*)0xdeadbeef; ) |
duke@435 | 2646 | |
duke@435 | 2647 | // After some number of kills there _may_ be a later def |
duke@435 | 2648 | Node *later_def = NULL; |
duke@435 | 2649 | |
duke@435 | 2650 | // Finding a kill requires a real pinch-point. |
duke@435 | 2651 | // Check for not already having a pinch-point. |
duke@435 | 2652 | // Pinch points are Op_Node's. |
duke@435 | 2653 | if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point? |
duke@435 | 2654 | later_def = pinch; // Must be def/kill as optimistic pinch-point |
duke@435 | 2655 | if ( _pinch_free_list.size() > 0) { |
duke@435 | 2656 | pinch = _pinch_free_list.pop(); |
duke@435 | 2657 | } else { |
kvn@4115 | 2658 | pinch = new (_cfg->C) Node(1); // Pinch point to-be |
duke@435 | 2659 | } |
duke@435 | 2660 | if (pinch->_idx >= _regalloc->node_regs_max_index()) { |
duke@435 | 2661 | _cfg->C->record_method_not_compilable("too many D-U pinch points"); |
duke@435 | 2662 | return; |
duke@435 | 2663 | } |
adlertz@5509 | 2664 | _cfg->map_node_to_block(pinch, b); // Pretend it's valid in this block (lazy init) |
duke@435 | 2665 | _reg_node.map(def_reg,pinch); // Record pinch-point |
duke@435 | 2666 | //_regalloc->set_bad(pinch->_idx); // Already initialized this way. |
duke@435 | 2667 | if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill |
duke@435 | 2668 | pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call |
duke@435 | 2669 | add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch |
duke@435 | 2670 | later_def = NULL; // and no later def |
duke@435 | 2671 | } |
duke@435 | 2672 | pinch->set_req(0,later_def); // Hook later def so we can find it |
duke@435 | 2673 | } else { // Else have valid pinch point |
duke@435 | 2674 | if( pinch->in(0) ) // If there is a later-def |
duke@435 | 2675 | later_def = pinch->in(0); // Get it |
duke@435 | 2676 | } |
duke@435 | 2677 | |
duke@435 | 2678 | // Add output-dependence edge from later def to kill |
duke@435 | 2679 | if( later_def ) // If there is some original def |
duke@435 | 2680 | add_prec_edge_from_to(later_def,kill); // Add edge from def to kill |
duke@435 | 2681 | |
duke@435 | 2682 | // See if current kill is also a use, and so is forced to be the pinch-point. |
duke@435 | 2683 | if( pinch->Opcode() == Op_Node ) { |
duke@435 | 2684 | Node *uses = kill->is_Proj() ? kill->in(0) : kill; |
duke@435 | 2685 | for( uint i=1; i<uses->req(); i++ ) { |
duke@435 | 2686 | if( _regalloc->get_reg_first(uses->in(i)) == def_reg || |
duke@435 | 2687 | _regalloc->get_reg_second(uses->in(i)) == def_reg ) { |
duke@435 | 2688 | // Yes, found a use/kill pinch-point |
duke@435 | 2689 | pinch->set_req(0,NULL); // |
duke@435 | 2690 | pinch->replace_by(kill); // Move anti-dep edges up |
duke@435 | 2691 | pinch = kill; |
duke@435 | 2692 | _reg_node.map(def_reg,pinch); |
duke@435 | 2693 | return; |
duke@435 | 2694 | } |
duke@435 | 2695 | } |
duke@435 | 2696 | } |
duke@435 | 2697 | |
duke@435 | 2698 | // Add edge from kill to pinch-point |
duke@435 | 2699 | add_prec_edge_from_to(kill,pinch); |
duke@435 | 2700 | } |
duke@435 | 2701 | |
duke@435 | 2702 | void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { |
duke@435 | 2703 | if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow |
duke@435 | 2704 | return; |
duke@435 | 2705 | Node *pinch = _reg_node[use_reg]; // Get pinch point |
duke@435 | 2706 | // Check for no later def_reg/kill in block |
adlertz@5509 | 2707 | if ((pinch != NULL) && _cfg->get_block_for_node(pinch) == b && |
duke@435 | 2708 | // Use has to be block-local as well |
adlertz@5509 | 2709 | _cfg->get_block_for_node(use) == b) { |
duke@435 | 2710 | if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) |
duke@435 | 2711 | pinch->req() == 1 ) { // pinch not yet in block? |
duke@435 | 2712 | pinch->del_req(0); // yank pointer to later-def, also set flag |
duke@435 | 2713 | // Insert the pinch-point in the block just after the last use |
adlertz@5635 | 2714 | b->insert_node(pinch, b->find_node(use) + 1); |
duke@435 | 2715 | _bb_end++; // Increase size scheduled region in block |
duke@435 | 2716 | } |
duke@435 | 2717 | |
duke@435 | 2718 | add_prec_edge_from_to(pinch,use); |
duke@435 | 2719 | } |
duke@435 | 2720 | } |
duke@435 | 2721 | |
duke@435 | 2722 | // We insert antidependences between the reads and following write of |
duke@435 | 2723 | // allocated registers to prevent illegal code motion. Hopefully, the |
duke@435 | 2724 | // number of added references should be fairly small, especially as we |
duke@435 | 2725 | // are only adding references within the current basic block. |
duke@435 | 2726 | void Scheduling::ComputeRegisterAntidependencies(Block *b) { |
duke@435 | 2727 | |
duke@435 | 2728 | #ifdef ASSERT |
duke@435 | 2729 | verify_good_schedule(b,"before block local scheduling"); |
duke@435 | 2730 | #endif |
duke@435 | 2731 | |
duke@435 | 2732 | // A valid schedule, for each register independently, is an endless cycle |
duke@435 | 2733 | // of: a def, then some uses (connected to the def by true dependencies), |
duke@435 | 2734 | // then some kills (defs with no uses), finally the cycle repeats with a new |
duke@435 | 2735 | // def. The uses are allowed to float relative to each other, as are the |
duke@435 | 2736 | // kills. No use is allowed to slide past a kill (or def). This requires |
duke@435 | 2737 | // antidependencies between all uses of a single def and all kills that |
duke@435 | 2738 | // follow, up to the next def. More edges are redundant, because later defs |
duke@435 | 2739 | // & kills are already serialized with true or antidependencies. To keep |
duke@435 | 2740 | // the edge count down, we add a 'pinch point' node if there's more than |
duke@435 | 2741 | // one use or more than one kill/def. |
duke@435 | 2742 | |
duke@435 | 2743 | // We add dependencies in one bottom-up pass. |
duke@435 | 2744 | |
duke@435 | 2745 | // For each instruction we handle it's DEFs/KILLs, then it's USEs. |
duke@435 | 2746 | |
duke@435 | 2747 | // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this |
duke@435 | 2748 | // register. If not, we record the DEF/KILL in _reg_node, the |
duke@435 | 2749 | // register-to-def mapping. If there is a prior DEF/KILL, we insert a |
duke@435 | 2750 | // "pinch point", a new Node that's in the graph but not in the block. |
duke@435 | 2751 | // We put edges from the prior and current DEF/KILLs to the pinch point. |
duke@435 | 2752 | // We put the pinch point in _reg_node. If there's already a pinch point |
duke@435 | 2753 | // we merely add an edge from the current DEF/KILL to the pinch point. |
duke@435 | 2754 | |
duke@435 | 2755 | // After doing the DEF/KILLs, we handle USEs. For each used register, we |
duke@435 | 2756 | // put an edge from the pinch point to the USE. |
duke@435 | 2757 | |
duke@435 | 2758 | // To be expedient, the _reg_node array is pre-allocated for the whole |
duke@435 | 2759 | // compilation. _reg_node is lazily initialized; it either contains a NULL, |
duke@435 | 2760 | // or a valid def/kill/pinch-point, or a leftover node from some prior |
duke@435 | 2761 | // block. Leftover node from some prior block is treated like a NULL (no |
duke@435 | 2762 | // prior def, so no anti-dependence needed). Valid def is distinguished by |
duke@435 | 2763 | // it being in the current block. |
duke@435 | 2764 | bool fat_proj_seen = false; |
duke@435 | 2765 | uint last_safept = _bb_end-1; |
adlertz@5635 | 2766 | Node* end_node = (_bb_end-1 >= _bb_start) ? b->get_node(last_safept) : NULL; |
duke@435 | 2767 | Node* last_safept_node = end_node; |
duke@435 | 2768 | for( uint i = _bb_end-1; i >= _bb_start; i-- ) { |
adlertz@5635 | 2769 | Node *n = b->get_node(i); |
duke@435 | 2770 | int is_def = n->outcnt(); // def if some uses prior to adding precedence edges |
kvn@3040 | 2771 | if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) { |
duke@435 | 2772 | // Fat-proj kills a slew of registers |
duke@435 | 2773 | // This can add edges to 'n' and obscure whether or not it was a def, |
duke@435 | 2774 | // hence the is_def flag. |
duke@435 | 2775 | fat_proj_seen = true; |
duke@435 | 2776 | RegMask rm = n->out_RegMask();// Make local copy |
duke@435 | 2777 | while( rm.is_NotEmpty() ) { |
duke@435 | 2778 | OptoReg::Name kill = rm.find_first_elem(); |
duke@435 | 2779 | rm.Remove(kill); |
duke@435 | 2780 | anti_do_def( b, n, kill, is_def ); |
duke@435 | 2781 | } |
duke@435 | 2782 | } else { |
duke@435 | 2783 | // Get DEF'd registers the normal way |
duke@435 | 2784 | anti_do_def( b, n, _regalloc->get_reg_first(n), is_def ); |
duke@435 | 2785 | anti_do_def( b, n, _regalloc->get_reg_second(n), is_def ); |
duke@435 | 2786 | } |
duke@435 | 2787 | |
kvn@3049 | 2788 | // Kill projections on a branch should appear to occur on the |
kvn@3049 | 2789 | // branch, not afterwards, so grab the masks from the projections |
kvn@3049 | 2790 | // and process them. |
kvn@3051 | 2791 | if (n->is_MachBranch() || n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_Jump) { |
kvn@3049 | 2792 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
kvn@3049 | 2793 | Node* use = n->fast_out(i); |
kvn@3049 | 2794 | if (use->is_Proj()) { |
kvn@3049 | 2795 | RegMask rm = use->out_RegMask();// Make local copy |
kvn@3049 | 2796 | while( rm.is_NotEmpty() ) { |
kvn@3049 | 2797 | OptoReg::Name kill = rm.find_first_elem(); |
kvn@3049 | 2798 | rm.Remove(kill); |
kvn@3049 | 2799 | anti_do_def( b, n, kill, false ); |
kvn@3049 | 2800 | } |
kvn@3049 | 2801 | } |
kvn@3049 | 2802 | } |
kvn@3049 | 2803 | } |
kvn@3049 | 2804 | |
duke@435 | 2805 | // Check each register used by this instruction for a following DEF/KILL |
duke@435 | 2806 | // that must occur afterward and requires an anti-dependence edge. |
duke@435 | 2807 | for( uint j=0; j<n->req(); j++ ) { |
duke@435 | 2808 | Node *def = n->in(j); |
duke@435 | 2809 | if( def ) { |
kvn@3040 | 2810 | assert( !def->is_MachProj() || def->ideal_reg() != MachProjNode::fat_proj, "" ); |
duke@435 | 2811 | anti_do_use( b, n, _regalloc->get_reg_first(def) ); |
duke@435 | 2812 | anti_do_use( b, n, _regalloc->get_reg_second(def) ); |
duke@435 | 2813 | } |
duke@435 | 2814 | } |
duke@435 | 2815 | // Do not allow defs of new derived values to float above GC |
duke@435 | 2816 | // points unless the base is definitely available at the GC point. |
duke@435 | 2817 | |
adlertz@5635 | 2818 | Node *m = b->get_node(i); |
duke@435 | 2819 | |
duke@435 | 2820 | // Add precedence edge from following safepoint to use of derived pointer |
duke@435 | 2821 | if( last_safept_node != end_node && |
duke@435 | 2822 | m != last_safept_node) { |
duke@435 | 2823 | for (uint k = 1; k < m->req(); k++) { |
duke@435 | 2824 | const Type *t = m->in(k)->bottom_type(); |
duke@435 | 2825 | if( t->isa_oop_ptr() && |
duke@435 | 2826 | t->is_ptr()->offset() != 0 ) { |
duke@435 | 2827 | last_safept_node->add_prec( m ); |
duke@435 | 2828 | break; |
duke@435 | 2829 | } |
duke@435 | 2830 | } |
duke@435 | 2831 | } |
duke@435 | 2832 | |
duke@435 | 2833 | if( n->jvms() ) { // Precedence edge from derived to safept |
duke@435 | 2834 | // Check if last_safept_node was moved by pinch-point insertion in anti_do_use() |
adlertz@5635 | 2835 | if( b->get_node(last_safept) != last_safept_node ) { |
duke@435 | 2836 | last_safept = b->find_node(last_safept_node); |
duke@435 | 2837 | } |
duke@435 | 2838 | for( uint j=last_safept; j > i; j-- ) { |
adlertz@5635 | 2839 | Node *mach = b->get_node(j); |
duke@435 | 2840 | if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP ) |
duke@435 | 2841 | mach->add_prec( n ); |
duke@435 | 2842 | } |
duke@435 | 2843 | last_safept = i; |
duke@435 | 2844 | last_safept_node = m; |
duke@435 | 2845 | } |
duke@435 | 2846 | } |
duke@435 | 2847 | |
duke@435 | 2848 | if (fat_proj_seen) { |
duke@435 | 2849 | // Garbage collect pinch nodes that were not consumed. |
duke@435 | 2850 | // They are usually created by a fat kill MachProj for a call. |
duke@435 | 2851 | garbage_collect_pinch_nodes(); |
duke@435 | 2852 | } |
duke@435 | 2853 | } |
duke@435 | 2854 | |
duke@435 | 2855 | // Garbage collect pinch nodes for reuse by other blocks. |
duke@435 | 2856 | // |
duke@435 | 2857 | // The block scheduler's insertion of anti-dependence |
duke@435 | 2858 | // edges creates many pinch nodes when the block contains |
duke@435 | 2859 | // 2 or more Calls. A pinch node is used to prevent a |
duke@435 | 2860 | // combinatorial explosion of edges. If a set of kills for a |
duke@435 | 2861 | // register is anti-dependent on a set of uses (or defs), rather |
duke@435 | 2862 | // than adding an edge in the graph between each pair of kill |
duke@435 | 2863 | // and use (or def), a pinch is inserted between them: |
duke@435 | 2864 | // |
duke@435 | 2865 | // use1 use2 use3 |
duke@435 | 2866 | // \ | / |
duke@435 | 2867 | // \ | / |
duke@435 | 2868 | // pinch |
duke@435 | 2869 | // / | \ |
duke@435 | 2870 | // / | \ |
duke@435 | 2871 | // kill1 kill2 kill3 |
duke@435 | 2872 | // |
duke@435 | 2873 | // One pinch node is created per register killed when |
duke@435 | 2874 | // the second call is encountered during a backwards pass |
duke@435 | 2875 | // over the block. Most of these pinch nodes are never |
duke@435 | 2876 | // wired into the graph because the register is never |
duke@435 | 2877 | // used or def'ed in the block. |
duke@435 | 2878 | // |
duke@435 | 2879 | void Scheduling::garbage_collect_pinch_nodes() { |
duke@435 | 2880 | #ifndef PRODUCT |
duke@435 | 2881 | if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); |
duke@435 | 2882 | #endif |
duke@435 | 2883 | int trace_cnt = 0; |
duke@435 | 2884 | for (uint k = 0; k < _reg_node.Size(); k++) { |
duke@435 | 2885 | Node* pinch = _reg_node[k]; |
adlertz@5509 | 2886 | if ((pinch != NULL) && pinch->Opcode() == Op_Node && |
duke@435 | 2887 | // no predecence input edges |
duke@435 | 2888 | (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { |
duke@435 | 2889 | cleanup_pinch(pinch); |
duke@435 | 2890 | _pinch_free_list.push(pinch); |
duke@435 | 2891 | _reg_node.map(k, NULL); |
duke@435 | 2892 | #ifndef PRODUCT |
duke@435 | 2893 | if (_cfg->C->trace_opto_output()) { |
duke@435 | 2894 | trace_cnt++; |
duke@435 | 2895 | if (trace_cnt > 40) { |
duke@435 | 2896 | tty->print("\n"); |
duke@435 | 2897 | trace_cnt = 0; |
duke@435 | 2898 | } |
duke@435 | 2899 | tty->print(" %d", pinch->_idx); |
duke@435 | 2900 | } |
duke@435 | 2901 | #endif |
duke@435 | 2902 | } |
duke@435 | 2903 | } |
duke@435 | 2904 | #ifndef PRODUCT |
duke@435 | 2905 | if (_cfg->C->trace_opto_output()) tty->print("\n"); |
duke@435 | 2906 | #endif |
duke@435 | 2907 | } |
duke@435 | 2908 | |
duke@435 | 2909 | // Clean up a pinch node for reuse. |
duke@435 | 2910 | void Scheduling::cleanup_pinch( Node *pinch ) { |
duke@435 | 2911 | assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking"); |
duke@435 | 2912 | |
duke@435 | 2913 | for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) { |
duke@435 | 2914 | Node* use = pinch->last_out(i); |
duke@435 | 2915 | uint uses_found = 0; |
duke@435 | 2916 | for (uint j = use->req(); j < use->len(); j++) { |
duke@435 | 2917 | if (use->in(j) == pinch) { |
duke@435 | 2918 | use->rm_prec(j); |
duke@435 | 2919 | uses_found++; |
duke@435 | 2920 | } |
duke@435 | 2921 | } |
duke@435 | 2922 | assert(uses_found > 0, "must be a precedence edge"); |
duke@435 | 2923 | i -= uses_found; // we deleted 1 or more copies of this edge |
duke@435 | 2924 | } |
duke@435 | 2925 | // May have a later_def entry |
duke@435 | 2926 | pinch->set_req(0, NULL); |
duke@435 | 2927 | } |
duke@435 | 2928 | |
duke@435 | 2929 | #ifndef PRODUCT |
duke@435 | 2930 | |
duke@435 | 2931 | void Scheduling::dump_available() const { |
duke@435 | 2932 | tty->print("#Availist "); |
duke@435 | 2933 | for (uint i = 0; i < _available.size(); i++) |
duke@435 | 2934 | tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]); |
duke@435 | 2935 | tty->cr(); |
duke@435 | 2936 | } |
duke@435 | 2937 | |
duke@435 | 2938 | // Print Scheduling Statistics |
duke@435 | 2939 | void Scheduling::print_statistics() { |
duke@435 | 2940 | // Print the size added by nops for bundling |
duke@435 | 2941 | tty->print("Nops added %d bytes to total of %d bytes", |
duke@435 | 2942 | _total_nop_size, _total_method_size); |
duke@435 | 2943 | if (_total_method_size > 0) |
duke@435 | 2944 | tty->print(", for %.2f%%", |
duke@435 | 2945 | ((double)_total_nop_size) / ((double) _total_method_size) * 100.0); |
duke@435 | 2946 | tty->print("\n"); |
duke@435 | 2947 | |
duke@435 | 2948 | // Print the number of branch shadows filled |
duke@435 | 2949 | if (Pipeline::_branch_has_delay_slot) { |
duke@435 | 2950 | tty->print("Of %d branches, %d had unconditional delay slots filled", |
duke@435 | 2951 | _total_branches, _total_unconditional_delays); |
duke@435 | 2952 | if (_total_branches > 0) |
duke@435 | 2953 | tty->print(", for %.2f%%", |
duke@435 | 2954 | ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0); |
duke@435 | 2955 | tty->print("\n"); |
duke@435 | 2956 | } |
duke@435 | 2957 | |
duke@435 | 2958 | uint total_instructions = 0, total_bundles = 0; |
duke@435 | 2959 | |
duke@435 | 2960 | for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) { |
duke@435 | 2961 | uint bundle_count = _total_instructions_per_bundle[i]; |
duke@435 | 2962 | total_instructions += bundle_count * i; |
duke@435 | 2963 | total_bundles += bundle_count; |
duke@435 | 2964 | } |
duke@435 | 2965 | |
duke@435 | 2966 | if (total_bundles > 0) |
duke@435 | 2967 | tty->print("Average ILP (excluding nops) is %.2f\n", |
duke@435 | 2968 | ((double)total_instructions) / ((double)total_bundles)); |
duke@435 | 2969 | } |
duke@435 | 2970 | #endif |