Tue, 22 Oct 2013 09:51:47 +0200
8026251: New type profiling points: parameters to methods
Summary: x86 interpreter and c1 type profiling for parameters on method entries
Reviewed-by: kvn, twisti
duke@435 | 1 | /* |
coleenp@4037 | 2 | * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | * |
duke@435 | 23 | */ |
duke@435 | 24 | |
stefank@2314 | 25 | #include "precompiled.hpp" |
twisti@4318 | 26 | #include "asm/macroAssembler.hpp" |
stefank@2314 | 27 | #include "code/vtableStubs.hpp" |
roland@5987 | 28 | #include "interp_masm_x86.hpp" |
stefank@2314 | 29 | #include "memory/resourceArea.hpp" |
stefank@2314 | 30 | #include "oops/instanceKlass.hpp" |
stefank@2314 | 31 | #include "oops/klassVtable.hpp" |
stefank@2314 | 32 | #include "runtime/sharedRuntime.hpp" |
stefank@2314 | 33 | #include "vmreg_x86.inline.hpp" |
stefank@2314 | 34 | #ifdef COMPILER2 |
stefank@2314 | 35 | #include "opto/runtime.hpp" |
stefank@2314 | 36 | #endif |
duke@435 | 37 | |
duke@435 | 38 | // machine-dependent part of VtableStubs: create VtableStub of correct size and |
duke@435 | 39 | // initialize its code |
duke@435 | 40 | |
duke@435 | 41 | #define __ masm-> |
duke@435 | 42 | |
duke@435 | 43 | #ifndef PRODUCT |
duke@435 | 44 | extern "C" void bad_compiled_vtable_index(JavaThread* thread, |
duke@435 | 45 | oop receiver, |
duke@435 | 46 | int index); |
duke@435 | 47 | #endif |
duke@435 | 48 | |
duke@435 | 49 | VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { |
duke@435 | 50 | const int amd64_code_length = VtableStub::pd_code_size_limit(true); |
duke@435 | 51 | VtableStub* s = new(amd64_code_length) VtableStub(true, vtable_index); |
anoll@5762 | 52 | // Can be NULL if there is no free space in the code cache. |
anoll@5762 | 53 | if (s == NULL) { |
anoll@5762 | 54 | return NULL; |
anoll@5762 | 55 | } |
anoll@5762 | 56 | |
duke@435 | 57 | ResourceMark rm; |
duke@435 | 58 | CodeBuffer cb(s->entry_point(), amd64_code_length); |
duke@435 | 59 | MacroAssembler* masm = new MacroAssembler(&cb); |
duke@435 | 60 | |
duke@435 | 61 | #ifndef PRODUCT |
duke@435 | 62 | if (CountCompiledCalls) { |
duke@435 | 63 | __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); |
duke@435 | 64 | } |
duke@435 | 65 | #endif |
duke@435 | 66 | |
duke@435 | 67 | // get receiver (need to skip return address on top of stack) |
duke@435 | 68 | assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); |
duke@435 | 69 | |
duke@435 | 70 | // Free registers (non-args) are rax, rbx |
duke@435 | 71 | |
duke@435 | 72 | // get receiver klass |
duke@435 | 73 | address npe_addr = __ pc(); |
coleenp@548 | 74 | __ load_klass(rax, j_rarg0); |
duke@435 | 75 | |
duke@435 | 76 | #ifndef PRODUCT |
duke@435 | 77 | if (DebugVtables) { |
duke@435 | 78 | Label L; |
duke@435 | 79 | // check offset vs vtable length |
coleenp@4037 | 80 | __ cmpl(Address(rax, InstanceKlass::vtable_length_offset() * wordSize), |
duke@435 | 81 | vtable_index * vtableEntry::size()); |
duke@435 | 82 | __ jcc(Assembler::greater, L); |
duke@435 | 83 | __ movl(rbx, vtable_index); |
duke@435 | 84 | __ call_VM(noreg, |
duke@435 | 85 | CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx); |
duke@435 | 86 | __ bind(L); |
duke@435 | 87 | } |
duke@435 | 88 | #endif // PRODUCT |
duke@435 | 89 | |
coleenp@4037 | 90 | // load Method* and target address |
duke@435 | 91 | const Register method = rbx; |
duke@435 | 92 | |
twisti@3969 | 93 | __ lookup_virtual_method(rax, vtable_index, method); |
twisti@3969 | 94 | |
duke@435 | 95 | if (DebugVtables) { |
duke@435 | 96 | Label L; |
never@739 | 97 | __ cmpptr(method, (int32_t)NULL_WORD); |
duke@435 | 98 | __ jcc(Assembler::equal, L); |
coleenp@4037 | 99 | __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD); |
duke@435 | 100 | __ jcc(Assembler::notZero, L); |
duke@435 | 101 | __ stop("Vtable entry is NULL"); |
duke@435 | 102 | __ bind(L); |
duke@435 | 103 | } |
duke@435 | 104 | // rax: receiver klass |
coleenp@4037 | 105 | // rbx: Method* |
duke@435 | 106 | // rcx: receiver |
duke@435 | 107 | address ame_addr = __ pc(); |
coleenp@4037 | 108 | __ jmp( Address(rbx, Method::from_compiled_offset())); |
duke@435 | 109 | |
duke@435 | 110 | __ flush(); |
jrose@1058 | 111 | |
jrose@1058 | 112 | if (PrintMiscellaneous && (WizardMode || Verbose)) { |
jrose@1058 | 113 | tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", |
jrose@1058 | 114 | vtable_index, s->entry_point(), |
jrose@1058 | 115 | (int)(s->code_end() - s->entry_point()), |
jrose@1058 | 116 | (int)(s->code_end() - __ pc())); |
jrose@1058 | 117 | } |
jrose@1058 | 118 | guarantee(__ pc() <= s->code_end(), "overflowed buffer"); |
jrose@1144 | 119 | // shut the door on sizing bugs |
jrose@1144 | 120 | int slop = 3; // 32-bit offset is this much larger than an 8-bit one |
jrose@1144 | 121 | assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); |
jrose@1058 | 122 | |
duke@435 | 123 | s->set_exception_points(npe_addr, ame_addr); |
duke@435 | 124 | return s; |
duke@435 | 125 | } |
duke@435 | 126 | |
duke@435 | 127 | |
jrose@1058 | 128 | VtableStub* VtableStubs::create_itable_stub(int itable_index) { |
duke@435 | 129 | // Note well: pd_code_size_limit is the absolute minimum we can get |
duke@435 | 130 | // away with. If you add code here, bump the code stub size |
duke@435 | 131 | // returned by pd_code_size_limit! |
duke@435 | 132 | const int amd64_code_length = VtableStub::pd_code_size_limit(false); |
jrose@1058 | 133 | VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index); |
anoll@5762 | 134 | // Can be NULL if there is no free space in the code cache. |
anoll@5762 | 135 | if (s == NULL) { |
anoll@5762 | 136 | return NULL; |
anoll@5762 | 137 | } |
anoll@5762 | 138 | |
duke@435 | 139 | ResourceMark rm; |
duke@435 | 140 | CodeBuffer cb(s->entry_point(), amd64_code_length); |
duke@435 | 141 | MacroAssembler* masm = new MacroAssembler(&cb); |
duke@435 | 142 | |
duke@435 | 143 | #ifndef PRODUCT |
duke@435 | 144 | if (CountCompiledCalls) { |
duke@435 | 145 | __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); |
duke@435 | 146 | } |
duke@435 | 147 | #endif |
duke@435 | 148 | |
duke@435 | 149 | // Entry arguments: |
duke@435 | 150 | // rax: Interface |
duke@435 | 151 | // j_rarg0: Receiver |
duke@435 | 152 | |
duke@435 | 153 | // Free registers (non-args) are rax (interface), rbx |
duke@435 | 154 | |
duke@435 | 155 | // get receiver (need to skip return address on top of stack) |
duke@435 | 156 | |
duke@435 | 157 | assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); |
duke@435 | 158 | // get receiver klass (also an implicit null-check) |
duke@435 | 159 | address npe_addr = __ pc(); |
duke@435 | 160 | |
jrose@1058 | 161 | // Most registers are in use; we'll use rax, rbx, r10, r11 |
jrose@1058 | 162 | // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them) |
jrose@1058 | 163 | __ load_klass(r10, j_rarg0); |
duke@435 | 164 | |
duke@435 | 165 | // If we take a trap while this arg is on the stack we will not |
duke@435 | 166 | // be able to walk the stack properly. This is not an issue except |
duke@435 | 167 | // when there are mistakes in this assembly code that could generate |
duke@435 | 168 | // a spurious fault. Ask me how I know... |
duke@435 | 169 | |
jrose@1058 | 170 | const Register method = rbx; |
jrose@1058 | 171 | Label throw_icce; |
duke@435 | 172 | |
coleenp@4037 | 173 | // Get Method* and entrypoint for compiler |
jrose@1058 | 174 | __ lookup_interface_method(// inputs: rec. class, interface, itable index |
jrose@1058 | 175 | r10, rax, itable_index, |
jrose@1058 | 176 | // outputs: method, scan temp. reg |
jrose@1058 | 177 | method, r11, |
jrose@1058 | 178 | throw_icce); |
duke@435 | 179 | |
coleenp@4037 | 180 | // method (rbx): Method* |
duke@435 | 181 | // j_rarg0: receiver |
duke@435 | 182 | |
duke@435 | 183 | #ifdef ASSERT |
dcubed@451 | 184 | if (DebugVtables) { |
dcubed@451 | 185 | Label L2; |
never@739 | 186 | __ cmpptr(method, (int32_t)NULL_WORD); |
dcubed@451 | 187 | __ jcc(Assembler::equal, L2); |
coleenp@4037 | 188 | __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD); |
dcubed@451 | 189 | __ jcc(Assembler::notZero, L2); |
dcubed@451 | 190 | __ stop("compiler entrypoint is null"); |
dcubed@451 | 191 | __ bind(L2); |
dcubed@451 | 192 | } |
duke@435 | 193 | #endif // ASSERT |
duke@435 | 194 | |
coleenp@4037 | 195 | // rbx: Method* |
dcubed@451 | 196 | // j_rarg0: receiver |
dcubed@451 | 197 | address ame_addr = __ pc(); |
coleenp@4037 | 198 | __ jmp(Address(method, Method::from_compiled_offset())); |
dcubed@451 | 199 | |
dcubed@451 | 200 | __ bind(throw_icce); |
dcubed@451 | 201 | __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); |
duke@435 | 202 | |
duke@435 | 203 | __ flush(); |
dcubed@451 | 204 | |
jrose@1058 | 205 | if (PrintMiscellaneous && (WizardMode || Verbose)) { |
jrose@1058 | 206 | tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", |
jrose@1058 | 207 | itable_index, s->entry_point(), |
jrose@1058 | 208 | (int)(s->code_end() - s->entry_point()), |
jrose@1058 | 209 | (int)(s->code_end() - __ pc())); |
jrose@1058 | 210 | } |
dcubed@451 | 211 | guarantee(__ pc() <= s->code_end(), "overflowed buffer"); |
jrose@1144 | 212 | // shut the door on sizing bugs |
jrose@1144 | 213 | int slop = 3; // 32-bit offset is this much larger than an 8-bit one |
jrose@1144 | 214 | assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); |
dcubed@451 | 215 | |
duke@435 | 216 | s->set_exception_points(npe_addr, ame_addr); |
duke@435 | 217 | return s; |
duke@435 | 218 | } |
duke@435 | 219 | |
duke@435 | 220 | int VtableStub::pd_code_size_limit(bool is_vtable_stub) { |
duke@435 | 221 | if (is_vtable_stub) { |
duke@435 | 222 | // Vtable stub size |
coleenp@548 | 223 | return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) + |
ehelin@5694 | 224 | (UseCompressedClassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0); |
duke@435 | 225 | } else { |
duke@435 | 226 | // Itable stub size |
iveresov@2138 | 227 | return (DebugVtables ? 512 : 74) + (CountCompiledCalls ? 13 : 0) + |
ehelin@5694 | 228 | (UseCompressedClassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0); |
duke@435 | 229 | } |
jrose@1144 | 230 | // In order to tune these parameters, run the JVM with VM options |
jrose@1144 | 231 | // +PrintMiscellaneous and +WizardMode to see information about |
jrose@1144 | 232 | // actual itable stubs. Look for lines like this: |
jrose@1144 | 233 | // itable #1 at 0x5551212[71] left over: 3 |
jrose@1144 | 234 | // Reduce the constants so that the "left over" number is >=3 |
jrose@1144 | 235 | // for the common cases. |
jrose@1144 | 236 | // Do not aim at a left-over number of zero, because a |
jrose@1144 | 237 | // large vtable or itable index (>= 32) will require a 32-bit |
jrose@1144 | 238 | // immediate displacement instead of an 8-bit one. |
jrose@1144 | 239 | // |
jrose@1144 | 240 | // The JVM98 app. _202_jess has a megamorphic interface call. |
jrose@1144 | 241 | // The itable code looks like this: |
jrose@1144 | 242 | // Decoding VtableStub itbl[1]@12 |
jrose@1144 | 243 | // mov 0x8(%rsi),%r10 |
jrose@1144 | 244 | // mov 0x198(%r10),%r11d |
jrose@1144 | 245 | // lea 0x218(%r10,%r11,8),%r11 |
jrose@1144 | 246 | // lea 0x8(%r10),%r10 |
jrose@1144 | 247 | // mov (%r11),%rbx |
jrose@1144 | 248 | // cmp %rbx,%rax |
jrose@1144 | 249 | // je success |
jrose@1144 | 250 | // loop: |
jrose@1144 | 251 | // test %rbx,%rbx |
jrose@1144 | 252 | // je throw_icce |
jrose@1144 | 253 | // add $0x10,%r11 |
jrose@1144 | 254 | // mov (%r11),%rbx |
jrose@1144 | 255 | // cmp %rbx,%rax |
jrose@1144 | 256 | // jne loop |
jrose@1144 | 257 | // success: |
jrose@1144 | 258 | // mov 0x8(%r11),%r11d |
jrose@1144 | 259 | // mov (%r10,%r11,1),%rbx |
jrose@1144 | 260 | // jmpq *0x60(%rbx) |
jrose@1144 | 261 | // throw_icce: |
jrose@1144 | 262 | // jmpq throw_ICCE_entry |
duke@435 | 263 | } |
duke@435 | 264 | |
duke@435 | 265 | int VtableStub::pd_code_alignment() { |
duke@435 | 266 | return wordSize; |
duke@435 | 267 | } |