1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,4105 @@ 1.4 +/* 1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "asm/macroAssembler.hpp" 1.30 +#include "asm/macroAssembler.inline.hpp" 1.31 +#include "code/debugInfoRec.hpp" 1.32 +#include "code/icBuffer.hpp" 1.33 +#include "code/vtableStubs.hpp" 1.34 +#include "interpreter/interpreter.hpp" 1.35 +#include "oops/compiledICHolder.hpp" 1.36 +#include "prims/jvmtiRedefineClassesTrace.hpp" 1.37 +#include "runtime/sharedRuntime.hpp" 1.38 +#include "runtime/vframeArray.hpp" 1.39 +#include "vmreg_x86.inline.hpp" 1.40 +#ifdef COMPILER1 1.41 +#include "c1/c1_Runtime1.hpp" 1.42 +#endif 1.43 +#ifdef COMPILER2 1.44 +#include "opto/runtime.hpp" 1.45 +#endif 1.46 + 1.47 +#define __ masm-> 1.48 + 1.49 +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; 1.50 + 1.51 +class SimpleRuntimeFrame { 1.52 + 1.53 + public: 1.54 + 1.55 + // Most of the runtime stubs have this simple frame layout. 1.56 + // This class exists to make the layout shared in one place. 1.57 + // Offsets are for compiler stack slots, which are jints. 1.58 + enum layout { 1.59 + // The frame sender code expects that rbp will be in the "natural" place and 1.60 + // will override any oopMap setting for it. We must therefore force the layout 1.61 + // so that it agrees with the frame sender code. 1.62 + rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, 1.63 + rbp_off2, 1.64 + return_off, return_off2, 1.65 + framesize 1.66 + }; 1.67 +}; 1.68 + 1.69 +class RegisterSaver { 1.70 + // Capture info about frame layout. Layout offsets are in jint 1.71 + // units because compiler frame slots are jints. 1.72 +#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 1.73 + enum layout { 1.74 + fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 1.75 + xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area 1.76 + DEF_XMM_OFFS(0), 1.77 + DEF_XMM_OFFS(1), 1.78 + DEF_XMM_OFFS(2), 1.79 + DEF_XMM_OFFS(3), 1.80 + DEF_XMM_OFFS(4), 1.81 + DEF_XMM_OFFS(5), 1.82 + DEF_XMM_OFFS(6), 1.83 + DEF_XMM_OFFS(7), 1.84 + DEF_XMM_OFFS(8), 1.85 + DEF_XMM_OFFS(9), 1.86 + DEF_XMM_OFFS(10), 1.87 + DEF_XMM_OFFS(11), 1.88 + DEF_XMM_OFFS(12), 1.89 + DEF_XMM_OFFS(13), 1.90 + DEF_XMM_OFFS(14), 1.91 + DEF_XMM_OFFS(15), 1.92 + fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 1.93 + fpu_stateH_end, 1.94 + r15_off, r15H_off, 1.95 + r14_off, r14H_off, 1.96 + r13_off, r13H_off, 1.97 + r12_off, r12H_off, 1.98 + r11_off, r11H_off, 1.99 + r10_off, r10H_off, 1.100 + r9_off, r9H_off, 1.101 + r8_off, r8H_off, 1.102 + rdi_off, rdiH_off, 1.103 + rsi_off, rsiH_off, 1.104 + ignore_off, ignoreH_off, // extra copy of rbp 1.105 + rsp_off, rspH_off, 1.106 + rbx_off, rbxH_off, 1.107 + rdx_off, rdxH_off, 1.108 + rcx_off, rcxH_off, 1.109 + rax_off, raxH_off, 1.110 + // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state 1.111 + align_off, alignH_off, 1.112 + flags_off, flagsH_off, 1.113 + // The frame sender code expects that rbp will be in the "natural" place and 1.114 + // will override any oopMap setting for it. We must therefore force the layout 1.115 + // so that it agrees with the frame sender code. 1.116 + rbp_off, rbpH_off, // copy of rbp we will restore 1.117 + return_off, returnH_off, // slot for return address 1.118 + reg_save_size // size in compiler stack slots 1.119 + }; 1.120 + 1.121 + public: 1.122 + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); 1.123 + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); 1.124 + 1.125 + // Offsets into the register save area 1.126 + // Used by deoptimization when it is managing result register 1.127 + // values on its own 1.128 + 1.129 + static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } 1.130 + static int rdx_offset_in_bytes(void) { return BytesPerInt * rdx_off; } 1.131 + static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } 1.132 + static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } 1.133 + static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } 1.134 + 1.135 + // During deoptimization only the result registers need to be restored, 1.136 + // all the other values have already been extracted. 1.137 + static void restore_result_registers(MacroAssembler* masm); 1.138 +}; 1.139 + 1.140 +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 1.141 + int vect_words = 0; 1.142 +#ifdef COMPILER2 1.143 + if (save_vectors) { 1.144 + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 1.145 + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 1.146 + // Save upper half of YMM registes 1.147 + vect_words = 16 * 16 / wordSize; 1.148 + additional_frame_words += vect_words; 1.149 + } 1.150 +#else 1.151 + assert(!save_vectors, "vectors are generated only by C2"); 1.152 +#endif 1.153 + 1.154 + // Always make the frame size 16-byte aligned 1.155 + int frame_size_in_bytes = round_to(additional_frame_words*wordSize + 1.156 + reg_save_size*BytesPerInt, 16); 1.157 + // OopMap frame size is in compiler stack slots (jint's) not bytes or words 1.158 + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 1.159 + // The caller will allocate additional_frame_words 1.160 + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; 1.161 + // CodeBlob frame size is in words. 1.162 + int frame_size_in_words = frame_size_in_bytes / wordSize; 1.163 + *total_frame_words = frame_size_in_words; 1.164 + 1.165 + // Save registers, fpu state, and flags. 1.166 + // We assume caller has already pushed the return address onto the 1.167 + // stack, so rsp is 8-byte aligned here. 1.168 + // We push rpb twice in this sequence because we want the real rbp 1.169 + // to be under the return like a normal enter. 1.170 + 1.171 + __ enter(); // rsp becomes 16-byte aligned here 1.172 + __ push_CPU_state(); // Push a multiple of 16 bytes 1.173 + 1.174 + if (vect_words > 0) { 1.175 + assert(vect_words*wordSize == 256, ""); 1.176 + __ subptr(rsp, 256); // Save upper half of YMM registes 1.177 + __ vextractf128h(Address(rsp, 0),xmm0); 1.178 + __ vextractf128h(Address(rsp, 16),xmm1); 1.179 + __ vextractf128h(Address(rsp, 32),xmm2); 1.180 + __ vextractf128h(Address(rsp, 48),xmm3); 1.181 + __ vextractf128h(Address(rsp, 64),xmm4); 1.182 + __ vextractf128h(Address(rsp, 80),xmm5); 1.183 + __ vextractf128h(Address(rsp, 96),xmm6); 1.184 + __ vextractf128h(Address(rsp,112),xmm7); 1.185 + __ vextractf128h(Address(rsp,128),xmm8); 1.186 + __ vextractf128h(Address(rsp,144),xmm9); 1.187 + __ vextractf128h(Address(rsp,160),xmm10); 1.188 + __ vextractf128h(Address(rsp,176),xmm11); 1.189 + __ vextractf128h(Address(rsp,192),xmm12); 1.190 + __ vextractf128h(Address(rsp,208),xmm13); 1.191 + __ vextractf128h(Address(rsp,224),xmm14); 1.192 + __ vextractf128h(Address(rsp,240),xmm15); 1.193 + } 1.194 + if (frame::arg_reg_save_area_bytes != 0) { 1.195 + // Allocate argument register save area 1.196 + __ subptr(rsp, frame::arg_reg_save_area_bytes); 1.197 + } 1.198 + 1.199 + // Set an oopmap for the call site. This oopmap will map all 1.200 + // oop-registers and debug-info registers as callee-saved. This 1.201 + // will allow deoptimization at this safepoint to find all possible 1.202 + // debug-info recordings, as well as let GC find all oops. 1.203 + 1.204 + OopMapSet *oop_maps = new OopMapSet(); 1.205 + OopMap* map = new OopMap(frame_size_in_slots, 0); 1.206 + 1.207 +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) 1.208 + 1.209 + map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 1.210 + map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 1.211 + map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 1.212 + map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 1.213 + // rbp location is known implicitly by the frame sender code, needs no oopmap 1.214 + // and the location where rbp was saved by is ignored 1.215 + map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); 1.216 + map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); 1.217 + map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); 1.218 + map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); 1.219 + map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); 1.220 + map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); 1.221 + map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); 1.222 + map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); 1.223 + map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); 1.224 + map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); 1.225 + map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg()); 1.226 + map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg()); 1.227 + map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg()); 1.228 + map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg()); 1.229 + map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg()); 1.230 + map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg()); 1.231 + map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg()); 1.232 + map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg()); 1.233 + map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg()); 1.234 + map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg()); 1.235 + map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg()); 1.236 + map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg()); 1.237 + map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg()); 1.238 + map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg()); 1.239 + map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg()); 1.240 + map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); 1.241 + 1.242 + // %%% These should all be a waste but we'll keep things as they were for now 1.243 + if (true) { 1.244 + map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 1.245 + map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 1.246 + map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); 1.247 + map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); 1.248 + // rbp location is known implicitly by the frame sender code, needs no oopmap 1.249 + map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); 1.250 + map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); 1.251 + map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); 1.252 + map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); 1.253 + map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); 1.254 + map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); 1.255 + map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); 1.256 + map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); 1.257 + map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); 1.258 + map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); 1.259 + map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next()); 1.260 + map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next()); 1.261 + map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next()); 1.262 + map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next()); 1.263 + map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next()); 1.264 + map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next()); 1.265 + map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next()); 1.266 + map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next()); 1.267 + map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next()); 1.268 + map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next()); 1.269 + map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next()); 1.270 + map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next()); 1.271 + map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next()); 1.272 + map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next()); 1.273 + map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next()); 1.274 + map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next()); 1.275 + } 1.276 + 1.277 + return map; 1.278 +} 1.279 + 1.280 +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 1.281 + if (frame::arg_reg_save_area_bytes != 0) { 1.282 + // Pop arg register save area 1.283 + __ addptr(rsp, frame::arg_reg_save_area_bytes); 1.284 + } 1.285 +#ifdef COMPILER2 1.286 + if (restore_vectors) { 1.287 + // Restore upper half of YMM registes. 1.288 + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 1.289 + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 1.290 + __ vinsertf128h(xmm0, Address(rsp, 0)); 1.291 + __ vinsertf128h(xmm1, Address(rsp, 16)); 1.292 + __ vinsertf128h(xmm2, Address(rsp, 32)); 1.293 + __ vinsertf128h(xmm3, Address(rsp, 48)); 1.294 + __ vinsertf128h(xmm4, Address(rsp, 64)); 1.295 + __ vinsertf128h(xmm5, Address(rsp, 80)); 1.296 + __ vinsertf128h(xmm6, Address(rsp, 96)); 1.297 + __ vinsertf128h(xmm7, Address(rsp,112)); 1.298 + __ vinsertf128h(xmm8, Address(rsp,128)); 1.299 + __ vinsertf128h(xmm9, Address(rsp,144)); 1.300 + __ vinsertf128h(xmm10, Address(rsp,160)); 1.301 + __ vinsertf128h(xmm11, Address(rsp,176)); 1.302 + __ vinsertf128h(xmm12, Address(rsp,192)); 1.303 + __ vinsertf128h(xmm13, Address(rsp,208)); 1.304 + __ vinsertf128h(xmm14, Address(rsp,224)); 1.305 + __ vinsertf128h(xmm15, Address(rsp,240)); 1.306 + __ addptr(rsp, 256); 1.307 + } 1.308 +#else 1.309 + assert(!restore_vectors, "vectors are generated only by C2"); 1.310 +#endif 1.311 + // Recover CPU state 1.312 + __ pop_CPU_state(); 1.313 + // Get the rbp described implicitly by the calling convention (no oopMap) 1.314 + __ pop(rbp); 1.315 +} 1.316 + 1.317 +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 1.318 + 1.319 + // Just restore result register. Only used by deoptimization. By 1.320 + // now any callee save register that needs to be restored to a c2 1.321 + // caller of the deoptee has been extracted into the vframeArray 1.322 + // and will be stuffed into the c2i adapter we create for later 1.323 + // restoration so only result registers need to be restored here. 1.324 + 1.325 + // Restore fp result register 1.326 + __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); 1.327 + // Restore integer result register 1.328 + __ movptr(rax, Address(rsp, rax_offset_in_bytes())); 1.329 + __ movptr(rdx, Address(rsp, rdx_offset_in_bytes())); 1.330 + 1.331 + // Pop all of the register save are off the stack except the return address 1.332 + __ addptr(rsp, return_offset_in_bytes()); 1.333 +} 1.334 + 1.335 +// Is vector's size (in bytes) bigger than a size saved by default? 1.336 +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. 1.337 +bool SharedRuntime::is_wide_vector(int size) { 1.338 + return size > 16; 1.339 +} 1.340 + 1.341 +// The java_calling_convention describes stack locations as ideal slots on 1.342 +// a frame with no abi restrictions. Since we must observe abi restrictions 1.343 +// (like the placement of the register window) the slots must be biased by 1.344 +// the following value. 1.345 +static int reg2offset_in(VMReg r) { 1.346 + // Account for saved rbp and return address 1.347 + // This should really be in_preserve_stack_slots 1.348 + return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; 1.349 +} 1.350 + 1.351 +static int reg2offset_out(VMReg r) { 1.352 + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 1.353 +} 1.354 + 1.355 +// --------------------------------------------------------------------------- 1.356 +// Read the array of BasicTypes from a signature, and compute where the 1.357 +// arguments should go. Values in the VMRegPair regs array refer to 4-byte 1.358 +// quantities. Values less than VMRegImpl::stack0 are registers, those above 1.359 +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer 1.360 +// as framesizes are fixed. 1.361 +// VMRegImpl::stack0 refers to the first slot 0(sp). 1.362 +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 1.363 +// up to RegisterImpl::number_of_registers) are the 64-bit 1.364 +// integer registers. 1.365 + 1.366 +// Note: the INPUTS in sig_bt are in units of Java argument words, which are 1.367 +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 1.368 +// units regardless of build. Of course for i486 there is no 64 bit build 1.369 + 1.370 +// The Java calling convention is a "shifted" version of the C ABI. 1.371 +// By skipping the first C ABI register we can call non-static jni methods 1.372 +// with small numbers of arguments without having to shuffle the arguments 1.373 +// at all. Since we control the java ABI we ought to at least get some 1.374 +// advantage out of it. 1.375 + 1.376 +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 1.377 + VMRegPair *regs, 1.378 + int total_args_passed, 1.379 + int is_outgoing) { 1.380 + 1.381 + // Create the mapping between argument positions and 1.382 + // registers. 1.383 + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { 1.384 + j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 1.385 + }; 1.386 + static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { 1.387 + j_farg0, j_farg1, j_farg2, j_farg3, 1.388 + j_farg4, j_farg5, j_farg6, j_farg7 1.389 + }; 1.390 + 1.391 + 1.392 + uint int_args = 0; 1.393 + uint fp_args = 0; 1.394 + uint stk_args = 0; // inc by 2 each time 1.395 + 1.396 + for (int i = 0; i < total_args_passed; i++) { 1.397 + switch (sig_bt[i]) { 1.398 + case T_BOOLEAN: 1.399 + case T_CHAR: 1.400 + case T_BYTE: 1.401 + case T_SHORT: 1.402 + case T_INT: 1.403 + if (int_args < Argument::n_int_register_parameters_j) { 1.404 + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1.405 + } else { 1.406 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.407 + stk_args += 2; 1.408 + } 1.409 + break; 1.410 + case T_VOID: 1.411 + // halves of T_LONG or T_DOUBLE 1.412 + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1.413 + regs[i].set_bad(); 1.414 + break; 1.415 + case T_LONG: 1.416 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.417 + // fall through 1.418 + case T_OBJECT: 1.419 + case T_ARRAY: 1.420 + case T_ADDRESS: 1.421 + if (int_args < Argument::n_int_register_parameters_j) { 1.422 + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1.423 + } else { 1.424 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.425 + stk_args += 2; 1.426 + } 1.427 + break; 1.428 + case T_FLOAT: 1.429 + if (fp_args < Argument::n_float_register_parameters_j) { 1.430 + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1.431 + } else { 1.432 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.433 + stk_args += 2; 1.434 + } 1.435 + break; 1.436 + case T_DOUBLE: 1.437 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.438 + if (fp_args < Argument::n_float_register_parameters_j) { 1.439 + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1.440 + } else { 1.441 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.442 + stk_args += 2; 1.443 + } 1.444 + break; 1.445 + default: 1.446 + ShouldNotReachHere(); 1.447 + break; 1.448 + } 1.449 + } 1.450 + 1.451 + return round_to(stk_args, 2); 1.452 +} 1.453 + 1.454 +// Patch the callers callsite with entry to compiled code if it exists. 1.455 +static void patch_callers_callsite(MacroAssembler *masm) { 1.456 + Label L; 1.457 + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); 1.458 + __ jcc(Assembler::equal, L); 1.459 + 1.460 + // Save the current stack pointer 1.461 + __ mov(r13, rsp); 1.462 + // Schedule the branch target address early. 1.463 + // Call into the VM to patch the caller, then jump to compiled callee 1.464 + // rax isn't live so capture return address while we easily can 1.465 + __ movptr(rax, Address(rsp, 0)); 1.466 + 1.467 + // align stack so push_CPU_state doesn't fault 1.468 + __ andptr(rsp, -(StackAlignmentInBytes)); 1.469 + __ push_CPU_state(); 1.470 + 1.471 + // VM needs caller's callsite 1.472 + // VM needs target method 1.473 + // This needs to be a long call since we will relocate this adapter to 1.474 + // the codeBuffer and it may not reach 1.475 + 1.476 + // Allocate argument register save area 1.477 + if (frame::arg_reg_save_area_bytes != 0) { 1.478 + __ subptr(rsp, frame::arg_reg_save_area_bytes); 1.479 + } 1.480 + __ mov(c_rarg0, rbx); 1.481 + __ mov(c_rarg1, rax); 1.482 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); 1.483 + 1.484 + // De-allocate argument register save area 1.485 + if (frame::arg_reg_save_area_bytes != 0) { 1.486 + __ addptr(rsp, frame::arg_reg_save_area_bytes); 1.487 + } 1.488 + 1.489 + __ pop_CPU_state(); 1.490 + // restore sp 1.491 + __ mov(rsp, r13); 1.492 + __ bind(L); 1.493 +} 1.494 + 1.495 + 1.496 +static void gen_c2i_adapter(MacroAssembler *masm, 1.497 + int total_args_passed, 1.498 + int comp_args_on_stack, 1.499 + const BasicType *sig_bt, 1.500 + const VMRegPair *regs, 1.501 + Label& skip_fixup) { 1.502 + // Before we get into the guts of the C2I adapter, see if we should be here 1.503 + // at all. We've come from compiled code and are attempting to jump to the 1.504 + // interpreter, which means the caller made a static call to get here 1.505 + // (vcalls always get a compiled target if there is one). Check for a 1.506 + // compiled target. If there is one, we need to patch the caller's call. 1.507 + patch_callers_callsite(masm); 1.508 + 1.509 + __ bind(skip_fixup); 1.510 + 1.511 + // Since all args are passed on the stack, total_args_passed * 1.512 + // Interpreter::stackElementSize is the space we need. Plus 1 because 1.513 + // we also account for the return address location since 1.514 + // we store it first rather than hold it in rax across all the shuffling 1.515 + 1.516 + int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; 1.517 + 1.518 + // stack is aligned, keep it that way 1.519 + extraspace = round_to(extraspace, 2*wordSize); 1.520 + 1.521 + // Get return address 1.522 + __ pop(rax); 1.523 + 1.524 + // set senderSP value 1.525 + __ mov(r13, rsp); 1.526 + 1.527 + __ subptr(rsp, extraspace); 1.528 + 1.529 + // Store the return address in the expected location 1.530 + __ movptr(Address(rsp, 0), rax); 1.531 + 1.532 + // Now write the args into the outgoing interpreter space 1.533 + for (int i = 0; i < total_args_passed; i++) { 1.534 + if (sig_bt[i] == T_VOID) { 1.535 + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1.536 + continue; 1.537 + } 1.538 + 1.539 + // offset to start parameters 1.540 + int st_off = (total_args_passed - i) * Interpreter::stackElementSize; 1.541 + int next_off = st_off - Interpreter::stackElementSize; 1.542 + 1.543 + // Say 4 args: 1.544 + // i st_off 1.545 + // 0 32 T_LONG 1.546 + // 1 24 T_VOID 1.547 + // 2 16 T_OBJECT 1.548 + // 3 8 T_BOOL 1.549 + // - 0 return address 1.550 + // 1.551 + // However to make thing extra confusing. Because we can fit a long/double in 1.552 + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter 1.553 + // leaves one slot empty and only stores to a single slot. In this case the 1.554 + // slot that is occupied is the T_VOID slot. See I said it was confusing. 1.555 + 1.556 + VMReg r_1 = regs[i].first(); 1.557 + VMReg r_2 = regs[i].second(); 1.558 + if (!r_1->is_valid()) { 1.559 + assert(!r_2->is_valid(), ""); 1.560 + continue; 1.561 + } 1.562 + if (r_1->is_stack()) { 1.563 + // memory to memory use rax 1.564 + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; 1.565 + if (!r_2->is_valid()) { 1.566 + // sign extend?? 1.567 + __ movl(rax, Address(rsp, ld_off)); 1.568 + __ movptr(Address(rsp, st_off), rax); 1.569 + 1.570 + } else { 1.571 + 1.572 + __ movq(rax, Address(rsp, ld_off)); 1.573 + 1.574 + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG 1.575 + // T_DOUBLE and T_LONG use two slots in the interpreter 1.576 + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1.577 + // ld_off == LSW, ld_off+wordSize == MSW 1.578 + // st_off == MSW, next_off == LSW 1.579 + __ movq(Address(rsp, next_off), rax); 1.580 +#ifdef ASSERT 1.581 + // Overwrite the unused slot with known junk 1.582 + __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); 1.583 + __ movptr(Address(rsp, st_off), rax); 1.584 +#endif /* ASSERT */ 1.585 + } else { 1.586 + __ movq(Address(rsp, st_off), rax); 1.587 + } 1.588 + } 1.589 + } else if (r_1->is_Register()) { 1.590 + Register r = r_1->as_Register(); 1.591 + if (!r_2->is_valid()) { 1.592 + // must be only an int (or less ) so move only 32bits to slot 1.593 + // why not sign extend?? 1.594 + __ movl(Address(rsp, st_off), r); 1.595 + } else { 1.596 + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG 1.597 + // T_DOUBLE and T_LONG use two slots in the interpreter 1.598 + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1.599 + // long/double in gpr 1.600 +#ifdef ASSERT 1.601 + // Overwrite the unused slot with known junk 1.602 + __ mov64(rax, CONST64(0xdeadffffdeadaaab)); 1.603 + __ movptr(Address(rsp, st_off), rax); 1.604 +#endif /* ASSERT */ 1.605 + __ movq(Address(rsp, next_off), r); 1.606 + } else { 1.607 + __ movptr(Address(rsp, st_off), r); 1.608 + } 1.609 + } 1.610 + } else { 1.611 + assert(r_1->is_XMMRegister(), ""); 1.612 + if (!r_2->is_valid()) { 1.613 + // only a float use just part of the slot 1.614 + __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); 1.615 + } else { 1.616 +#ifdef ASSERT 1.617 + // Overwrite the unused slot with known junk 1.618 + __ mov64(rax, CONST64(0xdeadffffdeadaaac)); 1.619 + __ movptr(Address(rsp, st_off), rax); 1.620 +#endif /* ASSERT */ 1.621 + __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); 1.622 + } 1.623 + } 1.624 + } 1.625 + 1.626 + // Schedule the branch target address early. 1.627 + __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); 1.628 + __ jmp(rcx); 1.629 +} 1.630 + 1.631 +static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, 1.632 + address code_start, address code_end, 1.633 + Label& L_ok) { 1.634 + Label L_fail; 1.635 + __ lea(temp_reg, ExternalAddress(code_start)); 1.636 + __ cmpptr(pc_reg, temp_reg); 1.637 + __ jcc(Assembler::belowEqual, L_fail); 1.638 + __ lea(temp_reg, ExternalAddress(code_end)); 1.639 + __ cmpptr(pc_reg, temp_reg); 1.640 + __ jcc(Assembler::below, L_ok); 1.641 + __ bind(L_fail); 1.642 +} 1.643 + 1.644 +static void gen_i2c_adapter(MacroAssembler *masm, 1.645 + int total_args_passed, 1.646 + int comp_args_on_stack, 1.647 + const BasicType *sig_bt, 1.648 + const VMRegPair *regs) { 1.649 + 1.650 + // Note: r13 contains the senderSP on entry. We must preserve it since 1.651 + // we may do a i2c -> c2i transition if we lose a race where compiled 1.652 + // code goes non-entrant while we get args ready. 1.653 + // In addition we use r13 to locate all the interpreter args as 1.654 + // we must align the stack to 16 bytes on an i2c entry else we 1.655 + // lose alignment we expect in all compiled code and register 1.656 + // save code can segv when fxsave instructions find improperly 1.657 + // aligned stack pointer. 1.658 + 1.659 + // Adapters can be frameless because they do not require the caller 1.660 + // to perform additional cleanup work, such as correcting the stack pointer. 1.661 + // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1.662 + // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1.663 + // even if a callee has modified the stack pointer. 1.664 + // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1.665 + // routinely repairs its caller's stack pointer (from sender_sp, which is set 1.666 + // up via the senderSP register). 1.667 + // In other words, if *either* the caller or callee is interpreted, we can 1.668 + // get the stack pointer repaired after a call. 1.669 + // This is why c2i and i2c adapters cannot be indefinitely composed. 1.670 + // In particular, if a c2i adapter were to somehow call an i2c adapter, 1.671 + // both caller and callee would be compiled methods, and neither would 1.672 + // clean up the stack pointer changes performed by the two adapters. 1.673 + // If this happens, control eventually transfers back to the compiled 1.674 + // caller, but with an uncorrected stack, causing delayed havoc. 1.675 + 1.676 + // Pick up the return address 1.677 + __ movptr(rax, Address(rsp, 0)); 1.678 + 1.679 + if (VerifyAdapterCalls && 1.680 + (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 1.681 + // So, let's test for cascading c2i/i2c adapters right now. 1.682 + // assert(Interpreter::contains($return_addr) || 1.683 + // StubRoutines::contains($return_addr), 1.684 + // "i2c adapter must return to an interpreter frame"); 1.685 + __ block_comment("verify_i2c { "); 1.686 + Label L_ok; 1.687 + if (Interpreter::code() != NULL) 1.688 + range_check(masm, rax, r11, 1.689 + Interpreter::code()->code_start(), Interpreter::code()->code_end(), 1.690 + L_ok); 1.691 + if (StubRoutines::code1() != NULL) 1.692 + range_check(masm, rax, r11, 1.693 + StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 1.694 + L_ok); 1.695 + if (StubRoutines::code2() != NULL) 1.696 + range_check(masm, rax, r11, 1.697 + StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 1.698 + L_ok); 1.699 + const char* msg = "i2c adapter must return to an interpreter frame"; 1.700 + __ block_comment(msg); 1.701 + __ stop(msg); 1.702 + __ bind(L_ok); 1.703 + __ block_comment("} verify_i2ce "); 1.704 + } 1.705 + 1.706 + // Must preserve original SP for loading incoming arguments because 1.707 + // we need to align the outgoing SP for compiled code. 1.708 + __ movptr(r11, rsp); 1.709 + 1.710 + // Cut-out for having no stack args. Since up to 2 int/oop args are passed 1.711 + // in registers, we will occasionally have no stack args. 1.712 + int comp_words_on_stack = 0; 1.713 + if (comp_args_on_stack) { 1.714 + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in 1.715 + // registers are below. By subtracting stack0, we either get a negative 1.716 + // number (all values in registers) or the maximum stack slot accessed. 1.717 + 1.718 + // Convert 4-byte c2 stack slots to words. 1.719 + comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1.720 + // Round up to miminum stack alignment, in wordSize 1.721 + comp_words_on_stack = round_to(comp_words_on_stack, 2); 1.722 + __ subptr(rsp, comp_words_on_stack * wordSize); 1.723 + } 1.724 + 1.725 + 1.726 + // Ensure compiled code always sees stack at proper alignment 1.727 + __ andptr(rsp, -16); 1.728 + 1.729 + // push the return address and misalign the stack that youngest frame always sees 1.730 + // as far as the placement of the call instruction 1.731 + __ push(rax); 1.732 + 1.733 + // Put saved SP in another register 1.734 + const Register saved_sp = rax; 1.735 + __ movptr(saved_sp, r11); 1.736 + 1.737 + // Will jump to the compiled code just as if compiled code was doing it. 1.738 + // Pre-load the register-jump target early, to schedule it better. 1.739 + __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); 1.740 + 1.741 + // Now generate the shuffle code. Pick up all register args and move the 1.742 + // rest through the floating point stack top. 1.743 + for (int i = 0; i < total_args_passed; i++) { 1.744 + if (sig_bt[i] == T_VOID) { 1.745 + // Longs and doubles are passed in native word order, but misaligned 1.746 + // in the 32-bit build. 1.747 + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1.748 + continue; 1.749 + } 1.750 + 1.751 + // Pick up 0, 1 or 2 words from SP+offset. 1.752 + 1.753 + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 1.754 + "scrambled load targets?"); 1.755 + // Load in argument order going down. 1.756 + int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; 1.757 + // Point to interpreter value (vs. tag) 1.758 + int next_off = ld_off - Interpreter::stackElementSize; 1.759 + // 1.760 + // 1.761 + // 1.762 + VMReg r_1 = regs[i].first(); 1.763 + VMReg r_2 = regs[i].second(); 1.764 + if (!r_1->is_valid()) { 1.765 + assert(!r_2->is_valid(), ""); 1.766 + continue; 1.767 + } 1.768 + if (r_1->is_stack()) { 1.769 + // Convert stack slot to an SP offset (+ wordSize to account for return address ) 1.770 + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; 1.771 + 1.772 + // We can use r13 as a temp here because compiled code doesn't need r13 as an input 1.773 + // and if we end up going thru a c2i because of a miss a reasonable value of r13 1.774 + // will be generated. 1.775 + if (!r_2->is_valid()) { 1.776 + // sign extend??? 1.777 + __ movl(r13, Address(saved_sp, ld_off)); 1.778 + __ movptr(Address(rsp, st_off), r13); 1.779 + } else { 1.780 + // 1.781 + // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1.782 + // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1.783 + // So we must adjust where to pick up the data to match the interpreter. 1.784 + // 1.785 + // Interpreter local[n] == MSW, local[n+1] == LSW however locals 1.786 + // are accessed as negative so LSW is at LOW address 1.787 + 1.788 + // ld_off is MSW so get LSW 1.789 + const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? 1.790 + next_off : ld_off; 1.791 + __ movq(r13, Address(saved_sp, offset)); 1.792 + // st_off is LSW (i.e. reg.first()) 1.793 + __ movq(Address(rsp, st_off), r13); 1.794 + } 1.795 + } else if (r_1->is_Register()) { // Register argument 1.796 + Register r = r_1->as_Register(); 1.797 + assert(r != rax, "must be different"); 1.798 + if (r_2->is_valid()) { 1.799 + // 1.800 + // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE 1.801 + // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case 1.802 + // So we must adjust where to pick up the data to match the interpreter. 1.803 + 1.804 + const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? 1.805 + next_off : ld_off; 1.806 + 1.807 + // this can be a misaligned move 1.808 + __ movq(r, Address(saved_sp, offset)); 1.809 + } else { 1.810 + // sign extend and use a full word? 1.811 + __ movl(r, Address(saved_sp, ld_off)); 1.812 + } 1.813 + } else { 1.814 + if (!r_2->is_valid()) { 1.815 + __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); 1.816 + } else { 1.817 + __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); 1.818 + } 1.819 + } 1.820 + } 1.821 + 1.822 + // 6243940 We might end up in handle_wrong_method if 1.823 + // the callee is deoptimized as we race thru here. If that 1.824 + // happens we don't want to take a safepoint because the 1.825 + // caller frame will look interpreted and arguments are now 1.826 + // "compiled" so it is much better to make this transition 1.827 + // invisible to the stack walking code. Unfortunately if 1.828 + // we try and find the callee by normal means a safepoint 1.829 + // is possible. So we stash the desired callee in the thread 1.830 + // and the vm will find there should this case occur. 1.831 + 1.832 + __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); 1.833 + 1.834 + // put Method* where a c2i would expect should we end up there 1.835 + // only needed becaus eof c2 resolve stubs return Method* as a result in 1.836 + // rax 1.837 + __ mov(rax, rbx); 1.838 + __ jmp(r11); 1.839 +} 1.840 + 1.841 +// --------------------------------------------------------------- 1.842 +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1.843 + int total_args_passed, 1.844 + int comp_args_on_stack, 1.845 + const BasicType *sig_bt, 1.846 + const VMRegPair *regs, 1.847 + AdapterFingerPrint* fingerprint) { 1.848 + address i2c_entry = __ pc(); 1.849 + 1.850 + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); 1.851 + 1.852 + // ------------------------------------------------------------------------- 1.853 + // Generate a C2I adapter. On entry we know rbx holds the Method* during calls 1.854 + // to the interpreter. The args start out packed in the compiled layout. They 1.855 + // need to be unpacked into the interpreter layout. This will almost always 1.856 + // require some stack space. We grow the current (compiled) stack, then repack 1.857 + // the args. We finally end in a jump to the generic interpreter entry point. 1.858 + // On exit from the interpreter, the interpreter will restore our SP (lest the 1.859 + // compiled code, which relys solely on SP and not RBP, get sick). 1.860 + 1.861 + address c2i_unverified_entry = __ pc(); 1.862 + Label skip_fixup; 1.863 + Label ok; 1.864 + 1.865 + Register holder = rax; 1.866 + Register receiver = j_rarg0; 1.867 + Register temp = rbx; 1.868 + 1.869 + { 1.870 + __ load_klass(temp, receiver); 1.871 + __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); 1.872 + __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset())); 1.873 + __ jcc(Assembler::equal, ok); 1.874 + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1.875 + 1.876 + __ bind(ok); 1.877 + // Method might have been compiled since the call site was patched to 1.878 + // interpreted if that is the case treat it as a miss so we can get 1.879 + // the call site corrected. 1.880 + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); 1.881 + __ jcc(Assembler::equal, skip_fixup); 1.882 + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1.883 + } 1.884 + 1.885 + address c2i_entry = __ pc(); 1.886 + 1.887 + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 1.888 + 1.889 + __ flush(); 1.890 + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); 1.891 +} 1.892 + 1.893 +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1.894 + VMRegPair *regs, 1.895 + VMRegPair *regs2, 1.896 + int total_args_passed) { 1.897 + assert(regs2 == NULL, "not needed on x86"); 1.898 +// We return the amount of VMRegImpl stack slots we need to reserve for all 1.899 +// the arguments NOT counting out_preserve_stack_slots. 1.900 + 1.901 +// NOTE: These arrays will have to change when c1 is ported 1.902 +#ifdef _WIN64 1.903 + static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1.904 + c_rarg0, c_rarg1, c_rarg2, c_rarg3 1.905 + }; 1.906 + static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1.907 + c_farg0, c_farg1, c_farg2, c_farg3 1.908 + }; 1.909 +#else 1.910 + static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { 1.911 + c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 1.912 + }; 1.913 + static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { 1.914 + c_farg0, c_farg1, c_farg2, c_farg3, 1.915 + c_farg4, c_farg5, c_farg6, c_farg7 1.916 + }; 1.917 +#endif // _WIN64 1.918 + 1.919 + 1.920 + uint int_args = 0; 1.921 + uint fp_args = 0; 1.922 + uint stk_args = 0; // inc by 2 each time 1.923 + 1.924 + for (int i = 0; i < total_args_passed; i++) { 1.925 + switch (sig_bt[i]) { 1.926 + case T_BOOLEAN: 1.927 + case T_CHAR: 1.928 + case T_BYTE: 1.929 + case T_SHORT: 1.930 + case T_INT: 1.931 + if (int_args < Argument::n_int_register_parameters_c) { 1.932 + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); 1.933 +#ifdef _WIN64 1.934 + fp_args++; 1.935 + // Allocate slots for callee to stuff register args the stack. 1.936 + stk_args += 2; 1.937 +#endif 1.938 + } else { 1.939 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.940 + stk_args += 2; 1.941 + } 1.942 + break; 1.943 + case T_LONG: 1.944 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.945 + // fall through 1.946 + case T_OBJECT: 1.947 + case T_ARRAY: 1.948 + case T_ADDRESS: 1.949 + case T_METADATA: 1.950 + if (int_args < Argument::n_int_register_parameters_c) { 1.951 + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); 1.952 +#ifdef _WIN64 1.953 + fp_args++; 1.954 + stk_args += 2; 1.955 +#endif 1.956 + } else { 1.957 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.958 + stk_args += 2; 1.959 + } 1.960 + break; 1.961 + case T_FLOAT: 1.962 + if (fp_args < Argument::n_float_register_parameters_c) { 1.963 + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); 1.964 +#ifdef _WIN64 1.965 + int_args++; 1.966 + // Allocate slots for callee to stuff register args the stack. 1.967 + stk_args += 2; 1.968 +#endif 1.969 + } else { 1.970 + regs[i].set1(VMRegImpl::stack2reg(stk_args)); 1.971 + stk_args += 2; 1.972 + } 1.973 + break; 1.974 + case T_DOUBLE: 1.975 + assert(sig_bt[i + 1] == T_VOID, "expecting half"); 1.976 + if (fp_args < Argument::n_float_register_parameters_c) { 1.977 + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); 1.978 +#ifdef _WIN64 1.979 + int_args++; 1.980 + // Allocate slots for callee to stuff register args the stack. 1.981 + stk_args += 2; 1.982 +#endif 1.983 + } else { 1.984 + regs[i].set2(VMRegImpl::stack2reg(stk_args)); 1.985 + stk_args += 2; 1.986 + } 1.987 + break; 1.988 + case T_VOID: // Halves of longs and doubles 1.989 + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 1.990 + regs[i].set_bad(); 1.991 + break; 1.992 + default: 1.993 + ShouldNotReachHere(); 1.994 + break; 1.995 + } 1.996 + } 1.997 +#ifdef _WIN64 1.998 + // windows abi requires that we always allocate enough stack space 1.999 + // for 4 64bit registers to be stored down. 1.1000 + if (stk_args < 8) { 1.1001 + stk_args = 8; 1.1002 + } 1.1003 +#endif // _WIN64 1.1004 + 1.1005 + return stk_args; 1.1006 +} 1.1007 + 1.1008 +// On 64 bit we will store integer like items to the stack as 1.1009 +// 64 bits items (sparc abi) even though java would only store 1.1010 +// 32bits for a parameter. On 32bit it will simply be 32 bits 1.1011 +// So this routine will do 32->32 on 32bit and 32->64 on 64bit 1.1012 +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1013 + if (src.first()->is_stack()) { 1.1014 + if (dst.first()->is_stack()) { 1.1015 + // stack to stack 1.1016 + __ movslq(rax, Address(rbp, reg2offset_in(src.first()))); 1.1017 + __ movq(Address(rsp, reg2offset_out(dst.first())), rax); 1.1018 + } else { 1.1019 + // stack to reg 1.1020 + __ movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); 1.1021 + } 1.1022 + } else if (dst.first()->is_stack()) { 1.1023 + // reg to stack 1.1024 + // Do we really have to sign extend??? 1.1025 + // __ movslq(src.first()->as_Register(), src.first()->as_Register()); 1.1026 + __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); 1.1027 + } else { 1.1028 + // Do we really have to sign extend??? 1.1029 + // __ movslq(dst.first()->as_Register(), src.first()->as_Register()); 1.1030 + if (dst.first() != src.first()) { 1.1031 + __ movq(dst.first()->as_Register(), src.first()->as_Register()); 1.1032 + } 1.1033 + } 1.1034 +} 1.1035 + 1.1036 +static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1037 + if (src.first()->is_stack()) { 1.1038 + if (dst.first()->is_stack()) { 1.1039 + // stack to stack 1.1040 + __ movq(rax, Address(rbp, reg2offset_in(src.first()))); 1.1041 + __ movq(Address(rsp, reg2offset_out(dst.first())), rax); 1.1042 + } else { 1.1043 + // stack to reg 1.1044 + __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); 1.1045 + } 1.1046 + } else if (dst.first()->is_stack()) { 1.1047 + // reg to stack 1.1048 + __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); 1.1049 + } else { 1.1050 + if (dst.first() != src.first()) { 1.1051 + __ movq(dst.first()->as_Register(), src.first()->as_Register()); 1.1052 + } 1.1053 + } 1.1054 +} 1.1055 + 1.1056 +// An oop arg. Must pass a handle not the oop itself 1.1057 +static void object_move(MacroAssembler* masm, 1.1058 + OopMap* map, 1.1059 + int oop_handle_offset, 1.1060 + int framesize_in_slots, 1.1061 + VMRegPair src, 1.1062 + VMRegPair dst, 1.1063 + bool is_receiver, 1.1064 + int* receiver_offset) { 1.1065 + 1.1066 + // must pass a handle. First figure out the location we use as a handle 1.1067 + 1.1068 + Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register(); 1.1069 + 1.1070 + // See if oop is NULL if it is we need no handle 1.1071 + 1.1072 + if (src.first()->is_stack()) { 1.1073 + 1.1074 + // Oop is already on the stack as an argument 1.1075 + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1.1076 + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1.1077 + if (is_receiver) { 1.1078 + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 1.1079 + } 1.1080 + 1.1081 + __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD); 1.1082 + __ lea(rHandle, Address(rbp, reg2offset_in(src.first()))); 1.1083 + // conditionally move a NULL 1.1084 + __ cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first()))); 1.1085 + } else { 1.1086 + 1.1087 + // Oop is in an a register we must store it to the space we reserve 1.1088 + // on the stack for oop_handles and pass a handle if oop is non-NULL 1.1089 + 1.1090 + const Register rOop = src.first()->as_Register(); 1.1091 + int oop_slot; 1.1092 + if (rOop == j_rarg0) 1.1093 + oop_slot = 0; 1.1094 + else if (rOop == j_rarg1) 1.1095 + oop_slot = 1; 1.1096 + else if (rOop == j_rarg2) 1.1097 + oop_slot = 2; 1.1098 + else if (rOop == j_rarg3) 1.1099 + oop_slot = 3; 1.1100 + else if (rOop == j_rarg4) 1.1101 + oop_slot = 4; 1.1102 + else { 1.1103 + assert(rOop == j_rarg5, "wrong register"); 1.1104 + oop_slot = 5; 1.1105 + } 1.1106 + 1.1107 + oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; 1.1108 + int offset = oop_slot*VMRegImpl::stack_slot_size; 1.1109 + 1.1110 + map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1.1111 + // Store oop in handle area, may be NULL 1.1112 + __ movptr(Address(rsp, offset), rOop); 1.1113 + if (is_receiver) { 1.1114 + *receiver_offset = offset; 1.1115 + } 1.1116 + 1.1117 + __ cmpptr(rOop, (int32_t)NULL_WORD); 1.1118 + __ lea(rHandle, Address(rsp, offset)); 1.1119 + // conditionally move a NULL from the handle area where it was just stored 1.1120 + __ cmovptr(Assembler::equal, rHandle, Address(rsp, offset)); 1.1121 + } 1.1122 + 1.1123 + // If arg is on the stack then place it otherwise it is already in correct reg. 1.1124 + if (dst.first()->is_stack()) { 1.1125 + __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); 1.1126 + } 1.1127 +} 1.1128 + 1.1129 +// A float arg may have to do float reg int reg conversion 1.1130 +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1131 + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); 1.1132 + 1.1133 + // The calling conventions assures us that each VMregpair is either 1.1134 + // all really one physical register or adjacent stack slots. 1.1135 + // This greatly simplifies the cases here compared to sparc. 1.1136 + 1.1137 + if (src.first()->is_stack()) { 1.1138 + if (dst.first()->is_stack()) { 1.1139 + __ movl(rax, Address(rbp, reg2offset_in(src.first()))); 1.1140 + __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); 1.1141 + } else { 1.1142 + // stack to reg 1.1143 + assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters"); 1.1144 + __ movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first()))); 1.1145 + } 1.1146 + } else if (dst.first()->is_stack()) { 1.1147 + // reg to stack 1.1148 + assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters"); 1.1149 + __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); 1.1150 + } else { 1.1151 + // reg to reg 1.1152 + // In theory these overlap but the ordering is such that this is likely a nop 1.1153 + if ( src.first() != dst.first()) { 1.1154 + __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); 1.1155 + } 1.1156 + } 1.1157 +} 1.1158 + 1.1159 +// A long move 1.1160 +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1161 + 1.1162 + // The calling conventions assures us that each VMregpair is either 1.1163 + // all really one physical register or adjacent stack slots. 1.1164 + // This greatly simplifies the cases here compared to sparc. 1.1165 + 1.1166 + if (src.is_single_phys_reg() ) { 1.1167 + if (dst.is_single_phys_reg()) { 1.1168 + if (dst.first() != src.first()) { 1.1169 + __ mov(dst.first()->as_Register(), src.first()->as_Register()); 1.1170 + } 1.1171 + } else { 1.1172 + assert(dst.is_single_reg(), "not a stack pair"); 1.1173 + __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); 1.1174 + } 1.1175 + } else if (dst.is_single_phys_reg()) { 1.1176 + assert(src.is_single_reg(), "not a stack pair"); 1.1177 + __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first()))); 1.1178 + } else { 1.1179 + assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); 1.1180 + __ movq(rax, Address(rbp, reg2offset_in(src.first()))); 1.1181 + __ movq(Address(rsp, reg2offset_out(dst.first())), rax); 1.1182 + } 1.1183 +} 1.1184 + 1.1185 +// A double move 1.1186 +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1187 + 1.1188 + // The calling conventions assures us that each VMregpair is either 1.1189 + // all really one physical register or adjacent stack slots. 1.1190 + // This greatly simplifies the cases here compared to sparc. 1.1191 + 1.1192 + if (src.is_single_phys_reg() ) { 1.1193 + if (dst.is_single_phys_reg()) { 1.1194 + // In theory these overlap but the ordering is such that this is likely a nop 1.1195 + if ( src.first() != dst.first()) { 1.1196 + __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); 1.1197 + } 1.1198 + } else { 1.1199 + assert(dst.is_single_reg(), "not a stack pair"); 1.1200 + __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); 1.1201 + } 1.1202 + } else if (dst.is_single_phys_reg()) { 1.1203 + assert(src.is_single_reg(), "not a stack pair"); 1.1204 + __ movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first()))); 1.1205 + } else { 1.1206 + assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); 1.1207 + __ movq(rax, Address(rbp, reg2offset_in(src.first()))); 1.1208 + __ movq(Address(rsp, reg2offset_out(dst.first())), rax); 1.1209 + } 1.1210 +} 1.1211 + 1.1212 + 1.1213 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1214 + // We always ignore the frame_slots arg and just use the space just below frame pointer 1.1215 + // which by this time is free to use 1.1216 + switch (ret_type) { 1.1217 + case T_FLOAT: 1.1218 + __ movflt(Address(rbp, -wordSize), xmm0); 1.1219 + break; 1.1220 + case T_DOUBLE: 1.1221 + __ movdbl(Address(rbp, -wordSize), xmm0); 1.1222 + break; 1.1223 + case T_VOID: break; 1.1224 + default: { 1.1225 + __ movptr(Address(rbp, -wordSize), rax); 1.1226 + } 1.1227 + } 1.1228 +} 1.1229 + 1.1230 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1231 + // We always ignore the frame_slots arg and just use the space just below frame pointer 1.1232 + // which by this time is free to use 1.1233 + switch (ret_type) { 1.1234 + case T_FLOAT: 1.1235 + __ movflt(xmm0, Address(rbp, -wordSize)); 1.1236 + break; 1.1237 + case T_DOUBLE: 1.1238 + __ movdbl(xmm0, Address(rbp, -wordSize)); 1.1239 + break; 1.1240 + case T_VOID: break; 1.1241 + default: { 1.1242 + __ movptr(rax, Address(rbp, -wordSize)); 1.1243 + } 1.1244 + } 1.1245 +} 1.1246 + 1.1247 +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1.1248 + for ( int i = first_arg ; i < arg_count ; i++ ) { 1.1249 + if (args[i].first()->is_Register()) { 1.1250 + __ push(args[i].first()->as_Register()); 1.1251 + } else if (args[i].first()->is_XMMRegister()) { 1.1252 + __ subptr(rsp, 2*wordSize); 1.1253 + __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); 1.1254 + } 1.1255 + } 1.1256 +} 1.1257 + 1.1258 +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { 1.1259 + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { 1.1260 + if (args[i].first()->is_Register()) { 1.1261 + __ pop(args[i].first()->as_Register()); 1.1262 + } else if (args[i].first()->is_XMMRegister()) { 1.1263 + __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); 1.1264 + __ addptr(rsp, 2*wordSize); 1.1265 + } 1.1266 + } 1.1267 +} 1.1268 + 1.1269 + 1.1270 +static void save_or_restore_arguments(MacroAssembler* masm, 1.1271 + const int stack_slots, 1.1272 + const int total_in_args, 1.1273 + const int arg_save_area, 1.1274 + OopMap* map, 1.1275 + VMRegPair* in_regs, 1.1276 + BasicType* in_sig_bt) { 1.1277 + // if map is non-NULL then the code should store the values, 1.1278 + // otherwise it should load them. 1.1279 + int slot = arg_save_area; 1.1280 + // Save down double word first 1.1281 + for ( int i = 0; i < total_in_args; i++) { 1.1282 + if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) { 1.1283 + int offset = slot * VMRegImpl::stack_slot_size; 1.1284 + slot += VMRegImpl::slots_per_word; 1.1285 + assert(slot <= stack_slots, "overflow"); 1.1286 + if (map != NULL) { 1.1287 + __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); 1.1288 + } else { 1.1289 + __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); 1.1290 + } 1.1291 + } 1.1292 + if (in_regs[i].first()->is_Register() && 1.1293 + (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { 1.1294 + int offset = slot * VMRegImpl::stack_slot_size; 1.1295 + if (map != NULL) { 1.1296 + __ movq(Address(rsp, offset), in_regs[i].first()->as_Register()); 1.1297 + if (in_sig_bt[i] == T_ARRAY) { 1.1298 + map->set_oop(VMRegImpl::stack2reg(slot));; 1.1299 + } 1.1300 + } else { 1.1301 + __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset)); 1.1302 + } 1.1303 + slot += VMRegImpl::slots_per_word; 1.1304 + } 1.1305 + } 1.1306 + // Save or restore single word registers 1.1307 + for ( int i = 0; i < total_in_args; i++) { 1.1308 + if (in_regs[i].first()->is_Register()) { 1.1309 + int offset = slot * VMRegImpl::stack_slot_size; 1.1310 + slot++; 1.1311 + assert(slot <= stack_slots, "overflow"); 1.1312 + 1.1313 + // Value is in an input register pass we must flush it to the stack 1.1314 + const Register reg = in_regs[i].first()->as_Register(); 1.1315 + switch (in_sig_bt[i]) { 1.1316 + case T_BOOLEAN: 1.1317 + case T_CHAR: 1.1318 + case T_BYTE: 1.1319 + case T_SHORT: 1.1320 + case T_INT: 1.1321 + if (map != NULL) { 1.1322 + __ movl(Address(rsp, offset), reg); 1.1323 + } else { 1.1324 + __ movl(reg, Address(rsp, offset)); 1.1325 + } 1.1326 + break; 1.1327 + case T_ARRAY: 1.1328 + case T_LONG: 1.1329 + // handled above 1.1330 + break; 1.1331 + case T_OBJECT: 1.1332 + default: ShouldNotReachHere(); 1.1333 + } 1.1334 + } else if (in_regs[i].first()->is_XMMRegister()) { 1.1335 + if (in_sig_bt[i] == T_FLOAT) { 1.1336 + int offset = slot * VMRegImpl::stack_slot_size; 1.1337 + slot++; 1.1338 + assert(slot <= stack_slots, "overflow"); 1.1339 + if (map != NULL) { 1.1340 + __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); 1.1341 + } else { 1.1342 + __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); 1.1343 + } 1.1344 + } 1.1345 + } else if (in_regs[i].first()->is_stack()) { 1.1346 + if (in_sig_bt[i] == T_ARRAY && map != NULL) { 1.1347 + int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1.1348 + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); 1.1349 + } 1.1350 + } 1.1351 + } 1.1352 +} 1.1353 + 1.1354 + 1.1355 +// Check GC_locker::needs_gc and enter the runtime if it's true. This 1.1356 +// keeps a new JNI critical region from starting until a GC has been 1.1357 +// forced. Save down any oops in registers and describe them in an 1.1358 +// OopMap. 1.1359 +static void check_needs_gc_for_critical_native(MacroAssembler* masm, 1.1360 + int stack_slots, 1.1361 + int total_c_args, 1.1362 + int total_in_args, 1.1363 + int arg_save_area, 1.1364 + OopMapSet* oop_maps, 1.1365 + VMRegPair* in_regs, 1.1366 + BasicType* in_sig_bt) { 1.1367 + __ block_comment("check GC_locker::needs_gc"); 1.1368 + Label cont; 1.1369 + __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false); 1.1370 + __ jcc(Assembler::equal, cont); 1.1371 + 1.1372 + // Save down any incoming oops and call into the runtime to halt for a GC 1.1373 + 1.1374 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.1375 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1376 + arg_save_area, map, in_regs, in_sig_bt); 1.1377 + 1.1378 + address the_pc = __ pc(); 1.1379 + oop_maps->add_gc_map( __ offset(), map); 1.1380 + __ set_last_Java_frame(rsp, noreg, the_pc); 1.1381 + 1.1382 + __ block_comment("block_for_jni_critical"); 1.1383 + __ movptr(c_rarg0, r15_thread); 1.1384 + __ mov(r12, rsp); // remember sp 1.1385 + __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 1.1386 + __ andptr(rsp, -16); // align stack as required by ABI 1.1387 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical))); 1.1388 + __ mov(rsp, r12); // restore sp 1.1389 + __ reinit_heapbase(); 1.1390 + 1.1391 + __ reset_last_Java_frame(false, true); 1.1392 + 1.1393 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1394 + arg_save_area, NULL, in_regs, in_sig_bt); 1.1395 + 1.1396 + __ bind(cont); 1.1397 +#ifdef ASSERT 1.1398 + if (StressCriticalJNINatives) { 1.1399 + // Stress register saving 1.1400 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.1401 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1402 + arg_save_area, map, in_regs, in_sig_bt); 1.1403 + // Destroy argument registers 1.1404 + for (int i = 0; i < total_in_args - 1; i++) { 1.1405 + if (in_regs[i].first()->is_Register()) { 1.1406 + const Register reg = in_regs[i].first()->as_Register(); 1.1407 + __ xorptr(reg, reg); 1.1408 + } else if (in_regs[i].first()->is_XMMRegister()) { 1.1409 + __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister()); 1.1410 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.1411 + ShouldNotReachHere(); 1.1412 + } else if (in_regs[i].first()->is_stack()) { 1.1413 + // Nothing to do 1.1414 + } else { 1.1415 + ShouldNotReachHere(); 1.1416 + } 1.1417 + if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) { 1.1418 + i++; 1.1419 + } 1.1420 + } 1.1421 + 1.1422 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1423 + arg_save_area, NULL, in_regs, in_sig_bt); 1.1424 + } 1.1425 +#endif 1.1426 +} 1.1427 + 1.1428 +// Unpack an array argument into a pointer to the body and the length 1.1429 +// if the array is non-null, otherwise pass 0 for both. 1.1430 +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { 1.1431 + Register tmp_reg = rax; 1.1432 + assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, 1.1433 + "possible collision"); 1.1434 + assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, 1.1435 + "possible collision"); 1.1436 + 1.1437 + __ block_comment("unpack_array_argument {"); 1.1438 + 1.1439 + // Pass the length, ptr pair 1.1440 + Label is_null, done; 1.1441 + VMRegPair tmp; 1.1442 + tmp.set_ptr(tmp_reg->as_VMReg()); 1.1443 + if (reg.first()->is_stack()) { 1.1444 + // Load the arg up from the stack 1.1445 + move_ptr(masm, reg, tmp); 1.1446 + reg = tmp; 1.1447 + } 1.1448 + __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); 1.1449 + __ jccb(Assembler::equal, is_null); 1.1450 + __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type))); 1.1451 + move_ptr(masm, tmp, body_arg); 1.1452 + // load the length relative to the body. 1.1453 + __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() - 1.1454 + arrayOopDesc::base_offset_in_bytes(in_elem_type))); 1.1455 + move32_64(masm, tmp, length_arg); 1.1456 + __ jmpb(done); 1.1457 + __ bind(is_null); 1.1458 + // Pass zeros 1.1459 + __ xorptr(tmp_reg, tmp_reg); 1.1460 + move_ptr(masm, tmp, body_arg); 1.1461 + move32_64(masm, tmp, length_arg); 1.1462 + __ bind(done); 1.1463 + 1.1464 + __ block_comment("} unpack_array_argument"); 1.1465 +} 1.1466 + 1.1467 + 1.1468 +// Different signatures may require very different orders for the move 1.1469 +// to avoid clobbering other arguments. There's no simple way to 1.1470 +// order them safely. Compute a safe order for issuing stores and 1.1471 +// break any cycles in those stores. This code is fairly general but 1.1472 +// it's not necessary on the other platforms so we keep it in the 1.1473 +// platform dependent code instead of moving it into a shared file. 1.1474 +// (See bugs 7013347 & 7145024.) 1.1475 +// Note that this code is specific to LP64. 1.1476 +class ComputeMoveOrder: public StackObj { 1.1477 + class MoveOperation: public ResourceObj { 1.1478 + friend class ComputeMoveOrder; 1.1479 + private: 1.1480 + VMRegPair _src; 1.1481 + VMRegPair _dst; 1.1482 + int _src_index; 1.1483 + int _dst_index; 1.1484 + bool _processed; 1.1485 + MoveOperation* _next; 1.1486 + MoveOperation* _prev; 1.1487 + 1.1488 + static int get_id(VMRegPair r) { 1.1489 + return r.first()->value(); 1.1490 + } 1.1491 + 1.1492 + public: 1.1493 + MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): 1.1494 + _src(src) 1.1495 + , _src_index(src_index) 1.1496 + , _dst(dst) 1.1497 + , _dst_index(dst_index) 1.1498 + , _next(NULL) 1.1499 + , _prev(NULL) 1.1500 + , _processed(false) { 1.1501 + } 1.1502 + 1.1503 + VMRegPair src() const { return _src; } 1.1504 + int src_id() const { return get_id(src()); } 1.1505 + int src_index() const { return _src_index; } 1.1506 + VMRegPair dst() const { return _dst; } 1.1507 + void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; } 1.1508 + int dst_index() const { return _dst_index; } 1.1509 + int dst_id() const { return get_id(dst()); } 1.1510 + MoveOperation* next() const { return _next; } 1.1511 + MoveOperation* prev() const { return _prev; } 1.1512 + void set_processed() { _processed = true; } 1.1513 + bool is_processed() const { return _processed; } 1.1514 + 1.1515 + // insert 1.1516 + void break_cycle(VMRegPair temp_register) { 1.1517 + // create a new store following the last store 1.1518 + // to move from the temp_register to the original 1.1519 + MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst()); 1.1520 + 1.1521 + // break the cycle of links and insert new_store at the end 1.1522 + // break the reverse link. 1.1523 + MoveOperation* p = prev(); 1.1524 + assert(p->next() == this, "must be"); 1.1525 + _prev = NULL; 1.1526 + p->_next = new_store; 1.1527 + new_store->_prev = p; 1.1528 + 1.1529 + // change the original store to save it's value in the temp. 1.1530 + set_dst(-1, temp_register); 1.1531 + } 1.1532 + 1.1533 + void link(GrowableArray<MoveOperation*>& killer) { 1.1534 + // link this store in front the store that it depends on 1.1535 + MoveOperation* n = killer.at_grow(src_id(), NULL); 1.1536 + if (n != NULL) { 1.1537 + assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet"); 1.1538 + _next = n; 1.1539 + n->_prev = this; 1.1540 + } 1.1541 + } 1.1542 + }; 1.1543 + 1.1544 + private: 1.1545 + GrowableArray<MoveOperation*> edges; 1.1546 + 1.1547 + public: 1.1548 + ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, 1.1549 + BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { 1.1550 + // Move operations where the dest is the stack can all be 1.1551 + // scheduled first since they can't interfere with the other moves. 1.1552 + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1.1553 + if (in_sig_bt[i] == T_ARRAY) { 1.1554 + c_arg--; 1.1555 + if (out_regs[c_arg].first()->is_stack() && 1.1556 + out_regs[c_arg + 1].first()->is_stack()) { 1.1557 + arg_order.push(i); 1.1558 + arg_order.push(c_arg); 1.1559 + } else { 1.1560 + if (out_regs[c_arg].first()->is_stack() || 1.1561 + in_regs[i].first() == out_regs[c_arg].first()) { 1.1562 + add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]); 1.1563 + } else { 1.1564 + add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); 1.1565 + } 1.1566 + } 1.1567 + } else if (in_sig_bt[i] == T_VOID) { 1.1568 + arg_order.push(i); 1.1569 + arg_order.push(c_arg); 1.1570 + } else { 1.1571 + if (out_regs[c_arg].first()->is_stack() || 1.1572 + in_regs[i].first() == out_regs[c_arg].first()) { 1.1573 + arg_order.push(i); 1.1574 + arg_order.push(c_arg); 1.1575 + } else { 1.1576 + add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]); 1.1577 + } 1.1578 + } 1.1579 + } 1.1580 + // Break any cycles in the register moves and emit the in the 1.1581 + // proper order. 1.1582 + GrowableArray<MoveOperation*>* stores = get_store_order(tmp_vmreg); 1.1583 + for (int i = 0; i < stores->length(); i++) { 1.1584 + arg_order.push(stores->at(i)->src_index()); 1.1585 + arg_order.push(stores->at(i)->dst_index()); 1.1586 + } 1.1587 + } 1.1588 + 1.1589 + // Collected all the move operations 1.1590 + void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { 1.1591 + if (src.first() == dst.first()) return; 1.1592 + edges.append(new MoveOperation(src_index, src, dst_index, dst)); 1.1593 + } 1.1594 + 1.1595 + // Walk the edges breaking cycles between moves. The result list 1.1596 + // can be walked in order to produce the proper set of loads 1.1597 + GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { 1.1598 + // Record which moves kill which values 1.1599 + GrowableArray<MoveOperation*> killer; 1.1600 + for (int i = 0; i < edges.length(); i++) { 1.1601 + MoveOperation* s = edges.at(i); 1.1602 + assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer"); 1.1603 + killer.at_put_grow(s->dst_id(), s, NULL); 1.1604 + } 1.1605 + assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL, 1.1606 + "make sure temp isn't in the registers that are killed"); 1.1607 + 1.1608 + // create links between loads and stores 1.1609 + for (int i = 0; i < edges.length(); i++) { 1.1610 + edges.at(i)->link(killer); 1.1611 + } 1.1612 + 1.1613 + // at this point, all the move operations are chained together 1.1614 + // in a doubly linked list. Processing it backwards finds 1.1615 + // the beginning of the chain, forwards finds the end. If there's 1.1616 + // a cycle it can be broken at any point, so pick an edge and walk 1.1617 + // backward until the list ends or we end where we started. 1.1618 + GrowableArray<MoveOperation*>* stores = new GrowableArray<MoveOperation*>(); 1.1619 + for (int e = 0; e < edges.length(); e++) { 1.1620 + MoveOperation* s = edges.at(e); 1.1621 + if (!s->is_processed()) { 1.1622 + MoveOperation* start = s; 1.1623 + // search for the beginning of the chain or cycle 1.1624 + while (start->prev() != NULL && start->prev() != s) { 1.1625 + start = start->prev(); 1.1626 + } 1.1627 + if (start->prev() == s) { 1.1628 + start->break_cycle(temp_register); 1.1629 + } 1.1630 + // walk the chain forward inserting to store list 1.1631 + while (start != NULL) { 1.1632 + stores->append(start); 1.1633 + start->set_processed(); 1.1634 + start = start->next(); 1.1635 + } 1.1636 + } 1.1637 + } 1.1638 + return stores; 1.1639 + } 1.1640 +}; 1.1641 + 1.1642 +static void verify_oop_args(MacroAssembler* masm, 1.1643 + methodHandle method, 1.1644 + const BasicType* sig_bt, 1.1645 + const VMRegPair* regs) { 1.1646 + Register temp_reg = rbx; // not part of any compiled calling seq 1.1647 + if (VerifyOops) { 1.1648 + for (int i = 0; i < method->size_of_parameters(); i++) { 1.1649 + if (sig_bt[i] == T_OBJECT || 1.1650 + sig_bt[i] == T_ARRAY) { 1.1651 + VMReg r = regs[i].first(); 1.1652 + assert(r->is_valid(), "bad oop arg"); 1.1653 + if (r->is_stack()) { 1.1654 + __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1655 + __ verify_oop(temp_reg); 1.1656 + } else { 1.1657 + __ verify_oop(r->as_Register()); 1.1658 + } 1.1659 + } 1.1660 + } 1.1661 + } 1.1662 +} 1.1663 + 1.1664 +static void gen_special_dispatch(MacroAssembler* masm, 1.1665 + methodHandle method, 1.1666 + const BasicType* sig_bt, 1.1667 + const VMRegPair* regs) { 1.1668 + verify_oop_args(masm, method, sig_bt, regs); 1.1669 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.1670 + 1.1671 + // Now write the args into the outgoing interpreter space 1.1672 + bool has_receiver = false; 1.1673 + Register receiver_reg = noreg; 1.1674 + int member_arg_pos = -1; 1.1675 + Register member_reg = noreg; 1.1676 + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1.1677 + if (ref_kind != 0) { 1.1678 + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1.1679 + member_reg = rbx; // known to be free at this point 1.1680 + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1.1681 + } else if (iid == vmIntrinsics::_invokeBasic) { 1.1682 + has_receiver = true; 1.1683 + } else { 1.1684 + fatal(err_msg_res("unexpected intrinsic id %d", iid)); 1.1685 + } 1.1686 + 1.1687 + if (member_reg != noreg) { 1.1688 + // Load the member_arg into register, if necessary. 1.1689 + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1.1690 + VMReg r = regs[member_arg_pos].first(); 1.1691 + if (r->is_stack()) { 1.1692 + __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1693 + } else { 1.1694 + // no data motion is needed 1.1695 + member_reg = r->as_Register(); 1.1696 + } 1.1697 + } 1.1698 + 1.1699 + if (has_receiver) { 1.1700 + // Make sure the receiver is loaded into a register. 1.1701 + assert(method->size_of_parameters() > 0, "oob"); 1.1702 + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1.1703 + VMReg r = regs[0].first(); 1.1704 + assert(r->is_valid(), "bad receiver arg"); 1.1705 + if (r->is_stack()) { 1.1706 + // Porting note: This assumes that compiled calling conventions always 1.1707 + // pass the receiver oop in a register. If this is not true on some 1.1708 + // platform, pick a temp and load the receiver from stack. 1.1709 + fatal("receiver always in a register"); 1.1710 + receiver_reg = j_rarg0; // known to be free at this point 1.1711 + __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 1.1712 + } else { 1.1713 + // no data motion is needed 1.1714 + receiver_reg = r->as_Register(); 1.1715 + } 1.1716 + } 1.1717 + 1.1718 + // Figure out which address we are really jumping to: 1.1719 + MethodHandles::generate_method_handle_dispatch(masm, iid, 1.1720 + receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1.1721 +} 1.1722 + 1.1723 +// --------------------------------------------------------------------------- 1.1724 +// Generate a native wrapper for a given method. The method takes arguments 1.1725 +// in the Java compiled code convention, marshals them to the native 1.1726 +// convention (handlizes oops, etc), transitions to native, makes the call, 1.1727 +// returns to java state (possibly blocking), unhandlizes any result and 1.1728 +// returns. 1.1729 +// 1.1730 +// Critical native functions are a shorthand for the use of 1.1731 +// GetPrimtiveArrayCritical and disallow the use of any other JNI 1.1732 +// functions. The wrapper is expected to unpack the arguments before 1.1733 +// passing them to the callee and perform checks before and after the 1.1734 +// native call to ensure that they GC_locker 1.1735 +// lock_critical/unlock_critical semantics are followed. Some other 1.1736 +// parts of JNI setup are skipped like the tear down of the JNI handle 1.1737 +// block and the check for pending exceptions it's impossible for them 1.1738 +// to be thrown. 1.1739 +// 1.1740 +// They are roughly structured like this: 1.1741 +// if (GC_locker::needs_gc()) 1.1742 +// SharedRuntime::block_for_jni_critical(); 1.1743 +// tranistion to thread_in_native 1.1744 +// unpack arrray arguments and call native entry point 1.1745 +// check for safepoint in progress 1.1746 +// check if any thread suspend flags are set 1.1747 +// call into JVM and possible unlock the JNI critical 1.1748 +// if a GC was suppressed while in the critical native. 1.1749 +// transition back to thread_in_Java 1.1750 +// return to caller 1.1751 +// 1.1752 +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1.1753 + methodHandle method, 1.1754 + int compile_id, 1.1755 + BasicType* in_sig_bt, 1.1756 + VMRegPair* in_regs, 1.1757 + BasicType ret_type) { 1.1758 + if (method->is_method_handle_intrinsic()) { 1.1759 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.1760 + intptr_t start = (intptr_t)__ pc(); 1.1761 + int vep_offset = ((intptr_t)__ pc()) - start; 1.1762 + gen_special_dispatch(masm, 1.1763 + method, 1.1764 + in_sig_bt, 1.1765 + in_regs); 1.1766 + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1.1767 + __ flush(); 1.1768 + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1.1769 + return nmethod::new_native_nmethod(method, 1.1770 + compile_id, 1.1771 + masm->code(), 1.1772 + vep_offset, 1.1773 + frame_complete, 1.1774 + stack_slots / VMRegImpl::slots_per_word, 1.1775 + in_ByteSize(-1), 1.1776 + in_ByteSize(-1), 1.1777 + (OopMapSet*)NULL); 1.1778 + } 1.1779 + bool is_critical_native = true; 1.1780 + address native_func = method->critical_native_function(); 1.1781 + if (native_func == NULL) { 1.1782 + native_func = method->native_function(); 1.1783 + is_critical_native = false; 1.1784 + } 1.1785 + assert(native_func != NULL, "must have function"); 1.1786 + 1.1787 + // An OopMap for lock (and class if static) 1.1788 + OopMapSet *oop_maps = new OopMapSet(); 1.1789 + intptr_t start = (intptr_t)__ pc(); 1.1790 + 1.1791 + // We have received a description of where all the java arg are located 1.1792 + // on entry to the wrapper. We need to convert these args to where 1.1793 + // the jni function will expect them. To figure out where they go 1.1794 + // we convert the java signature to a C signature by inserting 1.1795 + // the hidden arguments as arg[0] and possibly arg[1] (static method) 1.1796 + 1.1797 + const int total_in_args = method->size_of_parameters(); 1.1798 + int total_c_args = total_in_args; 1.1799 + if (!is_critical_native) { 1.1800 + total_c_args += 1; 1.1801 + if (method->is_static()) { 1.1802 + total_c_args++; 1.1803 + } 1.1804 + } else { 1.1805 + for (int i = 0; i < total_in_args; i++) { 1.1806 + if (in_sig_bt[i] == T_ARRAY) { 1.1807 + total_c_args++; 1.1808 + } 1.1809 + } 1.1810 + } 1.1811 + 1.1812 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1.1813 + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1.1814 + BasicType* in_elem_bt = NULL; 1.1815 + 1.1816 + int argc = 0; 1.1817 + if (!is_critical_native) { 1.1818 + out_sig_bt[argc++] = T_ADDRESS; 1.1819 + if (method->is_static()) { 1.1820 + out_sig_bt[argc++] = T_OBJECT; 1.1821 + } 1.1822 + 1.1823 + for (int i = 0; i < total_in_args ; i++ ) { 1.1824 + out_sig_bt[argc++] = in_sig_bt[i]; 1.1825 + } 1.1826 + } else { 1.1827 + Thread* THREAD = Thread::current(); 1.1828 + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); 1.1829 + SignatureStream ss(method->signature()); 1.1830 + for (int i = 0; i < total_in_args ; i++ ) { 1.1831 + if (in_sig_bt[i] == T_ARRAY) { 1.1832 + // Arrays are passed as int, elem* pair 1.1833 + out_sig_bt[argc++] = T_INT; 1.1834 + out_sig_bt[argc++] = T_ADDRESS; 1.1835 + Symbol* atype = ss.as_symbol(CHECK_NULL); 1.1836 + const char* at = atype->as_C_string(); 1.1837 + if (strlen(at) == 2) { 1.1838 + assert(at[0] == '[', "must be"); 1.1839 + switch (at[1]) { 1.1840 + case 'B': in_elem_bt[i] = T_BYTE; break; 1.1841 + case 'C': in_elem_bt[i] = T_CHAR; break; 1.1842 + case 'D': in_elem_bt[i] = T_DOUBLE; break; 1.1843 + case 'F': in_elem_bt[i] = T_FLOAT; break; 1.1844 + case 'I': in_elem_bt[i] = T_INT; break; 1.1845 + case 'J': in_elem_bt[i] = T_LONG; break; 1.1846 + case 'S': in_elem_bt[i] = T_SHORT; break; 1.1847 + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; 1.1848 + default: ShouldNotReachHere(); 1.1849 + } 1.1850 + } 1.1851 + } else { 1.1852 + out_sig_bt[argc++] = in_sig_bt[i]; 1.1853 + in_elem_bt[i] = T_VOID; 1.1854 + } 1.1855 + if (in_sig_bt[i] != T_VOID) { 1.1856 + assert(in_sig_bt[i] == ss.type(), "must match"); 1.1857 + ss.next(); 1.1858 + } 1.1859 + } 1.1860 + } 1.1861 + 1.1862 + // Now figure out where the args must be stored and how much stack space 1.1863 + // they require. 1.1864 + int out_arg_slots; 1.1865 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1.1866 + 1.1867 + // Compute framesize for the wrapper. We need to handlize all oops in 1.1868 + // incoming registers 1.1869 + 1.1870 + // Calculate the total number of stack slots we will need. 1.1871 + 1.1872 + // First count the abi requirement plus all of the outgoing args 1.1873 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.1874 + 1.1875 + // Now the space for the inbound oop handle area 1.1876 + int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers 1.1877 + if (is_critical_native) { 1.1878 + // Critical natives may have to call out so they need a save area 1.1879 + // for register arguments. 1.1880 + int double_slots = 0; 1.1881 + int single_slots = 0; 1.1882 + for ( int i = 0; i < total_in_args; i++) { 1.1883 + if (in_regs[i].first()->is_Register()) { 1.1884 + const Register reg = in_regs[i].first()->as_Register(); 1.1885 + switch (in_sig_bt[i]) { 1.1886 + case T_BOOLEAN: 1.1887 + case T_BYTE: 1.1888 + case T_SHORT: 1.1889 + case T_CHAR: 1.1890 + case T_INT: single_slots++; break; 1.1891 + case T_ARRAY: // specific to LP64 (7145024) 1.1892 + case T_LONG: double_slots++; break; 1.1893 + default: ShouldNotReachHere(); 1.1894 + } 1.1895 + } else if (in_regs[i].first()->is_XMMRegister()) { 1.1896 + switch (in_sig_bt[i]) { 1.1897 + case T_FLOAT: single_slots++; break; 1.1898 + case T_DOUBLE: double_slots++; break; 1.1899 + default: ShouldNotReachHere(); 1.1900 + } 1.1901 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.1902 + ShouldNotReachHere(); 1.1903 + } 1.1904 + } 1.1905 + total_save_slots = double_slots * 2 + single_slots; 1.1906 + // align the save area 1.1907 + if (double_slots != 0) { 1.1908 + stack_slots = round_to(stack_slots, 2); 1.1909 + } 1.1910 + } 1.1911 + 1.1912 + int oop_handle_offset = stack_slots; 1.1913 + stack_slots += total_save_slots; 1.1914 + 1.1915 + // Now any space we need for handlizing a klass if static method 1.1916 + 1.1917 + int klass_slot_offset = 0; 1.1918 + int klass_offset = -1; 1.1919 + int lock_slot_offset = 0; 1.1920 + bool is_static = false; 1.1921 + 1.1922 + if (method->is_static()) { 1.1923 + klass_slot_offset = stack_slots; 1.1924 + stack_slots += VMRegImpl::slots_per_word; 1.1925 + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1.1926 + is_static = true; 1.1927 + } 1.1928 + 1.1929 + // Plus a lock if needed 1.1930 + 1.1931 + if (method->is_synchronized()) { 1.1932 + lock_slot_offset = stack_slots; 1.1933 + stack_slots += VMRegImpl::slots_per_word; 1.1934 + } 1.1935 + 1.1936 + // Now a place (+2) to save return values or temp during shuffling 1.1937 + // + 4 for return address (which we own) and saved rbp 1.1938 + stack_slots += 6; 1.1939 + 1.1940 + // Ok The space we have allocated will look like: 1.1941 + // 1.1942 + // 1.1943 + // FP-> | | 1.1944 + // |---------------------| 1.1945 + // | 2 slots for moves | 1.1946 + // |---------------------| 1.1947 + // | lock box (if sync) | 1.1948 + // |---------------------| <- lock_slot_offset 1.1949 + // | klass (if static) | 1.1950 + // |---------------------| <- klass_slot_offset 1.1951 + // | oopHandle area | 1.1952 + // |---------------------| <- oop_handle_offset (6 java arg registers) 1.1953 + // | outbound memory | 1.1954 + // | based arguments | 1.1955 + // | | 1.1956 + // |---------------------| 1.1957 + // | | 1.1958 + // SP-> | out_preserved_slots | 1.1959 + // 1.1960 + // 1.1961 + 1.1962 + 1.1963 + // Now compute actual number of stack words we need rounding to make 1.1964 + // stack properly aligned. 1.1965 + stack_slots = round_to(stack_slots, StackAlignmentInSlots); 1.1966 + 1.1967 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.1968 + 1.1969 + // First thing make an ic check to see if we should even be here 1.1970 + 1.1971 + // We are free to use all registers as temps without saving them and 1.1972 + // restoring them except rbp. rbp is the only callee save register 1.1973 + // as far as the interpreter and the compiler(s) are concerned. 1.1974 + 1.1975 + 1.1976 + const Register ic_reg = rax; 1.1977 + const Register receiver = j_rarg0; 1.1978 + 1.1979 + Label hit; 1.1980 + Label exception_pending; 1.1981 + 1.1982 + assert_different_registers(ic_reg, receiver, rscratch1); 1.1983 + __ verify_oop(receiver); 1.1984 + __ load_klass(rscratch1, receiver); 1.1985 + __ cmpq(ic_reg, rscratch1); 1.1986 + __ jcc(Assembler::equal, hit); 1.1987 + 1.1988 + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1.1989 + 1.1990 + // Verified entry point must be aligned 1.1991 + __ align(8); 1.1992 + 1.1993 + __ bind(hit); 1.1994 + 1.1995 + int vep_offset = ((intptr_t)__ pc()) - start; 1.1996 + 1.1997 + // The instruction at the verified entry point must be 5 bytes or longer 1.1998 + // because it can be patched on the fly by make_non_entrant. The stack bang 1.1999 + // instruction fits that requirement. 1.2000 + 1.2001 + // Generate stack overflow check 1.2002 + 1.2003 + if (UseStackBanging) { 1.2004 + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); 1.2005 + } else { 1.2006 + // need a 5 byte instruction to allow MT safe patching to non-entrant 1.2007 + __ fat_nop(); 1.2008 + } 1.2009 + 1.2010 + // Generate a new frame for the wrapper. 1.2011 + __ enter(); 1.2012 + // -2 because return address is already present and so is saved rbp 1.2013 + __ subptr(rsp, stack_size - 2*wordSize); 1.2014 + 1.2015 + // Frame is now completed as far as size and linkage. 1.2016 + int frame_complete = ((intptr_t)__ pc()) - start; 1.2017 + 1.2018 + if (UseRTMLocking) { 1.2019 + // Abort RTM transaction before calling JNI 1.2020 + // because critical section will be large and will be 1.2021 + // aborted anyway. Also nmethod could be deoptimized. 1.2022 + __ xabort(0); 1.2023 + } 1.2024 + 1.2025 +#ifdef ASSERT 1.2026 + { 1.2027 + Label L; 1.2028 + __ mov(rax, rsp); 1.2029 + __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI) 1.2030 + __ cmpptr(rax, rsp); 1.2031 + __ jcc(Assembler::equal, L); 1.2032 + __ stop("improperly aligned stack"); 1.2033 + __ bind(L); 1.2034 + } 1.2035 +#endif /* ASSERT */ 1.2036 + 1.2037 + 1.2038 + // We use r14 as the oop handle for the receiver/klass 1.2039 + // It is callee save so it survives the call to native 1.2040 + 1.2041 + const Register oop_handle_reg = r14; 1.2042 + 1.2043 + if (is_critical_native) { 1.2044 + check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, 1.2045 + oop_handle_offset, oop_maps, in_regs, in_sig_bt); 1.2046 + } 1.2047 + 1.2048 + // 1.2049 + // We immediately shuffle the arguments so that any vm call we have to 1.2050 + // make from here on out (sync slow path, jvmti, etc.) we will have 1.2051 + // captured the oops from our caller and have a valid oopMap for 1.2052 + // them. 1.2053 + 1.2054 + // ----------------- 1.2055 + // The Grand Shuffle 1.2056 + 1.2057 + // The Java calling convention is either equal (linux) or denser (win64) than the 1.2058 + // c calling convention. However the because of the jni_env argument the c calling 1.2059 + // convention always has at least one more (and two for static) arguments than Java. 1.2060 + // Therefore if we move the args from java -> c backwards then we will never have 1.2061 + // a register->register conflict and we don't have to build a dependency graph 1.2062 + // and figure out how to break any cycles. 1.2063 + // 1.2064 + 1.2065 + // Record esp-based slot for receiver on stack for non-static methods 1.2066 + int receiver_offset = -1; 1.2067 + 1.2068 + // This is a trick. We double the stack slots so we can claim 1.2069 + // the oops in the caller's frame. Since we are sure to have 1.2070 + // more args than the caller doubling is enough to make 1.2071 + // sure we can capture all the incoming oop args from the 1.2072 + // caller. 1.2073 + // 1.2074 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.2075 + 1.2076 + // Mark location of rbp (someday) 1.2077 + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); 1.2078 + 1.2079 + // Use eax, ebx as temporaries during any memory-memory moves we have to do 1.2080 + // All inbound args are referenced based on rbp and all outbound args via rsp. 1.2081 + 1.2082 + 1.2083 +#ifdef ASSERT 1.2084 + bool reg_destroyed[RegisterImpl::number_of_registers]; 1.2085 + bool freg_destroyed[XMMRegisterImpl::number_of_registers]; 1.2086 + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1.2087 + reg_destroyed[r] = false; 1.2088 + } 1.2089 + for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) { 1.2090 + freg_destroyed[f] = false; 1.2091 + } 1.2092 + 1.2093 +#endif /* ASSERT */ 1.2094 + 1.2095 + // This may iterate in two different directions depending on the 1.2096 + // kind of native it is. The reason is that for regular JNI natives 1.2097 + // the incoming and outgoing registers are offset upwards and for 1.2098 + // critical natives they are offset down. 1.2099 + GrowableArray<int> arg_order(2 * total_in_args); 1.2100 + VMRegPair tmp_vmreg; 1.2101 + tmp_vmreg.set1(rbx->as_VMReg()); 1.2102 + 1.2103 + if (!is_critical_native) { 1.2104 + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { 1.2105 + arg_order.push(i); 1.2106 + arg_order.push(c_arg); 1.2107 + } 1.2108 + } else { 1.2109 + // Compute a valid move order, using tmp_vmreg to break any cycles 1.2110 + ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); 1.2111 + } 1.2112 + 1.2113 + int temploc = -1; 1.2114 + for (int ai = 0; ai < arg_order.length(); ai += 2) { 1.2115 + int i = arg_order.at(ai); 1.2116 + int c_arg = arg_order.at(ai + 1); 1.2117 + __ block_comment(err_msg("move %d -> %d", i, c_arg)); 1.2118 + if (c_arg == -1) { 1.2119 + assert(is_critical_native, "should only be required for critical natives"); 1.2120 + // This arg needs to be moved to a temporary 1.2121 + __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); 1.2122 + in_regs[i] = tmp_vmreg; 1.2123 + temploc = i; 1.2124 + continue; 1.2125 + } else if (i == -1) { 1.2126 + assert(is_critical_native, "should only be required for critical natives"); 1.2127 + // Read from the temporary location 1.2128 + assert(temploc != -1, "must be valid"); 1.2129 + i = temploc; 1.2130 + temploc = -1; 1.2131 + } 1.2132 +#ifdef ASSERT 1.2133 + if (in_regs[i].first()->is_Register()) { 1.2134 + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); 1.2135 + } else if (in_regs[i].first()->is_XMMRegister()) { 1.2136 + assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); 1.2137 + } 1.2138 + if (out_regs[c_arg].first()->is_Register()) { 1.2139 + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1.2140 + } else if (out_regs[c_arg].first()->is_XMMRegister()) { 1.2141 + freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 1.2142 + } 1.2143 +#endif /* ASSERT */ 1.2144 + switch (in_sig_bt[i]) { 1.2145 + case T_ARRAY: 1.2146 + if (is_critical_native) { 1.2147 + unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); 1.2148 + c_arg++; 1.2149 +#ifdef ASSERT 1.2150 + if (out_regs[c_arg].first()->is_Register()) { 1.2151 + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1.2152 + } else if (out_regs[c_arg].first()->is_XMMRegister()) { 1.2153 + freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; 1.2154 + } 1.2155 +#endif 1.2156 + break; 1.2157 + } 1.2158 + case T_OBJECT: 1.2159 + assert(!is_critical_native, "no oop arguments"); 1.2160 + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1.2161 + ((i == 0) && (!is_static)), 1.2162 + &receiver_offset); 1.2163 + break; 1.2164 + case T_VOID: 1.2165 + break; 1.2166 + 1.2167 + case T_FLOAT: 1.2168 + float_move(masm, in_regs[i], out_regs[c_arg]); 1.2169 + break; 1.2170 + 1.2171 + case T_DOUBLE: 1.2172 + assert( i + 1 < total_in_args && 1.2173 + in_sig_bt[i + 1] == T_VOID && 1.2174 + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1.2175 + double_move(masm, in_regs[i], out_regs[c_arg]); 1.2176 + break; 1.2177 + 1.2178 + case T_LONG : 1.2179 + long_move(masm, in_regs[i], out_regs[c_arg]); 1.2180 + break; 1.2181 + 1.2182 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.2183 + 1.2184 + default: 1.2185 + move32_64(masm, in_regs[i], out_regs[c_arg]); 1.2186 + } 1.2187 + } 1.2188 + 1.2189 + int c_arg; 1.2190 + 1.2191 + // Pre-load a static method's oop into r14. Used both by locking code and 1.2192 + // the normal JNI call code. 1.2193 + if (!is_critical_native) { 1.2194 + // point c_arg at the first arg that is already loaded in case we 1.2195 + // need to spill before we call out 1.2196 + c_arg = total_c_args - total_in_args; 1.2197 + 1.2198 + if (method->is_static()) { 1.2199 + 1.2200 + // load oop into a register 1.2201 + __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); 1.2202 + 1.2203 + // Now handlize the static class mirror it's known not-null. 1.2204 + __ movptr(Address(rsp, klass_offset), oop_handle_reg); 1.2205 + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1.2206 + 1.2207 + // Now get the handle 1.2208 + __ lea(oop_handle_reg, Address(rsp, klass_offset)); 1.2209 + // store the klass handle as second argument 1.2210 + __ movptr(c_rarg1, oop_handle_reg); 1.2211 + // and protect the arg if we must spill 1.2212 + c_arg--; 1.2213 + } 1.2214 + } else { 1.2215 + // For JNI critical methods we need to save all registers in save_args. 1.2216 + c_arg = 0; 1.2217 + } 1.2218 + 1.2219 + // Change state to native (we save the return address in the thread, since it might not 1.2220 + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() 1.2221 + // points into the right code segment. It does not have to be the correct return pc. 1.2222 + // We use the same pc/oopMap repeatedly when we call out 1.2223 + 1.2224 + intptr_t the_pc = (intptr_t) __ pc(); 1.2225 + oop_maps->add_gc_map(the_pc - start, map); 1.2226 + 1.2227 + __ set_last_Java_frame(rsp, noreg, (address)the_pc); 1.2228 + 1.2229 + 1.2230 + // We have all of the arguments setup at this point. We must not touch any register 1.2231 + // argument registers at this point (what if we save/restore them there are no oop? 1.2232 + 1.2233 + { 1.2234 + SkipIfEqual skip(masm, &DTraceMethodProbes, false); 1.2235 + // protect the args we've loaded 1.2236 + save_args(masm, total_c_args, c_arg, out_regs); 1.2237 + __ mov_metadata(c_rarg1, method()); 1.2238 + __ call_VM_leaf( 1.2239 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 1.2240 + r15_thread, c_rarg1); 1.2241 + restore_args(masm, total_c_args, c_arg, out_regs); 1.2242 + } 1.2243 + 1.2244 + // RedefineClasses() tracing support for obsolete method entry 1.2245 + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { 1.2246 + // protect the args we've loaded 1.2247 + save_args(masm, total_c_args, c_arg, out_regs); 1.2248 + __ mov_metadata(c_rarg1, method()); 1.2249 + __ call_VM_leaf( 1.2250 + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 1.2251 + r15_thread, c_rarg1); 1.2252 + restore_args(masm, total_c_args, c_arg, out_regs); 1.2253 + } 1.2254 + 1.2255 + // Lock a synchronized method 1.2256 + 1.2257 + // Register definitions used by locking and unlocking 1.2258 + 1.2259 + const Register swap_reg = rax; // Must use rax for cmpxchg instruction 1.2260 + const Register obj_reg = rbx; // Will contain the oop 1.2261 + const Register lock_reg = r13; // Address of compiler lock object (BasicLock) 1.2262 + const Register old_hdr = r13; // value of old header at unlock time 1.2263 + 1.2264 + Label slow_path_lock; 1.2265 + Label lock_done; 1.2266 + 1.2267 + if (method->is_synchronized()) { 1.2268 + assert(!is_critical_native, "unhandled"); 1.2269 + 1.2270 + 1.2271 + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); 1.2272 + 1.2273 + // Get the handle (the 2nd argument) 1.2274 + __ mov(oop_handle_reg, c_rarg1); 1.2275 + 1.2276 + // Get address of the box 1.2277 + 1.2278 + __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 1.2279 + 1.2280 + // Load the oop from the handle 1.2281 + __ movptr(obj_reg, Address(oop_handle_reg, 0)); 1.2282 + 1.2283 + if (UseBiasedLocking) { 1.2284 + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock); 1.2285 + } 1.2286 + 1.2287 + // Load immediate 1 into swap_reg %rax 1.2288 + __ movl(swap_reg, 1); 1.2289 + 1.2290 + // Load (object->mark() | 1) into swap_reg %rax 1.2291 + __ orptr(swap_reg, Address(obj_reg, 0)); 1.2292 + 1.2293 + // Save (object->mark() | 1) into BasicLock's displaced header 1.2294 + __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 1.2295 + 1.2296 + if (os::is_MP()) { 1.2297 + __ lock(); 1.2298 + } 1.2299 + 1.2300 + // src -> dest iff dest == rax else rax <- dest 1.2301 + __ cmpxchgptr(lock_reg, Address(obj_reg, 0)); 1.2302 + __ jcc(Assembler::equal, lock_done); 1.2303 + 1.2304 + // Hmm should this move to the slow path code area??? 1.2305 + 1.2306 + // Test if the oopMark is an obvious stack pointer, i.e., 1.2307 + // 1) (mark & 3) == 0, and 1.2308 + // 2) rsp <= mark < mark + os::pagesize() 1.2309 + // These 3 tests can be done by evaluating the following 1.2310 + // expression: ((mark - rsp) & (3 - os::vm_page_size())), 1.2311 + // assuming both stack pointer and pagesize have their 1.2312 + // least significant 2 bits clear. 1.2313 + // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg 1.2314 + 1.2315 + __ subptr(swap_reg, rsp); 1.2316 + __ andptr(swap_reg, 3 - os::vm_page_size()); 1.2317 + 1.2318 + // Save the test result, for recursive case, the result is zero 1.2319 + __ movptr(Address(lock_reg, mark_word_offset), swap_reg); 1.2320 + __ jcc(Assembler::notEqual, slow_path_lock); 1.2321 + 1.2322 + // Slow path will re-enter here 1.2323 + 1.2324 + __ bind(lock_done); 1.2325 + } 1.2326 + 1.2327 + 1.2328 + // Finally just about ready to make the JNI call 1.2329 + 1.2330 + 1.2331 + // get JNIEnv* which is first argument to native 1.2332 + if (!is_critical_native) { 1.2333 + __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); 1.2334 + } 1.2335 + 1.2336 + // Now set thread in native 1.2337 + __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); 1.2338 + 1.2339 + __ call(RuntimeAddress(native_func)); 1.2340 + 1.2341 + // Verify or restore cpu control state after JNI call 1.2342 + __ restore_cpu_control_state_after_jni(); 1.2343 + 1.2344 + // Unpack native results. 1.2345 + switch (ret_type) { 1.2346 + case T_BOOLEAN: __ c2bool(rax); break; 1.2347 + case T_CHAR : __ movzwl(rax, rax); break; 1.2348 + case T_BYTE : __ sign_extend_byte (rax); break; 1.2349 + case T_SHORT : __ sign_extend_short(rax); break; 1.2350 + case T_INT : /* nothing to do */ break; 1.2351 + case T_DOUBLE : 1.2352 + case T_FLOAT : 1.2353 + // Result is in xmm0 we'll save as needed 1.2354 + break; 1.2355 + case T_ARRAY: // Really a handle 1.2356 + case T_OBJECT: // Really a handle 1.2357 + break; // can't de-handlize until after safepoint check 1.2358 + case T_VOID: break; 1.2359 + case T_LONG: break; 1.2360 + default : ShouldNotReachHere(); 1.2361 + } 1.2362 + 1.2363 + // Switch thread to "native transition" state before reading the synchronization state. 1.2364 + // This additional state is necessary because reading and testing the synchronization 1.2365 + // state is not atomic w.r.t. GC, as this scenario demonstrates: 1.2366 + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 1.2367 + // VM thread changes sync state to synchronizing and suspends threads for GC. 1.2368 + // Thread A is resumed to finish this native method, but doesn't block here since it 1.2369 + // didn't see any synchronization is progress, and escapes. 1.2370 + __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); 1.2371 + 1.2372 + if(os::is_MP()) { 1.2373 + if (UseMembar) { 1.2374 + // Force this write out before the read below 1.2375 + __ membar(Assembler::Membar_mask_bits( 1.2376 + Assembler::LoadLoad | Assembler::LoadStore | 1.2377 + Assembler::StoreLoad | Assembler::StoreStore)); 1.2378 + } else { 1.2379 + // Write serialization page so VM thread can do a pseudo remote membar. 1.2380 + // We use the current thread pointer to calculate a thread specific 1.2381 + // offset to write to within the page. This minimizes bus traffic 1.2382 + // due to cache line collision. 1.2383 + __ serialize_memory(r15_thread, rcx); 1.2384 + } 1.2385 + } 1.2386 + 1.2387 + Label after_transition; 1.2388 + 1.2389 + // check for safepoint operation in progress and/or pending suspend requests 1.2390 + { 1.2391 + Label Continue; 1.2392 + 1.2393 + __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()), 1.2394 + SafepointSynchronize::_not_synchronized); 1.2395 + 1.2396 + Label L; 1.2397 + __ jcc(Assembler::notEqual, L); 1.2398 + __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); 1.2399 + __ jcc(Assembler::equal, Continue); 1.2400 + __ bind(L); 1.2401 + 1.2402 + // Don't use call_VM as it will see a possible pending exception and forward it 1.2403 + // and never return here preventing us from clearing _last_native_pc down below. 1.2404 + // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are 1.2405 + // preserved and correspond to the bcp/locals pointers. So we do a runtime call 1.2406 + // by hand. 1.2407 + // 1.2408 + save_native_result(masm, ret_type, stack_slots); 1.2409 + __ mov(c_rarg0, r15_thread); 1.2410 + __ mov(r12, rsp); // remember sp 1.2411 + __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 1.2412 + __ andptr(rsp, -16); // align stack as required by ABI 1.2413 + if (!is_critical_native) { 1.2414 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); 1.2415 + } else { 1.2416 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); 1.2417 + } 1.2418 + __ mov(rsp, r12); // restore sp 1.2419 + __ reinit_heapbase(); 1.2420 + // Restore any method result value 1.2421 + restore_native_result(masm, ret_type, stack_slots); 1.2422 + 1.2423 + if (is_critical_native) { 1.2424 + // The call above performed the transition to thread_in_Java so 1.2425 + // skip the transition logic below. 1.2426 + __ jmpb(after_transition); 1.2427 + } 1.2428 + 1.2429 + __ bind(Continue); 1.2430 + } 1.2431 + 1.2432 + // change thread state 1.2433 + __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); 1.2434 + __ bind(after_transition); 1.2435 + 1.2436 + Label reguard; 1.2437 + Label reguard_done; 1.2438 + __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled); 1.2439 + __ jcc(Assembler::equal, reguard); 1.2440 + __ bind(reguard_done); 1.2441 + 1.2442 + // native result if any is live 1.2443 + 1.2444 + // Unlock 1.2445 + Label unlock_done; 1.2446 + Label slow_path_unlock; 1.2447 + if (method->is_synchronized()) { 1.2448 + 1.2449 + // Get locked oop from the handle we passed to jni 1.2450 + __ movptr(obj_reg, Address(oop_handle_reg, 0)); 1.2451 + 1.2452 + Label done; 1.2453 + 1.2454 + if (UseBiasedLocking) { 1.2455 + __ biased_locking_exit(obj_reg, old_hdr, done); 1.2456 + } 1.2457 + 1.2458 + // Simple recursive lock? 1.2459 + 1.2460 + __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD); 1.2461 + __ jcc(Assembler::equal, done); 1.2462 + 1.2463 + // Must save rax if if it is live now because cmpxchg must use it 1.2464 + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 1.2465 + save_native_result(masm, ret_type, stack_slots); 1.2466 + } 1.2467 + 1.2468 + 1.2469 + // get address of the stack lock 1.2470 + __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 1.2471 + // get old displaced header 1.2472 + __ movptr(old_hdr, Address(rax, 0)); 1.2473 + 1.2474 + // Atomic swap old header if oop still contains the stack lock 1.2475 + if (os::is_MP()) { 1.2476 + __ lock(); 1.2477 + } 1.2478 + __ cmpxchgptr(old_hdr, Address(obj_reg, 0)); 1.2479 + __ jcc(Assembler::notEqual, slow_path_unlock); 1.2480 + 1.2481 + // slow path re-enters here 1.2482 + __ bind(unlock_done); 1.2483 + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { 1.2484 + restore_native_result(masm, ret_type, stack_slots); 1.2485 + } 1.2486 + 1.2487 + __ bind(done); 1.2488 + 1.2489 + } 1.2490 + { 1.2491 + SkipIfEqual skip(masm, &DTraceMethodProbes, false); 1.2492 + save_native_result(masm, ret_type, stack_slots); 1.2493 + __ mov_metadata(c_rarg1, method()); 1.2494 + __ call_VM_leaf( 1.2495 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 1.2496 + r15_thread, c_rarg1); 1.2497 + restore_native_result(masm, ret_type, stack_slots); 1.2498 + } 1.2499 + 1.2500 + __ reset_last_Java_frame(false, true); 1.2501 + 1.2502 + // Unpack oop result 1.2503 + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { 1.2504 + Label L; 1.2505 + __ testptr(rax, rax); 1.2506 + __ jcc(Assembler::zero, L); 1.2507 + __ movptr(rax, Address(rax, 0)); 1.2508 + __ bind(L); 1.2509 + __ verify_oop(rax); 1.2510 + } 1.2511 + 1.2512 + if (!is_critical_native) { 1.2513 + // reset handle block 1.2514 + __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset())); 1.2515 + __ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); 1.2516 + } 1.2517 + 1.2518 + // pop our frame 1.2519 + 1.2520 + __ leave(); 1.2521 + 1.2522 + if (!is_critical_native) { 1.2523 + // Any exception pending? 1.2524 + __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 1.2525 + __ jcc(Assembler::notEqual, exception_pending); 1.2526 + } 1.2527 + 1.2528 + // Return 1.2529 + 1.2530 + __ ret(0); 1.2531 + 1.2532 + // Unexpected paths are out of line and go here 1.2533 + 1.2534 + if (!is_critical_native) { 1.2535 + // forward the exception 1.2536 + __ bind(exception_pending); 1.2537 + 1.2538 + // and forward the exception 1.2539 + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1.2540 + } 1.2541 + 1.2542 + // Slow path locking & unlocking 1.2543 + if (method->is_synchronized()) { 1.2544 + 1.2545 + // BEGIN Slow path lock 1.2546 + __ bind(slow_path_lock); 1.2547 + 1.2548 + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM 1.2549 + // args are (oop obj, BasicLock* lock, JavaThread* thread) 1.2550 + 1.2551 + // protect the args we've loaded 1.2552 + save_args(masm, total_c_args, c_arg, out_regs); 1.2553 + 1.2554 + __ mov(c_rarg0, obj_reg); 1.2555 + __ mov(c_rarg1, lock_reg); 1.2556 + __ mov(c_rarg2, r15_thread); 1.2557 + 1.2558 + // Not a leaf but we have last_Java_frame setup as we want 1.2559 + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); 1.2560 + restore_args(masm, total_c_args, c_arg, out_regs); 1.2561 + 1.2562 +#ifdef ASSERT 1.2563 + { Label L; 1.2564 + __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 1.2565 + __ jcc(Assembler::equal, L); 1.2566 + __ stop("no pending exception allowed on exit from monitorenter"); 1.2567 + __ bind(L); 1.2568 + } 1.2569 +#endif 1.2570 + __ jmp(lock_done); 1.2571 + 1.2572 + // END Slow path lock 1.2573 + 1.2574 + // BEGIN Slow path unlock 1.2575 + __ bind(slow_path_unlock); 1.2576 + 1.2577 + // If we haven't already saved the native result we must save it now as xmm registers 1.2578 + // are still exposed. 1.2579 + 1.2580 + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 1.2581 + save_native_result(masm, ret_type, stack_slots); 1.2582 + } 1.2583 + 1.2584 + __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); 1.2585 + 1.2586 + __ mov(c_rarg0, obj_reg); 1.2587 + __ mov(r12, rsp); // remember sp 1.2588 + __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 1.2589 + __ andptr(rsp, -16); // align stack as required by ABI 1.2590 + 1.2591 + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) 1.2592 + // NOTE that obj_reg == rbx currently 1.2593 + __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); 1.2594 + __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); 1.2595 + 1.2596 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); 1.2597 + __ mov(rsp, r12); // restore sp 1.2598 + __ reinit_heapbase(); 1.2599 +#ifdef ASSERT 1.2600 + { 1.2601 + Label L; 1.2602 + __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); 1.2603 + __ jcc(Assembler::equal, L); 1.2604 + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); 1.2605 + __ bind(L); 1.2606 + } 1.2607 +#endif /* ASSERT */ 1.2608 + 1.2609 + __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); 1.2610 + 1.2611 + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { 1.2612 + restore_native_result(masm, ret_type, stack_slots); 1.2613 + } 1.2614 + __ jmp(unlock_done); 1.2615 + 1.2616 + // END Slow path unlock 1.2617 + 1.2618 + } // synchronized 1.2619 + 1.2620 + // SLOW PATH Reguard the stack if needed 1.2621 + 1.2622 + __ bind(reguard); 1.2623 + save_native_result(masm, ret_type, stack_slots); 1.2624 + __ mov(r12, rsp); // remember sp 1.2625 + __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows 1.2626 + __ andptr(rsp, -16); // align stack as required by ABI 1.2627 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); 1.2628 + __ mov(rsp, r12); // restore sp 1.2629 + __ reinit_heapbase(); 1.2630 + restore_native_result(masm, ret_type, stack_slots); 1.2631 + // and continue 1.2632 + __ jmp(reguard_done); 1.2633 + 1.2634 + 1.2635 + 1.2636 + __ flush(); 1.2637 + 1.2638 + nmethod *nm = nmethod::new_native_nmethod(method, 1.2639 + compile_id, 1.2640 + masm->code(), 1.2641 + vep_offset, 1.2642 + frame_complete, 1.2643 + stack_slots / VMRegImpl::slots_per_word, 1.2644 + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 1.2645 + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), 1.2646 + oop_maps); 1.2647 + 1.2648 + if (is_critical_native) { 1.2649 + nm->set_lazy_critical_native(true); 1.2650 + } 1.2651 + 1.2652 + return nm; 1.2653 + 1.2654 +} 1.2655 + 1.2656 +#ifdef HAVE_DTRACE_H 1.2657 +// --------------------------------------------------------------------------- 1.2658 +// Generate a dtrace nmethod for a given signature. The method takes arguments 1.2659 +// in the Java compiled code convention, marshals them to the native 1.2660 +// abi and then leaves nops at the position you would expect to call a native 1.2661 +// function. When the probe is enabled the nops are replaced with a trap 1.2662 +// instruction that dtrace inserts and the trace will cause a notification 1.2663 +// to dtrace. 1.2664 +// 1.2665 +// The probes are only able to take primitive types and java/lang/String as 1.2666 +// arguments. No other java types are allowed. Strings are converted to utf8 1.2667 +// strings so that from dtrace point of view java strings are converted to C 1.2668 +// strings. There is an arbitrary fixed limit on the total space that a method 1.2669 +// can use for converting the strings. (256 chars per string in the signature). 1.2670 +// So any java string larger then this is truncated. 1.2671 + 1.2672 +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; 1.2673 +static bool offsets_initialized = false; 1.2674 + 1.2675 + 1.2676 +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, 1.2677 + methodHandle method) { 1.2678 + 1.2679 + 1.2680 + // generate_dtrace_nmethod is guarded by a mutex so we are sure to 1.2681 + // be single threaded in this method. 1.2682 + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); 1.2683 + 1.2684 + if (!offsets_initialized) { 1.2685 + fp_offset[c_rarg0->as_VMReg()->value()] = -1 * wordSize; 1.2686 + fp_offset[c_rarg1->as_VMReg()->value()] = -2 * wordSize; 1.2687 + fp_offset[c_rarg2->as_VMReg()->value()] = -3 * wordSize; 1.2688 + fp_offset[c_rarg3->as_VMReg()->value()] = -4 * wordSize; 1.2689 + fp_offset[c_rarg4->as_VMReg()->value()] = -5 * wordSize; 1.2690 + fp_offset[c_rarg5->as_VMReg()->value()] = -6 * wordSize; 1.2691 + 1.2692 + fp_offset[c_farg0->as_VMReg()->value()] = -7 * wordSize; 1.2693 + fp_offset[c_farg1->as_VMReg()->value()] = -8 * wordSize; 1.2694 + fp_offset[c_farg2->as_VMReg()->value()] = -9 * wordSize; 1.2695 + fp_offset[c_farg3->as_VMReg()->value()] = -10 * wordSize; 1.2696 + fp_offset[c_farg4->as_VMReg()->value()] = -11 * wordSize; 1.2697 + fp_offset[c_farg5->as_VMReg()->value()] = -12 * wordSize; 1.2698 + fp_offset[c_farg6->as_VMReg()->value()] = -13 * wordSize; 1.2699 + fp_offset[c_farg7->as_VMReg()->value()] = -14 * wordSize; 1.2700 + 1.2701 + offsets_initialized = true; 1.2702 + } 1.2703 + // Fill in the signature array, for the calling-convention call. 1.2704 + int total_args_passed = method->size_of_parameters(); 1.2705 + 1.2706 + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); 1.2707 + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); 1.2708 + 1.2709 + // The signature we are going to use for the trap that dtrace will see 1.2710 + // java/lang/String is converted. We drop "this" and any other object 1.2711 + // is converted to NULL. (A one-slot java/lang/Long object reference 1.2712 + // is converted to a two-slot long, which is why we double the allocation). 1.2713 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); 1.2714 + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); 1.2715 + 1.2716 + int i=0; 1.2717 + int total_strings = 0; 1.2718 + int first_arg_to_pass = 0; 1.2719 + int total_c_args = 0; 1.2720 + 1.2721 + // Skip the receiver as dtrace doesn't want to see it 1.2722 + if( !method->is_static() ) { 1.2723 + in_sig_bt[i++] = T_OBJECT; 1.2724 + first_arg_to_pass = 1; 1.2725 + } 1.2726 + 1.2727 + // We need to convert the java args to where a native (non-jni) function 1.2728 + // would expect them. To figure out where they go we convert the java 1.2729 + // signature to a C signature. 1.2730 + 1.2731 + SignatureStream ss(method->signature()); 1.2732 + for ( ; !ss.at_return_type(); ss.next()) { 1.2733 + BasicType bt = ss.type(); 1.2734 + in_sig_bt[i++] = bt; // Collect remaining bits of signature 1.2735 + out_sig_bt[total_c_args++] = bt; 1.2736 + if( bt == T_OBJECT) { 1.2737 + Symbol* s = ss.as_symbol_or_null(); // symbol is created 1.2738 + if (s == vmSymbols::java_lang_String()) { 1.2739 + total_strings++; 1.2740 + out_sig_bt[total_c_args-1] = T_ADDRESS; 1.2741 + } else if (s == vmSymbols::java_lang_Boolean() || 1.2742 + s == vmSymbols::java_lang_Character() || 1.2743 + s == vmSymbols::java_lang_Byte() || 1.2744 + s == vmSymbols::java_lang_Short() || 1.2745 + s == vmSymbols::java_lang_Integer() || 1.2746 + s == vmSymbols::java_lang_Float()) { 1.2747 + out_sig_bt[total_c_args-1] = T_INT; 1.2748 + } else if (s == vmSymbols::java_lang_Long() || 1.2749 + s == vmSymbols::java_lang_Double()) { 1.2750 + out_sig_bt[total_c_args-1] = T_LONG; 1.2751 + out_sig_bt[total_c_args++] = T_VOID; 1.2752 + } 1.2753 + } else if ( bt == T_LONG || bt == T_DOUBLE ) { 1.2754 + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots 1.2755 + // We convert double to long 1.2756 + out_sig_bt[total_c_args-1] = T_LONG; 1.2757 + out_sig_bt[total_c_args++] = T_VOID; 1.2758 + } else if ( bt == T_FLOAT) { 1.2759 + // We convert float to int 1.2760 + out_sig_bt[total_c_args-1] = T_INT; 1.2761 + } 1.2762 + } 1.2763 + 1.2764 + assert(i==total_args_passed, "validly parsed signature"); 1.2765 + 1.2766 + // Now get the compiled-Java layout as input arguments 1.2767 + int comp_args_on_stack; 1.2768 + comp_args_on_stack = SharedRuntime::java_calling_convention( 1.2769 + in_sig_bt, in_regs, total_args_passed, false); 1.2770 + 1.2771 + // Now figure out where the args must be stored and how much stack space 1.2772 + // they require (neglecting out_preserve_stack_slots but space for storing 1.2773 + // the 1st six register arguments). It's weird see int_stk_helper. 1.2774 + 1.2775 + int out_arg_slots; 1.2776 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1.2777 + 1.2778 + // Calculate the total number of stack slots we will need. 1.2779 + 1.2780 + // First count the abi requirement plus all of the outgoing args 1.2781 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.2782 + 1.2783 + // Now space for the string(s) we must convert 1.2784 + int* string_locs = NEW_RESOURCE_ARRAY(int, total_strings + 1); 1.2785 + for (i = 0; i < total_strings ; i++) { 1.2786 + string_locs[i] = stack_slots; 1.2787 + stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size; 1.2788 + } 1.2789 + 1.2790 + // Plus the temps we might need to juggle register args 1.2791 + // regs take two slots each 1.2792 + stack_slots += (Argument::n_int_register_parameters_c + 1.2793 + Argument::n_float_register_parameters_c) * 2; 1.2794 + 1.2795 + 1.2796 + // + 4 for return address (which we own) and saved rbp, 1.2797 + 1.2798 + stack_slots += 4; 1.2799 + 1.2800 + // Ok The space we have allocated will look like: 1.2801 + // 1.2802 + // 1.2803 + // FP-> | | 1.2804 + // |---------------------| 1.2805 + // | string[n] | 1.2806 + // |---------------------| <- string_locs[n] 1.2807 + // | string[n-1] | 1.2808 + // |---------------------| <- string_locs[n-1] 1.2809 + // | ... | 1.2810 + // | ... | 1.2811 + // |---------------------| <- string_locs[1] 1.2812 + // | string[0] | 1.2813 + // |---------------------| <- string_locs[0] 1.2814 + // | outbound memory | 1.2815 + // | based arguments | 1.2816 + // | | 1.2817 + // |---------------------| 1.2818 + // | | 1.2819 + // SP-> | out_preserved_slots | 1.2820 + // 1.2821 + // 1.2822 + 1.2823 + // Now compute actual number of stack words we need rounding to make 1.2824 + // stack properly aligned. 1.2825 + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); 1.2826 + 1.2827 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.2828 + 1.2829 + intptr_t start = (intptr_t)__ pc(); 1.2830 + 1.2831 + // First thing make an ic check to see if we should even be here 1.2832 + 1.2833 + // We are free to use all registers as temps without saving them and 1.2834 + // restoring them except rbp. rbp, is the only callee save register 1.2835 + // as far as the interpreter and the compiler(s) are concerned. 1.2836 + 1.2837 + const Register ic_reg = rax; 1.2838 + const Register receiver = rcx; 1.2839 + Label hit; 1.2840 + Label exception_pending; 1.2841 + 1.2842 + 1.2843 + __ verify_oop(receiver); 1.2844 + __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes())); 1.2845 + __ jcc(Assembler::equal, hit); 1.2846 + 1.2847 + __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1.2848 + 1.2849 + // verified entry must be aligned for code patching. 1.2850 + // and the first 5 bytes must be in the same cache line 1.2851 + // if we align at 8 then we will be sure 5 bytes are in the same line 1.2852 + __ align(8); 1.2853 + 1.2854 + __ bind(hit); 1.2855 + 1.2856 + int vep_offset = ((intptr_t)__ pc()) - start; 1.2857 + 1.2858 + 1.2859 + // The instruction at the verified entry point must be 5 bytes or longer 1.2860 + // because it can be patched on the fly by make_non_entrant. The stack bang 1.2861 + // instruction fits that requirement. 1.2862 + 1.2863 + // Generate stack overflow check 1.2864 + 1.2865 + if (UseStackBanging) { 1.2866 + if (stack_size <= StackShadowPages*os::vm_page_size()) { 1.2867 + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); 1.2868 + } else { 1.2869 + __ movl(rax, stack_size); 1.2870 + __ bang_stack_size(rax, rbx); 1.2871 + } 1.2872 + } else { 1.2873 + // need a 5 byte instruction to allow MT safe patching to non-entrant 1.2874 + __ fat_nop(); 1.2875 + } 1.2876 + 1.2877 + assert(((uintptr_t)__ pc() - start - vep_offset) >= 5, 1.2878 + "valid size for make_non_entrant"); 1.2879 + 1.2880 + // Generate a new frame for the wrapper. 1.2881 + __ enter(); 1.2882 + 1.2883 + // -4 because return address is already present and so is saved rbp, 1.2884 + if (stack_size - 2*wordSize != 0) { 1.2885 + __ subq(rsp, stack_size - 2*wordSize); 1.2886 + } 1.2887 + 1.2888 + // Frame is now completed as far a size and linkage. 1.2889 + 1.2890 + int frame_complete = ((intptr_t)__ pc()) - start; 1.2891 + 1.2892 + int c_arg, j_arg; 1.2893 + 1.2894 + // State of input register args 1.2895 + 1.2896 + bool live[ConcreteRegisterImpl::number_of_registers]; 1.2897 + 1.2898 + live[j_rarg0->as_VMReg()->value()] = false; 1.2899 + live[j_rarg1->as_VMReg()->value()] = false; 1.2900 + live[j_rarg2->as_VMReg()->value()] = false; 1.2901 + live[j_rarg3->as_VMReg()->value()] = false; 1.2902 + live[j_rarg4->as_VMReg()->value()] = false; 1.2903 + live[j_rarg5->as_VMReg()->value()] = false; 1.2904 + 1.2905 + live[j_farg0->as_VMReg()->value()] = false; 1.2906 + live[j_farg1->as_VMReg()->value()] = false; 1.2907 + live[j_farg2->as_VMReg()->value()] = false; 1.2908 + live[j_farg3->as_VMReg()->value()] = false; 1.2909 + live[j_farg4->as_VMReg()->value()] = false; 1.2910 + live[j_farg5->as_VMReg()->value()] = false; 1.2911 + live[j_farg6->as_VMReg()->value()] = false; 1.2912 + live[j_farg7->as_VMReg()->value()] = false; 1.2913 + 1.2914 + 1.2915 + bool rax_is_zero = false; 1.2916 + 1.2917 + // All args (except strings) destined for the stack are moved first 1.2918 + for (j_arg = first_arg_to_pass, c_arg = 0 ; 1.2919 + j_arg < total_args_passed ; j_arg++, c_arg++ ) { 1.2920 + VMRegPair src = in_regs[j_arg]; 1.2921 + VMRegPair dst = out_regs[c_arg]; 1.2922 + 1.2923 + // Get the real reg value or a dummy (rsp) 1.2924 + 1.2925 + int src_reg = src.first()->is_reg() ? 1.2926 + src.first()->value() : 1.2927 + rsp->as_VMReg()->value(); 1.2928 + 1.2929 + bool useless = in_sig_bt[j_arg] == T_ARRAY || 1.2930 + (in_sig_bt[j_arg] == T_OBJECT && 1.2931 + out_sig_bt[c_arg] != T_INT && 1.2932 + out_sig_bt[c_arg] != T_ADDRESS && 1.2933 + out_sig_bt[c_arg] != T_LONG); 1.2934 + 1.2935 + live[src_reg] = !useless; 1.2936 + 1.2937 + if (dst.first()->is_stack()) { 1.2938 + 1.2939 + // Even though a string arg in a register is still live after this loop 1.2940 + // after the string conversion loop (next) it will be dead so we take 1.2941 + // advantage of that now for simpler code to manage live. 1.2942 + 1.2943 + live[src_reg] = false; 1.2944 + switch (in_sig_bt[j_arg]) { 1.2945 + 1.2946 + case T_ARRAY: 1.2947 + case T_OBJECT: 1.2948 + { 1.2949 + Address stack_dst(rsp, reg2offset_out(dst.first())); 1.2950 + 1.2951 + if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { 1.2952 + // need to unbox a one-word value 1.2953 + Register in_reg = rax; 1.2954 + if ( src.first()->is_reg() ) { 1.2955 + in_reg = src.first()->as_Register(); 1.2956 + } else { 1.2957 + __ movq(rax, Address(rbp, reg2offset_in(src.first()))); 1.2958 + rax_is_zero = false; 1.2959 + } 1.2960 + Label skipUnbox; 1.2961 + __ movptr(Address(rsp, reg2offset_out(dst.first())), 1.2962 + (int32_t)NULL_WORD); 1.2963 + __ testq(in_reg, in_reg); 1.2964 + __ jcc(Assembler::zero, skipUnbox); 1.2965 + 1.2966 + BasicType bt = out_sig_bt[c_arg]; 1.2967 + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); 1.2968 + Address src1(in_reg, box_offset); 1.2969 + if ( bt == T_LONG ) { 1.2970 + __ movq(in_reg, src1); 1.2971 + __ movq(stack_dst, in_reg); 1.2972 + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 1.2973 + ++c_arg; // skip over T_VOID to keep the loop indices in sync 1.2974 + } else { 1.2975 + __ movl(in_reg, src1); 1.2976 + __ movl(stack_dst, in_reg); 1.2977 + } 1.2978 + 1.2979 + __ bind(skipUnbox); 1.2980 + } else if (out_sig_bt[c_arg] != T_ADDRESS) { 1.2981 + // Convert the arg to NULL 1.2982 + if (!rax_is_zero) { 1.2983 + __ xorq(rax, rax); 1.2984 + rax_is_zero = true; 1.2985 + } 1.2986 + __ movq(stack_dst, rax); 1.2987 + } 1.2988 + } 1.2989 + break; 1.2990 + 1.2991 + case T_VOID: 1.2992 + break; 1.2993 + 1.2994 + case T_FLOAT: 1.2995 + // This does the right thing since we know it is destined for the 1.2996 + // stack 1.2997 + float_move(masm, src, dst); 1.2998 + break; 1.2999 + 1.3000 + case T_DOUBLE: 1.3001 + // This does the right thing since we know it is destined for the 1.3002 + // stack 1.3003 + double_move(masm, src, dst); 1.3004 + break; 1.3005 + 1.3006 + case T_LONG : 1.3007 + long_move(masm, src, dst); 1.3008 + break; 1.3009 + 1.3010 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.3011 + 1.3012 + default: 1.3013 + move32_64(masm, src, dst); 1.3014 + } 1.3015 + } 1.3016 + 1.3017 + } 1.3018 + 1.3019 + // If we have any strings we must store any register based arg to the stack 1.3020 + // This includes any still live xmm registers too. 1.3021 + 1.3022 + int sid = 0; 1.3023 + 1.3024 + if (total_strings > 0 ) { 1.3025 + for (j_arg = first_arg_to_pass, c_arg = 0 ; 1.3026 + j_arg < total_args_passed ; j_arg++, c_arg++ ) { 1.3027 + VMRegPair src = in_regs[j_arg]; 1.3028 + VMRegPair dst = out_regs[c_arg]; 1.3029 + 1.3030 + if (src.first()->is_reg()) { 1.3031 + Address src_tmp(rbp, fp_offset[src.first()->value()]); 1.3032 + 1.3033 + // string oops were left untouched by the previous loop even if the 1.3034 + // eventual (converted) arg is destined for the stack so park them 1.3035 + // away now (except for first) 1.3036 + 1.3037 + if (out_sig_bt[c_arg] == T_ADDRESS) { 1.3038 + Address utf8_addr = Address( 1.3039 + rsp, string_locs[sid++] * VMRegImpl::stack_slot_size); 1.3040 + if (sid != 1) { 1.3041 + // The first string arg won't be killed until after the utf8 1.3042 + // conversion 1.3043 + __ movq(utf8_addr, src.first()->as_Register()); 1.3044 + } 1.3045 + } else if (dst.first()->is_reg()) { 1.3046 + if (in_sig_bt[j_arg] == T_FLOAT || in_sig_bt[j_arg] == T_DOUBLE) { 1.3047 + 1.3048 + // Convert the xmm register to an int and store it in the reserved 1.3049 + // location for the eventual c register arg 1.3050 + XMMRegister f = src.first()->as_XMMRegister(); 1.3051 + if (in_sig_bt[j_arg] == T_FLOAT) { 1.3052 + __ movflt(src_tmp, f); 1.3053 + } else { 1.3054 + __ movdbl(src_tmp, f); 1.3055 + } 1.3056 + } else { 1.3057 + // If the arg is an oop type we don't support don't bother to store 1.3058 + // it remember string was handled above. 1.3059 + bool useless = in_sig_bt[j_arg] == T_ARRAY || 1.3060 + (in_sig_bt[j_arg] == T_OBJECT && 1.3061 + out_sig_bt[c_arg] != T_INT && 1.3062 + out_sig_bt[c_arg] != T_LONG); 1.3063 + 1.3064 + if (!useless) { 1.3065 + __ movq(src_tmp, src.first()->as_Register()); 1.3066 + } 1.3067 + } 1.3068 + } 1.3069 + } 1.3070 + if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) { 1.3071 + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 1.3072 + ++c_arg; // skip over T_VOID to keep the loop indices in sync 1.3073 + } 1.3074 + } 1.3075 + 1.3076 + // Now that the volatile registers are safe, convert all the strings 1.3077 + sid = 0; 1.3078 + 1.3079 + for (j_arg = first_arg_to_pass, c_arg = 0 ; 1.3080 + j_arg < total_args_passed ; j_arg++, c_arg++ ) { 1.3081 + if (out_sig_bt[c_arg] == T_ADDRESS) { 1.3082 + // It's a string 1.3083 + Address utf8_addr = Address( 1.3084 + rsp, string_locs[sid++] * VMRegImpl::stack_slot_size); 1.3085 + // The first string we find might still be in the original java arg 1.3086 + // register 1.3087 + 1.3088 + VMReg src = in_regs[j_arg].first(); 1.3089 + 1.3090 + // We will need to eventually save the final argument to the trap 1.3091 + // in the von-volatile location dedicated to src. This is the offset 1.3092 + // from fp we will use. 1.3093 + int src_off = src->is_reg() ? 1.3094 + fp_offset[src->value()] : reg2offset_in(src); 1.3095 + 1.3096 + // This is where the argument will eventually reside 1.3097 + VMRegPair dst = out_regs[c_arg]; 1.3098 + 1.3099 + if (src->is_reg()) { 1.3100 + if (sid == 1) { 1.3101 + __ movq(c_rarg0, src->as_Register()); 1.3102 + } else { 1.3103 + __ movq(c_rarg0, utf8_addr); 1.3104 + } 1.3105 + } else { 1.3106 + // arg is still in the original location 1.3107 + __ movq(c_rarg0, Address(rbp, reg2offset_in(src))); 1.3108 + } 1.3109 + Label done, convert; 1.3110 + 1.3111 + // see if the oop is NULL 1.3112 + __ testq(c_rarg0, c_rarg0); 1.3113 + __ jcc(Assembler::notEqual, convert); 1.3114 + 1.3115 + if (dst.first()->is_reg()) { 1.3116 + // Save the ptr to utf string in the origina src loc or the tmp 1.3117 + // dedicated to it 1.3118 + __ movq(Address(rbp, src_off), c_rarg0); 1.3119 + } else { 1.3120 + __ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg0); 1.3121 + } 1.3122 + __ jmp(done); 1.3123 + 1.3124 + __ bind(convert); 1.3125 + 1.3126 + __ lea(c_rarg1, utf8_addr); 1.3127 + if (dst.first()->is_reg()) { 1.3128 + __ movq(Address(rbp, src_off), c_rarg1); 1.3129 + } else { 1.3130 + __ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg1); 1.3131 + } 1.3132 + // And do the conversion 1.3133 + __ call(RuntimeAddress( 1.3134 + CAST_FROM_FN_PTR(address, SharedRuntime::get_utf))); 1.3135 + 1.3136 + __ bind(done); 1.3137 + } 1.3138 + if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) { 1.3139 + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 1.3140 + ++c_arg; // skip over T_VOID to keep the loop indices in sync 1.3141 + } 1.3142 + } 1.3143 + // The get_utf call killed all the c_arg registers 1.3144 + live[c_rarg0->as_VMReg()->value()] = false; 1.3145 + live[c_rarg1->as_VMReg()->value()] = false; 1.3146 + live[c_rarg2->as_VMReg()->value()] = false; 1.3147 + live[c_rarg3->as_VMReg()->value()] = false; 1.3148 + live[c_rarg4->as_VMReg()->value()] = false; 1.3149 + live[c_rarg5->as_VMReg()->value()] = false; 1.3150 + 1.3151 + live[c_farg0->as_VMReg()->value()] = false; 1.3152 + live[c_farg1->as_VMReg()->value()] = false; 1.3153 + live[c_farg2->as_VMReg()->value()] = false; 1.3154 + live[c_farg3->as_VMReg()->value()] = false; 1.3155 + live[c_farg4->as_VMReg()->value()] = false; 1.3156 + live[c_farg5->as_VMReg()->value()] = false; 1.3157 + live[c_farg6->as_VMReg()->value()] = false; 1.3158 + live[c_farg7->as_VMReg()->value()] = false; 1.3159 + } 1.3160 + 1.3161 + // Now we can finally move the register args to their desired locations 1.3162 + 1.3163 + rax_is_zero = false; 1.3164 + 1.3165 + for (j_arg = first_arg_to_pass, c_arg = 0 ; 1.3166 + j_arg < total_args_passed ; j_arg++, c_arg++ ) { 1.3167 + 1.3168 + VMRegPair src = in_regs[j_arg]; 1.3169 + VMRegPair dst = out_regs[c_arg]; 1.3170 + 1.3171 + // Only need to look for args destined for the interger registers (since we 1.3172 + // convert float/double args to look like int/long outbound) 1.3173 + if (dst.first()->is_reg()) { 1.3174 + Register r = dst.first()->as_Register(); 1.3175 + 1.3176 + // Check if the java arg is unsupported and thereofre useless 1.3177 + bool useless = in_sig_bt[j_arg] == T_ARRAY || 1.3178 + (in_sig_bt[j_arg] == T_OBJECT && 1.3179 + out_sig_bt[c_arg] != T_INT && 1.3180 + out_sig_bt[c_arg] != T_ADDRESS && 1.3181 + out_sig_bt[c_arg] != T_LONG); 1.3182 + 1.3183 + 1.3184 + // If we're going to kill an existing arg save it first 1.3185 + if (live[dst.first()->value()]) { 1.3186 + // you can't kill yourself 1.3187 + if (src.first() != dst.first()) { 1.3188 + __ movq(Address(rbp, fp_offset[dst.first()->value()]), r); 1.3189 + } 1.3190 + } 1.3191 + if (src.first()->is_reg()) { 1.3192 + if (live[src.first()->value()] ) { 1.3193 + if (in_sig_bt[j_arg] == T_FLOAT) { 1.3194 + __ movdl(r, src.first()->as_XMMRegister()); 1.3195 + } else if (in_sig_bt[j_arg] == T_DOUBLE) { 1.3196 + __ movdq(r, src.first()->as_XMMRegister()); 1.3197 + } else if (r != src.first()->as_Register()) { 1.3198 + if (!useless) { 1.3199 + __ movq(r, src.first()->as_Register()); 1.3200 + } 1.3201 + } 1.3202 + } else { 1.3203 + // If the arg is an oop type we don't support don't bother to store 1.3204 + // it 1.3205 + if (!useless) { 1.3206 + if (in_sig_bt[j_arg] == T_DOUBLE || 1.3207 + in_sig_bt[j_arg] == T_LONG || 1.3208 + in_sig_bt[j_arg] == T_OBJECT ) { 1.3209 + __ movq(r, Address(rbp, fp_offset[src.first()->value()])); 1.3210 + } else { 1.3211 + __ movl(r, Address(rbp, fp_offset[src.first()->value()])); 1.3212 + } 1.3213 + } 1.3214 + } 1.3215 + live[src.first()->value()] = false; 1.3216 + } else if (!useless) { 1.3217 + // full sized move even for int should be ok 1.3218 + __ movq(r, Address(rbp, reg2offset_in(src.first()))); 1.3219 + } 1.3220 + 1.3221 + // At this point r has the original java arg in the final location 1.3222 + // (assuming it wasn't useless). If the java arg was an oop 1.3223 + // we have a bit more to do 1.3224 + 1.3225 + if (in_sig_bt[j_arg] == T_ARRAY || in_sig_bt[j_arg] == T_OBJECT ) { 1.3226 + if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { 1.3227 + // need to unbox a one-word value 1.3228 + Label skip; 1.3229 + __ testq(r, r); 1.3230 + __ jcc(Assembler::equal, skip); 1.3231 + BasicType bt = out_sig_bt[c_arg]; 1.3232 + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); 1.3233 + Address src1(r, box_offset); 1.3234 + if ( bt == T_LONG ) { 1.3235 + __ movq(r, src1); 1.3236 + } else { 1.3237 + __ movl(r, src1); 1.3238 + } 1.3239 + __ bind(skip); 1.3240 + 1.3241 + } else if (out_sig_bt[c_arg] != T_ADDRESS) { 1.3242 + // Convert the arg to NULL 1.3243 + __ xorq(r, r); 1.3244 + } 1.3245 + } 1.3246 + 1.3247 + // dst can longer be holding an input value 1.3248 + live[dst.first()->value()] = false; 1.3249 + } 1.3250 + if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) { 1.3251 + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 1.3252 + ++c_arg; // skip over T_VOID to keep the loop indices in sync 1.3253 + } 1.3254 + } 1.3255 + 1.3256 + 1.3257 + // Ok now we are done. Need to place the nop that dtrace wants in order to 1.3258 + // patch in the trap 1.3259 + int patch_offset = ((intptr_t)__ pc()) - start; 1.3260 + 1.3261 + __ nop(); 1.3262 + 1.3263 + 1.3264 + // Return 1.3265 + 1.3266 + __ leave(); 1.3267 + __ ret(0); 1.3268 + 1.3269 + __ flush(); 1.3270 + 1.3271 + nmethod *nm = nmethod::new_dtrace_nmethod( 1.3272 + method, masm->code(), vep_offset, patch_offset, frame_complete, 1.3273 + stack_slots / VMRegImpl::slots_per_word); 1.3274 + return nm; 1.3275 + 1.3276 +} 1.3277 + 1.3278 +#endif // HAVE_DTRACE_H 1.3279 + 1.3280 +// this function returns the adjust size (in number of words) to a c2i adapter 1.3281 +// activation for use during deoptimization 1.3282 +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { 1.3283 + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; 1.3284 +} 1.3285 + 1.3286 + 1.3287 +uint SharedRuntime::out_preserve_stack_slots() { 1.3288 + return 0; 1.3289 +} 1.3290 + 1.3291 +//------------------------------generate_deopt_blob---------------------------- 1.3292 +void SharedRuntime::generate_deopt_blob() { 1.3293 + // Allocate space for the code 1.3294 + ResourceMark rm; 1.3295 + // Setup code generation tools 1.3296 + CodeBuffer buffer("deopt_blob", 2048, 1024); 1.3297 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3298 + int frame_size_in_words; 1.3299 + OopMap* map = NULL; 1.3300 + OopMapSet *oop_maps = new OopMapSet(); 1.3301 + 1.3302 + // ------------- 1.3303 + // This code enters when returning to a de-optimized nmethod. A return 1.3304 + // address has been pushed on the the stack, and return values are in 1.3305 + // registers. 1.3306 + // If we are doing a normal deopt then we were called from the patched 1.3307 + // nmethod from the point we returned to the nmethod. So the return 1.3308 + // address on the stack is wrong by NativeCall::instruction_size 1.3309 + // We will adjust the value so it looks like we have the original return 1.3310 + // address on the stack (like when we eagerly deoptimized). 1.3311 + // In the case of an exception pending when deoptimizing, we enter 1.3312 + // with a return address on the stack that points after the call we patched 1.3313 + // into the exception handler. We have the following register state from, 1.3314 + // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). 1.3315 + // rax: exception oop 1.3316 + // rbx: exception handler 1.3317 + // rdx: throwing pc 1.3318 + // So in this case we simply jam rdx into the useless return address and 1.3319 + // the stack looks just like we want. 1.3320 + // 1.3321 + // At this point we need to de-opt. We save the argument return 1.3322 + // registers. We call the first C routine, fetch_unroll_info(). This 1.3323 + // routine captures the return values and returns a structure which 1.3324 + // describes the current frame size and the sizes of all replacement frames. 1.3325 + // The current frame is compiled code and may contain many inlined 1.3326 + // functions, each with their own JVM state. We pop the current frame, then 1.3327 + // push all the new frames. Then we call the C routine unpack_frames() to 1.3328 + // populate these frames. Finally unpack_frames() returns us the new target 1.3329 + // address. Notice that callee-save registers are BLOWN here; they have 1.3330 + // already been captured in the vframeArray at the time the return PC was 1.3331 + // patched. 1.3332 + address start = __ pc(); 1.3333 + Label cont; 1.3334 + 1.3335 + // Prolog for non exception case! 1.3336 + 1.3337 + // Save everything in sight. 1.3338 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); 1.3339 + 1.3340 + // Normal deoptimization. Save exec mode for unpack_frames. 1.3341 + __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved 1.3342 + __ jmp(cont); 1.3343 + 1.3344 + int reexecute_offset = __ pc() - start; 1.3345 + 1.3346 + // Reexecute case 1.3347 + // return address is the pc describes what bci to do re-execute at 1.3348 + 1.3349 + // No need to update map as each call to save_live_registers will produce identical oopmap 1.3350 + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); 1.3351 + 1.3352 + __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved 1.3353 + __ jmp(cont); 1.3354 + 1.3355 + int exception_offset = __ pc() - start; 1.3356 + 1.3357 + // Prolog for exception case 1.3358 + 1.3359 + // all registers are dead at this entry point, except for rax, and 1.3360 + // rdx which contain the exception oop and exception pc 1.3361 + // respectively. Set them in TLS and fall thru to the 1.3362 + // unpack_with_exception_in_tls entry point. 1.3363 + 1.3364 + __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 1.3365 + __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax); 1.3366 + 1.3367 + int exception_in_tls_offset = __ pc() - start; 1.3368 + 1.3369 + // new implementation because exception oop is now passed in JavaThread 1.3370 + 1.3371 + // Prolog for exception case 1.3372 + // All registers must be preserved because they might be used by LinearScan 1.3373 + // Exceptiop oop and throwing PC are passed in JavaThread 1.3374 + // tos: stack at point of call to method that threw the exception (i.e. only 1.3375 + // args are on the stack, no return address) 1.3376 + 1.3377 + // make room on stack for the return address 1.3378 + // It will be patched later with the throwing pc. The correct value is not 1.3379 + // available now because loading it from memory would destroy registers. 1.3380 + __ push(0); 1.3381 + 1.3382 + // Save everything in sight. 1.3383 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); 1.3384 + 1.3385 + // Now it is safe to overwrite any register 1.3386 + 1.3387 + // Deopt during an exception. Save exec mode for unpack_frames. 1.3388 + __ movl(r14, Deoptimization::Unpack_exception); // callee-saved 1.3389 + 1.3390 + // load throwing pc from JavaThread and patch it as the return address 1.3391 + // of the current frame. Then clear the field in JavaThread 1.3392 + 1.3393 + __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 1.3394 + __ movptr(Address(rbp, wordSize), rdx); 1.3395 + __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); 1.3396 + 1.3397 +#ifdef ASSERT 1.3398 + // verify that there is really an exception oop in JavaThread 1.3399 + __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 1.3400 + __ verify_oop(rax); 1.3401 + 1.3402 + // verify that there is no pending exception 1.3403 + Label no_pending_exception; 1.3404 + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 1.3405 + __ testptr(rax, rax); 1.3406 + __ jcc(Assembler::zero, no_pending_exception); 1.3407 + __ stop("must not have pending exception here"); 1.3408 + __ bind(no_pending_exception); 1.3409 +#endif 1.3410 + 1.3411 + __ bind(cont); 1.3412 + 1.3413 + // Call C code. Need thread and this frame, but NOT official VM entry 1.3414 + // crud. We cannot block on this call, no GC can happen. 1.3415 + // 1.3416 + // UnrollBlock* fetch_unroll_info(JavaThread* thread) 1.3417 + 1.3418 + // fetch_unroll_info needs to call last_java_frame(). 1.3419 + 1.3420 + __ set_last_Java_frame(noreg, noreg, NULL); 1.3421 +#ifdef ASSERT 1.3422 + { Label L; 1.3423 + __ cmpptr(Address(r15_thread, 1.3424 + JavaThread::last_Java_fp_offset()), 1.3425 + (int32_t)0); 1.3426 + __ jcc(Assembler::equal, L); 1.3427 + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); 1.3428 + __ bind(L); 1.3429 + } 1.3430 +#endif // ASSERT 1.3431 + __ mov(c_rarg0, r15_thread); 1.3432 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); 1.3433 + 1.3434 + // Need to have an oopmap that tells fetch_unroll_info where to 1.3435 + // find any register it might need. 1.3436 + oop_maps->add_gc_map(__ pc() - start, map); 1.3437 + 1.3438 + __ reset_last_Java_frame(false, false); 1.3439 + 1.3440 + // Load UnrollBlock* into rdi 1.3441 + __ mov(rdi, rax); 1.3442 + 1.3443 + Label noException; 1.3444 + __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 1.3445 + __ jcc(Assembler::notEqual, noException); 1.3446 + __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 1.3447 + // QQQ this is useless it was NULL above 1.3448 + __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 1.3449 + __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD); 1.3450 + __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD); 1.3451 + 1.3452 + __ verify_oop(rax); 1.3453 + 1.3454 + // Overwrite the result registers with the exception results. 1.3455 + __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 1.3456 + // I think this is useless 1.3457 + __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx); 1.3458 + 1.3459 + __ bind(noException); 1.3460 + 1.3461 + // Only register save data is on the stack. 1.3462 + // Now restore the result registers. Everything else is either dead 1.3463 + // or captured in the vframeArray. 1.3464 + RegisterSaver::restore_result_registers(masm); 1.3465 + 1.3466 + // All of the register save area has been popped of the stack. Only the 1.3467 + // return address remains. 1.3468 + 1.3469 + // Pop all the frames we must move/replace. 1.3470 + // 1.3471 + // Frame picture (youngest to oldest) 1.3472 + // 1: self-frame (no frame link) 1.3473 + // 2: deopting frame (no frame link) 1.3474 + // 3: caller of deopting frame (could be compiled/interpreted). 1.3475 + // 1.3476 + // Note: by leaving the return address of self-frame on the stack 1.3477 + // and using the size of frame 2 to adjust the stack 1.3478 + // when we are done the return to frame 3 will still be on the stack. 1.3479 + 1.3480 + // Pop deoptimized frame 1.3481 + __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); 1.3482 + __ addptr(rsp, rcx); 1.3483 + 1.3484 + // rsp should be pointing at the return address to the caller (3) 1.3485 + 1.3486 + // Pick up the initial fp we should save 1.3487 + // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 1.3488 + __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 1.3489 + 1.3490 +#ifdef ASSERT 1.3491 + // Compilers generate code that bang the stack by as much as the 1.3492 + // interpreter would need. So this stack banging should never 1.3493 + // trigger a fault. Verify that it does not on non product builds. 1.3494 + if (UseStackBanging) { 1.3495 + __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 1.3496 + __ bang_stack_size(rbx, rcx); 1.3497 + } 1.3498 +#endif 1.3499 + 1.3500 + // Load address of array of frame pcs into rcx 1.3501 + __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 1.3502 + 1.3503 + // Trash the old pc 1.3504 + __ addptr(rsp, wordSize); 1.3505 + 1.3506 + // Load address of array of frame sizes into rsi 1.3507 + __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); 1.3508 + 1.3509 + // Load counter into rdx 1.3510 + __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); 1.3511 + 1.3512 + // Now adjust the caller's stack to make up for the extra locals 1.3513 + // but record the original sp so that we can save it in the skeletal interpreter 1.3514 + // frame and the stack walking of interpreter_sender will get the unextended sp 1.3515 + // value and not the "real" sp value. 1.3516 + 1.3517 + const Register sender_sp = r8; 1.3518 + 1.3519 + __ mov(sender_sp, rsp); 1.3520 + __ movl(rbx, Address(rdi, 1.3521 + Deoptimization::UnrollBlock:: 1.3522 + caller_adjustment_offset_in_bytes())); 1.3523 + __ subptr(rsp, rbx); 1.3524 + 1.3525 + // Push interpreter frames in a loop 1.3526 + Label loop; 1.3527 + __ bind(loop); 1.3528 + __ movptr(rbx, Address(rsi, 0)); // Load frame size 1.3529 +#ifdef CC_INTERP 1.3530 + __ subptr(rbx, 4*wordSize); // we'll push pc and ebp by hand and 1.3531 +#ifdef ASSERT 1.3532 + __ push(0xDEADDEAD); // Make a recognizable pattern 1.3533 + __ push(0xDEADDEAD); 1.3534 +#else /* ASSERT */ 1.3535 + __ subptr(rsp, 2*wordSize); // skip the "static long no_param" 1.3536 +#endif /* ASSERT */ 1.3537 +#else 1.3538 + __ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand 1.3539 +#endif // CC_INTERP 1.3540 + __ pushptr(Address(rcx, 0)); // Save return address 1.3541 + __ enter(); // Save old & set new ebp 1.3542 + __ subptr(rsp, rbx); // Prolog 1.3543 +#ifdef CC_INTERP 1.3544 + __ movptr(Address(rbp, 1.3545 + -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))), 1.3546 + sender_sp); // Make it walkable 1.3547 +#else /* CC_INTERP */ 1.3548 + // This value is corrected by layout_activation_impl 1.3549 + __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); 1.3550 + __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable 1.3551 +#endif /* CC_INTERP */ 1.3552 + __ mov(sender_sp, rsp); // Pass sender_sp to next frame 1.3553 + __ addptr(rsi, wordSize); // Bump array pointer (sizes) 1.3554 + __ addptr(rcx, wordSize); // Bump array pointer (pcs) 1.3555 + __ decrementl(rdx); // Decrement counter 1.3556 + __ jcc(Assembler::notZero, loop); 1.3557 + __ pushptr(Address(rcx, 0)); // Save final return address 1.3558 + 1.3559 + // Re-push self-frame 1.3560 + __ enter(); // Save old & set new ebp 1.3561 + 1.3562 + // Allocate a full sized register save area. 1.3563 + // Return address and rbp are in place, so we allocate two less words. 1.3564 + __ subptr(rsp, (frame_size_in_words - 2) * wordSize); 1.3565 + 1.3566 + // Restore frame locals after moving the frame 1.3567 + __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); 1.3568 + __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 1.3569 + 1.3570 + // Call C code. Need thread but NOT official VM entry 1.3571 + // crud. We cannot block on this call, no GC can happen. Call should 1.3572 + // restore return values to their stack-slots with the new SP. 1.3573 + // 1.3574 + // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) 1.3575 + 1.3576 + // Use rbp because the frames look interpreted now 1.3577 + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 1.3578 + // Don't need the precise return PC here, just precise enough to point into this code blob. 1.3579 + address the_pc = __ pc(); 1.3580 + __ set_last_Java_frame(noreg, rbp, the_pc); 1.3581 + 1.3582 + __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI 1.3583 + __ mov(c_rarg0, r15_thread); 1.3584 + __ movl(c_rarg1, r14); // second arg: exec_mode 1.3585 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 1.3586 + // Revert SP alignment after call since we're going to do some SP relative addressing below 1.3587 + __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); 1.3588 + 1.3589 + // Set an oopmap for the call site 1.3590 + // Use the same PC we used for the last java frame 1.3591 + oop_maps->add_gc_map(the_pc - start, 1.3592 + new OopMap( frame_size_in_words, 0 )); 1.3593 + 1.3594 + // Clear fp AND pc 1.3595 + __ reset_last_Java_frame(true, true); 1.3596 + 1.3597 + // Collect return values 1.3598 + __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); 1.3599 + __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); 1.3600 + // I think this is useless (throwing pc?) 1.3601 + __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes())); 1.3602 + 1.3603 + // Pop self-frame. 1.3604 + __ leave(); // Epilog 1.3605 + 1.3606 + // Jump to interpreter 1.3607 + __ ret(0); 1.3608 + 1.3609 + // Make sure all code is generated 1.3610 + masm->flush(); 1.3611 + 1.3612 + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); 1.3613 + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 1.3614 +} 1.3615 + 1.3616 +#ifdef COMPILER2 1.3617 +//------------------------------generate_uncommon_trap_blob-------------------- 1.3618 +void SharedRuntime::generate_uncommon_trap_blob() { 1.3619 + // Allocate space for the code 1.3620 + ResourceMark rm; 1.3621 + // Setup code generation tools 1.3622 + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 1.3623 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3624 + 1.3625 + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 1.3626 + 1.3627 + address start = __ pc(); 1.3628 + 1.3629 + if (UseRTMLocking) { 1.3630 + // Abort RTM transaction before possible nmethod deoptimization. 1.3631 + __ xabort(0); 1.3632 + } 1.3633 + 1.3634 + // Push self-frame. We get here with a return address on the 1.3635 + // stack, so rsp is 8-byte aligned until we allocate our frame. 1.3636 + __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! 1.3637 + 1.3638 + // No callee saved registers. rbp is assumed implicitly saved 1.3639 + __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 1.3640 + 1.3641 + // compiler left unloaded_class_index in j_rarg0 move to where the 1.3642 + // runtime expects it. 1.3643 + __ movl(c_rarg1, j_rarg0); 1.3644 + 1.3645 + __ set_last_Java_frame(noreg, noreg, NULL); 1.3646 + 1.3647 + // Call C code. Need thread but NOT official VM entry 1.3648 + // crud. We cannot block on this call, no GC can happen. Call should 1.3649 + // capture callee-saved registers as well as return values. 1.3650 + // Thread is in rdi already. 1.3651 + // 1.3652 + // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); 1.3653 + 1.3654 + __ mov(c_rarg0, r15_thread); 1.3655 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); 1.3656 + 1.3657 + // Set an oopmap for the call site 1.3658 + OopMapSet* oop_maps = new OopMapSet(); 1.3659 + OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); 1.3660 + 1.3661 + // location of rbp is known implicitly by the frame sender code 1.3662 + 1.3663 + oop_maps->add_gc_map(__ pc() - start, map); 1.3664 + 1.3665 + __ reset_last_Java_frame(false, false); 1.3666 + 1.3667 + // Load UnrollBlock* into rdi 1.3668 + __ mov(rdi, rax); 1.3669 + 1.3670 + // Pop all the frames we must move/replace. 1.3671 + // 1.3672 + // Frame picture (youngest to oldest) 1.3673 + // 1: self-frame (no frame link) 1.3674 + // 2: deopting frame (no frame link) 1.3675 + // 3: caller of deopting frame (could be compiled/interpreted). 1.3676 + 1.3677 + // Pop self-frame. We have no frame, and must rely only on rax and rsp. 1.3678 + __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! 1.3679 + 1.3680 + // Pop deoptimized frame (int) 1.3681 + __ movl(rcx, Address(rdi, 1.3682 + Deoptimization::UnrollBlock:: 1.3683 + size_of_deoptimized_frame_offset_in_bytes())); 1.3684 + __ addptr(rsp, rcx); 1.3685 + 1.3686 + // rsp should be pointing at the return address to the caller (3) 1.3687 + 1.3688 + // Pick up the initial fp we should save 1.3689 + // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) 1.3690 + __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); 1.3691 + 1.3692 +#ifdef ASSERT 1.3693 + // Compilers generate code that bang the stack by as much as the 1.3694 + // interpreter would need. So this stack banging should never 1.3695 + // trigger a fault. Verify that it does not on non product builds. 1.3696 + if (UseStackBanging) { 1.3697 + __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); 1.3698 + __ bang_stack_size(rbx, rcx); 1.3699 + } 1.3700 +#endif 1.3701 + 1.3702 + // Load address of array of frame pcs into rcx (address*) 1.3703 + __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); 1.3704 + 1.3705 + // Trash the return pc 1.3706 + __ addptr(rsp, wordSize); 1.3707 + 1.3708 + // Load address of array of frame sizes into rsi (intptr_t*) 1.3709 + __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes())); 1.3710 + 1.3711 + // Counter 1.3712 + __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int) 1.3713 + 1.3714 + // Now adjust the caller's stack to make up for the extra locals but 1.3715 + // record the original sp so that we can save it in the skeletal 1.3716 + // interpreter frame and the stack walking of interpreter_sender 1.3717 + // will get the unextended sp value and not the "real" sp value. 1.3718 + 1.3719 + const Register sender_sp = r8; 1.3720 + 1.3721 + __ mov(sender_sp, rsp); 1.3722 + __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int) 1.3723 + __ subptr(rsp, rbx); 1.3724 + 1.3725 + // Push interpreter frames in a loop 1.3726 + Label loop; 1.3727 + __ bind(loop); 1.3728 + __ movptr(rbx, Address(rsi, 0)); // Load frame size 1.3729 + __ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand 1.3730 + __ pushptr(Address(rcx, 0)); // Save return address 1.3731 + __ enter(); // Save old & set new rbp 1.3732 + __ subptr(rsp, rbx); // Prolog 1.3733 +#ifdef CC_INTERP 1.3734 + __ movptr(Address(rbp, 1.3735 + -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))), 1.3736 + sender_sp); // Make it walkable 1.3737 +#else // CC_INTERP 1.3738 + __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), 1.3739 + sender_sp); // Make it walkable 1.3740 + // This value is corrected by layout_activation_impl 1.3741 + __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD ); 1.3742 +#endif // CC_INTERP 1.3743 + __ mov(sender_sp, rsp); // Pass sender_sp to next frame 1.3744 + __ addptr(rsi, wordSize); // Bump array pointer (sizes) 1.3745 + __ addptr(rcx, wordSize); // Bump array pointer (pcs) 1.3746 + __ decrementl(rdx); // Decrement counter 1.3747 + __ jcc(Assembler::notZero, loop); 1.3748 + __ pushptr(Address(rcx, 0)); // Save final return address 1.3749 + 1.3750 + // Re-push self-frame 1.3751 + __ enter(); // Save old & set new rbp 1.3752 + __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); 1.3753 + // Prolog 1.3754 + 1.3755 + // Use rbp because the frames look interpreted now 1.3756 + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. 1.3757 + // Don't need the precise return PC here, just precise enough to point into this code blob. 1.3758 + address the_pc = __ pc(); 1.3759 + __ set_last_Java_frame(noreg, rbp, the_pc); 1.3760 + 1.3761 + // Call C code. Need thread but NOT official VM entry 1.3762 + // crud. We cannot block on this call, no GC can happen. Call should 1.3763 + // restore return values to their stack-slots with the new SP. 1.3764 + // Thread is in rdi already. 1.3765 + // 1.3766 + // BasicType unpack_frames(JavaThread* thread, int exec_mode); 1.3767 + 1.3768 + __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI 1.3769 + __ mov(c_rarg0, r15_thread); 1.3770 + __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); 1.3771 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); 1.3772 + 1.3773 + // Set an oopmap for the call site 1.3774 + // Use the same PC we used for the last java frame 1.3775 + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 1.3776 + 1.3777 + // Clear fp AND pc 1.3778 + __ reset_last_Java_frame(true, true); 1.3779 + 1.3780 + // Pop self-frame. 1.3781 + __ leave(); // Epilog 1.3782 + 1.3783 + // Jump to interpreter 1.3784 + __ ret(0); 1.3785 + 1.3786 + // Make sure all code is generated 1.3787 + masm->flush(); 1.3788 + 1.3789 + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, 1.3790 + SimpleRuntimeFrame::framesize >> 1); 1.3791 +} 1.3792 +#endif // COMPILER2 1.3793 + 1.3794 + 1.3795 +//------------------------------generate_handler_blob------ 1.3796 +// 1.3797 +// Generate a special Compile2Runtime blob that saves all registers, 1.3798 +// and setup oopmap. 1.3799 +// 1.3800 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 1.3801 + assert(StubRoutines::forward_exception_entry() != NULL, 1.3802 + "must be generated before"); 1.3803 + 1.3804 + ResourceMark rm; 1.3805 + OopMapSet *oop_maps = new OopMapSet(); 1.3806 + OopMap* map; 1.3807 + 1.3808 + // Allocate space for the code. Setup code generation tools. 1.3809 + CodeBuffer buffer("handler_blob", 2048, 1024); 1.3810 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3811 + 1.3812 + address start = __ pc(); 1.3813 + address call_pc = NULL; 1.3814 + int frame_size_in_words; 1.3815 + bool cause_return = (poll_type == POLL_AT_RETURN); 1.3816 + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); 1.3817 + 1.3818 + if (UseRTMLocking) { 1.3819 + // Abort RTM transaction before calling runtime 1.3820 + // because critical section will be large and will be 1.3821 + // aborted anyway. Also nmethod could be deoptimized. 1.3822 + __ xabort(0); 1.3823 + } 1.3824 + 1.3825 + // Make room for return address (or push it again) 1.3826 + if (!cause_return) { 1.3827 + __ push(rbx); 1.3828 + } 1.3829 + 1.3830 + // Save registers, fpu state, and flags 1.3831 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); 1.3832 + 1.3833 + // The following is basically a call_VM. However, we need the precise 1.3834 + // address of the call in order to generate an oopmap. Hence, we do all the 1.3835 + // work outselves. 1.3836 + 1.3837 + __ set_last_Java_frame(noreg, noreg, NULL); 1.3838 + 1.3839 + // The return address must always be correct so that frame constructor never 1.3840 + // sees an invalid pc. 1.3841 + 1.3842 + if (!cause_return) { 1.3843 + // overwrite the dummy value we pushed on entry 1.3844 + __ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset())); 1.3845 + __ movptr(Address(rbp, wordSize), c_rarg0); 1.3846 + } 1.3847 + 1.3848 + // Do the call 1.3849 + __ mov(c_rarg0, r15_thread); 1.3850 + __ call(RuntimeAddress(call_ptr)); 1.3851 + 1.3852 + // Set an oopmap for the call site. This oopmap will map all 1.3853 + // oop-registers and debug-info registers as callee-saved. This 1.3854 + // will allow deoptimization at this safepoint to find all possible 1.3855 + // debug-info recordings, as well as let GC find all oops. 1.3856 + 1.3857 + oop_maps->add_gc_map( __ pc() - start, map); 1.3858 + 1.3859 + Label noException; 1.3860 + 1.3861 + __ reset_last_Java_frame(false, false); 1.3862 + 1.3863 + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 1.3864 + __ jcc(Assembler::equal, noException); 1.3865 + 1.3866 + // Exception pending 1.3867 + 1.3868 + RegisterSaver::restore_live_registers(masm, save_vectors); 1.3869 + 1.3870 + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1.3871 + 1.3872 + // No exception case 1.3873 + __ bind(noException); 1.3874 + 1.3875 + // Normal exit, restore registers and exit. 1.3876 + RegisterSaver::restore_live_registers(masm, save_vectors); 1.3877 + 1.3878 + __ ret(0); 1.3879 + 1.3880 + // Make sure all code is generated 1.3881 + masm->flush(); 1.3882 + 1.3883 + // Fill-out other meta info 1.3884 + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); 1.3885 +} 1.3886 + 1.3887 +// 1.3888 +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 1.3889 +// 1.3890 +// Generate a stub that calls into vm to find out the proper destination 1.3891 +// of a java call. All the argument registers are live at this point 1.3892 +// but since this is generic code we don't know what they are and the caller 1.3893 +// must do any gc of the args. 1.3894 +// 1.3895 +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 1.3896 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.3897 + 1.3898 + // allocate space for the code 1.3899 + ResourceMark rm; 1.3900 + 1.3901 + CodeBuffer buffer(name, 1000, 512); 1.3902 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3903 + 1.3904 + int frame_size_in_words; 1.3905 + 1.3906 + OopMapSet *oop_maps = new OopMapSet(); 1.3907 + OopMap* map = NULL; 1.3908 + 1.3909 + int start = __ offset(); 1.3910 + 1.3911 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); 1.3912 + 1.3913 + int frame_complete = __ offset(); 1.3914 + 1.3915 + __ set_last_Java_frame(noreg, noreg, NULL); 1.3916 + 1.3917 + __ mov(c_rarg0, r15_thread); 1.3918 + 1.3919 + __ call(RuntimeAddress(destination)); 1.3920 + 1.3921 + 1.3922 + // Set an oopmap for the call site. 1.3923 + // We need this not only for callee-saved registers, but also for volatile 1.3924 + // registers that the compiler might be keeping live across a safepoint. 1.3925 + 1.3926 + oop_maps->add_gc_map( __ offset() - start, map); 1.3927 + 1.3928 + // rax contains the address we are going to jump to assuming no exception got installed 1.3929 + 1.3930 + // clear last_Java_sp 1.3931 + __ reset_last_Java_frame(false, false); 1.3932 + // check for pending exceptions 1.3933 + Label pending; 1.3934 + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); 1.3935 + __ jcc(Assembler::notEqual, pending); 1.3936 + 1.3937 + // get the returned Method* 1.3938 + __ get_vm_result_2(rbx, r15_thread); 1.3939 + __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); 1.3940 + 1.3941 + __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); 1.3942 + 1.3943 + RegisterSaver::restore_live_registers(masm); 1.3944 + 1.3945 + // We are back the the original state on entry and ready to go. 1.3946 + 1.3947 + __ jmp(rax); 1.3948 + 1.3949 + // Pending exception after the safepoint 1.3950 + 1.3951 + __ bind(pending); 1.3952 + 1.3953 + RegisterSaver::restore_live_registers(masm); 1.3954 + 1.3955 + // exception pending => remove activation and forward to exception handler 1.3956 + 1.3957 + __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); 1.3958 + 1.3959 + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); 1.3960 + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1.3961 + 1.3962 + // ------------- 1.3963 + // make sure all code is generated 1.3964 + masm->flush(); 1.3965 + 1.3966 + // return the blob 1.3967 + // frame_size_words or bytes?? 1.3968 + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); 1.3969 +} 1.3970 + 1.3971 + 1.3972 +#ifdef COMPILER2 1.3973 +// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame 1.3974 +// 1.3975 +//------------------------------generate_exception_blob--------------------------- 1.3976 +// creates exception blob at the end 1.3977 +// Using exception blob, this code is jumped from a compiled method. 1.3978 +// (see emit_exception_handler in x86_64.ad file) 1.3979 +// 1.3980 +// Given an exception pc at a call we call into the runtime for the 1.3981 +// handler in this method. This handler might merely restore state 1.3982 +// (i.e. callee save registers) unwind the frame and jump to the 1.3983 +// exception handler for the nmethod if there is no Java level handler 1.3984 +// for the nmethod. 1.3985 +// 1.3986 +// This code is entered with a jmp. 1.3987 +// 1.3988 +// Arguments: 1.3989 +// rax: exception oop 1.3990 +// rdx: exception pc 1.3991 +// 1.3992 +// Results: 1.3993 +// rax: exception oop 1.3994 +// rdx: exception pc in caller or ??? 1.3995 +// destination: exception handler of caller 1.3996 +// 1.3997 +// Note: the exception pc MUST be at a call (precise debug information) 1.3998 +// Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. 1.3999 +// 1.4000 + 1.4001 +void OptoRuntime::generate_exception_blob() { 1.4002 + assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); 1.4003 + assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); 1.4004 + assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); 1.4005 + 1.4006 + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); 1.4007 + 1.4008 + // Allocate space for the code 1.4009 + ResourceMark rm; 1.4010 + // Setup code generation tools 1.4011 + CodeBuffer buffer("exception_blob", 2048, 1024); 1.4012 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.4013 + 1.4014 + 1.4015 + address start = __ pc(); 1.4016 + 1.4017 + // Exception pc is 'return address' for stack walker 1.4018 + __ push(rdx); 1.4019 + __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog 1.4020 + 1.4021 + // Save callee-saved registers. See x86_64.ad. 1.4022 + 1.4023 + // rbp is an implicitly saved callee saved register (i.e. the calling 1.4024 + // convention will save restore it in prolog/epilog) Other than that 1.4025 + // there are no callee save registers now that adapter frames are gone. 1.4026 + 1.4027 + __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); 1.4028 + 1.4029 + // Store exception in Thread object. We cannot pass any arguments to the 1.4030 + // handle_exception call, since we do not want to make any assumption 1.4031 + // about the size of the frame where the exception happened in. 1.4032 + // c_rarg0 is either rdi (Linux) or rcx (Windows). 1.4033 + __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax); 1.4034 + __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); 1.4035 + 1.4036 + // This call does all the hard work. It checks if an exception handler 1.4037 + // exists in the method. 1.4038 + // If so, it returns the handler address. 1.4039 + // If not, it prepares for stack-unwinding, restoring the callee-save 1.4040 + // registers of the frame being removed. 1.4041 + // 1.4042 + // address OptoRuntime::handle_exception_C(JavaThread* thread) 1.4043 + 1.4044 + // At a method handle call, the stack may not be properly aligned 1.4045 + // when returning with an exception. 1.4046 + address the_pc = __ pc(); 1.4047 + __ set_last_Java_frame(noreg, noreg, the_pc); 1.4048 + __ mov(c_rarg0, r15_thread); 1.4049 + __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack 1.4050 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); 1.4051 + 1.4052 + // Set an oopmap for the call site. This oopmap will only be used if we 1.4053 + // are unwinding the stack. Hence, all locations will be dead. 1.4054 + // Callee-saved registers will be the same as the frame above (i.e., 1.4055 + // handle_exception_stub), since they were restored when we got the 1.4056 + // exception. 1.4057 + 1.4058 + OopMapSet* oop_maps = new OopMapSet(); 1.4059 + 1.4060 + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); 1.4061 + 1.4062 + __ reset_last_Java_frame(false, true); 1.4063 + 1.4064 + // Restore callee-saved registers 1.4065 + 1.4066 + // rbp is an implicitly saved callee saved register (i.e. the calling 1.4067 + // convention will save restore it in prolog/epilog) Other than that 1.4068 + // there are no callee save registers no that adapter frames are gone. 1.4069 + 1.4070 + __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); 1.4071 + 1.4072 + __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog 1.4073 + __ pop(rdx); // No need for exception pc anymore 1.4074 + 1.4075 + // rax: exception handler 1.4076 + 1.4077 + // Restore SP from BP if the exception PC is a MethodHandle call site. 1.4078 + __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0); 1.4079 + __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); 1.4080 + 1.4081 + // We have a handler in rax (could be deopt blob). 1.4082 + __ mov(r8, rax); 1.4083 + 1.4084 + // Get the exception oop 1.4085 + __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 1.4086 + // Get the exception pc in case we are deoptimized 1.4087 + __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); 1.4088 +#ifdef ASSERT 1.4089 + __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD); 1.4090 + __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD); 1.4091 +#endif 1.4092 + // Clear the exception oop so GC no longer processes it as a root. 1.4093 + __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD); 1.4094 + 1.4095 + // rax: exception oop 1.4096 + // r8: exception handler 1.4097 + // rdx: exception pc 1.4098 + // Jump to handler 1.4099 + 1.4100 + __ jmp(r8); 1.4101 + 1.4102 + // Make sure all code is generated 1.4103 + masm->flush(); 1.4104 + 1.4105 + // Set exception blob 1.4106 + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 1.4107 +} 1.4108 +#endif // COMPILER2