1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,3923 @@ 1.4 +/* 1.5 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "asm/macroAssembler.inline.hpp" 1.30 +#include "code/debugInfoRec.hpp" 1.31 +#include "code/icBuffer.hpp" 1.32 +#include "code/vtableStubs.hpp" 1.33 +#include "interpreter/interpreter.hpp" 1.34 +#include "oops/compiledICHolder.hpp" 1.35 +#include "prims/jvmtiRedefineClassesTrace.hpp" 1.36 +#include "runtime/sharedRuntime.hpp" 1.37 +#include "runtime/vframeArray.hpp" 1.38 +#include "vmreg_sparc.inline.hpp" 1.39 +#ifdef COMPILER1 1.40 +#include "c1/c1_Runtime1.hpp" 1.41 +#endif 1.42 +#ifdef COMPILER2 1.43 +#include "opto/runtime.hpp" 1.44 +#endif 1.45 +#ifdef SHARK 1.46 +#include "compiler/compileBroker.hpp" 1.47 +#include "shark/sharkCompiler.hpp" 1.48 +#endif 1.49 + 1.50 +#define __ masm-> 1.51 + 1.52 + 1.53 +class RegisterSaver { 1.54 + 1.55 + // Used for saving volatile registers. This is Gregs, Fregs, I/L/O. 1.56 + // The Oregs are problematic. In the 32bit build the compiler can 1.57 + // have O registers live with 64 bit quantities. A window save will 1.58 + // cut the heads off of the registers. We have to do a very extensive 1.59 + // stack dance to save and restore these properly. 1.60 + 1.61 + // Note that the Oregs problem only exists if we block at either a polling 1.62 + // page exception a compiled code safepoint that was not originally a call 1.63 + // or deoptimize following one of these kinds of safepoints. 1.64 + 1.65 + // Lots of registers to save. For all builds, a window save will preserve 1.66 + // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit 1.67 + // builds a window-save will preserve the %o registers. In the LION build 1.68 + // we need to save the 64-bit %o registers which requires we save them 1.69 + // before the window-save (as then they become %i registers and get their 1.70 + // heads chopped off on interrupt). We have to save some %g registers here 1.71 + // as well. 1.72 + enum { 1.73 + // This frame's save area. Includes extra space for the native call: 1.74 + // vararg's layout space and the like. Briefly holds the caller's 1.75 + // register save area. 1.76 + call_args_area = frame::register_save_words_sp_offset + 1.77 + frame::memory_parameter_word_sp_offset*wordSize, 1.78 + // Make sure save locations are always 8 byte aligned. 1.79 + // can't use round_to because it doesn't produce compile time constant 1.80 + start_of_extra_save_area = ((call_args_area + 7) & ~7), 1.81 + g1_offset = start_of_extra_save_area, // g-regs needing saving 1.82 + g3_offset = g1_offset+8, 1.83 + g4_offset = g3_offset+8, 1.84 + g5_offset = g4_offset+8, 1.85 + o0_offset = g5_offset+8, 1.86 + o1_offset = o0_offset+8, 1.87 + o2_offset = o1_offset+8, 1.88 + o3_offset = o2_offset+8, 1.89 + o4_offset = o3_offset+8, 1.90 + o5_offset = o4_offset+8, 1.91 + start_of_flags_save_area = o5_offset+8, 1.92 + ccr_offset = start_of_flags_save_area, 1.93 + fsr_offset = ccr_offset + 8, 1.94 + d00_offset = fsr_offset+8, // Start of float save area 1.95 + register_save_size = d00_offset+8*32 1.96 + }; 1.97 + 1.98 + 1.99 + public: 1.100 + 1.101 + static int Oexception_offset() { return o0_offset; }; 1.102 + static int G3_offset() { return g3_offset; }; 1.103 + static int G5_offset() { return g5_offset; }; 1.104 + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); 1.105 + static void restore_live_registers(MacroAssembler* masm); 1.106 + 1.107 + // During deoptimization only the result register need to be restored 1.108 + // all the other values have already been extracted. 1.109 + 1.110 + static void restore_result_registers(MacroAssembler* masm); 1.111 +}; 1.112 + 1.113 +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { 1.114 + // Record volatile registers as callee-save values in an OopMap so their save locations will be 1.115 + // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for 1.116 + // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers 1.117 + // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame 1.118 + // (as the stub's I's) when the runtime routine called by the stub creates its frame. 1.119 + int i; 1.120 + // Always make the frame size 16 byte aligned. 1.121 + int frame_size = round_to(additional_frame_words + register_save_size, 16); 1.122 + // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words 1.123 + int frame_size_in_slots = frame_size / sizeof(jint); 1.124 + // CodeBlob frame size is in words. 1.125 + *total_frame_words = frame_size / wordSize; 1.126 + // OopMap* map = new OopMap(*total_frame_words, 0); 1.127 + OopMap* map = new OopMap(frame_size_in_slots, 0); 1.128 + 1.129 +#if !defined(_LP64) 1.130 + 1.131 + // Save 64-bit O registers; they will get their heads chopped off on a 'save'. 1.132 + __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 1.133 + __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 1.134 + __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 1.135 + __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 1.136 + __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 1.137 + __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 1.138 +#endif /* _LP64 */ 1.139 + 1.140 + __ save(SP, -frame_size, SP); 1.141 + 1.142 +#ifndef _LP64 1.143 + // Reload the 64 bit Oregs. Although they are now Iregs we load them 1.144 + // to Oregs here to avoid interrupts cutting off their heads 1.145 + 1.146 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 1.147 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 1.148 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 1.149 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 1.150 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 1.151 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 1.152 + 1.153 + __ stx(O0, SP, o0_offset+STACK_BIAS); 1.154 + map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg()); 1.155 + 1.156 + __ stx(O1, SP, o1_offset+STACK_BIAS); 1.157 + 1.158 + map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg()); 1.159 + 1.160 + __ stx(O2, SP, o2_offset+STACK_BIAS); 1.161 + map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg()); 1.162 + 1.163 + __ stx(O3, SP, o3_offset+STACK_BIAS); 1.164 + map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg()); 1.165 + 1.166 + __ stx(O4, SP, o4_offset+STACK_BIAS); 1.167 + map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg()); 1.168 + 1.169 + __ stx(O5, SP, o5_offset+STACK_BIAS); 1.170 + map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg()); 1.171 +#endif /* _LP64 */ 1.172 + 1.173 + 1.174 +#ifdef _LP64 1.175 + int debug_offset = 0; 1.176 +#else 1.177 + int debug_offset = 4; 1.178 +#endif 1.179 + // Save the G's 1.180 + __ stx(G1, SP, g1_offset+STACK_BIAS); 1.181 + map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg()); 1.182 + 1.183 + __ stx(G3, SP, g3_offset+STACK_BIAS); 1.184 + map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg()); 1.185 + 1.186 + __ stx(G4, SP, g4_offset+STACK_BIAS); 1.187 + map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg()); 1.188 + 1.189 + __ stx(G5, SP, g5_offset+STACK_BIAS); 1.190 + map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg()); 1.191 + 1.192 + // This is really a waste but we'll keep things as they were for now 1.193 + if (true) { 1.194 +#ifndef _LP64 1.195 + map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next()); 1.196 + map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next()); 1.197 + map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next()); 1.198 + map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next()); 1.199 + map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next()); 1.200 + map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next()); 1.201 + map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next()); 1.202 + map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next()); 1.203 + map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next()); 1.204 + map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next()); 1.205 +#endif /* _LP64 */ 1.206 + } 1.207 + 1.208 + 1.209 + // Save the flags 1.210 + __ rdccr( G5 ); 1.211 + __ stx(G5, SP, ccr_offset+STACK_BIAS); 1.212 + __ stxfsr(SP, fsr_offset+STACK_BIAS); 1.213 + 1.214 + // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles) 1.215 + int offset = d00_offset; 1.216 + for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) { 1.217 + FloatRegister f = as_FloatRegister(i); 1.218 + __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS); 1.219 + // Record as callee saved both halves of double registers (2 float registers). 1.220 + map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg()); 1.221 + map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next()); 1.222 + offset += sizeof(double); 1.223 + } 1.224 + 1.225 + // And we're done. 1.226 + 1.227 + return map; 1.228 +} 1.229 + 1.230 + 1.231 +// Pop the current frame and restore all the registers that we 1.232 +// saved. 1.233 +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { 1.234 + 1.235 + // Restore all the FP registers 1.236 + for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) { 1.237 + __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i)); 1.238 + } 1.239 + 1.240 + __ ldx(SP, ccr_offset+STACK_BIAS, G1); 1.241 + __ wrccr (G1) ; 1.242 + 1.243 + // Restore the G's 1.244 + // Note that G2 (AKA GThread) must be saved and restored separately. 1.245 + // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr. 1.246 + 1.247 + __ ldx(SP, g1_offset+STACK_BIAS, G1); 1.248 + __ ldx(SP, g3_offset+STACK_BIAS, G3); 1.249 + __ ldx(SP, g4_offset+STACK_BIAS, G4); 1.250 + __ ldx(SP, g5_offset+STACK_BIAS, G5); 1.251 + 1.252 + 1.253 +#if !defined(_LP64) 1.254 + // Restore the 64-bit O's. 1.255 + __ ldx(SP, o0_offset+STACK_BIAS, O0); 1.256 + __ ldx(SP, o1_offset+STACK_BIAS, O1); 1.257 + __ ldx(SP, o2_offset+STACK_BIAS, O2); 1.258 + __ ldx(SP, o3_offset+STACK_BIAS, O3); 1.259 + __ ldx(SP, o4_offset+STACK_BIAS, O4); 1.260 + __ ldx(SP, o5_offset+STACK_BIAS, O5); 1.261 + 1.262 + // And temporarily place them in TLS 1.263 + 1.264 + __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 1.265 + __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 1.266 + __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 1.267 + __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 1.268 + __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 1.269 + __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 1.270 +#endif /* _LP64 */ 1.271 + 1.272 + // Restore flags 1.273 + 1.274 + __ ldxfsr(SP, fsr_offset+STACK_BIAS); 1.275 + 1.276 + __ restore(); 1.277 + 1.278 +#if !defined(_LP64) 1.279 + // Now reload the 64bit Oregs after we've restore the window. 1.280 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 1.281 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 1.282 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 1.283 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 1.284 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 1.285 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 1.286 +#endif /* _LP64 */ 1.287 + 1.288 +} 1.289 + 1.290 +// Pop the current frame and restore the registers that might be holding 1.291 +// a result. 1.292 +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 1.293 + 1.294 +#if !defined(_LP64) 1.295 + // 32bit build returns longs in G1 1.296 + __ ldx(SP, g1_offset+STACK_BIAS, G1); 1.297 + 1.298 + // Retrieve the 64-bit O's. 1.299 + __ ldx(SP, o0_offset+STACK_BIAS, O0); 1.300 + __ ldx(SP, o1_offset+STACK_BIAS, O1); 1.301 + // and save to TLS 1.302 + __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 1.303 + __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 1.304 +#endif /* _LP64 */ 1.305 + 1.306 + __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0)); 1.307 + 1.308 + __ restore(); 1.309 + 1.310 +#if !defined(_LP64) 1.311 + // Now reload the 64bit Oregs after we've restore the window. 1.312 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 1.313 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 1.314 +#endif /* _LP64 */ 1.315 + 1.316 +} 1.317 + 1.318 +// Is vector's size (in bytes) bigger than a size saved by default? 1.319 +// 8 bytes FP registers are saved by default on SPARC. 1.320 +bool SharedRuntime::is_wide_vector(int size) { 1.321 + // Note, MaxVectorSize == 8 on SPARC. 1.322 + assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size)); 1.323 + return size > 8; 1.324 +} 1.325 + 1.326 +// The java_calling_convention describes stack locations as ideal slots on 1.327 +// a frame with no abi restrictions. Since we must observe abi restrictions 1.328 +// (like the placement of the register window) the slots must be biased by 1.329 +// the following value. 1.330 +static int reg2offset(VMReg r) { 1.331 + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 1.332 +} 1.333 + 1.334 +static VMRegPair reg64_to_VMRegPair(Register r) { 1.335 + VMRegPair ret; 1.336 + if (wordSize == 8) { 1.337 + ret.set2(r->as_VMReg()); 1.338 + } else { 1.339 + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); 1.340 + } 1.341 + return ret; 1.342 +} 1.343 + 1.344 +// --------------------------------------------------------------------------- 1.345 +// Read the array of BasicTypes from a signature, and compute where the 1.346 +// arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size) 1.347 +// quantities. Values less than VMRegImpl::stack0 are registers, those above 1.348 +// refer to 4-byte stack slots. All stack slots are based off of the window 1.349 +// top. VMRegImpl::stack0 refers to the first slot past the 16-word window, 1.350 +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 1.351 +// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit 1.352 +// integer registers. Values 64-95 are the (32-bit only) float registers. 1.353 +// Each 32-bit quantity is given its own number, so the integer registers 1.354 +// (in either 32- or 64-bit builds) use 2 numbers. For example, there is 1.355 +// an O0-low and an O0-high. Essentially, all int register numbers are doubled. 1.356 + 1.357 +// Register results are passed in O0-O5, for outgoing call arguments. To 1.358 +// convert to incoming arguments, convert all O's to I's. The regs array 1.359 +// refer to the low and hi 32-bit words of 64-bit registers or stack slots. 1.360 +// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a 1.361 +// 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was 1.362 +// passed (used as a placeholder for the other half of longs and doubles in 1.363 +// the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is 1.364 +// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention). 1.365 +// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first() 1.366 +// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the 1.367 +// same VMRegPair. 1.368 + 1.369 +// Note: the INPUTS in sig_bt are in units of Java argument words, which are 1.370 +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 1.371 +// units regardless of build. 1.372 + 1.373 + 1.374 +// --------------------------------------------------------------------------- 1.375 +// The compiled Java calling convention. The Java convention always passes 1.376 +// 64-bit values in adjacent aligned locations (either registers or stack), 1.377 +// floats in float registers and doubles in aligned float pairs. There is 1.378 +// no backing varargs store for values in registers. 1.379 +// In the 32-bit build, longs are passed on the stack (cannot be 1.380 +// passed in I's, because longs in I's get their heads chopped off at 1.381 +// interrupt). 1.382 +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 1.383 + VMRegPair *regs, 1.384 + int total_args_passed, 1.385 + int is_outgoing) { 1.386 + assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); 1.387 + 1.388 + const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; 1.389 + const int flt_reg_max = 8; 1.390 + 1.391 + int int_reg = 0; 1.392 + int flt_reg = 0; 1.393 + int slot = 0; 1.394 + 1.395 + for (int i = 0; i < total_args_passed; i++) { 1.396 + switch (sig_bt[i]) { 1.397 + case T_INT: 1.398 + case T_SHORT: 1.399 + case T_CHAR: 1.400 + case T_BYTE: 1.401 + case T_BOOLEAN: 1.402 +#ifndef _LP64 1.403 + case T_OBJECT: 1.404 + case T_ARRAY: 1.405 + case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.406 +#endif // _LP64 1.407 + if (int_reg < int_reg_max) { 1.408 + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.409 + regs[i].set1(r->as_VMReg()); 1.410 + } else { 1.411 + regs[i].set1(VMRegImpl::stack2reg(slot++)); 1.412 + } 1.413 + break; 1.414 + 1.415 +#ifdef _LP64 1.416 + case T_LONG: 1.417 + assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 1.418 + // fall-through 1.419 + case T_OBJECT: 1.420 + case T_ARRAY: 1.421 + case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.422 + if (int_reg < int_reg_max) { 1.423 + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.424 + regs[i].set2(r->as_VMReg()); 1.425 + } else { 1.426 + slot = round_to(slot, 2); // align 1.427 + regs[i].set2(VMRegImpl::stack2reg(slot)); 1.428 + slot += 2; 1.429 + } 1.430 + break; 1.431 +#else 1.432 + case T_LONG: 1.433 + assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 1.434 + // On 32-bit SPARC put longs always on the stack to keep the pressure off 1.435 + // integer argument registers. They should be used for oops. 1.436 + slot = round_to(slot, 2); // align 1.437 + regs[i].set2(VMRegImpl::stack2reg(slot)); 1.438 + slot += 2; 1.439 +#endif 1.440 + break; 1.441 + 1.442 + case T_FLOAT: 1.443 + if (flt_reg < flt_reg_max) { 1.444 + FloatRegister r = as_FloatRegister(flt_reg++); 1.445 + regs[i].set1(r->as_VMReg()); 1.446 + } else { 1.447 + regs[i].set1(VMRegImpl::stack2reg(slot++)); 1.448 + } 1.449 + break; 1.450 + 1.451 + case T_DOUBLE: 1.452 + assert(sig_bt[i+1] == T_VOID, "expecting half"); 1.453 + if (round_to(flt_reg, 2) + 1 < flt_reg_max) { 1.454 + flt_reg = round_to(flt_reg, 2); // align 1.455 + FloatRegister r = as_FloatRegister(flt_reg); 1.456 + regs[i].set2(r->as_VMReg()); 1.457 + flt_reg += 2; 1.458 + } else { 1.459 + slot = round_to(slot, 2); // align 1.460 + regs[i].set2(VMRegImpl::stack2reg(slot)); 1.461 + slot += 2; 1.462 + } 1.463 + break; 1.464 + 1.465 + case T_VOID: 1.466 + regs[i].set_bad(); // Halves of longs & doubles 1.467 + break; 1.468 + 1.469 + default: 1.470 + fatal(err_msg_res("unknown basic type %d", sig_bt[i])); 1.471 + break; 1.472 + } 1.473 + } 1.474 + 1.475 + // retun the amount of stack space these arguments will need. 1.476 + return slot; 1.477 +} 1.478 + 1.479 +// Helper class mostly to avoid passing masm everywhere, and handle 1.480 +// store displacement overflow logic. 1.481 +class AdapterGenerator { 1.482 + MacroAssembler *masm; 1.483 + Register Rdisp; 1.484 + void set_Rdisp(Register r) { Rdisp = r; } 1.485 + 1.486 + void patch_callers_callsite(); 1.487 + 1.488 + // base+st_off points to top of argument 1.489 + int arg_offset(const int st_off) { return st_off; } 1.490 + int next_arg_offset(const int st_off) { 1.491 + return st_off - Interpreter::stackElementSize; 1.492 + } 1.493 + 1.494 + // Argument slot values may be loaded first into a register because 1.495 + // they might not fit into displacement. 1.496 + RegisterOrConstant arg_slot(const int st_off); 1.497 + RegisterOrConstant next_arg_slot(const int st_off); 1.498 + 1.499 + // Stores long into offset pointed to by base 1.500 + void store_c2i_long(Register r, Register base, 1.501 + const int st_off, bool is_stack); 1.502 + void store_c2i_object(Register r, Register base, 1.503 + const int st_off); 1.504 + void store_c2i_int(Register r, Register base, 1.505 + const int st_off); 1.506 + void store_c2i_double(VMReg r_2, 1.507 + VMReg r_1, Register base, const int st_off); 1.508 + void store_c2i_float(FloatRegister f, Register base, 1.509 + const int st_off); 1.510 + 1.511 + public: 1.512 + void gen_c2i_adapter(int total_args_passed, 1.513 + // VMReg max_arg, 1.514 + int comp_args_on_stack, // VMRegStackSlots 1.515 + const BasicType *sig_bt, 1.516 + const VMRegPair *regs, 1.517 + Label& skip_fixup); 1.518 + void gen_i2c_adapter(int total_args_passed, 1.519 + // VMReg max_arg, 1.520 + int comp_args_on_stack, // VMRegStackSlots 1.521 + const BasicType *sig_bt, 1.522 + const VMRegPair *regs); 1.523 + 1.524 + AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} 1.525 +}; 1.526 + 1.527 + 1.528 +// Patch the callers callsite with entry to compiled code if it exists. 1.529 +void AdapterGenerator::patch_callers_callsite() { 1.530 + Label L; 1.531 + __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 1.532 + __ br_null(G3_scratch, false, Assembler::pt, L); 1.533 + __ delayed()->nop(); 1.534 + // Call into the VM to patch the caller, then jump to compiled callee 1.535 + __ save_frame(4); // Args in compiled layout; do not blow them 1.536 + 1.537 + // Must save all the live Gregs the list is: 1.538 + // G1: 1st Long arg (32bit build) 1.539 + // G2: global allocated to TLS 1.540 + // G3: used in inline cache check (scratch) 1.541 + // G4: 2nd Long arg (32bit build); 1.542 + // G5: used in inline cache check (Method*) 1.543 + 1.544 + // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops. 1.545 + 1.546 +#ifdef _LP64 1.547 + // mov(s,d) 1.548 + __ mov(G1, L1); 1.549 + __ mov(G4, L4); 1.550 + __ mov(G5_method, L5); 1.551 + __ mov(G5_method, O0); // VM needs target method 1.552 + __ mov(I7, O1); // VM needs caller's callsite 1.553 + // Must be a leaf call... 1.554 + // can be very far once the blob has been relocated 1.555 + AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); 1.556 + __ relocate(relocInfo::runtime_call_type); 1.557 + __ jumpl_to(dest, O7, O7); 1.558 + __ delayed()->mov(G2_thread, L7_thread_cache); 1.559 + __ mov(L7_thread_cache, G2_thread); 1.560 + __ mov(L1, G1); 1.561 + __ mov(L4, G4); 1.562 + __ mov(L5, G5_method); 1.563 +#else 1.564 + __ stx(G1, FP, -8 + STACK_BIAS); 1.565 + __ stx(G4, FP, -16 + STACK_BIAS); 1.566 + __ mov(G5_method, L5); 1.567 + __ mov(G5_method, O0); // VM needs target method 1.568 + __ mov(I7, O1); // VM needs caller's callsite 1.569 + // Must be a leaf call... 1.570 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type); 1.571 + __ delayed()->mov(G2_thread, L7_thread_cache); 1.572 + __ mov(L7_thread_cache, G2_thread); 1.573 + __ ldx(FP, -8 + STACK_BIAS, G1); 1.574 + __ ldx(FP, -16 + STACK_BIAS, G4); 1.575 + __ mov(L5, G5_method); 1.576 +#endif /* _LP64 */ 1.577 + 1.578 + __ restore(); // Restore args 1.579 + __ bind(L); 1.580 +} 1.581 + 1.582 + 1.583 +RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) { 1.584 + RegisterOrConstant roc(arg_offset(st_off)); 1.585 + return __ ensure_simm13_or_reg(roc, Rdisp); 1.586 +} 1.587 + 1.588 +RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) { 1.589 + RegisterOrConstant roc(next_arg_offset(st_off)); 1.590 + return __ ensure_simm13_or_reg(roc, Rdisp); 1.591 +} 1.592 + 1.593 + 1.594 +// Stores long into offset pointed to by base 1.595 +void AdapterGenerator::store_c2i_long(Register r, Register base, 1.596 + const int st_off, bool is_stack) { 1.597 +#ifdef _LP64 1.598 + // In V9, longs are given 2 64-bit slots in the interpreter, but the 1.599 + // data is passed in only 1 slot. 1.600 + __ stx(r, base, next_arg_slot(st_off)); 1.601 +#else 1.602 +#ifdef COMPILER2 1.603 + // Misaligned store of 64-bit data 1.604 + __ stw(r, base, arg_slot(st_off)); // lo bits 1.605 + __ srlx(r, 32, r); 1.606 + __ stw(r, base, next_arg_slot(st_off)); // hi bits 1.607 +#else 1.608 + if (is_stack) { 1.609 + // Misaligned store of 64-bit data 1.610 + __ stw(r, base, arg_slot(st_off)); // lo bits 1.611 + __ srlx(r, 32, r); 1.612 + __ stw(r, base, next_arg_slot(st_off)); // hi bits 1.613 + } else { 1.614 + __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits 1.615 + __ stw(r , base, next_arg_slot(st_off)); // hi bits 1.616 + } 1.617 +#endif // COMPILER2 1.618 +#endif // _LP64 1.619 +} 1.620 + 1.621 +void AdapterGenerator::store_c2i_object(Register r, Register base, 1.622 + const int st_off) { 1.623 + __ st_ptr (r, base, arg_slot(st_off)); 1.624 +} 1.625 + 1.626 +void AdapterGenerator::store_c2i_int(Register r, Register base, 1.627 + const int st_off) { 1.628 + __ st (r, base, arg_slot(st_off)); 1.629 +} 1.630 + 1.631 +// Stores into offset pointed to by base 1.632 +void AdapterGenerator::store_c2i_double(VMReg r_2, 1.633 + VMReg r_1, Register base, const int st_off) { 1.634 +#ifdef _LP64 1.635 + // In V9, doubles are given 2 64-bit slots in the interpreter, but the 1.636 + // data is passed in only 1 slot. 1.637 + __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 1.638 +#else 1.639 + // Need to marshal 64-bit value from misaligned Lesp loads 1.640 + __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 1.641 + __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) ); 1.642 +#endif 1.643 +} 1.644 + 1.645 +void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, 1.646 + const int st_off) { 1.647 + __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off)); 1.648 +} 1.649 + 1.650 +void AdapterGenerator::gen_c2i_adapter( 1.651 + int total_args_passed, 1.652 + // VMReg max_arg, 1.653 + int comp_args_on_stack, // VMRegStackSlots 1.654 + const BasicType *sig_bt, 1.655 + const VMRegPair *regs, 1.656 + Label& L_skip_fixup) { 1.657 + 1.658 + // Before we get into the guts of the C2I adapter, see if we should be here 1.659 + // at all. We've come from compiled code and are attempting to jump to the 1.660 + // interpreter, which means the caller made a static call to get here 1.661 + // (vcalls always get a compiled target if there is one). Check for a 1.662 + // compiled target. If there is one, we need to patch the caller's call. 1.663 + // However we will run interpreted if we come thru here. The next pass 1.664 + // thru the call site will run compiled. If we ran compiled here then 1.665 + // we can (theorectically) do endless i2c->c2i->i2c transitions during 1.666 + // deopt/uncommon trap cycles. If we always go interpreted here then 1.667 + // we can have at most one and don't need to play any tricks to keep 1.668 + // from endlessly growing the stack. 1.669 + // 1.670 + // Actually if we detected that we had an i2c->c2i transition here we 1.671 + // ought to be able to reset the world back to the state of the interpreted 1.672 + // call and not bother building another interpreter arg area. We don't 1.673 + // do that at this point. 1.674 + 1.675 + patch_callers_callsite(); 1.676 + 1.677 + __ bind(L_skip_fixup); 1.678 + 1.679 + // Since all args are passed on the stack, total_args_passed*wordSize is the 1.680 + // space we need. Add in varargs area needed by the interpreter. Round up 1.681 + // to stack alignment. 1.682 + const int arg_size = total_args_passed * Interpreter::stackElementSize; 1.683 + const int varargs_area = 1.684 + (frame::varargs_offset - frame::register_save_words)*wordSize; 1.685 + const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); 1.686 + 1.687 + const int bias = STACK_BIAS; 1.688 + const int interp_arg_offset = frame::varargs_offset*wordSize + 1.689 + (total_args_passed-1)*Interpreter::stackElementSize; 1.690 + 1.691 + const Register base = SP; 1.692 + 1.693 + // Make some extra space on the stack. 1.694 + __ sub(SP, __ ensure_simm13_or_reg(extraspace, G3_scratch), SP); 1.695 + set_Rdisp(G3_scratch); 1.696 + 1.697 + // Write the args into the outgoing interpreter space. 1.698 + for (int i = 0; i < total_args_passed; i++) { 1.699 + const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; 1.700 + VMReg r_1 = regs[i].first(); 1.701 + VMReg r_2 = regs[i].second(); 1.702 + if (!r_1->is_valid()) { 1.703 + assert(!r_2->is_valid(), ""); 1.704 + continue; 1.705 + } 1.706 + if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 1.707 + RegisterOrConstant ld_off = reg2offset(r_1) + extraspace + bias; 1.708 + ld_off = __ ensure_simm13_or_reg(ld_off, Rdisp); 1.709 + r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle 1.710 + if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); 1.711 + else __ ldx(base, ld_off, G1_scratch); 1.712 + } 1.713 + 1.714 + if (r_1->is_Register()) { 1.715 + Register r = r_1->as_Register()->after_restore(); 1.716 + if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 1.717 + store_c2i_object(r, base, st_off); 1.718 + } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1.719 + store_c2i_long(r, base, st_off, r_2->is_stack()); 1.720 + } else { 1.721 + store_c2i_int(r, base, st_off); 1.722 + } 1.723 + } else { 1.724 + assert(r_1->is_FloatRegister(), ""); 1.725 + if (sig_bt[i] == T_FLOAT) { 1.726 + store_c2i_float(r_1->as_FloatRegister(), base, st_off); 1.727 + } else { 1.728 + assert(sig_bt[i] == T_DOUBLE, "wrong type"); 1.729 + store_c2i_double(r_2, r_1, base, st_off); 1.730 + } 1.731 + } 1.732 + } 1.733 + 1.734 + // Load the interpreter entry point. 1.735 + __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 1.736 + 1.737 + // Pass O5_savedSP as an argument to the interpreter. 1.738 + // The interpreter will restore SP to this value before returning. 1.739 + __ add(SP, __ ensure_simm13_or_reg(extraspace, G1), O5_savedSP); 1.740 + 1.741 + __ mov((frame::varargs_offset)*wordSize - 1.742 + 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); 1.743 + // Jump to the interpreter just as if interpreter was doing it. 1.744 + __ jmpl(G3_scratch, 0, G0); 1.745 + // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp 1.746 + // (really L0) is in use by the compiled frame as a generic temp. However, 1.747 + // the interpreter does not know where its args are without some kind of 1.748 + // arg pointer being passed in. Pass it in Gargs. 1.749 + __ delayed()->add(SP, G1, Gargs); 1.750 +} 1.751 + 1.752 +static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg, 1.753 + address code_start, address code_end, 1.754 + Label& L_ok) { 1.755 + Label L_fail; 1.756 + __ set(ExternalAddress(code_start), temp_reg); 1.757 + __ set(pointer_delta(code_end, code_start, 1), temp2_reg); 1.758 + __ cmp(pc_reg, temp_reg); 1.759 + __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail); 1.760 + __ delayed()->add(temp_reg, temp2_reg, temp_reg); 1.761 + __ cmp(pc_reg, temp_reg); 1.762 + __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); 1.763 + __ bind(L_fail); 1.764 +} 1.765 + 1.766 +void AdapterGenerator::gen_i2c_adapter( 1.767 + int total_args_passed, 1.768 + // VMReg max_arg, 1.769 + int comp_args_on_stack, // VMRegStackSlots 1.770 + const BasicType *sig_bt, 1.771 + const VMRegPair *regs) { 1.772 + 1.773 + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame 1.774 + // layout. Lesp was saved by the calling I-frame and will be restored on 1.775 + // return. Meanwhile, outgoing arg space is all owned by the callee 1.776 + // C-frame, so we can mangle it at will. After adjusting the frame size, 1.777 + // hoist register arguments and repack other args according to the compiled 1.778 + // code convention. Finally, end in a jump to the compiled code. The entry 1.779 + // point address is the start of the buffer. 1.780 + 1.781 + // We will only enter here from an interpreted frame and never from after 1.782 + // passing thru a c2i. Azul allowed this but we do not. If we lose the 1.783 + // race and use a c2i we will remain interpreted for the race loser(s). 1.784 + // This removes all sorts of headaches on the x86 side and also eliminates 1.785 + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. 1.786 + 1.787 + // More detail: 1.788 + // Adapters can be frameless because they do not require the caller 1.789 + // to perform additional cleanup work, such as correcting the stack pointer. 1.790 + // An i2c adapter is frameless because the *caller* frame, which is interpreted, 1.791 + // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 1.792 + // even if a callee has modified the stack pointer. 1.793 + // A c2i adapter is frameless because the *callee* frame, which is interpreted, 1.794 + // routinely repairs its caller's stack pointer (from sender_sp, which is set 1.795 + // up via the senderSP register). 1.796 + // In other words, if *either* the caller or callee is interpreted, we can 1.797 + // get the stack pointer repaired after a call. 1.798 + // This is why c2i and i2c adapters cannot be indefinitely composed. 1.799 + // In particular, if a c2i adapter were to somehow call an i2c adapter, 1.800 + // both caller and callee would be compiled methods, and neither would 1.801 + // clean up the stack pointer changes performed by the two adapters. 1.802 + // If this happens, control eventually transfers back to the compiled 1.803 + // caller, but with an uncorrected stack, causing delayed havoc. 1.804 + 1.805 + if (VerifyAdapterCalls && 1.806 + (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 1.807 + // So, let's test for cascading c2i/i2c adapters right now. 1.808 + // assert(Interpreter::contains($return_addr) || 1.809 + // StubRoutines::contains($return_addr), 1.810 + // "i2c adapter must return to an interpreter frame"); 1.811 + __ block_comment("verify_i2c { "); 1.812 + Label L_ok; 1.813 + if (Interpreter::code() != NULL) 1.814 + range_check(masm, O7, O0, O1, 1.815 + Interpreter::code()->code_start(), Interpreter::code()->code_end(), 1.816 + L_ok); 1.817 + if (StubRoutines::code1() != NULL) 1.818 + range_check(masm, O7, O0, O1, 1.819 + StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 1.820 + L_ok); 1.821 + if (StubRoutines::code2() != NULL) 1.822 + range_check(masm, O7, O0, O1, 1.823 + StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 1.824 + L_ok); 1.825 + const char* msg = "i2c adapter must return to an interpreter frame"; 1.826 + __ block_comment(msg); 1.827 + __ stop(msg); 1.828 + __ bind(L_ok); 1.829 + __ block_comment("} verify_i2ce "); 1.830 + } 1.831 + 1.832 + // As you can see from the list of inputs & outputs there are not a lot 1.833 + // of temp registers to work with: mostly G1, G3 & G4. 1.834 + 1.835 + // Inputs: 1.836 + // G2_thread - TLS 1.837 + // G5_method - Method oop 1.838 + // G4 (Gargs) - Pointer to interpreter's args 1.839 + // O0..O4 - free for scratch 1.840 + // O5_savedSP - Caller's saved SP, to be restored if needed 1.841 + // O6 - Current SP! 1.842 + // O7 - Valid return address 1.843 + // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 1.844 + 1.845 + // Outputs: 1.846 + // G2_thread - TLS 1.847 + // O0-O5 - Outgoing args in compiled layout 1.848 + // O6 - Adjusted or restored SP 1.849 + // O7 - Valid return address 1.850 + // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 1.851 + // F0-F7 - more outgoing args 1.852 + 1.853 + 1.854 + // Gargs is the incoming argument base, and also an outgoing argument. 1.855 + __ sub(Gargs, BytesPerWord, Gargs); 1.856 + 1.857 + // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME 1.858 + // WITH O7 HOLDING A VALID RETURN PC 1.859 + // 1.860 + // | | 1.861 + // : java stack : 1.862 + // | | 1.863 + // +--------------+ <--- start of outgoing args 1.864 + // | receiver | | 1.865 + // : rest of args : |---size is java-arg-words 1.866 + // | | | 1.867 + // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I 1.868 + // | | | 1.869 + // : unused : |---Space for max Java stack, plus stack alignment 1.870 + // | | | 1.871 + // +--------------+ <--- SP + 16*wordsize 1.872 + // | | 1.873 + // : window : 1.874 + // | | 1.875 + // +--------------+ <--- SP 1.876 + 1.877 + // WE REPACK THE STACK. We use the common calling convention layout as 1.878 + // discovered by calling SharedRuntime::calling_convention. We assume it 1.879 + // causes an arbitrary shuffle of memory, which may require some register 1.880 + // temps to do the shuffle. We hope for (and optimize for) the case where 1.881 + // temps are not needed. We may have to resize the stack slightly, in case 1.882 + // we need alignment padding (32-bit interpreter can pass longs & doubles 1.883 + // misaligned, but the compilers expect them aligned). 1.884 + // 1.885 + // | | 1.886 + // : java stack : 1.887 + // | | 1.888 + // +--------------+ <--- start of outgoing args 1.889 + // | pad, align | | 1.890 + // +--------------+ | 1.891 + // | ints, longs, | | 1.892 + // | floats, | |---Outgoing stack args. 1.893 + // : doubles : | First few args in registers. 1.894 + // | | | 1.895 + // +--------------+ <--- SP' + 16*wordsize 1.896 + // | | 1.897 + // : window : 1.898 + // | | 1.899 + // +--------------+ <--- SP' 1.900 + 1.901 + // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME 1.902 + // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP 1.903 + // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN. 1.904 + 1.905 + // Cut-out for having no stack args. Since up to 6 args are passed 1.906 + // in registers, we will commonly have no stack args. 1.907 + if (comp_args_on_stack > 0) { 1.908 + // Convert VMReg stack slots to words. 1.909 + int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1.910 + // Round up to miminum stack alignment, in wordSize 1.911 + comp_words_on_stack = round_to(comp_words_on_stack, 2); 1.912 + // Now compute the distance from Lesp to SP. This calculation does not 1.913 + // include the space for total_args_passed because Lesp has not yet popped 1.914 + // the arguments. 1.915 + __ sub(SP, (comp_words_on_stack)*wordSize, SP); 1.916 + } 1.917 + 1.918 + // Now generate the shuffle code. Pick up all register args and move the 1.919 + // rest through G1_scratch. 1.920 + for (int i = 0; i < total_args_passed; i++) { 1.921 + if (sig_bt[i] == T_VOID) { 1.922 + // Longs and doubles are passed in native word order, but misaligned 1.923 + // in the 32-bit build. 1.924 + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1.925 + continue; 1.926 + } 1.927 + 1.928 + // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the 1.929 + // 32-bit build and aligned in the 64-bit build. Look for the obvious 1.930 + // ldx/lddf optimizations. 1.931 + 1.932 + // Load in argument order going down. 1.933 + const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; 1.934 + set_Rdisp(G1_scratch); 1.935 + 1.936 + VMReg r_1 = regs[i].first(); 1.937 + VMReg r_2 = regs[i].second(); 1.938 + if (!r_1->is_valid()) { 1.939 + assert(!r_2->is_valid(), ""); 1.940 + continue; 1.941 + } 1.942 + if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9 1.943 + r_1 = F8->as_VMReg(); // as part of the load/store shuffle 1.944 + if (r_2->is_valid()) r_2 = r_1->next(); 1.945 + } 1.946 + if (r_1->is_Register()) { // Register argument 1.947 + Register r = r_1->as_Register()->after_restore(); 1.948 + if (!r_2->is_valid()) { 1.949 + __ ld(Gargs, arg_slot(ld_off), r); 1.950 + } else { 1.951 +#ifdef _LP64 1.952 + // In V9, longs are given 2 64-bit slots in the interpreter, but the 1.953 + // data is passed in only 1 slot. 1.954 + RegisterOrConstant slot = (sig_bt[i] == T_LONG) ? 1.955 + next_arg_slot(ld_off) : arg_slot(ld_off); 1.956 + __ ldx(Gargs, slot, r); 1.957 +#else 1.958 + fatal("longs should be on stack"); 1.959 +#endif 1.960 + } 1.961 + } else { 1.962 + assert(r_1->is_FloatRegister(), ""); 1.963 + if (!r_2->is_valid()) { 1.964 + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); 1.965 + } else { 1.966 +#ifdef _LP64 1.967 + // In V9, doubles are given 2 64-bit slots in the interpreter, but the 1.968 + // data is passed in only 1 slot. This code also handles longs that 1.969 + // are passed on the stack, but need a stack-to-stack move through a 1.970 + // spare float register. 1.971 + RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 1.972 + next_arg_slot(ld_off) : arg_slot(ld_off); 1.973 + __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); 1.974 +#else 1.975 + // Need to marshal 64-bit value from misaligned Lesp loads 1.976 + __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); 1.977 + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); 1.978 +#endif 1.979 + } 1.980 + } 1.981 + // Was the argument really intended to be on the stack, but was loaded 1.982 + // into F8/F9? 1.983 + if (regs[i].first()->is_stack()) { 1.984 + assert(r_1->as_FloatRegister() == F8, "fix this code"); 1.985 + // Convert stack slot to an SP offset 1.986 + int st_off = reg2offset(regs[i].first()) + STACK_BIAS; 1.987 + // Store down the shuffled stack word. Target address _is_ aligned. 1.988 + RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp); 1.989 + if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot); 1.990 + else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); 1.991 + } 1.992 + } 1.993 + 1.994 + // Jump to the compiled code just as if compiled code was doing it. 1.995 + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); 1.996 + 1.997 + // 6243940 We might end up in handle_wrong_method if 1.998 + // the callee is deoptimized as we race thru here. If that 1.999 + // happens we don't want to take a safepoint because the 1.1000 + // caller frame will look interpreted and arguments are now 1.1001 + // "compiled" so it is much better to make this transition 1.1002 + // invisible to the stack walking code. Unfortunately if 1.1003 + // we try and find the callee by normal means a safepoint 1.1004 + // is possible. So we stash the desired callee in the thread 1.1005 + // and the vm will find there should this case occur. 1.1006 + Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); 1.1007 + __ st_ptr(G5_method, callee_target_addr); 1.1008 + __ jmpl(G3, 0, G0); 1.1009 + __ delayed()->nop(); 1.1010 +} 1.1011 + 1.1012 +// --------------------------------------------------------------- 1.1013 +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1.1014 + int total_args_passed, 1.1015 + // VMReg max_arg, 1.1016 + int comp_args_on_stack, // VMRegStackSlots 1.1017 + const BasicType *sig_bt, 1.1018 + const VMRegPair *regs, 1.1019 + AdapterFingerPrint* fingerprint) { 1.1020 + address i2c_entry = __ pc(); 1.1021 + 1.1022 + AdapterGenerator agen(masm); 1.1023 + 1.1024 + agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); 1.1025 + 1.1026 + 1.1027 + // ------------------------------------------------------------------------- 1.1028 + // Generate a C2I adapter. On entry we know G5 holds the Method*. The 1.1029 + // args start out packed in the compiled layout. They need to be unpacked 1.1030 + // into the interpreter layout. This will almost always require some stack 1.1031 + // space. We grow the current (compiled) stack, then repack the args. We 1.1032 + // finally end in a jump to the generic interpreter entry point. On exit 1.1033 + // from the interpreter, the interpreter will restore our SP (lest the 1.1034 + // compiled code, which relys solely on SP and not FP, get sick). 1.1035 + 1.1036 + address c2i_unverified_entry = __ pc(); 1.1037 + Label L_skip_fixup; 1.1038 + { 1.1039 + Register R_temp = G1; // another scratch register 1.1040 + 1.1041 + AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1.1042 + 1.1043 + __ verify_oop(O0); 1.1044 + __ load_klass(O0, G3_scratch); 1.1045 + 1.1046 + __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); 1.1047 + __ cmp(G3_scratch, R_temp); 1.1048 + 1.1049 + Label ok, ok2; 1.1050 + __ brx(Assembler::equal, false, Assembler::pt, ok); 1.1051 + __ delayed()->ld_ptr(G5_method, CompiledICHolder::holder_method_offset(), G5_method); 1.1052 + __ jump_to(ic_miss, G3_scratch); 1.1053 + __ delayed()->nop(); 1.1054 + 1.1055 + __ bind(ok); 1.1056 + // Method might have been compiled since the call site was patched to 1.1057 + // interpreted if that is the case treat it as a miss so we can get 1.1058 + // the call site corrected. 1.1059 + __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 1.1060 + __ bind(ok2); 1.1061 + __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup); 1.1062 + __ delayed()->nop(); 1.1063 + __ jump_to(ic_miss, G3_scratch); 1.1064 + __ delayed()->nop(); 1.1065 + 1.1066 + } 1.1067 + 1.1068 + address c2i_entry = __ pc(); 1.1069 + 1.1070 + agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup); 1.1071 + 1.1072 + __ flush(); 1.1073 + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); 1.1074 + 1.1075 +} 1.1076 + 1.1077 +// Helper function for native calling conventions 1.1078 +static VMReg int_stk_helper( int i ) { 1.1079 + // Bias any stack based VMReg we get by ignoring the window area 1.1080 + // but not the register parameter save area. 1.1081 + // 1.1082 + // This is strange for the following reasons. We'd normally expect 1.1083 + // the calling convention to return an VMReg for a stack slot 1.1084 + // completely ignoring any abi reserved area. C2 thinks of that 1.1085 + // abi area as only out_preserve_stack_slots. This does not include 1.1086 + // the area allocated by the C abi to store down integer arguments 1.1087 + // because the java calling convention does not use it. So 1.1088 + // since c2 assumes that there are only out_preserve_stack_slots 1.1089 + // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack 1.1090 + // location the c calling convention must add in this bias amount 1.1091 + // to make up for the fact that the out_preserve_stack_slots is 1.1092 + // insufficient for C calls. What a mess. I sure hope those 6 1.1093 + // stack words were worth it on every java call! 1.1094 + 1.1095 + // Another way of cleaning this up would be for out_preserve_stack_slots 1.1096 + // to take a parameter to say whether it was C or java calling conventions. 1.1097 + // Then things might look a little better (but not much). 1.1098 + 1.1099 + int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; 1.1100 + if( mem_parm_offset < 0 ) { 1.1101 + return as_oRegister(i)->as_VMReg(); 1.1102 + } else { 1.1103 + int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; 1.1104 + // Now return a biased offset that will be correct when out_preserve_slots is added back in 1.1105 + return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); 1.1106 + } 1.1107 +} 1.1108 + 1.1109 + 1.1110 +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1.1111 + VMRegPair *regs, 1.1112 + VMRegPair *regs2, 1.1113 + int total_args_passed) { 1.1114 + assert(regs2 == NULL, "not needed on sparc"); 1.1115 + 1.1116 + // Return the number of VMReg stack_slots needed for the args. 1.1117 + // This value does not include an abi space (like register window 1.1118 + // save area). 1.1119 + 1.1120 + // The native convention is V8 if !LP64 1.1121 + // The LP64 convention is the V9 convention which is slightly more sane. 1.1122 + 1.1123 + // We return the amount of VMReg stack slots we need to reserve for all 1.1124 + // the arguments NOT counting out_preserve_stack_slots. Since we always 1.1125 + // have space for storing at least 6 registers to memory we start with that. 1.1126 + // See int_stk_helper for a further discussion. 1.1127 + int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots(); 1.1128 + 1.1129 +#ifdef _LP64 1.1130 + // V9 convention: All things "as-if" on double-wide stack slots. 1.1131 + // Hoist any int/ptr/long's in the first 6 to int regs. 1.1132 + // Hoist any flt/dbl's in the first 16 dbl regs. 1.1133 + int j = 0; // Count of actual args, not HALVES 1.1134 + for( int i=0; i<total_args_passed; i++, j++ ) { 1.1135 + switch( sig_bt[i] ) { 1.1136 + case T_BOOLEAN: 1.1137 + case T_BYTE: 1.1138 + case T_CHAR: 1.1139 + case T_INT: 1.1140 + case T_SHORT: 1.1141 + regs[i].set1( int_stk_helper( j ) ); break; 1.1142 + case T_LONG: 1.1143 + assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1.1144 + case T_ADDRESS: // raw pointers, like current thread, for VM calls 1.1145 + case T_ARRAY: 1.1146 + case T_OBJECT: 1.1147 + case T_METADATA: 1.1148 + regs[i].set2( int_stk_helper( j ) ); 1.1149 + break; 1.1150 + case T_FLOAT: 1.1151 + if ( j < 16 ) { 1.1152 + // V9ism: floats go in ODD registers 1.1153 + regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg()); 1.1154 + } else { 1.1155 + // V9ism: floats go in ODD stack slot 1.1156 + regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1))); 1.1157 + } 1.1158 + break; 1.1159 + case T_DOUBLE: 1.1160 + assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1.1161 + if ( j < 16 ) { 1.1162 + // V9ism: doubles go in EVEN/ODD regs 1.1163 + regs[i].set2(as_FloatRegister(j<<1)->as_VMReg()); 1.1164 + } else { 1.1165 + // V9ism: doubles go in EVEN/ODD stack slots 1.1166 + regs[i].set2(VMRegImpl::stack2reg(j<<1)); 1.1167 + } 1.1168 + break; 1.1169 + case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES 1.1170 + default: 1.1171 + ShouldNotReachHere(); 1.1172 + } 1.1173 + if (regs[i].first()->is_stack()) { 1.1174 + int off = regs[i].first()->reg2stack(); 1.1175 + if (off > max_stack_slots) max_stack_slots = off; 1.1176 + } 1.1177 + if (regs[i].second()->is_stack()) { 1.1178 + int off = regs[i].second()->reg2stack(); 1.1179 + if (off > max_stack_slots) max_stack_slots = off; 1.1180 + } 1.1181 + } 1.1182 + 1.1183 +#else // _LP64 1.1184 + // V8 convention: first 6 things in O-regs, rest on stack. 1.1185 + // Alignment is willy-nilly. 1.1186 + for( int i=0; i<total_args_passed; i++ ) { 1.1187 + switch( sig_bt[i] ) { 1.1188 + case T_ADDRESS: // raw pointers, like current thread, for VM calls 1.1189 + case T_ARRAY: 1.1190 + case T_BOOLEAN: 1.1191 + case T_BYTE: 1.1192 + case T_CHAR: 1.1193 + case T_FLOAT: 1.1194 + case T_INT: 1.1195 + case T_OBJECT: 1.1196 + case T_METADATA: 1.1197 + case T_SHORT: 1.1198 + regs[i].set1( int_stk_helper( i ) ); 1.1199 + break; 1.1200 + case T_DOUBLE: 1.1201 + case T_LONG: 1.1202 + assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1.1203 + regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) ); 1.1204 + break; 1.1205 + case T_VOID: regs[i].set_bad(); break; 1.1206 + default: 1.1207 + ShouldNotReachHere(); 1.1208 + } 1.1209 + if (regs[i].first()->is_stack()) { 1.1210 + int off = regs[i].first()->reg2stack(); 1.1211 + if (off > max_stack_slots) max_stack_slots = off; 1.1212 + } 1.1213 + if (regs[i].second()->is_stack()) { 1.1214 + int off = regs[i].second()->reg2stack(); 1.1215 + if (off > max_stack_slots) max_stack_slots = off; 1.1216 + } 1.1217 + } 1.1218 +#endif // _LP64 1.1219 + 1.1220 + return round_to(max_stack_slots + 1, 2); 1.1221 + 1.1222 +} 1.1223 + 1.1224 + 1.1225 +// --------------------------------------------------------------------------- 1.1226 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1227 + switch (ret_type) { 1.1228 + case T_FLOAT: 1.1229 + __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS); 1.1230 + break; 1.1231 + case T_DOUBLE: 1.1232 + __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS); 1.1233 + break; 1.1234 + } 1.1235 +} 1.1236 + 1.1237 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1238 + switch (ret_type) { 1.1239 + case T_FLOAT: 1.1240 + __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0); 1.1241 + break; 1.1242 + case T_DOUBLE: 1.1243 + __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0); 1.1244 + break; 1.1245 + } 1.1246 +} 1.1247 + 1.1248 +// Check and forward and pending exception. Thread is stored in 1.1249 +// L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there 1.1250 +// is no exception handler. We merely pop this frame off and throw the 1.1251 +// exception in the caller's frame. 1.1252 +static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) { 1.1253 + Label L; 1.1254 + __ br_null(Rex_oop, false, Assembler::pt, L); 1.1255 + __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception 1.1256 + // Since this is a native call, we *know* the proper exception handler 1.1257 + // without calling into the VM: it's the empty function. Just pop this 1.1258 + // frame and then jump to forward_exception_entry; O7 will contain the 1.1259 + // native caller's return PC. 1.1260 + AddressLiteral exception_entry(StubRoutines::forward_exception_entry()); 1.1261 + __ jump_to(exception_entry, G3_scratch); 1.1262 + __ delayed()->restore(); // Pop this frame off. 1.1263 + __ bind(L); 1.1264 +} 1.1265 + 1.1266 +// A simple move of integer like type 1.1267 +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1268 + if (src.first()->is_stack()) { 1.1269 + if (dst.first()->is_stack()) { 1.1270 + // stack to stack 1.1271 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1272 + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1273 + } else { 1.1274 + // stack to reg 1.1275 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1276 + } 1.1277 + } else if (dst.first()->is_stack()) { 1.1278 + // reg to stack 1.1279 + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1280 + } else { 1.1281 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1282 + } 1.1283 +} 1.1284 + 1.1285 +// On 64 bit we will store integer like items to the stack as 1.1286 +// 64 bits items (sparc abi) even though java would only store 1.1287 +// 32bits for a parameter. On 32bit it will simply be 32 bits 1.1288 +// So this routine will do 32->32 on 32bit and 32->64 on 64bit 1.1289 +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1290 + if (src.first()->is_stack()) { 1.1291 + if (dst.first()->is_stack()) { 1.1292 + // stack to stack 1.1293 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1294 + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1295 + } else { 1.1296 + // stack to reg 1.1297 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1298 + } 1.1299 + } else if (dst.first()->is_stack()) { 1.1300 + // reg to stack 1.1301 + __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1302 + } else { 1.1303 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1304 + } 1.1305 +} 1.1306 + 1.1307 + 1.1308 +static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1309 + if (src.first()->is_stack()) { 1.1310 + if (dst.first()->is_stack()) { 1.1311 + // stack to stack 1.1312 + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1313 + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1314 + } else { 1.1315 + // stack to reg 1.1316 + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1317 + } 1.1318 + } else if (dst.first()->is_stack()) { 1.1319 + // reg to stack 1.1320 + __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1321 + } else { 1.1322 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1323 + } 1.1324 +} 1.1325 + 1.1326 + 1.1327 +// An oop arg. Must pass a handle not the oop itself 1.1328 +static void object_move(MacroAssembler* masm, 1.1329 + OopMap* map, 1.1330 + int oop_handle_offset, 1.1331 + int framesize_in_slots, 1.1332 + VMRegPair src, 1.1333 + VMRegPair dst, 1.1334 + bool is_receiver, 1.1335 + int* receiver_offset) { 1.1336 + 1.1337 + // must pass a handle. First figure out the location we use as a handle 1.1338 + 1.1339 + if (src.first()->is_stack()) { 1.1340 + // Oop is already on the stack 1.1341 + Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register(); 1.1342 + __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle); 1.1343 + __ ld_ptr(rHandle, 0, L4); 1.1344 +#ifdef _LP64 1.1345 + __ movr( Assembler::rc_z, L4, G0, rHandle ); 1.1346 +#else 1.1347 + __ tst( L4 ); 1.1348 + __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1.1349 +#endif 1.1350 + if (dst.first()->is_stack()) { 1.1351 + __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1352 + } 1.1353 + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1.1354 + if (is_receiver) { 1.1355 + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 1.1356 + } 1.1357 + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1.1358 + } else { 1.1359 + // Oop is in an input register pass we must flush it to the stack 1.1360 + const Register rOop = src.first()->as_Register(); 1.1361 + const Register rHandle = L5; 1.1362 + int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset; 1.1363 + int offset = oop_slot*VMRegImpl::stack_slot_size; 1.1364 + Label skip; 1.1365 + __ st_ptr(rOop, SP, offset + STACK_BIAS); 1.1366 + if (is_receiver) { 1.1367 + *receiver_offset = oop_slot * VMRegImpl::stack_slot_size; 1.1368 + } 1.1369 + map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1.1370 + __ add(SP, offset + STACK_BIAS, rHandle); 1.1371 +#ifdef _LP64 1.1372 + __ movr( Assembler::rc_z, rOop, G0, rHandle ); 1.1373 +#else 1.1374 + __ tst( rOop ); 1.1375 + __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1.1376 +#endif 1.1377 + 1.1378 + if (dst.first()->is_stack()) { 1.1379 + __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1380 + } else { 1.1381 + __ mov(rHandle, dst.first()->as_Register()); 1.1382 + } 1.1383 + } 1.1384 +} 1.1385 + 1.1386 +// A float arg may have to do float reg int reg conversion 1.1387 +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1388 + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); 1.1389 + 1.1390 + if (src.first()->is_stack()) { 1.1391 + if (dst.first()->is_stack()) { 1.1392 + // stack to stack the easiest of the bunch 1.1393 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1394 + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1395 + } else { 1.1396 + // stack to reg 1.1397 + if (dst.first()->is_Register()) { 1.1398 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1399 + } else { 1.1400 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1401 + } 1.1402 + } 1.1403 + } else if (dst.first()->is_stack()) { 1.1404 + // reg to stack 1.1405 + if (src.first()->is_Register()) { 1.1406 + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1407 + } else { 1.1408 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1409 + } 1.1410 + } else { 1.1411 + // reg to reg 1.1412 + if (src.first()->is_Register()) { 1.1413 + if (dst.first()->is_Register()) { 1.1414 + // gpr -> gpr 1.1415 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1416 + } else { 1.1417 + // gpr -> fpr 1.1418 + __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS); 1.1419 + __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1420 + } 1.1421 + } else if (dst.first()->is_Register()) { 1.1422 + // fpr -> gpr 1.1423 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS); 1.1424 + __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register()); 1.1425 + } else { 1.1426 + // fpr -> fpr 1.1427 + // In theory these overlap but the ordering is such that this is likely a nop 1.1428 + if ( src.first() != dst.first()) { 1.1429 + __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1.1430 + } 1.1431 + } 1.1432 + } 1.1433 +} 1.1434 + 1.1435 +static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1436 + VMRegPair src_lo(src.first()); 1.1437 + VMRegPair src_hi(src.second()); 1.1438 + VMRegPair dst_lo(dst.first()); 1.1439 + VMRegPair dst_hi(dst.second()); 1.1440 + simple_move32(masm, src_lo, dst_lo); 1.1441 + simple_move32(masm, src_hi, dst_hi); 1.1442 +} 1.1443 + 1.1444 +// A long move 1.1445 +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1446 + 1.1447 + // Do the simple ones here else do two int moves 1.1448 + if (src.is_single_phys_reg() ) { 1.1449 + if (dst.is_single_phys_reg()) { 1.1450 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1451 + } else { 1.1452 + // split src into two separate registers 1.1453 + // Remember hi means hi address or lsw on sparc 1.1454 + // Move msw to lsw 1.1455 + if (dst.second()->is_reg()) { 1.1456 + // MSW -> MSW 1.1457 + __ srax(src.first()->as_Register(), 32, dst.first()->as_Register()); 1.1458 + // Now LSW -> LSW 1.1459 + // this will only move lo -> lo and ignore hi 1.1460 + VMRegPair split(dst.second()); 1.1461 + simple_move32(masm, src, split); 1.1462 + } else { 1.1463 + VMRegPair split(src.first(), L4->as_VMReg()); 1.1464 + // MSW -> MSW (lo ie. first word) 1.1465 + __ srax(src.first()->as_Register(), 32, L4); 1.1466 + split_long_move(masm, split, dst); 1.1467 + } 1.1468 + } 1.1469 + } else if (dst.is_single_phys_reg()) { 1.1470 + if (src.is_adjacent_aligned_on_stack(2)) { 1.1471 + __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1472 + } else { 1.1473 + // dst is a single reg. 1.1474 + // Remember lo is low address not msb for stack slots 1.1475 + // and lo is the "real" register for registers 1.1476 + // src is 1.1477 + 1.1478 + VMRegPair split; 1.1479 + 1.1480 + if (src.first()->is_reg()) { 1.1481 + // src.lo (msw) is a reg, src.hi is stk/reg 1.1482 + // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg] 1.1483 + split.set_pair(dst.first(), src.first()); 1.1484 + } else { 1.1485 + // msw is stack move to L5 1.1486 + // lsw is stack move to dst.lo (real reg) 1.1487 + // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5 1.1488 + split.set_pair(dst.first(), L5->as_VMReg()); 1.1489 + } 1.1490 + 1.1491 + // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg) 1.1492 + // msw -> src.lo/L5, lsw -> dst.lo 1.1493 + split_long_move(masm, src, split); 1.1494 + 1.1495 + // So dst now has the low order correct position the 1.1496 + // msw half 1.1497 + __ sllx(split.first()->as_Register(), 32, L5); 1.1498 + 1.1499 + const Register d = dst.first()->as_Register(); 1.1500 + __ or3(L5, d, d); 1.1501 + } 1.1502 + } else { 1.1503 + // For LP64 we can probably do better. 1.1504 + split_long_move(masm, src, dst); 1.1505 + } 1.1506 +} 1.1507 + 1.1508 +// A double move 1.1509 +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1510 + 1.1511 + // The painful thing here is that like long_move a VMRegPair might be 1.1512 + // 1: a single physical register 1.1513 + // 2: two physical registers (v8) 1.1514 + // 3: a physical reg [lo] and a stack slot [hi] (v8) 1.1515 + // 4: two stack slots 1.1516 + 1.1517 + // Since src is always a java calling convention we know that the src pair 1.1518 + // is always either all registers or all stack (and aligned?) 1.1519 + 1.1520 + // in a register [lo] and a stack slot [hi] 1.1521 + if (src.first()->is_stack()) { 1.1522 + if (dst.first()->is_stack()) { 1.1523 + // stack to stack the easiest of the bunch 1.1524 + // ought to be a way to do this where if alignment is ok we use ldd/std when possible 1.1525 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1526 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1.1527 + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1528 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1529 + } else { 1.1530 + // stack to reg 1.1531 + if (dst.second()->is_stack()) { 1.1532 + // stack -> reg, stack -> stack 1.1533 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1.1534 + if (dst.first()->is_Register()) { 1.1535 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1536 + } else { 1.1537 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1538 + } 1.1539 + // This was missing. (very rare case) 1.1540 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1541 + } else { 1.1542 + // stack -> reg 1.1543 + // Eventually optimize for alignment QQQ 1.1544 + if (dst.first()->is_Register()) { 1.1545 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1546 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register()); 1.1547 + } else { 1.1548 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1549 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister()); 1.1550 + } 1.1551 + } 1.1552 + } 1.1553 + } else if (dst.first()->is_stack()) { 1.1554 + // reg to stack 1.1555 + if (src.first()->is_Register()) { 1.1556 + // Eventually optimize for alignment QQQ 1.1557 + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1558 + if (src.second()->is_stack()) { 1.1559 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1.1560 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1561 + } else { 1.1562 + __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS); 1.1563 + } 1.1564 + } else { 1.1565 + // fpr to stack 1.1566 + if (src.second()->is_stack()) { 1.1567 + ShouldNotReachHere(); 1.1568 + } else { 1.1569 + // Is the stack aligned? 1.1570 + if (reg2offset(dst.first()) & 0x7) { 1.1571 + // No do as pairs 1.1572 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1573 + __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS); 1.1574 + } else { 1.1575 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1576 + } 1.1577 + } 1.1578 + } 1.1579 + } else { 1.1580 + // reg to reg 1.1581 + if (src.first()->is_Register()) { 1.1582 + if (dst.first()->is_Register()) { 1.1583 + // gpr -> gpr 1.1584 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1585 + __ mov(src.second()->as_Register(), dst.second()->as_Register()); 1.1586 + } else { 1.1587 + // gpr -> fpr 1.1588 + // ought to be able to do a single store 1.1589 + __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS); 1.1590 + __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS); 1.1591 + // ought to be able to do a single load 1.1592 + __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1593 + __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister()); 1.1594 + } 1.1595 + } else if (dst.first()->is_Register()) { 1.1596 + // fpr -> gpr 1.1597 + // ought to be able to do a single store 1.1598 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS); 1.1599 + // ought to be able to do a single load 1.1600 + // REMEMBER first() is low address not LSB 1.1601 + __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register()); 1.1602 + if (dst.second()->is_Register()) { 1.1603 + __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register()); 1.1604 + } else { 1.1605 + __ ld(FP, -4 + STACK_BIAS, L4); 1.1606 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1607 + } 1.1608 + } else { 1.1609 + // fpr -> fpr 1.1610 + // In theory these overlap but the ordering is such that this is likely a nop 1.1611 + if ( src.first() != dst.first()) { 1.1612 + __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1.1613 + } 1.1614 + } 1.1615 + } 1.1616 +} 1.1617 + 1.1618 +// Creates an inner frame if one hasn't already been created, and 1.1619 +// saves a copy of the thread in L7_thread_cache 1.1620 +static void create_inner_frame(MacroAssembler* masm, bool* already_created) { 1.1621 + if (!*already_created) { 1.1622 + __ save_frame(0); 1.1623 + // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below 1.1624 + // Don't use save_thread because it smashes G2 and we merely want to save a 1.1625 + // copy 1.1626 + __ mov(G2_thread, L7_thread_cache); 1.1627 + *already_created = true; 1.1628 + } 1.1629 +} 1.1630 + 1.1631 + 1.1632 +static void save_or_restore_arguments(MacroAssembler* masm, 1.1633 + const int stack_slots, 1.1634 + const int total_in_args, 1.1635 + const int arg_save_area, 1.1636 + OopMap* map, 1.1637 + VMRegPair* in_regs, 1.1638 + BasicType* in_sig_bt) { 1.1639 + // if map is non-NULL then the code should store the values, 1.1640 + // otherwise it should load them. 1.1641 + if (map != NULL) { 1.1642 + // Fill in the map 1.1643 + for (int i = 0; i < total_in_args; i++) { 1.1644 + if (in_sig_bt[i] == T_ARRAY) { 1.1645 + if (in_regs[i].first()->is_stack()) { 1.1646 + int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1.1647 + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); 1.1648 + } else if (in_regs[i].first()->is_Register()) { 1.1649 + map->set_oop(in_regs[i].first()); 1.1650 + } else { 1.1651 + ShouldNotReachHere(); 1.1652 + } 1.1653 + } 1.1654 + } 1.1655 + } 1.1656 + 1.1657 + // Save or restore double word values 1.1658 + int handle_index = 0; 1.1659 + for (int i = 0; i < total_in_args; i++) { 1.1660 + int slot = handle_index + arg_save_area; 1.1661 + int offset = slot * VMRegImpl::stack_slot_size; 1.1662 + if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) { 1.1663 + const Register reg = in_regs[i].first()->as_Register(); 1.1664 + if (reg->is_global()) { 1.1665 + handle_index += 2; 1.1666 + assert(handle_index <= stack_slots, "overflow"); 1.1667 + if (map != NULL) { 1.1668 + __ stx(reg, SP, offset + STACK_BIAS); 1.1669 + } else { 1.1670 + __ ldx(SP, offset + STACK_BIAS, reg); 1.1671 + } 1.1672 + } 1.1673 + } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) { 1.1674 + handle_index += 2; 1.1675 + assert(handle_index <= stack_slots, "overflow"); 1.1676 + if (map != NULL) { 1.1677 + __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); 1.1678 + } else { 1.1679 + __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); 1.1680 + } 1.1681 + } 1.1682 + } 1.1683 + // Save floats 1.1684 + for (int i = 0; i < total_in_args; i++) { 1.1685 + int slot = handle_index + arg_save_area; 1.1686 + int offset = slot * VMRegImpl::stack_slot_size; 1.1687 + if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) { 1.1688 + handle_index++; 1.1689 + assert(handle_index <= stack_slots, "overflow"); 1.1690 + if (map != NULL) { 1.1691 + __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); 1.1692 + } else { 1.1693 + __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); 1.1694 + } 1.1695 + } 1.1696 + } 1.1697 + 1.1698 +} 1.1699 + 1.1700 + 1.1701 +// Check GC_locker::needs_gc and enter the runtime if it's true. This 1.1702 +// keeps a new JNI critical region from starting until a GC has been 1.1703 +// forced. Save down any oops in registers and describe them in an 1.1704 +// OopMap. 1.1705 +static void check_needs_gc_for_critical_native(MacroAssembler* masm, 1.1706 + const int stack_slots, 1.1707 + const int total_in_args, 1.1708 + const int arg_save_area, 1.1709 + OopMapSet* oop_maps, 1.1710 + VMRegPair* in_regs, 1.1711 + BasicType* in_sig_bt) { 1.1712 + __ block_comment("check GC_locker::needs_gc"); 1.1713 + Label cont; 1.1714 + AddressLiteral sync_state(GC_locker::needs_gc_address()); 1.1715 + __ load_bool_contents(sync_state, G3_scratch); 1.1716 + __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont); 1.1717 + __ delayed()->nop(); 1.1718 + 1.1719 + // Save down any values that are live in registers and call into the 1.1720 + // runtime to halt for a GC 1.1721 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.1722 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1723 + arg_save_area, map, in_regs, in_sig_bt); 1.1724 + 1.1725 + __ mov(G2_thread, L7_thread_cache); 1.1726 + 1.1727 + __ set_last_Java_frame(SP, noreg); 1.1728 + 1.1729 + __ block_comment("block_for_jni_critical"); 1.1730 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type); 1.1731 + __ delayed()->mov(L7_thread_cache, O0); 1.1732 + oop_maps->add_gc_map( __ offset(), map); 1.1733 + 1.1734 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.1735 + __ reset_last_Java_frame(); 1.1736 + 1.1737 + // Reload all the register arguments 1.1738 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1739 + arg_save_area, NULL, in_regs, in_sig_bt); 1.1740 + 1.1741 + __ bind(cont); 1.1742 +#ifdef ASSERT 1.1743 + if (StressCriticalJNINatives) { 1.1744 + // Stress register saving 1.1745 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.1746 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1747 + arg_save_area, map, in_regs, in_sig_bt); 1.1748 + // Destroy argument registers 1.1749 + for (int i = 0; i < total_in_args; i++) { 1.1750 + if (in_regs[i].first()->is_Register()) { 1.1751 + const Register reg = in_regs[i].first()->as_Register(); 1.1752 + if (reg->is_global()) { 1.1753 + __ mov(G0, reg); 1.1754 + } 1.1755 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.1756 + __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister()); 1.1757 + } 1.1758 + } 1.1759 + 1.1760 + save_or_restore_arguments(masm, stack_slots, total_in_args, 1.1761 + arg_save_area, NULL, in_regs, in_sig_bt); 1.1762 + } 1.1763 +#endif 1.1764 +} 1.1765 + 1.1766 +// Unpack an array argument into a pointer to the body and the length 1.1767 +// if the array is non-null, otherwise pass 0 for both. 1.1768 +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { 1.1769 + // Pass the length, ptr pair 1.1770 + Label is_null, done; 1.1771 + if (reg.first()->is_stack()) { 1.1772 + VMRegPair tmp = reg64_to_VMRegPair(L2); 1.1773 + // Load the arg up from the stack 1.1774 + move_ptr(masm, reg, tmp); 1.1775 + reg = tmp; 1.1776 + } 1.1777 + __ cmp(reg.first()->as_Register(), G0); 1.1778 + __ brx(Assembler::equal, false, Assembler::pt, is_null); 1.1779 + __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4); 1.1780 + move_ptr(masm, reg64_to_VMRegPair(L4), body_arg); 1.1781 + __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4); 1.1782 + move32_64(masm, reg64_to_VMRegPair(L4), length_arg); 1.1783 + __ ba_short(done); 1.1784 + __ bind(is_null); 1.1785 + // Pass zeros 1.1786 + move_ptr(masm, reg64_to_VMRegPair(G0), body_arg); 1.1787 + move32_64(masm, reg64_to_VMRegPair(G0), length_arg); 1.1788 + __ bind(done); 1.1789 +} 1.1790 + 1.1791 +static void verify_oop_args(MacroAssembler* masm, 1.1792 + methodHandle method, 1.1793 + const BasicType* sig_bt, 1.1794 + const VMRegPair* regs) { 1.1795 + Register temp_reg = G5_method; // not part of any compiled calling seq 1.1796 + if (VerifyOops) { 1.1797 + for (int i = 0; i < method->size_of_parameters(); i++) { 1.1798 + if (sig_bt[i] == T_OBJECT || 1.1799 + sig_bt[i] == T_ARRAY) { 1.1800 + VMReg r = regs[i].first(); 1.1801 + assert(r->is_valid(), "bad oop arg"); 1.1802 + if (r->is_stack()) { 1.1803 + RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1.1804 + ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg); 1.1805 + __ ld_ptr(SP, ld_off, temp_reg); 1.1806 + __ verify_oop(temp_reg); 1.1807 + } else { 1.1808 + __ verify_oop(r->as_Register()); 1.1809 + } 1.1810 + } 1.1811 + } 1.1812 + } 1.1813 +} 1.1814 + 1.1815 +static void gen_special_dispatch(MacroAssembler* masm, 1.1816 + methodHandle method, 1.1817 + const BasicType* sig_bt, 1.1818 + const VMRegPair* regs) { 1.1819 + verify_oop_args(masm, method, sig_bt, regs); 1.1820 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.1821 + 1.1822 + // Now write the args into the outgoing interpreter space 1.1823 + bool has_receiver = false; 1.1824 + Register receiver_reg = noreg; 1.1825 + int member_arg_pos = -1; 1.1826 + Register member_reg = noreg; 1.1827 + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1.1828 + if (ref_kind != 0) { 1.1829 + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1.1830 + member_reg = G5_method; // known to be free at this point 1.1831 + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1.1832 + } else if (iid == vmIntrinsics::_invokeBasic) { 1.1833 + has_receiver = true; 1.1834 + } else { 1.1835 + fatal(err_msg_res("unexpected intrinsic id %d", iid)); 1.1836 + } 1.1837 + 1.1838 + if (member_reg != noreg) { 1.1839 + // Load the member_arg into register, if necessary. 1.1840 + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1.1841 + VMReg r = regs[member_arg_pos].first(); 1.1842 + if (r->is_stack()) { 1.1843 + RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1.1844 + ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 1.1845 + __ ld_ptr(SP, ld_off, member_reg); 1.1846 + } else { 1.1847 + // no data motion is needed 1.1848 + member_reg = r->as_Register(); 1.1849 + } 1.1850 + } 1.1851 + 1.1852 + if (has_receiver) { 1.1853 + // Make sure the receiver is loaded into a register. 1.1854 + assert(method->size_of_parameters() > 0, "oob"); 1.1855 + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1.1856 + VMReg r = regs[0].first(); 1.1857 + assert(r->is_valid(), "bad receiver arg"); 1.1858 + if (r->is_stack()) { 1.1859 + // Porting note: This assumes that compiled calling conventions always 1.1860 + // pass the receiver oop in a register. If this is not true on some 1.1861 + // platform, pick a temp and load the receiver from stack. 1.1862 + fatal("receiver always in a register"); 1.1863 + receiver_reg = G3_scratch; // known to be free at this point 1.1864 + RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1.1865 + ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 1.1866 + __ ld_ptr(SP, ld_off, receiver_reg); 1.1867 + } else { 1.1868 + // no data motion is needed 1.1869 + receiver_reg = r->as_Register(); 1.1870 + } 1.1871 + } 1.1872 + 1.1873 + // Figure out which address we are really jumping to: 1.1874 + MethodHandles::generate_method_handle_dispatch(masm, iid, 1.1875 + receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1.1876 +} 1.1877 + 1.1878 +// --------------------------------------------------------------------------- 1.1879 +// Generate a native wrapper for a given method. The method takes arguments 1.1880 +// in the Java compiled code convention, marshals them to the native 1.1881 +// convention (handlizes oops, etc), transitions to native, makes the call, 1.1882 +// returns to java state (possibly blocking), unhandlizes any result and 1.1883 +// returns. 1.1884 +// 1.1885 +// Critical native functions are a shorthand for the use of 1.1886 +// GetPrimtiveArrayCritical and disallow the use of any other JNI 1.1887 +// functions. The wrapper is expected to unpack the arguments before 1.1888 +// passing them to the callee and perform checks before and after the 1.1889 +// native call to ensure that they GC_locker 1.1890 +// lock_critical/unlock_critical semantics are followed. Some other 1.1891 +// parts of JNI setup are skipped like the tear down of the JNI handle 1.1892 +// block and the check for pending exceptions it's impossible for them 1.1893 +// to be thrown. 1.1894 +// 1.1895 +// They are roughly structured like this: 1.1896 +// if (GC_locker::needs_gc()) 1.1897 +// SharedRuntime::block_for_jni_critical(); 1.1898 +// tranistion to thread_in_native 1.1899 +// unpack arrray arguments and call native entry point 1.1900 +// check for safepoint in progress 1.1901 +// check if any thread suspend flags are set 1.1902 +// call into JVM and possible unlock the JNI critical 1.1903 +// if a GC was suppressed while in the critical native. 1.1904 +// transition back to thread_in_Java 1.1905 +// return to caller 1.1906 +// 1.1907 +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1.1908 + methodHandle method, 1.1909 + int compile_id, 1.1910 + BasicType* in_sig_bt, 1.1911 + VMRegPair* in_regs, 1.1912 + BasicType ret_type) { 1.1913 + if (method->is_method_handle_intrinsic()) { 1.1914 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.1915 + intptr_t start = (intptr_t)__ pc(); 1.1916 + int vep_offset = ((intptr_t)__ pc()) - start; 1.1917 + gen_special_dispatch(masm, 1.1918 + method, 1.1919 + in_sig_bt, 1.1920 + in_regs); 1.1921 + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1.1922 + __ flush(); 1.1923 + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1.1924 + return nmethod::new_native_nmethod(method, 1.1925 + compile_id, 1.1926 + masm->code(), 1.1927 + vep_offset, 1.1928 + frame_complete, 1.1929 + stack_slots / VMRegImpl::slots_per_word, 1.1930 + in_ByteSize(-1), 1.1931 + in_ByteSize(-1), 1.1932 + (OopMapSet*)NULL); 1.1933 + } 1.1934 + bool is_critical_native = true; 1.1935 + address native_func = method->critical_native_function(); 1.1936 + if (native_func == NULL) { 1.1937 + native_func = method->native_function(); 1.1938 + is_critical_native = false; 1.1939 + } 1.1940 + assert(native_func != NULL, "must have function"); 1.1941 + 1.1942 + // Native nmethod wrappers never take possesion of the oop arguments. 1.1943 + // So the caller will gc the arguments. The only thing we need an 1.1944 + // oopMap for is if the call is static 1.1945 + // 1.1946 + // An OopMap for lock (and class if static), and one for the VM call itself 1.1947 + OopMapSet *oop_maps = new OopMapSet(); 1.1948 + intptr_t start = (intptr_t)__ pc(); 1.1949 + 1.1950 + // First thing make an ic check to see if we should even be here 1.1951 + { 1.1952 + Label L; 1.1953 + const Register temp_reg = G3_scratch; 1.1954 + AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1.1955 + __ verify_oop(O0); 1.1956 + __ load_klass(O0, temp_reg); 1.1957 + __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); 1.1958 + 1.1959 + __ jump_to(ic_miss, temp_reg); 1.1960 + __ delayed()->nop(); 1.1961 + __ align(CodeEntryAlignment); 1.1962 + __ bind(L); 1.1963 + } 1.1964 + 1.1965 + int vep_offset = ((intptr_t)__ pc()) - start; 1.1966 + 1.1967 +#ifdef COMPILER1 1.1968 + if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { 1.1969 + // Object.hashCode can pull the hashCode from the header word 1.1970 + // instead of doing a full VM transition once it's been computed. 1.1971 + // Since hashCode is usually polymorphic at call sites we can't do 1.1972 + // this optimization at the call site without a lot of work. 1.1973 + Label slowCase; 1.1974 + Register receiver = O0; 1.1975 + Register result = O0; 1.1976 + Register header = G3_scratch; 1.1977 + Register hash = G3_scratch; // overwrite header value with hash value 1.1978 + Register mask = G1; // to get hash field from header 1.1979 + 1.1980 + // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. 1.1981 + // We depend on hash_mask being at most 32 bits and avoid the use of 1.1982 + // hash_mask_in_place because it could be larger than 32 bits in a 64-bit 1.1983 + // vm: see markOop.hpp. 1.1984 + __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header); 1.1985 + __ sethi(markOopDesc::hash_mask, mask); 1.1986 + __ btst(markOopDesc::unlocked_value, header); 1.1987 + __ br(Assembler::zero, false, Assembler::pn, slowCase); 1.1988 + if (UseBiasedLocking) { 1.1989 + // Check if biased and fall through to runtime if so 1.1990 + __ delayed()->nop(); 1.1991 + __ btst(markOopDesc::biased_lock_bit_in_place, header); 1.1992 + __ br(Assembler::notZero, false, Assembler::pn, slowCase); 1.1993 + } 1.1994 + __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask); 1.1995 + 1.1996 + // Check for a valid (non-zero) hash code and get its value. 1.1997 +#ifdef _LP64 1.1998 + __ srlx(header, markOopDesc::hash_shift, hash); 1.1999 +#else 1.2000 + __ srl(header, markOopDesc::hash_shift, hash); 1.2001 +#endif 1.2002 + __ andcc(hash, mask, hash); 1.2003 + __ br(Assembler::equal, false, Assembler::pn, slowCase); 1.2004 + __ delayed()->nop(); 1.2005 + 1.2006 + // leaf return. 1.2007 + __ retl(); 1.2008 + __ delayed()->mov(hash, result); 1.2009 + __ bind(slowCase); 1.2010 + } 1.2011 +#endif // COMPILER1 1.2012 + 1.2013 + 1.2014 + // We have received a description of where all the java arg are located 1.2015 + // on entry to the wrapper. We need to convert these args to where 1.2016 + // the jni function will expect them. To figure out where they go 1.2017 + // we convert the java signature to a C signature by inserting 1.2018 + // the hidden arguments as arg[0] and possibly arg[1] (static method) 1.2019 + 1.2020 + const int total_in_args = method->size_of_parameters(); 1.2021 + int total_c_args = total_in_args; 1.2022 + int total_save_slots = 6 * VMRegImpl::slots_per_word; 1.2023 + if (!is_critical_native) { 1.2024 + total_c_args += 1; 1.2025 + if (method->is_static()) { 1.2026 + total_c_args++; 1.2027 + } 1.2028 + } else { 1.2029 + for (int i = 0; i < total_in_args; i++) { 1.2030 + if (in_sig_bt[i] == T_ARRAY) { 1.2031 + // These have to be saved and restored across the safepoint 1.2032 + total_c_args++; 1.2033 + } 1.2034 + } 1.2035 + } 1.2036 + 1.2037 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1.2038 + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1.2039 + BasicType* in_elem_bt = NULL; 1.2040 + 1.2041 + int argc = 0; 1.2042 + if (!is_critical_native) { 1.2043 + out_sig_bt[argc++] = T_ADDRESS; 1.2044 + if (method->is_static()) { 1.2045 + out_sig_bt[argc++] = T_OBJECT; 1.2046 + } 1.2047 + 1.2048 + for (int i = 0; i < total_in_args ; i++ ) { 1.2049 + out_sig_bt[argc++] = in_sig_bt[i]; 1.2050 + } 1.2051 + } else { 1.2052 + Thread* THREAD = Thread::current(); 1.2053 + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); 1.2054 + SignatureStream ss(method->signature()); 1.2055 + for (int i = 0; i < total_in_args ; i++ ) { 1.2056 + if (in_sig_bt[i] == T_ARRAY) { 1.2057 + // Arrays are passed as int, elem* pair 1.2058 + out_sig_bt[argc++] = T_INT; 1.2059 + out_sig_bt[argc++] = T_ADDRESS; 1.2060 + Symbol* atype = ss.as_symbol(CHECK_NULL); 1.2061 + const char* at = atype->as_C_string(); 1.2062 + if (strlen(at) == 2) { 1.2063 + assert(at[0] == '[', "must be"); 1.2064 + switch (at[1]) { 1.2065 + case 'B': in_elem_bt[i] = T_BYTE; break; 1.2066 + case 'C': in_elem_bt[i] = T_CHAR; break; 1.2067 + case 'D': in_elem_bt[i] = T_DOUBLE; break; 1.2068 + case 'F': in_elem_bt[i] = T_FLOAT; break; 1.2069 + case 'I': in_elem_bt[i] = T_INT; break; 1.2070 + case 'J': in_elem_bt[i] = T_LONG; break; 1.2071 + case 'S': in_elem_bt[i] = T_SHORT; break; 1.2072 + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; 1.2073 + default: ShouldNotReachHere(); 1.2074 + } 1.2075 + } 1.2076 + } else { 1.2077 + out_sig_bt[argc++] = in_sig_bt[i]; 1.2078 + in_elem_bt[i] = T_VOID; 1.2079 + } 1.2080 + if (in_sig_bt[i] != T_VOID) { 1.2081 + assert(in_sig_bt[i] == ss.type(), "must match"); 1.2082 + ss.next(); 1.2083 + } 1.2084 + } 1.2085 + } 1.2086 + 1.2087 + // Now figure out where the args must be stored and how much stack space 1.2088 + // they require (neglecting out_preserve_stack_slots but space for storing 1.2089 + // the 1st six register arguments). It's weird see int_stk_helper. 1.2090 + // 1.2091 + int out_arg_slots; 1.2092 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1.2093 + 1.2094 + if (is_critical_native) { 1.2095 + // Critical natives may have to call out so they need a save area 1.2096 + // for register arguments. 1.2097 + int double_slots = 0; 1.2098 + int single_slots = 0; 1.2099 + for ( int i = 0; i < total_in_args; i++) { 1.2100 + if (in_regs[i].first()->is_Register()) { 1.2101 + const Register reg = in_regs[i].first()->as_Register(); 1.2102 + switch (in_sig_bt[i]) { 1.2103 + case T_ARRAY: 1.2104 + case T_BOOLEAN: 1.2105 + case T_BYTE: 1.2106 + case T_SHORT: 1.2107 + case T_CHAR: 1.2108 + case T_INT: assert(reg->is_in(), "don't need to save these"); break; 1.2109 + case T_LONG: if (reg->is_global()) double_slots++; break; 1.2110 + default: ShouldNotReachHere(); 1.2111 + } 1.2112 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.2113 + switch (in_sig_bt[i]) { 1.2114 + case T_FLOAT: single_slots++; break; 1.2115 + case T_DOUBLE: double_slots++; break; 1.2116 + default: ShouldNotReachHere(); 1.2117 + } 1.2118 + } 1.2119 + } 1.2120 + total_save_slots = double_slots * 2 + single_slots; 1.2121 + } 1.2122 + 1.2123 + // Compute framesize for the wrapper. We need to handlize all oops in 1.2124 + // registers. We must create space for them here that is disjoint from 1.2125 + // the windowed save area because we have no control over when we might 1.2126 + // flush the window again and overwrite values that gc has since modified. 1.2127 + // (The live window race) 1.2128 + // 1.2129 + // We always just allocate 6 word for storing down these object. This allow 1.2130 + // us to simply record the base and use the Ireg number to decide which 1.2131 + // slot to use. (Note that the reg number is the inbound number not the 1.2132 + // outbound number). 1.2133 + // We must shuffle args to match the native convention, and include var-args space. 1.2134 + 1.2135 + // Calculate the total number of stack slots we will need. 1.2136 + 1.2137 + // First count the abi requirement plus all of the outgoing args 1.2138 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.2139 + 1.2140 + // Now the space for the inbound oop handle area 1.2141 + 1.2142 + int oop_handle_offset = round_to(stack_slots, 2); 1.2143 + stack_slots += total_save_slots; 1.2144 + 1.2145 + // Now any space we need for handlizing a klass if static method 1.2146 + 1.2147 + int klass_slot_offset = 0; 1.2148 + int klass_offset = -1; 1.2149 + int lock_slot_offset = 0; 1.2150 + bool is_static = false; 1.2151 + 1.2152 + if (method->is_static()) { 1.2153 + klass_slot_offset = stack_slots; 1.2154 + stack_slots += VMRegImpl::slots_per_word; 1.2155 + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1.2156 + is_static = true; 1.2157 + } 1.2158 + 1.2159 + // Plus a lock if needed 1.2160 + 1.2161 + if (method->is_synchronized()) { 1.2162 + lock_slot_offset = stack_slots; 1.2163 + stack_slots += VMRegImpl::slots_per_word; 1.2164 + } 1.2165 + 1.2166 + // Now a place to save return value or as a temporary for any gpr -> fpr moves 1.2167 + stack_slots += 2; 1.2168 + 1.2169 + // Ok The space we have allocated will look like: 1.2170 + // 1.2171 + // 1.2172 + // FP-> | | 1.2173 + // |---------------------| 1.2174 + // | 2 slots for moves | 1.2175 + // |---------------------| 1.2176 + // | lock box (if sync) | 1.2177 + // |---------------------| <- lock_slot_offset 1.2178 + // | klass (if static) | 1.2179 + // |---------------------| <- klass_slot_offset 1.2180 + // | oopHandle area | 1.2181 + // |---------------------| <- oop_handle_offset 1.2182 + // | outbound memory | 1.2183 + // | based arguments | 1.2184 + // | | 1.2185 + // |---------------------| 1.2186 + // | vararg area | 1.2187 + // |---------------------| 1.2188 + // | | 1.2189 + // SP-> | out_preserved_slots | 1.2190 + // 1.2191 + // 1.2192 + 1.2193 + 1.2194 + // Now compute actual number of stack words we need rounding to make 1.2195 + // stack properly aligned. 1.2196 + stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word); 1.2197 + 1.2198 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.2199 + 1.2200 + // Generate stack overflow check before creating frame 1.2201 + __ generate_stack_overflow_check(stack_size); 1.2202 + 1.2203 + // Generate a new frame for the wrapper. 1.2204 + __ save(SP, -stack_size, SP); 1.2205 + 1.2206 + int frame_complete = ((intptr_t)__ pc()) - start; 1.2207 + 1.2208 + __ verify_thread(); 1.2209 + 1.2210 + if (is_critical_native) { 1.2211 + check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, 1.2212 + oop_handle_offset, oop_maps, in_regs, in_sig_bt); 1.2213 + } 1.2214 + 1.2215 + // 1.2216 + // We immediately shuffle the arguments so that any vm call we have to 1.2217 + // make from here on out (sync slow path, jvmti, etc.) we will have 1.2218 + // captured the oops from our caller and have a valid oopMap for 1.2219 + // them. 1.2220 + 1.2221 + // ----------------- 1.2222 + // The Grand Shuffle 1.2223 + // 1.2224 + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 1.2225 + // (derived from JavaThread* which is in L7_thread_cache) and, if static, 1.2226 + // the class mirror instead of a receiver. This pretty much guarantees that 1.2227 + // register layout will not match. We ignore these extra arguments during 1.2228 + // the shuffle. The shuffle is described by the two calling convention 1.2229 + // vectors we have in our possession. We simply walk the java vector to 1.2230 + // get the source locations and the c vector to get the destinations. 1.2231 + // Because we have a new window and the argument registers are completely 1.2232 + // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about 1.2233 + // here. 1.2234 + 1.2235 + // This is a trick. We double the stack slots so we can claim 1.2236 + // the oops in the caller's frame. Since we are sure to have 1.2237 + // more args than the caller doubling is enough to make 1.2238 + // sure we can capture all the incoming oop args from the 1.2239 + // caller. 1.2240 + // 1.2241 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.2242 + // Record sp-based slot for receiver on stack for non-static methods 1.2243 + int receiver_offset = -1; 1.2244 + 1.2245 + // We move the arguments backward because the floating point registers 1.2246 + // destination will always be to a register with a greater or equal register 1.2247 + // number or the stack. 1.2248 + 1.2249 +#ifdef ASSERT 1.2250 + bool reg_destroyed[RegisterImpl::number_of_registers]; 1.2251 + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 1.2252 + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1.2253 + reg_destroyed[r] = false; 1.2254 + } 1.2255 + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 1.2256 + freg_destroyed[f] = false; 1.2257 + } 1.2258 + 1.2259 +#endif /* ASSERT */ 1.2260 + 1.2261 + for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) { 1.2262 + 1.2263 +#ifdef ASSERT 1.2264 + if (in_regs[i].first()->is_Register()) { 1.2265 + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!"); 1.2266 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.2267 + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!"); 1.2268 + } 1.2269 + if (out_regs[c_arg].first()->is_Register()) { 1.2270 + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1.2271 + } else if (out_regs[c_arg].first()->is_FloatRegister()) { 1.2272 + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true; 1.2273 + } 1.2274 +#endif /* ASSERT */ 1.2275 + 1.2276 + switch (in_sig_bt[i]) { 1.2277 + case T_ARRAY: 1.2278 + if (is_critical_native) { 1.2279 + unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]); 1.2280 + c_arg--; 1.2281 + break; 1.2282 + } 1.2283 + case T_OBJECT: 1.2284 + assert(!is_critical_native, "no oop arguments"); 1.2285 + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1.2286 + ((i == 0) && (!is_static)), 1.2287 + &receiver_offset); 1.2288 + break; 1.2289 + case T_VOID: 1.2290 + break; 1.2291 + 1.2292 + case T_FLOAT: 1.2293 + float_move(masm, in_regs[i], out_regs[c_arg]); 1.2294 + break; 1.2295 + 1.2296 + case T_DOUBLE: 1.2297 + assert( i + 1 < total_in_args && 1.2298 + in_sig_bt[i + 1] == T_VOID && 1.2299 + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1.2300 + double_move(masm, in_regs[i], out_regs[c_arg]); 1.2301 + break; 1.2302 + 1.2303 + case T_LONG : 1.2304 + long_move(masm, in_regs[i], out_regs[c_arg]); 1.2305 + break; 1.2306 + 1.2307 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.2308 + 1.2309 + default: 1.2310 + move32_64(masm, in_regs[i], out_regs[c_arg]); 1.2311 + } 1.2312 + } 1.2313 + 1.2314 + // Pre-load a static method's oop into O1. Used both by locking code and 1.2315 + // the normal JNI call code. 1.2316 + if (method->is_static() && !is_critical_native) { 1.2317 + __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1); 1.2318 + 1.2319 + // Now handlize the static class mirror in O1. It's known not-null. 1.2320 + __ st_ptr(O1, SP, klass_offset + STACK_BIAS); 1.2321 + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1.2322 + __ add(SP, klass_offset + STACK_BIAS, O1); 1.2323 + } 1.2324 + 1.2325 + 1.2326 + const Register L6_handle = L6; 1.2327 + 1.2328 + if (method->is_synchronized()) { 1.2329 + assert(!is_critical_native, "unhandled"); 1.2330 + __ mov(O1, L6_handle); 1.2331 + } 1.2332 + 1.2333 + // We have all of the arguments setup at this point. We MUST NOT touch any Oregs 1.2334 + // except O6/O7. So if we must call out we must push a new frame. We immediately 1.2335 + // push a new frame and flush the windows. 1.2336 +#ifdef _LP64 1.2337 + intptr_t thepc = (intptr_t) __ pc(); 1.2338 + { 1.2339 + address here = __ pc(); 1.2340 + // Call the next instruction 1.2341 + __ call(here + 8, relocInfo::none); 1.2342 + __ delayed()->nop(); 1.2343 + } 1.2344 +#else 1.2345 + intptr_t thepc = __ load_pc_address(O7, 0); 1.2346 +#endif /* _LP64 */ 1.2347 + 1.2348 + // We use the same pc/oopMap repeatedly when we call out 1.2349 + oop_maps->add_gc_map(thepc - start, map); 1.2350 + 1.2351 + // O7 now has the pc loaded that we will use when we finally call to native. 1.2352 + 1.2353 + // Save thread in L7; it crosses a bunch of VM calls below 1.2354 + // Don't use save_thread because it smashes G2 and we merely 1.2355 + // want to save a copy 1.2356 + __ mov(G2_thread, L7_thread_cache); 1.2357 + 1.2358 + 1.2359 + // If we create an inner frame once is plenty 1.2360 + // when we create it we must also save G2_thread 1.2361 + bool inner_frame_created = false; 1.2362 + 1.2363 + // dtrace method entry support 1.2364 + { 1.2365 + SkipIfEqual skip_if( 1.2366 + masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 1.2367 + // create inner frame 1.2368 + __ save_frame(0); 1.2369 + __ mov(G2_thread, L7_thread_cache); 1.2370 + __ set_metadata_constant(method(), O1); 1.2371 + __ call_VM_leaf(L7_thread_cache, 1.2372 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 1.2373 + G2_thread, O1); 1.2374 + __ restore(); 1.2375 + } 1.2376 + 1.2377 + // RedefineClasses() tracing support for obsolete method entry 1.2378 + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { 1.2379 + // create inner frame 1.2380 + __ save_frame(0); 1.2381 + __ mov(G2_thread, L7_thread_cache); 1.2382 + __ set_metadata_constant(method(), O1); 1.2383 + __ call_VM_leaf(L7_thread_cache, 1.2384 + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 1.2385 + G2_thread, O1); 1.2386 + __ restore(); 1.2387 + } 1.2388 + 1.2389 + // We are in the jni frame unless saved_frame is true in which case 1.2390 + // we are in one frame deeper (the "inner" frame). If we are in the 1.2391 + // "inner" frames the args are in the Iregs and if the jni frame then 1.2392 + // they are in the Oregs. 1.2393 + // If we ever need to go to the VM (for locking, jvmti) then 1.2394 + // we will always be in the "inner" frame. 1.2395 + 1.2396 + // Lock a synchronized method 1.2397 + int lock_offset = -1; // Set if locked 1.2398 + if (method->is_synchronized()) { 1.2399 + Register Roop = O1; 1.2400 + const Register L3_box = L3; 1.2401 + 1.2402 + create_inner_frame(masm, &inner_frame_created); 1.2403 + 1.2404 + __ ld_ptr(I1, 0, O1); 1.2405 + Label done; 1.2406 + 1.2407 + lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); 1.2408 + __ add(FP, lock_offset+STACK_BIAS, L3_box); 1.2409 +#ifdef ASSERT 1.2410 + if (UseBiasedLocking) { 1.2411 + // making the box point to itself will make it clear it went unused 1.2412 + // but also be obviously invalid 1.2413 + __ st_ptr(L3_box, L3_box, 0); 1.2414 + } 1.2415 +#endif // ASSERT 1.2416 + // 1.2417 + // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch 1.2418 + // 1.2419 + __ compiler_lock_object(Roop, L1, L3_box, L2); 1.2420 + __ br(Assembler::equal, false, Assembler::pt, done); 1.2421 + __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box); 1.2422 + 1.2423 + 1.2424 + // None of the above fast optimizations worked so we have to get into the 1.2425 + // slow case of monitor enter. Inline a special case of call_VM that 1.2426 + // disallows any pending_exception. 1.2427 + __ mov(Roop, O0); // Need oop in O0 1.2428 + __ mov(L3_box, O1); 1.2429 + 1.2430 + // Record last_Java_sp, in case the VM code releases the JVM lock. 1.2431 + 1.2432 + __ set_last_Java_frame(FP, I7); 1.2433 + 1.2434 + // do the call 1.2435 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); 1.2436 + __ delayed()->mov(L7_thread_cache, O2); 1.2437 + 1.2438 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2439 + __ reset_last_Java_frame(); 1.2440 + 1.2441 +#ifdef ASSERT 1.2442 + { Label L; 1.2443 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 1.2444 + __ br_null_short(O0, Assembler::pt, L); 1.2445 + __ stop("no pending exception allowed on exit from IR::monitorenter"); 1.2446 + __ bind(L); 1.2447 + } 1.2448 +#endif 1.2449 + __ bind(done); 1.2450 + } 1.2451 + 1.2452 + 1.2453 + // Finally just about ready to make the JNI call 1.2454 + 1.2455 + __ flushw(); 1.2456 + if (inner_frame_created) { 1.2457 + __ restore(); 1.2458 + } else { 1.2459 + // Store only what we need from this frame 1.2460 + // QQQ I think that non-v9 (like we care) we don't need these saves 1.2461 + // either as the flush traps and the current window goes too. 1.2462 + __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 1.2463 + __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 1.2464 + } 1.2465 + 1.2466 + // get JNIEnv* which is first argument to native 1.2467 + if (!is_critical_native) { 1.2468 + __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0); 1.2469 + } 1.2470 + 1.2471 + // Use that pc we placed in O7 a while back as the current frame anchor 1.2472 + __ set_last_Java_frame(SP, O7); 1.2473 + 1.2474 + // We flushed the windows ages ago now mark them as flushed before transitioning. 1.2475 + __ set(JavaFrameAnchor::flushed, G3_scratch); 1.2476 + __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); 1.2477 + 1.2478 + // Transition from _thread_in_Java to _thread_in_native. 1.2479 + __ set(_thread_in_native, G3_scratch); 1.2480 + 1.2481 +#ifdef _LP64 1.2482 + AddressLiteral dest(native_func); 1.2483 + __ relocate(relocInfo::runtime_call_type); 1.2484 + __ jumpl_to(dest, O7, O7); 1.2485 +#else 1.2486 + __ call(native_func, relocInfo::runtime_call_type); 1.2487 +#endif 1.2488 + __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 1.2489 + 1.2490 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2491 + 1.2492 + // Unpack native results. For int-types, we do any needed sign-extension 1.2493 + // and move things into I0. The return value there will survive any VM 1.2494 + // calls for blocking or unlocking. An FP or OOP result (handle) is done 1.2495 + // specially in the slow-path code. 1.2496 + switch (ret_type) { 1.2497 + case T_VOID: break; // Nothing to do! 1.2498 + case T_FLOAT: break; // Got it where we want it (unless slow-path) 1.2499 + case T_DOUBLE: break; // Got it where we want it (unless slow-path) 1.2500 + // In 64 bits build result is in O0, in O0, O1 in 32bit build 1.2501 + case T_LONG: 1.2502 +#ifndef _LP64 1.2503 + __ mov(O1, I1); 1.2504 +#endif 1.2505 + // Fall thru 1.2506 + case T_OBJECT: // Really a handle 1.2507 + case T_ARRAY: 1.2508 + case T_INT: 1.2509 + __ mov(O0, I0); 1.2510 + break; 1.2511 + case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false 1.2512 + case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break; 1.2513 + case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value! 1.2514 + case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break; 1.2515 + break; // Cannot de-handlize until after reclaiming jvm_lock 1.2516 + default: 1.2517 + ShouldNotReachHere(); 1.2518 + } 1.2519 + 1.2520 + Label after_transition; 1.2521 + // must we block? 1.2522 + 1.2523 + // Block, if necessary, before resuming in _thread_in_Java state. 1.2524 + // In order for GC to work, don't clear the last_Java_sp until after blocking. 1.2525 + { Label no_block; 1.2526 + AddressLiteral sync_state(SafepointSynchronize::address_of_state()); 1.2527 + 1.2528 + // Switch thread to "native transition" state before reading the synchronization state. 1.2529 + // This additional state is necessary because reading and testing the synchronization 1.2530 + // state is not atomic w.r.t. GC, as this scenario demonstrates: 1.2531 + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 1.2532 + // VM thread changes sync state to synchronizing and suspends threads for GC. 1.2533 + // Thread A is resumed to finish this native method, but doesn't block here since it 1.2534 + // didn't see any synchronization is progress, and escapes. 1.2535 + __ set(_thread_in_native_trans, G3_scratch); 1.2536 + __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 1.2537 + if(os::is_MP()) { 1.2538 + if (UseMembar) { 1.2539 + // Force this write out before the read below 1.2540 + __ membar(Assembler::StoreLoad); 1.2541 + } else { 1.2542 + // Write serialization page so VM thread can do a pseudo remote membar. 1.2543 + // We use the current thread pointer to calculate a thread specific 1.2544 + // offset to write to within the page. This minimizes bus traffic 1.2545 + // due to cache line collision. 1.2546 + __ serialize_memory(G2_thread, G1_scratch, G3_scratch); 1.2547 + } 1.2548 + } 1.2549 + __ load_contents(sync_state, G3_scratch); 1.2550 + __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); 1.2551 + 1.2552 + Label L; 1.2553 + Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); 1.2554 + __ br(Assembler::notEqual, false, Assembler::pn, L); 1.2555 + __ delayed()->ld(suspend_state, G3_scratch); 1.2556 + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); 1.2557 + __ bind(L); 1.2558 + 1.2559 + // Block. Save any potential method result value before the operation and 1.2560 + // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this 1.2561 + // lets us share the oopMap we used when we went native rather the create 1.2562 + // a distinct one for this pc 1.2563 + // 1.2564 + save_native_result(masm, ret_type, stack_slots); 1.2565 + if (!is_critical_native) { 1.2566 + __ call_VM_leaf(L7_thread_cache, 1.2567 + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), 1.2568 + G2_thread); 1.2569 + } else { 1.2570 + __ call_VM_leaf(L7_thread_cache, 1.2571 + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), 1.2572 + G2_thread); 1.2573 + } 1.2574 + 1.2575 + // Restore any method result value 1.2576 + restore_native_result(masm, ret_type, stack_slots); 1.2577 + 1.2578 + if (is_critical_native) { 1.2579 + // The call above performed the transition to thread_in_Java so 1.2580 + // skip the transition logic below. 1.2581 + __ ba(after_transition); 1.2582 + __ delayed()->nop(); 1.2583 + } 1.2584 + 1.2585 + __ bind(no_block); 1.2586 + } 1.2587 + 1.2588 + // thread state is thread_in_native_trans. Any safepoint blocking has already 1.2589 + // happened so we can now change state to _thread_in_Java. 1.2590 + __ set(_thread_in_Java, G3_scratch); 1.2591 + __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 1.2592 + __ bind(after_transition); 1.2593 + 1.2594 + Label no_reguard; 1.2595 + __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch); 1.2596 + __ cmp_and_br_short(G3_scratch, JavaThread::stack_guard_yellow_disabled, Assembler::notEqual, Assembler::pt, no_reguard); 1.2597 + 1.2598 + save_native_result(masm, ret_type, stack_slots); 1.2599 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 1.2600 + __ delayed()->nop(); 1.2601 + 1.2602 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2603 + restore_native_result(masm, ret_type, stack_slots); 1.2604 + 1.2605 + __ bind(no_reguard); 1.2606 + 1.2607 + // Handle possible exception (will unlock if necessary) 1.2608 + 1.2609 + // native result if any is live in freg or I0 (and I1 if long and 32bit vm) 1.2610 + 1.2611 + // Unlock 1.2612 + if (method->is_synchronized()) { 1.2613 + Label done; 1.2614 + Register I2_ex_oop = I2; 1.2615 + const Register L3_box = L3; 1.2616 + // Get locked oop from the handle we passed to jni 1.2617 + __ ld_ptr(L6_handle, 0, L4); 1.2618 + __ add(SP, lock_offset+STACK_BIAS, L3_box); 1.2619 + // Must save pending exception around the slow-path VM call. Since it's a 1.2620 + // leaf call, the pending exception (if any) can be kept in a register. 1.2621 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop); 1.2622 + // Now unlock 1.2623 + // (Roop, Rmark, Rbox, Rscratch) 1.2624 + __ compiler_unlock_object(L4, L1, L3_box, L2); 1.2625 + __ br(Assembler::equal, false, Assembler::pt, done); 1.2626 + __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box); 1.2627 + 1.2628 + // save and restore any potential method result value around the unlocking 1.2629 + // operation. Will save in I0 (or stack for FP returns). 1.2630 + save_native_result(masm, ret_type, stack_slots); 1.2631 + 1.2632 + // Must clear pending-exception before re-entering the VM. Since this is 1.2633 + // a leaf call, pending-exception-oop can be safely kept in a register. 1.2634 + __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset())); 1.2635 + 1.2636 + // slow case of monitor enter. Inline a special case of call_VM that 1.2637 + // disallows any pending_exception. 1.2638 + __ mov(L3_box, O1); 1.2639 + 1.2640 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); 1.2641 + __ delayed()->mov(L4, O0); // Need oop in O0 1.2642 + 1.2643 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2644 + 1.2645 +#ifdef ASSERT 1.2646 + { Label L; 1.2647 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 1.2648 + __ br_null_short(O0, Assembler::pt, L); 1.2649 + __ stop("no pending exception allowed on exit from IR::monitorexit"); 1.2650 + __ bind(L); 1.2651 + } 1.2652 +#endif 1.2653 + restore_native_result(masm, ret_type, stack_slots); 1.2654 + // check_forward_pending_exception jump to forward_exception if any pending 1.2655 + // exception is set. The forward_exception routine expects to see the 1.2656 + // exception in pending_exception and not in a register. Kind of clumsy, 1.2657 + // since all folks who branch to forward_exception must have tested 1.2658 + // pending_exception first and hence have it in a register already. 1.2659 + __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset())); 1.2660 + __ bind(done); 1.2661 + } 1.2662 + 1.2663 + // Tell dtrace about this method exit 1.2664 + { 1.2665 + SkipIfEqual skip_if( 1.2666 + masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 1.2667 + save_native_result(masm, ret_type, stack_slots); 1.2668 + __ set_metadata_constant(method(), O1); 1.2669 + __ call_VM_leaf(L7_thread_cache, 1.2670 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 1.2671 + G2_thread, O1); 1.2672 + restore_native_result(masm, ret_type, stack_slots); 1.2673 + } 1.2674 + 1.2675 + // Clear "last Java frame" SP and PC. 1.2676 + __ verify_thread(); // G2_thread must be correct 1.2677 + __ reset_last_Java_frame(); 1.2678 + 1.2679 + // Unpack oop result 1.2680 + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { 1.2681 + Label L; 1.2682 + __ addcc(G0, I0, G0); 1.2683 + __ brx(Assembler::notZero, true, Assembler::pt, L); 1.2684 + __ delayed()->ld_ptr(I0, 0, I0); 1.2685 + __ mov(G0, I0); 1.2686 + __ bind(L); 1.2687 + __ verify_oop(I0); 1.2688 + } 1.2689 + 1.2690 + if (!is_critical_native) { 1.2691 + // reset handle block 1.2692 + __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5); 1.2693 + __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes()); 1.2694 + 1.2695 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch); 1.2696 + check_forward_pending_exception(masm, G3_scratch); 1.2697 + } 1.2698 + 1.2699 + 1.2700 + // Return 1.2701 + 1.2702 +#ifndef _LP64 1.2703 + if (ret_type == T_LONG) { 1.2704 + 1.2705 + // Must leave proper result in O0,O1 and G1 (c2/tiered only) 1.2706 + __ sllx(I0, 32, G1); // Shift bits into high G1 1.2707 + __ srl (I1, 0, I1); // Zero extend O1 (harmless?) 1.2708 + __ or3 (I1, G1, G1); // OR 64 bits into G1 1.2709 + } 1.2710 +#endif 1.2711 + 1.2712 + __ ret(); 1.2713 + __ delayed()->restore(); 1.2714 + 1.2715 + __ flush(); 1.2716 + 1.2717 + nmethod *nm = nmethod::new_native_nmethod(method, 1.2718 + compile_id, 1.2719 + masm->code(), 1.2720 + vep_offset, 1.2721 + frame_complete, 1.2722 + stack_slots / VMRegImpl::slots_per_word, 1.2723 + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 1.2724 + in_ByteSize(lock_offset), 1.2725 + oop_maps); 1.2726 + 1.2727 + if (is_critical_native) { 1.2728 + nm->set_lazy_critical_native(true); 1.2729 + } 1.2730 + return nm; 1.2731 + 1.2732 +} 1.2733 + 1.2734 +#ifdef HAVE_DTRACE_H 1.2735 +// --------------------------------------------------------------------------- 1.2736 +// Generate a dtrace nmethod for a given signature. The method takes arguments 1.2737 +// in the Java compiled code convention, marshals them to the native 1.2738 +// abi and then leaves nops at the position you would expect to call a native 1.2739 +// function. When the probe is enabled the nops are replaced with a trap 1.2740 +// instruction that dtrace inserts and the trace will cause a notification 1.2741 +// to dtrace. 1.2742 +// 1.2743 +// The probes are only able to take primitive types and java/lang/String as 1.2744 +// arguments. No other java types are allowed. Strings are converted to utf8 1.2745 +// strings so that from dtrace point of view java strings are converted to C 1.2746 +// strings. There is an arbitrary fixed limit on the total space that a method 1.2747 +// can use for converting the strings. (256 chars per string in the signature). 1.2748 +// So any java string larger then this is truncated. 1.2749 + 1.2750 +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; 1.2751 +static bool offsets_initialized = false; 1.2752 + 1.2753 +nmethod *SharedRuntime::generate_dtrace_nmethod( 1.2754 + MacroAssembler *masm, methodHandle method) { 1.2755 + 1.2756 + 1.2757 + // generate_dtrace_nmethod is guarded by a mutex so we are sure to 1.2758 + // be single threaded in this method. 1.2759 + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); 1.2760 + 1.2761 + // Fill in the signature array, for the calling-convention call. 1.2762 + int total_args_passed = method->size_of_parameters(); 1.2763 + 1.2764 + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); 1.2765 + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); 1.2766 + 1.2767 + // The signature we are going to use for the trap that dtrace will see 1.2768 + // java/lang/String is converted. We drop "this" and any other object 1.2769 + // is converted to NULL. (A one-slot java/lang/Long object reference 1.2770 + // is converted to a two-slot long, which is why we double the allocation). 1.2771 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); 1.2772 + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); 1.2773 + 1.2774 + int i=0; 1.2775 + int total_strings = 0; 1.2776 + int first_arg_to_pass = 0; 1.2777 + int total_c_args = 0; 1.2778 + 1.2779 + // Skip the receiver as dtrace doesn't want to see it 1.2780 + if( !method->is_static() ) { 1.2781 + in_sig_bt[i++] = T_OBJECT; 1.2782 + first_arg_to_pass = 1; 1.2783 + } 1.2784 + 1.2785 + SignatureStream ss(method->signature()); 1.2786 + for ( ; !ss.at_return_type(); ss.next()) { 1.2787 + BasicType bt = ss.type(); 1.2788 + in_sig_bt[i++] = bt; // Collect remaining bits of signature 1.2789 + out_sig_bt[total_c_args++] = bt; 1.2790 + if( bt == T_OBJECT) { 1.2791 + Symbol* s = ss.as_symbol_or_null(); 1.2792 + if (s == vmSymbols::java_lang_String()) { 1.2793 + total_strings++; 1.2794 + out_sig_bt[total_c_args-1] = T_ADDRESS; 1.2795 + } else if (s == vmSymbols::java_lang_Boolean() || 1.2796 + s == vmSymbols::java_lang_Byte()) { 1.2797 + out_sig_bt[total_c_args-1] = T_BYTE; 1.2798 + } else if (s == vmSymbols::java_lang_Character() || 1.2799 + s == vmSymbols::java_lang_Short()) { 1.2800 + out_sig_bt[total_c_args-1] = T_SHORT; 1.2801 + } else if (s == vmSymbols::java_lang_Integer() || 1.2802 + s == vmSymbols::java_lang_Float()) { 1.2803 + out_sig_bt[total_c_args-1] = T_INT; 1.2804 + } else if (s == vmSymbols::java_lang_Long() || 1.2805 + s == vmSymbols::java_lang_Double()) { 1.2806 + out_sig_bt[total_c_args-1] = T_LONG; 1.2807 + out_sig_bt[total_c_args++] = T_VOID; 1.2808 + } 1.2809 + } else if ( bt == T_LONG || bt == T_DOUBLE ) { 1.2810 + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots 1.2811 + // We convert double to long 1.2812 + out_sig_bt[total_c_args-1] = T_LONG; 1.2813 + out_sig_bt[total_c_args++] = T_VOID; 1.2814 + } else if ( bt == T_FLOAT) { 1.2815 + // We convert float to int 1.2816 + out_sig_bt[total_c_args-1] = T_INT; 1.2817 + } 1.2818 + } 1.2819 + 1.2820 + assert(i==total_args_passed, "validly parsed signature"); 1.2821 + 1.2822 + // Now get the compiled-Java layout as input arguments 1.2823 + int comp_args_on_stack; 1.2824 + comp_args_on_stack = SharedRuntime::java_calling_convention( 1.2825 + in_sig_bt, in_regs, total_args_passed, false); 1.2826 + 1.2827 + // We have received a description of where all the java arg are located 1.2828 + // on entry to the wrapper. We need to convert these args to where 1.2829 + // the a native (non-jni) function would expect them. To figure out 1.2830 + // where they go we convert the java signature to a C signature and remove 1.2831 + // T_VOID for any long/double we might have received. 1.2832 + 1.2833 + 1.2834 + // Now figure out where the args must be stored and how much stack space 1.2835 + // they require (neglecting out_preserve_stack_slots but space for storing 1.2836 + // the 1st six register arguments). It's weird see int_stk_helper. 1.2837 + // 1.2838 + int out_arg_slots; 1.2839 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 1.2840 + 1.2841 + // Calculate the total number of stack slots we will need. 1.2842 + 1.2843 + // First count the abi requirement plus all of the outgoing args 1.2844 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.2845 + 1.2846 + // Plus a temp for possible converion of float/double/long register args 1.2847 + 1.2848 + int conversion_temp = stack_slots; 1.2849 + stack_slots += 2; 1.2850 + 1.2851 + 1.2852 + // Now space for the string(s) we must convert 1.2853 + 1.2854 + int string_locs = stack_slots; 1.2855 + stack_slots += total_strings * 1.2856 + (max_dtrace_string_size / VMRegImpl::stack_slot_size); 1.2857 + 1.2858 + // Ok The space we have allocated will look like: 1.2859 + // 1.2860 + // 1.2861 + // FP-> | | 1.2862 + // |---------------------| 1.2863 + // | string[n] | 1.2864 + // |---------------------| <- string_locs[n] 1.2865 + // | string[n-1] | 1.2866 + // |---------------------| <- string_locs[n-1] 1.2867 + // | ... | 1.2868 + // | ... | 1.2869 + // |---------------------| <- string_locs[1] 1.2870 + // | string[0] | 1.2871 + // |---------------------| <- string_locs[0] 1.2872 + // | temp | 1.2873 + // |---------------------| <- conversion_temp 1.2874 + // | outbound memory | 1.2875 + // | based arguments | 1.2876 + // | | 1.2877 + // |---------------------| 1.2878 + // | | 1.2879 + // SP-> | out_preserved_slots | 1.2880 + // 1.2881 + // 1.2882 + 1.2883 + // Now compute actual number of stack words we need rounding to make 1.2884 + // stack properly aligned. 1.2885 + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); 1.2886 + 1.2887 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.2888 + 1.2889 + intptr_t start = (intptr_t)__ pc(); 1.2890 + 1.2891 + // First thing make an ic check to see if we should even be here 1.2892 + 1.2893 + { 1.2894 + Label L; 1.2895 + const Register temp_reg = G3_scratch; 1.2896 + AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1.2897 + __ verify_oop(O0); 1.2898 + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); 1.2899 + __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); 1.2900 + 1.2901 + __ jump_to(ic_miss, temp_reg); 1.2902 + __ delayed()->nop(); 1.2903 + __ align(CodeEntryAlignment); 1.2904 + __ bind(L); 1.2905 + } 1.2906 + 1.2907 + int vep_offset = ((intptr_t)__ pc()) - start; 1.2908 + 1.2909 + 1.2910 + // The instruction at the verified entry point must be 5 bytes or longer 1.2911 + // because it can be patched on the fly by make_non_entrant. The stack bang 1.2912 + // instruction fits that requirement. 1.2913 + 1.2914 + // Generate stack overflow check before creating frame 1.2915 + __ generate_stack_overflow_check(stack_size); 1.2916 + 1.2917 + assert(((intptr_t)__ pc() - start - vep_offset) >= 5, 1.2918 + "valid size for make_non_entrant"); 1.2919 + 1.2920 + // Generate a new frame for the wrapper. 1.2921 + __ save(SP, -stack_size, SP); 1.2922 + 1.2923 + // Frame is now completed as far a size and linkage. 1.2924 + 1.2925 + int frame_complete = ((intptr_t)__ pc()) - start; 1.2926 + 1.2927 +#ifdef ASSERT 1.2928 + bool reg_destroyed[RegisterImpl::number_of_registers]; 1.2929 + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 1.2930 + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1.2931 + reg_destroyed[r] = false; 1.2932 + } 1.2933 + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 1.2934 + freg_destroyed[f] = false; 1.2935 + } 1.2936 + 1.2937 +#endif /* ASSERT */ 1.2938 + 1.2939 + VMRegPair zero; 1.2940 + const Register g0 = G0; // without this we get a compiler warning (why??) 1.2941 + zero.set2(g0->as_VMReg()); 1.2942 + 1.2943 + int c_arg, j_arg; 1.2944 + 1.2945 + Register conversion_off = noreg; 1.2946 + 1.2947 + for (j_arg = first_arg_to_pass, c_arg = 0 ; 1.2948 + j_arg < total_args_passed ; j_arg++, c_arg++ ) { 1.2949 + 1.2950 + VMRegPair src = in_regs[j_arg]; 1.2951 + VMRegPair dst = out_regs[c_arg]; 1.2952 + 1.2953 +#ifdef ASSERT 1.2954 + if (src.first()->is_Register()) { 1.2955 + assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); 1.2956 + } else if (src.first()->is_FloatRegister()) { 1.2957 + assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( 1.2958 + FloatRegisterImpl::S)], "ack!"); 1.2959 + } 1.2960 + if (dst.first()->is_Register()) { 1.2961 + reg_destroyed[dst.first()->as_Register()->encoding()] = true; 1.2962 + } else if (dst.first()->is_FloatRegister()) { 1.2963 + freg_destroyed[dst.first()->as_FloatRegister()->encoding( 1.2964 + FloatRegisterImpl::S)] = true; 1.2965 + } 1.2966 +#endif /* ASSERT */ 1.2967 + 1.2968 + switch (in_sig_bt[j_arg]) { 1.2969 + case T_ARRAY: 1.2970 + case T_OBJECT: 1.2971 + { 1.2972 + if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || 1.2973 + out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { 1.2974 + // need to unbox a one-slot value 1.2975 + Register in_reg = L0; 1.2976 + Register tmp = L2; 1.2977 + if ( src.first()->is_reg() ) { 1.2978 + in_reg = src.first()->as_Register(); 1.2979 + } else { 1.2980 + assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), 1.2981 + "must be"); 1.2982 + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); 1.2983 + } 1.2984 + // If the final destination is an acceptable register 1.2985 + if ( dst.first()->is_reg() ) { 1.2986 + if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { 1.2987 + tmp = dst.first()->as_Register(); 1.2988 + } 1.2989 + } 1.2990 + 1.2991 + Label skipUnbox; 1.2992 + if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { 1.2993 + __ mov(G0, tmp->successor()); 1.2994 + } 1.2995 + __ br_null(in_reg, true, Assembler::pn, skipUnbox); 1.2996 + __ delayed()->mov(G0, tmp); 1.2997 + 1.2998 + BasicType bt = out_sig_bt[c_arg]; 1.2999 + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); 1.3000 + switch (bt) { 1.3001 + case T_BYTE: 1.3002 + __ ldub(in_reg, box_offset, tmp); break; 1.3003 + case T_SHORT: 1.3004 + __ lduh(in_reg, box_offset, tmp); break; 1.3005 + case T_INT: 1.3006 + __ ld(in_reg, box_offset, tmp); break; 1.3007 + case T_LONG: 1.3008 + __ ld_long(in_reg, box_offset, tmp); break; 1.3009 + default: ShouldNotReachHere(); 1.3010 + } 1.3011 + 1.3012 + __ bind(skipUnbox); 1.3013 + // If tmp wasn't final destination copy to final destination 1.3014 + if (tmp == L2) { 1.3015 + VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); 1.3016 + if (out_sig_bt[c_arg] == T_LONG) { 1.3017 + long_move(masm, tmp_as_VM, dst); 1.3018 + } else { 1.3019 + move32_64(masm, tmp_as_VM, out_regs[c_arg]); 1.3020 + } 1.3021 + } 1.3022 + if (out_sig_bt[c_arg] == T_LONG) { 1.3023 + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 1.3024 + ++c_arg; // move over the T_VOID to keep the loop indices in sync 1.3025 + } 1.3026 + } else if (out_sig_bt[c_arg] == T_ADDRESS) { 1.3027 + Register s = 1.3028 + src.first()->is_reg() ? src.first()->as_Register() : L2; 1.3029 + Register d = 1.3030 + dst.first()->is_reg() ? dst.first()->as_Register() : L2; 1.3031 + 1.3032 + // We store the oop now so that the conversion pass can reach 1.3033 + // while in the inner frame. This will be the only store if 1.3034 + // the oop is NULL. 1.3035 + if (s != L2) { 1.3036 + // src is register 1.3037 + if (d != L2) { 1.3038 + // dst is register 1.3039 + __ mov(s, d); 1.3040 + } else { 1.3041 + assert(Assembler::is_simm13(reg2offset(dst.first()) + 1.3042 + STACK_BIAS), "must be"); 1.3043 + __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); 1.3044 + } 1.3045 + } else { 1.3046 + // src not a register 1.3047 + assert(Assembler::is_simm13(reg2offset(src.first()) + 1.3048 + STACK_BIAS), "must be"); 1.3049 + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); 1.3050 + if (d == L2) { 1.3051 + assert(Assembler::is_simm13(reg2offset(dst.first()) + 1.3052 + STACK_BIAS), "must be"); 1.3053 + __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); 1.3054 + } 1.3055 + } 1.3056 + } else if (out_sig_bt[c_arg] != T_VOID) { 1.3057 + // Convert the arg to NULL 1.3058 + if (dst.first()->is_reg()) { 1.3059 + __ mov(G0, dst.first()->as_Register()); 1.3060 + } else { 1.3061 + assert(Assembler::is_simm13(reg2offset(dst.first()) + 1.3062 + STACK_BIAS), "must be"); 1.3063 + __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); 1.3064 + } 1.3065 + } 1.3066 + } 1.3067 + break; 1.3068 + case T_VOID: 1.3069 + break; 1.3070 + 1.3071 + case T_FLOAT: 1.3072 + if (src.first()->is_stack()) { 1.3073 + // Stack to stack/reg is simple 1.3074 + move32_64(masm, src, dst); 1.3075 + } else { 1.3076 + if (dst.first()->is_reg()) { 1.3077 + // freg -> reg 1.3078 + int off = 1.3079 + STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 1.3080 + Register d = dst.first()->as_Register(); 1.3081 + if (Assembler::is_simm13(off)) { 1.3082 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3083 + SP, off); 1.3084 + __ ld(SP, off, d); 1.3085 + } else { 1.3086 + if (conversion_off == noreg) { 1.3087 + __ set(off, L6); 1.3088 + conversion_off = L6; 1.3089 + } 1.3090 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3091 + SP, conversion_off); 1.3092 + __ ld(SP, conversion_off , d); 1.3093 + } 1.3094 + } else { 1.3095 + // freg -> mem 1.3096 + int off = STACK_BIAS + reg2offset(dst.first()); 1.3097 + if (Assembler::is_simm13(off)) { 1.3098 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3099 + SP, off); 1.3100 + } else { 1.3101 + if (conversion_off == noreg) { 1.3102 + __ set(off, L6); 1.3103 + conversion_off = L6; 1.3104 + } 1.3105 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 1.3106 + SP, conversion_off); 1.3107 + } 1.3108 + } 1.3109 + } 1.3110 + break; 1.3111 + 1.3112 + case T_DOUBLE: 1.3113 + assert( j_arg + 1 < total_args_passed && 1.3114 + in_sig_bt[j_arg + 1] == T_VOID && 1.3115 + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1.3116 + if (src.first()->is_stack()) { 1.3117 + // Stack to stack/reg is simple 1.3118 + long_move(masm, src, dst); 1.3119 + } else { 1.3120 + Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; 1.3121 + 1.3122 + // Destination could be an odd reg on 32bit in which case 1.3123 + // we can't load direct to the destination. 1.3124 + 1.3125 + if (!d->is_even() && wordSize == 4) { 1.3126 + d = L2; 1.3127 + } 1.3128 + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 1.3129 + if (Assembler::is_simm13(off)) { 1.3130 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 1.3131 + SP, off); 1.3132 + __ ld_long(SP, off, d); 1.3133 + } else { 1.3134 + if (conversion_off == noreg) { 1.3135 + __ set(off, L6); 1.3136 + conversion_off = L6; 1.3137 + } 1.3138 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 1.3139 + SP, conversion_off); 1.3140 + __ ld_long(SP, conversion_off, d); 1.3141 + } 1.3142 + if (d == L2) { 1.3143 + long_move(masm, reg64_to_VMRegPair(L2), dst); 1.3144 + } 1.3145 + } 1.3146 + break; 1.3147 + 1.3148 + case T_LONG : 1.3149 + // 32bit can't do a split move of something like g1 -> O0, O1 1.3150 + // so use a memory temp 1.3151 + if (src.is_single_phys_reg() && wordSize == 4) { 1.3152 + Register tmp = L2; 1.3153 + if (dst.first()->is_reg() && 1.3154 + (wordSize == 8 || dst.first()->as_Register()->is_even())) { 1.3155 + tmp = dst.first()->as_Register(); 1.3156 + } 1.3157 + 1.3158 + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 1.3159 + if (Assembler::is_simm13(off)) { 1.3160 + __ stx(src.first()->as_Register(), SP, off); 1.3161 + __ ld_long(SP, off, tmp); 1.3162 + } else { 1.3163 + if (conversion_off == noreg) { 1.3164 + __ set(off, L6); 1.3165 + conversion_off = L6; 1.3166 + } 1.3167 + __ stx(src.first()->as_Register(), SP, conversion_off); 1.3168 + __ ld_long(SP, conversion_off, tmp); 1.3169 + } 1.3170 + 1.3171 + if (tmp == L2) { 1.3172 + long_move(masm, reg64_to_VMRegPair(L2), dst); 1.3173 + } 1.3174 + } else { 1.3175 + long_move(masm, src, dst); 1.3176 + } 1.3177 + break; 1.3178 + 1.3179 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.3180 + 1.3181 + default: 1.3182 + move32_64(masm, src, dst); 1.3183 + } 1.3184 + } 1.3185 + 1.3186 + 1.3187 + // If we have any strings we must store any register based arg to the stack 1.3188 + // This includes any still live xmm registers too. 1.3189 + 1.3190 + if (total_strings > 0 ) { 1.3191 + 1.3192 + // protect all the arg registers 1.3193 + __ save_frame(0); 1.3194 + __ mov(G2_thread, L7_thread_cache); 1.3195 + const Register L2_string_off = L2; 1.3196 + 1.3197 + // Get first string offset 1.3198 + __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); 1.3199 + 1.3200 + for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { 1.3201 + if (out_sig_bt[c_arg] == T_ADDRESS) { 1.3202 + 1.3203 + VMRegPair dst = out_regs[c_arg]; 1.3204 + const Register d = dst.first()->is_reg() ? 1.3205 + dst.first()->as_Register()->after_save() : noreg; 1.3206 + 1.3207 + // It's a string the oop and it was already copied to the out arg 1.3208 + // position 1.3209 + if (d != noreg) { 1.3210 + __ mov(d, O0); 1.3211 + } else { 1.3212 + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 1.3213 + "must be"); 1.3214 + __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); 1.3215 + } 1.3216 + Label skip; 1.3217 + 1.3218 + __ br_null(O0, false, Assembler::pn, skip); 1.3219 + __ delayed()->add(FP, L2_string_off, O1); 1.3220 + 1.3221 + if (d != noreg) { 1.3222 + __ mov(O1, d); 1.3223 + } else { 1.3224 + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 1.3225 + "must be"); 1.3226 + __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); 1.3227 + } 1.3228 + 1.3229 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), 1.3230 + relocInfo::runtime_call_type); 1.3231 + __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off); 1.3232 + 1.3233 + __ bind(skip); 1.3234 + 1.3235 + } 1.3236 + 1.3237 + } 1.3238 + __ mov(L7_thread_cache, G2_thread); 1.3239 + __ restore(); 1.3240 + 1.3241 + } 1.3242 + 1.3243 + 1.3244 + // Ok now we are done. Need to place the nop that dtrace wants in order to 1.3245 + // patch in the trap 1.3246 + 1.3247 + int patch_offset = ((intptr_t)__ pc()) - start; 1.3248 + 1.3249 + __ nop(); 1.3250 + 1.3251 + 1.3252 + // Return 1.3253 + 1.3254 + __ ret(); 1.3255 + __ delayed()->restore(); 1.3256 + 1.3257 + __ flush(); 1.3258 + 1.3259 + nmethod *nm = nmethod::new_dtrace_nmethod( 1.3260 + method, masm->code(), vep_offset, patch_offset, frame_complete, 1.3261 + stack_slots / VMRegImpl::slots_per_word); 1.3262 + return nm; 1.3263 + 1.3264 +} 1.3265 + 1.3266 +#endif // HAVE_DTRACE_H 1.3267 + 1.3268 +// this function returns the adjust size (in number of words) to a c2i adapter 1.3269 +// activation for use during deoptimization 1.3270 +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 1.3271 + assert(callee_locals >= callee_parameters, 1.3272 + "test and remove; got more parms than locals"); 1.3273 + if (callee_locals < callee_parameters) 1.3274 + return 0; // No adjustment for negative locals 1.3275 + int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; 1.3276 + return round_to(diff, WordsPerLong); 1.3277 +} 1.3278 + 1.3279 +// "Top of Stack" slots that may be unused by the calling convention but must 1.3280 +// otherwise be preserved. 1.3281 +// On Intel these are not necessary and the value can be zero. 1.3282 +// On Sparc this describes the words reserved for storing a register window 1.3283 +// when an interrupt occurs. 1.3284 +uint SharedRuntime::out_preserve_stack_slots() { 1.3285 + return frame::register_save_words * VMRegImpl::slots_per_word; 1.3286 +} 1.3287 + 1.3288 +static void gen_new_frame(MacroAssembler* masm, bool deopt) { 1.3289 +// 1.3290 +// Common out the new frame generation for deopt and uncommon trap 1.3291 +// 1.3292 + Register G3pcs = G3_scratch; // Array of new pcs (input) 1.3293 + Register Oreturn0 = O0; 1.3294 + Register Oreturn1 = O1; 1.3295 + Register O2UnrollBlock = O2; 1.3296 + Register O3array = O3; // Array of frame sizes (input) 1.3297 + Register O4array_size = O4; // number of frames (input) 1.3298 + Register O7frame_size = O7; // number of frames (input) 1.3299 + 1.3300 + __ ld_ptr(O3array, 0, O7frame_size); 1.3301 + __ sub(G0, O7frame_size, O7frame_size); 1.3302 + __ save(SP, O7frame_size, SP); 1.3303 + __ ld_ptr(G3pcs, 0, I7); // load frame's new pc 1.3304 + 1.3305 + #ifdef ASSERT 1.3306 + // make sure that the frames are aligned properly 1.3307 +#ifndef _LP64 1.3308 + __ btst(wordSize*2-1, SP); 1.3309 + __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc); 1.3310 +#endif 1.3311 + #endif 1.3312 + 1.3313 + // Deopt needs to pass some extra live values from frame to frame 1.3314 + 1.3315 + if (deopt) { 1.3316 + __ mov(Oreturn0->after_save(), Oreturn0); 1.3317 + __ mov(Oreturn1->after_save(), Oreturn1); 1.3318 + } 1.3319 + 1.3320 + __ mov(O4array_size->after_save(), O4array_size); 1.3321 + __ sub(O4array_size, 1, O4array_size); 1.3322 + __ mov(O3array->after_save(), O3array); 1.3323 + __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); 1.3324 + __ add(G3pcs, wordSize, G3pcs); // point to next pc value 1.3325 + 1.3326 + #ifdef ASSERT 1.3327 + // trash registers to show a clear pattern in backtraces 1.3328 + __ set(0xDEAD0000, I0); 1.3329 + __ add(I0, 2, I1); 1.3330 + __ add(I0, 4, I2); 1.3331 + __ add(I0, 6, I3); 1.3332 + __ add(I0, 8, I4); 1.3333 + // Don't touch I5 could have valuable savedSP 1.3334 + __ set(0xDEADBEEF, L0); 1.3335 + __ mov(L0, L1); 1.3336 + __ mov(L0, L2); 1.3337 + __ mov(L0, L3); 1.3338 + __ mov(L0, L4); 1.3339 + __ mov(L0, L5); 1.3340 + 1.3341 + // trash the return value as there is nothing to return yet 1.3342 + __ set(0xDEAD0001, O7); 1.3343 + #endif 1.3344 + 1.3345 + __ mov(SP, O5_savedSP); 1.3346 +} 1.3347 + 1.3348 + 1.3349 +static void make_new_frames(MacroAssembler* masm, bool deopt) { 1.3350 + // 1.3351 + // loop through the UnrollBlock info and create new frames 1.3352 + // 1.3353 + Register G3pcs = G3_scratch; 1.3354 + Register Oreturn0 = O0; 1.3355 + Register Oreturn1 = O1; 1.3356 + Register O2UnrollBlock = O2; 1.3357 + Register O3array = O3; 1.3358 + Register O4array_size = O4; 1.3359 + Label loop; 1.3360 + 1.3361 +#ifdef ASSERT 1.3362 + // Compilers generate code that bang the stack by as much as the 1.3363 + // interpreter would need. So this stack banging should never 1.3364 + // trigger a fault. Verify that it does not on non product builds. 1.3365 + if (UseStackBanging) { 1.3366 + // Get total frame size for interpreted frames 1.3367 + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); 1.3368 + __ bang_stack_size(O4, O3, G3_scratch); 1.3369 + } 1.3370 +#endif 1.3371 + 1.3372 + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); 1.3373 + __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); 1.3374 + __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array); 1.3375 + 1.3376 + // Adjust old interpreter frame to make space for new frame's extra java locals 1.3377 + // 1.3378 + // We capture the original sp for the transition frame only because it is needed in 1.3379 + // order to properly calculate interpreter_sp_adjustment. Even though in real life 1.3380 + // every interpreter frame captures a savedSP it is only needed at the transition 1.3381 + // (fortunately). If we had to have it correct everywhere then we would need to 1.3382 + // be told the sp_adjustment for each frame we create. If the frame size array 1.3383 + // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] 1.3384 + // for each frame we create and keep up the illusion every where. 1.3385 + // 1.3386 + 1.3387 + __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7); 1.3388 + __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment 1.3389 + __ sub(SP, O7, SP); 1.3390 + 1.3391 +#ifdef ASSERT 1.3392 + // make sure that there is at least one entry in the array 1.3393 + __ tst(O4array_size); 1.3394 + __ breakpoint_trap(Assembler::zero, Assembler::icc); 1.3395 +#endif 1.3396 + 1.3397 + // Now push the new interpreter frames 1.3398 + __ bind(loop); 1.3399 + 1.3400 + // allocate a new frame, filling the registers 1.3401 + 1.3402 + gen_new_frame(masm, deopt); // allocate an interpreter frame 1.3403 + 1.3404 + __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop); 1.3405 + __ delayed()->add(O3array, wordSize, O3array); 1.3406 + __ ld_ptr(G3pcs, 0, O7); // load final frame new pc 1.3407 + 1.3408 +} 1.3409 + 1.3410 +//------------------------------generate_deopt_blob---------------------------- 1.3411 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM 1.3412 +// instead. 1.3413 +void SharedRuntime::generate_deopt_blob() { 1.3414 + // allocate space for the code 1.3415 + ResourceMark rm; 1.3416 + // setup code generation tools 1.3417 + int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code 1.3418 +#ifdef ASSERT 1.3419 + if (UseStackBanging) { 1.3420 + pad += StackShadowPages*16 + 32; 1.3421 + } 1.3422 +#endif 1.3423 +#ifdef _LP64 1.3424 + CodeBuffer buffer("deopt_blob", 2100+pad, 512); 1.3425 +#else 1.3426 + // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread) 1.3427 + // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread) 1.3428 + CodeBuffer buffer("deopt_blob", 1600+pad, 512); 1.3429 +#endif /* _LP64 */ 1.3430 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3431 + FloatRegister Freturn0 = F0; 1.3432 + Register Greturn1 = G1; 1.3433 + Register Oreturn0 = O0; 1.3434 + Register Oreturn1 = O1; 1.3435 + Register O2UnrollBlock = O2; 1.3436 + Register L0deopt_mode = L0; 1.3437 + Register G4deopt_mode = G4_scratch; 1.3438 + int frame_size_words; 1.3439 + Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS); 1.3440 +#if !defined(_LP64) && defined(COMPILER2) 1.3441 + Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS); 1.3442 +#endif 1.3443 + Label cont; 1.3444 + 1.3445 + OopMapSet *oop_maps = new OopMapSet(); 1.3446 + 1.3447 + // 1.3448 + // This is the entry point for code which is returning to a de-optimized 1.3449 + // frame. 1.3450 + // The steps taken by this frame are as follows: 1.3451 + // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1) 1.3452 + // and all potentially live registers (at a pollpoint many registers can be live). 1.3453 + // 1.3454 + // - call the C routine: Deoptimization::fetch_unroll_info (this function 1.3455 + // returns information about the number and size of interpreter frames 1.3456 + // which are equivalent to the frame which is being deoptimized) 1.3457 + // - deallocate the unpack frame, restoring only results values. Other 1.3458 + // volatile registers will now be captured in the vframeArray as needed. 1.3459 + // - deallocate the deoptimization frame 1.3460 + // - in a loop using the information returned in the previous step 1.3461 + // push new interpreter frames (take care to propagate the return 1.3462 + // values through each new frame pushed) 1.3463 + // - create a dummy "unpack_frame" and save the return values (O0, O1, F0) 1.3464 + // - call the C routine: Deoptimization::unpack_frames (this function 1.3465 + // lays out values on the interpreter frame which was just created) 1.3466 + // - deallocate the dummy unpack_frame 1.3467 + // - ensure that all the return values are correctly set and then do 1.3468 + // a return to the interpreter entry point 1.3469 + // 1.3470 + // Refer to the following methods for more information: 1.3471 + // - Deoptimization::fetch_unroll_info 1.3472 + // - Deoptimization::unpack_frames 1.3473 + 1.3474 + OopMap* map = NULL; 1.3475 + 1.3476 + int start = __ offset(); 1.3477 + 1.3478 + // restore G2, the trampoline destroyed it 1.3479 + __ get_thread(); 1.3480 + 1.3481 + // On entry we have been called by the deoptimized nmethod with a call that 1.3482 + // replaced the original call (or safepoint polling location) so the deoptimizing 1.3483 + // pc is now in O7. Return values are still in the expected places 1.3484 + 1.3485 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3486 + __ ba(cont); 1.3487 + __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); 1.3488 + 1.3489 + int exception_offset = __ offset() - start; 1.3490 + 1.3491 + // restore G2, the trampoline destroyed it 1.3492 + __ get_thread(); 1.3493 + 1.3494 + // On entry we have been jumped to by the exception handler (or exception_blob 1.3495 + // for server). O0 contains the exception oop and O7 contains the original 1.3496 + // exception pc. So if we push a frame here it will look to the 1.3497 + // stack walking code (fetch_unroll_info) just like a normal call so 1.3498 + // state will be extracted normally. 1.3499 + 1.3500 + // save exception oop in JavaThread and fall through into the 1.3501 + // exception_in_tls case since they are handled in same way except 1.3502 + // for where the pending exception is kept. 1.3503 + __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset()); 1.3504 + 1.3505 + // 1.3506 + // Vanilla deoptimization with an exception pending in exception_oop 1.3507 + // 1.3508 + int exception_in_tls_offset = __ offset() - start; 1.3509 + 1.3510 + // No need to update oop_map as each call to save_live_registers will produce identical oopmap 1.3511 + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3512 + 1.3513 + // Restore G2_thread 1.3514 + __ get_thread(); 1.3515 + 1.3516 +#ifdef ASSERT 1.3517 + { 1.3518 + // verify that there is really an exception oop in exception_oop 1.3519 + Label has_exception; 1.3520 + __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); 1.3521 + __ br_notnull_short(Oexception, Assembler::pt, has_exception); 1.3522 + __ stop("no exception in thread"); 1.3523 + __ bind(has_exception); 1.3524 + 1.3525 + // verify that there is no pending exception 1.3526 + Label no_pending_exception; 1.3527 + Address exception_addr(G2_thread, Thread::pending_exception_offset()); 1.3528 + __ ld_ptr(exception_addr, Oexception); 1.3529 + __ br_null_short(Oexception, Assembler::pt, no_pending_exception); 1.3530 + __ stop("must not have pending exception here"); 1.3531 + __ bind(no_pending_exception); 1.3532 + } 1.3533 +#endif 1.3534 + 1.3535 + __ ba(cont); 1.3536 + __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);; 1.3537 + 1.3538 + // 1.3539 + // Reexecute entry, similar to c2 uncommon trap 1.3540 + // 1.3541 + int reexecute_offset = __ offset() - start; 1.3542 + 1.3543 + // No need to update oop_map as each call to save_live_registers will produce identical oopmap 1.3544 + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3545 + 1.3546 + __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode); 1.3547 + 1.3548 + __ bind(cont); 1.3549 + 1.3550 + __ set_last_Java_frame(SP, noreg); 1.3551 + 1.3552 + // do the call by hand so we can get the oopmap 1.3553 + 1.3554 + __ mov(G2_thread, L7_thread_cache); 1.3555 + __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); 1.3556 + __ delayed()->mov(G2_thread, O0); 1.3557 + 1.3558 + // Set an oopmap for the call site this describes all our saved volatile registers 1.3559 + 1.3560 + oop_maps->add_gc_map( __ offset()-start, map); 1.3561 + 1.3562 + __ mov(L7_thread_cache, G2_thread); 1.3563 + 1.3564 + __ reset_last_Java_frame(); 1.3565 + 1.3566 + // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers 1.3567 + // so this move will survive 1.3568 + 1.3569 + __ mov(L0deopt_mode, G4deopt_mode); 1.3570 + 1.3571 + __ mov(O0, O2UnrollBlock->after_save()); 1.3572 + 1.3573 + RegisterSaver::restore_result_registers(masm); 1.3574 + 1.3575 + Label noException; 1.3576 + __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); 1.3577 + 1.3578 + // Move the pending exception from exception_oop to Oexception so 1.3579 + // the pending exception will be picked up the interpreter. 1.3580 + __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); 1.3581 + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); 1.3582 + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset())); 1.3583 + __ bind(noException); 1.3584 + 1.3585 + // deallocate the deoptimization frame taking care to preserve the return values 1.3586 + __ mov(Oreturn0, Oreturn0->after_save()); 1.3587 + __ mov(Oreturn1, Oreturn1->after_save()); 1.3588 + __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 1.3589 + __ restore(); 1.3590 + 1.3591 + // Allocate new interpreter frame(s) and possible c2i adapter frame 1.3592 + 1.3593 + make_new_frames(masm, true); 1.3594 + 1.3595 + // push a dummy "unpack_frame" taking care of float return values and 1.3596 + // call Deoptimization::unpack_frames to have the unpacker layout 1.3597 + // information in the interpreter frames just created and then return 1.3598 + // to the interpreter entry point 1.3599 + __ save(SP, -frame_size_words*wordSize, SP); 1.3600 + __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr); 1.3601 +#if !defined(_LP64) 1.3602 +#if defined(COMPILER2) 1.3603 + // 32-bit 1-register longs return longs in G1 1.3604 + __ stx(Greturn1, saved_Greturn1_addr); 1.3605 +#endif 1.3606 + __ set_last_Java_frame(SP, noreg); 1.3607 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode); 1.3608 +#else 1.3609 + // LP64 uses g4 in set_last_Java_frame 1.3610 + __ mov(G4deopt_mode, O1); 1.3611 + __ set_last_Java_frame(SP, G0); 1.3612 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1); 1.3613 +#endif 1.3614 + __ reset_last_Java_frame(); 1.3615 + __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0); 1.3616 + 1.3617 +#if !defined(_LP64) && defined(COMPILER2) 1.3618 + // In 32 bit, C2 returns longs in G1 so restore the saved G1 into 1.3619 + // I0/I1 if the return value is long. 1.3620 + Label not_long; 1.3621 + __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long); 1.3622 + __ ldd(saved_Greturn1_addr,I0); 1.3623 + __ bind(not_long); 1.3624 +#endif 1.3625 + __ ret(); 1.3626 + __ delayed()->restore(); 1.3627 + 1.3628 + masm->flush(); 1.3629 + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); 1.3630 + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 1.3631 +} 1.3632 + 1.3633 +#ifdef COMPILER2 1.3634 + 1.3635 +//------------------------------generate_uncommon_trap_blob-------------------- 1.3636 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM 1.3637 +// instead. 1.3638 +void SharedRuntime::generate_uncommon_trap_blob() { 1.3639 + // allocate space for the code 1.3640 + ResourceMark rm; 1.3641 + // setup code generation tools 1.3642 + int pad = VerifyThread ? 512 : 0; 1.3643 +#ifdef ASSERT 1.3644 + if (UseStackBanging) { 1.3645 + pad += StackShadowPages*16 + 32; 1.3646 + } 1.3647 +#endif 1.3648 +#ifdef _LP64 1.3649 + CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); 1.3650 +#else 1.3651 + // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) 1.3652 + // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) 1.3653 + CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); 1.3654 +#endif 1.3655 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3656 + Register O2UnrollBlock = O2; 1.3657 + Register O2klass_index = O2; 1.3658 + 1.3659 + // 1.3660 + // This is the entry point for all traps the compiler takes when it thinks 1.3661 + // it cannot handle further execution of compilation code. The frame is 1.3662 + // deoptimized in these cases and converted into interpreter frames for 1.3663 + // execution 1.3664 + // The steps taken by this frame are as follows: 1.3665 + // - push a fake "unpack_frame" 1.3666 + // - call the C routine Deoptimization::uncommon_trap (this function 1.3667 + // packs the current compiled frame into vframe arrays and returns 1.3668 + // information about the number and size of interpreter frames which 1.3669 + // are equivalent to the frame which is being deoptimized) 1.3670 + // - deallocate the "unpack_frame" 1.3671 + // - deallocate the deoptimization frame 1.3672 + // - in a loop using the information returned in the previous step 1.3673 + // push interpreter frames; 1.3674 + // - create a dummy "unpack_frame" 1.3675 + // - call the C routine: Deoptimization::unpack_frames (this function 1.3676 + // lays out values on the interpreter frame which was just created) 1.3677 + // - deallocate the dummy unpack_frame 1.3678 + // - return to the interpreter entry point 1.3679 + // 1.3680 + // Refer to the following methods for more information: 1.3681 + // - Deoptimization::uncommon_trap 1.3682 + // - Deoptimization::unpack_frame 1.3683 + 1.3684 + // the unloaded class index is in O0 (first parameter to this blob) 1.3685 + 1.3686 + // push a dummy "unpack_frame" 1.3687 + // and call Deoptimization::uncommon_trap to pack the compiled frame into 1.3688 + // vframe array and return the UnrollBlock information 1.3689 + __ save_frame(0); 1.3690 + __ set_last_Java_frame(SP, noreg); 1.3691 + __ mov(I0, O2klass_index); 1.3692 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index); 1.3693 + __ reset_last_Java_frame(); 1.3694 + __ mov(O0, O2UnrollBlock->after_save()); 1.3695 + __ restore(); 1.3696 + 1.3697 + // deallocate the deoptimized frame taking care to preserve the return values 1.3698 + __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 1.3699 + __ restore(); 1.3700 + 1.3701 + // Allocate new interpreter frame(s) and possible c2i adapter frame 1.3702 + 1.3703 + make_new_frames(masm, false); 1.3704 + 1.3705 + // push a dummy "unpack_frame" taking care of float return values and 1.3706 + // call Deoptimization::unpack_frames to have the unpacker layout 1.3707 + // information in the interpreter frames just created and then return 1.3708 + // to the interpreter entry point 1.3709 + __ save_frame(0); 1.3710 + __ set_last_Java_frame(SP, noreg); 1.3711 + __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case 1.3712 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3); 1.3713 + __ reset_last_Java_frame(); 1.3714 + __ ret(); 1.3715 + __ delayed()->restore(); 1.3716 + 1.3717 + masm->flush(); 1.3718 + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize); 1.3719 +} 1.3720 + 1.3721 +#endif // COMPILER2 1.3722 + 1.3723 +//------------------------------generate_handler_blob------------------- 1.3724 +// 1.3725 +// Generate a special Compile2Runtime blob that saves all registers, and sets 1.3726 +// up an OopMap. 1.3727 +// 1.3728 +// This blob is jumped to (via a breakpoint and the signal handler) from a 1.3729 +// safepoint in compiled code. On entry to this blob, O7 contains the 1.3730 +// address in the original nmethod at which we should resume normal execution. 1.3731 +// Thus, this blob looks like a subroutine which must preserve lots of 1.3732 +// registers and return normally. Note that O7 is never register-allocated, 1.3733 +// so it is guaranteed to be free here. 1.3734 +// 1.3735 + 1.3736 +// The hardest part of what this blob must do is to save the 64-bit %o 1.3737 +// registers in the 32-bit build. A simple 'save' turn the %o's to %i's and 1.3738 +// an interrupt will chop off their heads. Making space in the caller's frame 1.3739 +// first will let us save the 64-bit %o's before save'ing, but we cannot hand 1.3740 +// the adjusted FP off to the GC stack-crawler: this will modify the caller's 1.3741 +// SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save 1.3742 +// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). 1.3743 +// Tricky, tricky, tricky... 1.3744 + 1.3745 +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 1.3746 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.3747 + 1.3748 + // allocate space for the code 1.3749 + ResourceMark rm; 1.3750 + // setup code generation tools 1.3751 + // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 1.3752 + // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 1.3753 + // even larger with TraceJumps 1.3754 + int pad = TraceJumps ? 512 : 0; 1.3755 + CodeBuffer buffer("handler_blob", 1600 + pad, 512); 1.3756 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3757 + int frame_size_words; 1.3758 + OopMapSet *oop_maps = new OopMapSet(); 1.3759 + OopMap* map = NULL; 1.3760 + 1.3761 + int start = __ offset(); 1.3762 + 1.3763 + bool cause_return = (poll_type == POLL_AT_RETURN); 1.3764 + // If this causes a return before the processing, then do a "restore" 1.3765 + if (cause_return) { 1.3766 + __ restore(); 1.3767 + } else { 1.3768 + // Make it look like we were called via the poll 1.3769 + // so that frame constructor always sees a valid return address 1.3770 + __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7); 1.3771 + __ sub(O7, frame::pc_return_offset, O7); 1.3772 + } 1.3773 + 1.3774 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3775 + 1.3776 + // setup last_Java_sp (blows G4) 1.3777 + __ set_last_Java_frame(SP, noreg); 1.3778 + 1.3779 + // call into the runtime to handle illegal instructions exception 1.3780 + // Do not use call_VM_leaf, because we need to make a GC map at this call site. 1.3781 + __ mov(G2_thread, O0); 1.3782 + __ save_thread(L7_thread_cache); 1.3783 + __ call(call_ptr); 1.3784 + __ delayed()->nop(); 1.3785 + 1.3786 + // Set an oopmap for the call site. 1.3787 + // We need this not only for callee-saved registers, but also for volatile 1.3788 + // registers that the compiler might be keeping live across a safepoint. 1.3789 + 1.3790 + oop_maps->add_gc_map( __ offset() - start, map); 1.3791 + 1.3792 + __ restore_thread(L7_thread_cache); 1.3793 + // clear last_Java_sp 1.3794 + __ reset_last_Java_frame(); 1.3795 + 1.3796 + // Check for exceptions 1.3797 + Label pending; 1.3798 + 1.3799 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 1.3800 + __ br_notnull_short(O1, Assembler::pn, pending); 1.3801 + 1.3802 + RegisterSaver::restore_live_registers(masm); 1.3803 + 1.3804 + // We are back the the original state on entry and ready to go. 1.3805 + 1.3806 + __ retl(); 1.3807 + __ delayed()->nop(); 1.3808 + 1.3809 + // Pending exception after the safepoint 1.3810 + 1.3811 + __ bind(pending); 1.3812 + 1.3813 + RegisterSaver::restore_live_registers(masm); 1.3814 + 1.3815 + // We are back the the original state on entry. 1.3816 + 1.3817 + // Tail-call forward_exception_entry, with the issuing PC in O7, 1.3818 + // so it looks like the original nmethod called forward_exception_entry. 1.3819 + __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 1.3820 + __ JMP(O0, 0); 1.3821 + __ delayed()->nop(); 1.3822 + 1.3823 + // ------------- 1.3824 + // make sure all code is generated 1.3825 + masm->flush(); 1.3826 + 1.3827 + // return exception blob 1.3828 + return SafepointBlob::create(&buffer, oop_maps, frame_size_words); 1.3829 +} 1.3830 + 1.3831 +// 1.3832 +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 1.3833 +// 1.3834 +// Generate a stub that calls into vm to find out the proper destination 1.3835 +// of a java call. All the argument registers are live at this point 1.3836 +// but since this is generic code we don't know what they are and the caller 1.3837 +// must do any gc of the args. 1.3838 +// 1.3839 +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 1.3840 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.3841 + 1.3842 + // allocate space for the code 1.3843 + ResourceMark rm; 1.3844 + // setup code generation tools 1.3845 + // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 1.3846 + // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 1.3847 + // even larger with TraceJumps 1.3848 + int pad = TraceJumps ? 512 : 0; 1.3849 + CodeBuffer buffer(name, 1600 + pad, 512); 1.3850 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3851 + int frame_size_words; 1.3852 + OopMapSet *oop_maps = new OopMapSet(); 1.3853 + OopMap* map = NULL; 1.3854 + 1.3855 + int start = __ offset(); 1.3856 + 1.3857 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3858 + 1.3859 + int frame_complete = __ offset(); 1.3860 + 1.3861 + // setup last_Java_sp (blows G4) 1.3862 + __ set_last_Java_frame(SP, noreg); 1.3863 + 1.3864 + // call into the runtime to handle illegal instructions exception 1.3865 + // Do not use call_VM_leaf, because we need to make a GC map at this call site. 1.3866 + __ mov(G2_thread, O0); 1.3867 + __ save_thread(L7_thread_cache); 1.3868 + __ call(destination, relocInfo::runtime_call_type); 1.3869 + __ delayed()->nop(); 1.3870 + 1.3871 + // O0 contains the address we are going to jump to assuming no exception got installed 1.3872 + 1.3873 + // Set an oopmap for the call site. 1.3874 + // We need this not only for callee-saved registers, but also for volatile 1.3875 + // registers that the compiler might be keeping live across a safepoint. 1.3876 + 1.3877 + oop_maps->add_gc_map( __ offset() - start, map); 1.3878 + 1.3879 + __ restore_thread(L7_thread_cache); 1.3880 + // clear last_Java_sp 1.3881 + __ reset_last_Java_frame(); 1.3882 + 1.3883 + // Check for exceptions 1.3884 + Label pending; 1.3885 + 1.3886 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 1.3887 + __ br_notnull_short(O1, Assembler::pn, pending); 1.3888 + 1.3889 + // get the returned Method* 1.3890 + 1.3891 + __ get_vm_result_2(G5_method); 1.3892 + __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS); 1.3893 + 1.3894 + // O0 is where we want to jump, overwrite G3 which is saved and scratch 1.3895 + 1.3896 + __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS); 1.3897 + 1.3898 + RegisterSaver::restore_live_registers(masm); 1.3899 + 1.3900 + // We are back the the original state on entry and ready to go. 1.3901 + 1.3902 + __ JMP(G3, 0); 1.3903 + __ delayed()->nop(); 1.3904 + 1.3905 + // Pending exception after the safepoint 1.3906 + 1.3907 + __ bind(pending); 1.3908 + 1.3909 + RegisterSaver::restore_live_registers(masm); 1.3910 + 1.3911 + // We are back the the original state on entry. 1.3912 + 1.3913 + // Tail-call forward_exception_entry, with the issuing PC in O7, 1.3914 + // so it looks like the original nmethod called forward_exception_entry. 1.3915 + __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 1.3916 + __ JMP(O0, 0); 1.3917 + __ delayed()->nop(); 1.3918 + 1.3919 + // ------------- 1.3920 + // make sure all code is generated 1.3921 + masm->flush(); 1.3922 + 1.3923 + // return the blob 1.3924 + // frame_size_words or bytes?? 1.3925 + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); 1.3926 +}