1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,3199 @@ 1.4 +/* 1.5 + * Copyright 2003-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "incls/_precompiled.incl" 1.29 +#include "incls/_sharedRuntime_sparc.cpp.incl" 1.30 + 1.31 +#define __ masm-> 1.32 + 1.33 +#ifdef COMPILER2 1.34 +UncommonTrapBlob* SharedRuntime::_uncommon_trap_blob; 1.35 +#endif // COMPILER2 1.36 + 1.37 +DeoptimizationBlob* SharedRuntime::_deopt_blob; 1.38 +SafepointBlob* SharedRuntime::_polling_page_safepoint_handler_blob; 1.39 +SafepointBlob* SharedRuntime::_polling_page_return_handler_blob; 1.40 +RuntimeStub* SharedRuntime::_wrong_method_blob; 1.41 +RuntimeStub* SharedRuntime::_ic_miss_blob; 1.42 +RuntimeStub* SharedRuntime::_resolve_opt_virtual_call_blob; 1.43 +RuntimeStub* SharedRuntime::_resolve_virtual_call_blob; 1.44 +RuntimeStub* SharedRuntime::_resolve_static_call_blob; 1.45 + 1.46 +class RegisterSaver { 1.47 + 1.48 + // Used for saving volatile registers. This is Gregs, Fregs, I/L/O. 1.49 + // The Oregs are problematic. In the 32bit build the compiler can 1.50 + // have O registers live with 64 bit quantities. A window save will 1.51 + // cut the heads off of the registers. We have to do a very extensive 1.52 + // stack dance to save and restore these properly. 1.53 + 1.54 + // Note that the Oregs problem only exists if we block at either a polling 1.55 + // page exception a compiled code safepoint that was not originally a call 1.56 + // or deoptimize following one of these kinds of safepoints. 1.57 + 1.58 + // Lots of registers to save. For all builds, a window save will preserve 1.59 + // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit 1.60 + // builds a window-save will preserve the %o registers. In the LION build 1.61 + // we need to save the 64-bit %o registers which requires we save them 1.62 + // before the window-save (as then they become %i registers and get their 1.63 + // heads chopped off on interrupt). We have to save some %g registers here 1.64 + // as well. 1.65 + enum { 1.66 + // This frame's save area. Includes extra space for the native call: 1.67 + // vararg's layout space and the like. Briefly holds the caller's 1.68 + // register save area. 1.69 + call_args_area = frame::register_save_words_sp_offset + 1.70 + frame::memory_parameter_word_sp_offset*wordSize, 1.71 + // Make sure save locations are always 8 byte aligned. 1.72 + // can't use round_to because it doesn't produce compile time constant 1.73 + start_of_extra_save_area = ((call_args_area + 7) & ~7), 1.74 + g1_offset = start_of_extra_save_area, // g-regs needing saving 1.75 + g3_offset = g1_offset+8, 1.76 + g4_offset = g3_offset+8, 1.77 + g5_offset = g4_offset+8, 1.78 + o0_offset = g5_offset+8, 1.79 + o1_offset = o0_offset+8, 1.80 + o2_offset = o1_offset+8, 1.81 + o3_offset = o2_offset+8, 1.82 + o4_offset = o3_offset+8, 1.83 + o5_offset = o4_offset+8, 1.84 + start_of_flags_save_area = o5_offset+8, 1.85 + ccr_offset = start_of_flags_save_area, 1.86 + fsr_offset = ccr_offset + 8, 1.87 + d00_offset = fsr_offset+8, // Start of float save area 1.88 + register_save_size = d00_offset+8*32 1.89 + }; 1.90 + 1.91 + 1.92 + public: 1.93 + 1.94 + static int Oexception_offset() { return o0_offset; }; 1.95 + static int G3_offset() { return g3_offset; }; 1.96 + static int G5_offset() { return g5_offset; }; 1.97 + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); 1.98 + static void restore_live_registers(MacroAssembler* masm); 1.99 + 1.100 + // During deoptimization only the result register need to be restored 1.101 + // all the other values have already been extracted. 1.102 + 1.103 + static void restore_result_registers(MacroAssembler* masm); 1.104 +}; 1.105 + 1.106 +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { 1.107 + // Record volatile registers as callee-save values in an OopMap so their save locations will be 1.108 + // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for 1.109 + // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers 1.110 + // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame 1.111 + // (as the stub's I's) when the runtime routine called by the stub creates its frame. 1.112 + int i; 1.113 + // Always make the frame size 16 bytr aligned. 1.114 + int frame_size = round_to(additional_frame_words + register_save_size, 16); 1.115 + // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words 1.116 + int frame_size_in_slots = frame_size / sizeof(jint); 1.117 + // CodeBlob frame size is in words. 1.118 + *total_frame_words = frame_size / wordSize; 1.119 + // OopMap* map = new OopMap(*total_frame_words, 0); 1.120 + OopMap* map = new OopMap(frame_size_in_slots, 0); 1.121 + 1.122 +#if !defined(_LP64) 1.123 + 1.124 + // Save 64-bit O registers; they will get their heads chopped off on a 'save'. 1.125 + __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 1.126 + __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 1.127 + __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 1.128 + __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 1.129 + __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 1.130 + __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 1.131 +#endif /* _LP64 */ 1.132 + 1.133 + __ save(SP, -frame_size, SP); 1.134 + 1.135 +#ifndef _LP64 1.136 + // Reload the 64 bit Oregs. Although they are now Iregs we load them 1.137 + // to Oregs here to avoid interrupts cutting off their heads 1.138 + 1.139 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 1.140 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 1.141 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 1.142 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 1.143 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 1.144 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 1.145 + 1.146 + __ stx(O0, SP, o0_offset+STACK_BIAS); 1.147 + map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg()); 1.148 + 1.149 + __ stx(O1, SP, o1_offset+STACK_BIAS); 1.150 + 1.151 + map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg()); 1.152 + 1.153 + __ stx(O2, SP, o2_offset+STACK_BIAS); 1.154 + map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg()); 1.155 + 1.156 + __ stx(O3, SP, o3_offset+STACK_BIAS); 1.157 + map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg()); 1.158 + 1.159 + __ stx(O4, SP, o4_offset+STACK_BIAS); 1.160 + map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg()); 1.161 + 1.162 + __ stx(O5, SP, o5_offset+STACK_BIAS); 1.163 + map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg()); 1.164 +#endif /* _LP64 */ 1.165 + 1.166 + // Save the G's 1.167 + __ stx(G1, SP, g1_offset+STACK_BIAS); 1.168 + map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + 4)>>2), G1->as_VMReg()); 1.169 + 1.170 + __ stx(G3, SP, g3_offset+STACK_BIAS); 1.171 + map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + 4)>>2), G3->as_VMReg()); 1.172 + 1.173 + __ stx(G4, SP, g4_offset+STACK_BIAS); 1.174 + map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + 4)>>2), G4->as_VMReg()); 1.175 + 1.176 + __ stx(G5, SP, g5_offset+STACK_BIAS); 1.177 + map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + 4)>>2), G5->as_VMReg()); 1.178 + 1.179 + // This is really a waste but we'll keep things as they were for now 1.180 + if (true) { 1.181 +#ifndef _LP64 1.182 + map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next()); 1.183 + map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next()); 1.184 + map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next()); 1.185 + map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next()); 1.186 + map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next()); 1.187 + map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next()); 1.188 +#endif /* _LP64 */ 1.189 + map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next()); 1.190 + map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next()); 1.191 + map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next()); 1.192 + map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next()); 1.193 + } 1.194 + 1.195 + 1.196 + // Save the flags 1.197 + __ rdccr( G5 ); 1.198 + __ stx(G5, SP, ccr_offset+STACK_BIAS); 1.199 + __ stxfsr(SP, fsr_offset+STACK_BIAS); 1.200 + 1.201 + // Save all the FP registers 1.202 + int offset = d00_offset; 1.203 + for( int i=0; i<64; i+=2 ) { 1.204 + FloatRegister f = as_FloatRegister(i); 1.205 + __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS); 1.206 + map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg()); 1.207 + if (true) { 1.208 + map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next()); 1.209 + } 1.210 + offset += sizeof(double); 1.211 + } 1.212 + 1.213 + // And we're done. 1.214 + 1.215 + return map; 1.216 +} 1.217 + 1.218 + 1.219 +// Pop the current frame and restore all the registers that we 1.220 +// saved. 1.221 +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { 1.222 + 1.223 + // Restore all the FP registers 1.224 + for( int i=0; i<64; i+=2 ) { 1.225 + __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i)); 1.226 + } 1.227 + 1.228 + __ ldx(SP, ccr_offset+STACK_BIAS, G1); 1.229 + __ wrccr (G1) ; 1.230 + 1.231 + // Restore the G's 1.232 + // Note that G2 (AKA GThread) must be saved and restored separately. 1.233 + // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr. 1.234 + 1.235 + __ ldx(SP, g1_offset+STACK_BIAS, G1); 1.236 + __ ldx(SP, g3_offset+STACK_BIAS, G3); 1.237 + __ ldx(SP, g4_offset+STACK_BIAS, G4); 1.238 + __ ldx(SP, g5_offset+STACK_BIAS, G5); 1.239 + 1.240 + 1.241 +#if !defined(_LP64) 1.242 + // Restore the 64-bit O's. 1.243 + __ ldx(SP, o0_offset+STACK_BIAS, O0); 1.244 + __ ldx(SP, o1_offset+STACK_BIAS, O1); 1.245 + __ ldx(SP, o2_offset+STACK_BIAS, O2); 1.246 + __ ldx(SP, o3_offset+STACK_BIAS, O3); 1.247 + __ ldx(SP, o4_offset+STACK_BIAS, O4); 1.248 + __ ldx(SP, o5_offset+STACK_BIAS, O5); 1.249 + 1.250 + // And temporarily place them in TLS 1.251 + 1.252 + __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 1.253 + __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 1.254 + __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 1.255 + __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 1.256 + __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 1.257 + __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 1.258 +#endif /* _LP64 */ 1.259 + 1.260 + // Restore flags 1.261 + 1.262 + __ ldxfsr(SP, fsr_offset+STACK_BIAS); 1.263 + 1.264 + __ restore(); 1.265 + 1.266 +#if !defined(_LP64) 1.267 + // Now reload the 64bit Oregs after we've restore the window. 1.268 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 1.269 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 1.270 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 1.271 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 1.272 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 1.273 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 1.274 +#endif /* _LP64 */ 1.275 + 1.276 +} 1.277 + 1.278 +// Pop the current frame and restore the registers that might be holding 1.279 +// a result. 1.280 +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 1.281 + 1.282 +#if !defined(_LP64) 1.283 + // 32bit build returns longs in G1 1.284 + __ ldx(SP, g1_offset+STACK_BIAS, G1); 1.285 + 1.286 + // Retrieve the 64-bit O's. 1.287 + __ ldx(SP, o0_offset+STACK_BIAS, O0); 1.288 + __ ldx(SP, o1_offset+STACK_BIAS, O1); 1.289 + // and save to TLS 1.290 + __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 1.291 + __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 1.292 +#endif /* _LP64 */ 1.293 + 1.294 + __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0)); 1.295 + 1.296 + __ restore(); 1.297 + 1.298 +#if !defined(_LP64) 1.299 + // Now reload the 64bit Oregs after we've restore the window. 1.300 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 1.301 + __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 1.302 +#endif /* _LP64 */ 1.303 + 1.304 +} 1.305 + 1.306 +// The java_calling_convention describes stack locations as ideal slots on 1.307 +// a frame with no abi restrictions. Since we must observe abi restrictions 1.308 +// (like the placement of the register window) the slots must be biased by 1.309 +// the following value. 1.310 +static int reg2offset(VMReg r) { 1.311 + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 1.312 +} 1.313 + 1.314 +// --------------------------------------------------------------------------- 1.315 +// Read the array of BasicTypes from a signature, and compute where the 1.316 +// arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size) 1.317 +// quantities. Values less than VMRegImpl::stack0 are registers, those above 1.318 +// refer to 4-byte stack slots. All stack slots are based off of the window 1.319 +// top. VMRegImpl::stack0 refers to the first slot past the 16-word window, 1.320 +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 1.321 +// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit 1.322 +// integer registers. Values 64-95 are the (32-bit only) float registers. 1.323 +// Each 32-bit quantity is given its own number, so the integer registers 1.324 +// (in either 32- or 64-bit builds) use 2 numbers. For example, there is 1.325 +// an O0-low and an O0-high. Essentially, all int register numbers are doubled. 1.326 + 1.327 +// Register results are passed in O0-O5, for outgoing call arguments. To 1.328 +// convert to incoming arguments, convert all O's to I's. The regs array 1.329 +// refer to the low and hi 32-bit words of 64-bit registers or stack slots. 1.330 +// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a 1.331 +// 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was 1.332 +// passed (used as a placeholder for the other half of longs and doubles in 1.333 +// the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is 1.334 +// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention). 1.335 +// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first() 1.336 +// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the 1.337 +// same VMRegPair. 1.338 + 1.339 +// Note: the INPUTS in sig_bt are in units of Java argument words, which are 1.340 +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 1.341 +// units regardless of build. 1.342 + 1.343 + 1.344 +// --------------------------------------------------------------------------- 1.345 +// The compiled Java calling convention. The Java convention always passes 1.346 +// 64-bit values in adjacent aligned locations (either registers or stack), 1.347 +// floats in float registers and doubles in aligned float pairs. Values are 1.348 +// packed in the registers. There is no backing varargs store for values in 1.349 +// registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be 1.350 +// passed in I's, because longs in I's get their heads chopped off at 1.351 +// interrupt). 1.352 +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 1.353 + VMRegPair *regs, 1.354 + int total_args_passed, 1.355 + int is_outgoing) { 1.356 + assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); 1.357 + 1.358 + // Convention is to pack the first 6 int/oop args into the first 6 registers 1.359 + // (I0-I5), extras spill to the stack. Then pack the first 8 float args 1.360 + // into F0-F7, extras spill to the stack. Then pad all register sets to 1.361 + // align. Then put longs and doubles into the same registers as they fit, 1.362 + // else spill to the stack. 1.363 + const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; 1.364 + const int flt_reg_max = 8; 1.365 + // 1.366 + // Where 32-bit 1-reg longs start being passed 1.367 + // In tiered we must pass on stack because c1 can't use a "pair" in a single reg. 1.368 + // So make it look like we've filled all the G regs that c2 wants to use. 1.369 + Register g_reg = TieredCompilation ? noreg : G1; 1.370 + 1.371 + // Count int/oop and float args. See how many stack slots we'll need and 1.372 + // where the longs & doubles will go. 1.373 + int int_reg_cnt = 0; 1.374 + int flt_reg_cnt = 0; 1.375 + // int stk_reg_pairs = frame::register_save_words*(wordSize>>2); 1.376 + // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots(); 1.377 + int stk_reg_pairs = 0; 1.378 + for (int i = 0; i < total_args_passed; i++) { 1.379 + switch (sig_bt[i]) { 1.380 + case T_LONG: // LP64, longs compete with int args 1.381 + assert(sig_bt[i+1] == T_VOID, ""); 1.382 +#ifdef _LP64 1.383 + if (int_reg_cnt < int_reg_max) int_reg_cnt++; 1.384 +#endif 1.385 + break; 1.386 + case T_OBJECT: 1.387 + case T_ARRAY: 1.388 + case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.389 + if (int_reg_cnt < int_reg_max) int_reg_cnt++; 1.390 +#ifndef _LP64 1.391 + else stk_reg_pairs++; 1.392 +#endif 1.393 + break; 1.394 + case T_INT: 1.395 + case T_SHORT: 1.396 + case T_CHAR: 1.397 + case T_BYTE: 1.398 + case T_BOOLEAN: 1.399 + if (int_reg_cnt < int_reg_max) int_reg_cnt++; 1.400 + else stk_reg_pairs++; 1.401 + break; 1.402 + case T_FLOAT: 1.403 + if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++; 1.404 + else stk_reg_pairs++; 1.405 + break; 1.406 + case T_DOUBLE: 1.407 + assert(sig_bt[i+1] == T_VOID, ""); 1.408 + break; 1.409 + case T_VOID: 1.410 + break; 1.411 + default: 1.412 + ShouldNotReachHere(); 1.413 + } 1.414 + } 1.415 + 1.416 + // This is where the longs/doubles start on the stack. 1.417 + stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round 1.418 + 1.419 + int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only 1.420 + int flt_reg_pairs = (flt_reg_cnt+1) & ~1; 1.421 + 1.422 + // int stk_reg = frame::register_save_words*(wordSize>>2); 1.423 + // int stk_reg = SharedRuntime::out_preserve_stack_slots(); 1.424 + int stk_reg = 0; 1.425 + int int_reg = 0; 1.426 + int flt_reg = 0; 1.427 + 1.428 + // Now do the signature layout 1.429 + for (int i = 0; i < total_args_passed; i++) { 1.430 + switch (sig_bt[i]) { 1.431 + case T_INT: 1.432 + case T_SHORT: 1.433 + case T_CHAR: 1.434 + case T_BYTE: 1.435 + case T_BOOLEAN: 1.436 +#ifndef _LP64 1.437 + case T_OBJECT: 1.438 + case T_ARRAY: 1.439 + case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.440 +#endif // _LP64 1.441 + if (int_reg < int_reg_max) { 1.442 + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.443 + regs[i].set1(r->as_VMReg()); 1.444 + } else { 1.445 + regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); 1.446 + } 1.447 + break; 1.448 + 1.449 +#ifdef _LP64 1.450 + case T_OBJECT: 1.451 + case T_ARRAY: 1.452 + case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.453 + if (int_reg < int_reg_max) { 1.454 + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.455 + regs[i].set2(r->as_VMReg()); 1.456 + } else { 1.457 + regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.458 + stk_reg_pairs += 2; 1.459 + } 1.460 + break; 1.461 +#endif // _LP64 1.462 + 1.463 + case T_LONG: 1.464 + assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 1.465 +#ifdef COMPILER2 1.466 +#ifdef _LP64 1.467 + // Can't be tiered (yet) 1.468 + if (int_reg < int_reg_max) { 1.469 + Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.470 + regs[i].set2(r->as_VMReg()); 1.471 + } else { 1.472 + regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.473 + stk_reg_pairs += 2; 1.474 + } 1.475 +#else 1.476 + // For 32-bit build, can't pass longs in O-regs because they become 1.477 + // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost 1.478 + // spare and available. This convention isn't used by the Sparc ABI or 1.479 + // anywhere else. If we're tiered then we don't use G-regs because c1 1.480 + // can't deal with them as a "pair". 1.481 + // G0: zero 1.482 + // G1: 1st Long arg 1.483 + // G2: global allocated to TLS 1.484 + // G3: used in inline cache check 1.485 + // G4: 2nd Long arg 1.486 + // G5: used in inline cache check 1.487 + // G6: used by OS 1.488 + // G7: used by OS 1.489 + 1.490 + if (g_reg == G1) { 1.491 + regs[i].set2(G1->as_VMReg()); // This long arg in G1 1.492 + g_reg = G4; // Where the next arg goes 1.493 + } else if (g_reg == G4) { 1.494 + regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4 1.495 + g_reg = noreg; // No more longs in registers 1.496 + } else { 1.497 + regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.498 + stk_reg_pairs += 2; 1.499 + } 1.500 +#endif // _LP64 1.501 +#else // COMPILER2 1.502 + if (int_reg_pairs + 1 < int_reg_max) { 1.503 + if (is_outgoing) { 1.504 + regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg()); 1.505 + } else { 1.506 + regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg()); 1.507 + } 1.508 + int_reg_pairs += 2; 1.509 + } else { 1.510 + regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.511 + stk_reg_pairs += 2; 1.512 + } 1.513 +#endif // COMPILER2 1.514 + break; 1.515 + 1.516 + case T_FLOAT: 1.517 + if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg()); 1.518 + else regs[i].set1( VMRegImpl::stack2reg(stk_reg++)); 1.519 + break; 1.520 + case T_DOUBLE: 1.521 + assert(sig_bt[i+1] == T_VOID, "expecting half"); 1.522 + if (flt_reg_pairs + 1 < flt_reg_max) { 1.523 + regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg()); 1.524 + flt_reg_pairs += 2; 1.525 + } else { 1.526 + regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.527 + stk_reg_pairs += 2; 1.528 + } 1.529 + break; 1.530 + case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles 1.531 + default: 1.532 + ShouldNotReachHere(); 1.533 + } 1.534 + } 1.535 + 1.536 + // retun the amount of stack space these arguments will need. 1.537 + return stk_reg_pairs; 1.538 + 1.539 +} 1.540 + 1.541 +// Helper class mostly to avoid passing masm everywhere, and handle store 1.542 +// displacement overflow logic for LP64 1.543 +class AdapterGenerator { 1.544 + MacroAssembler *masm; 1.545 +#ifdef _LP64 1.546 + Register Rdisp; 1.547 + void set_Rdisp(Register r) { Rdisp = r; } 1.548 +#endif // _LP64 1.549 + 1.550 + void patch_callers_callsite(); 1.551 + void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch); 1.552 + 1.553 + // base+st_off points to top of argument 1.554 + int arg_offset(const int st_off) { return st_off + Interpreter::value_offset_in_bytes(); } 1.555 + int next_arg_offset(const int st_off) { 1.556 + return st_off - Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes(); 1.557 + } 1.558 + 1.559 +#ifdef _LP64 1.560 + // On _LP64 argument slot values are loaded first into a register 1.561 + // because they might not fit into displacement. 1.562 + Register arg_slot(const int st_off); 1.563 + Register next_arg_slot(const int st_off); 1.564 +#else 1.565 + int arg_slot(const int st_off) { return arg_offset(st_off); } 1.566 + int next_arg_slot(const int st_off) { return next_arg_offset(st_off); } 1.567 +#endif // _LP64 1.568 + 1.569 + // Stores long into offset pointed to by base 1.570 + void store_c2i_long(Register r, Register base, 1.571 + const int st_off, bool is_stack); 1.572 + void store_c2i_object(Register r, Register base, 1.573 + const int st_off); 1.574 + void store_c2i_int(Register r, Register base, 1.575 + const int st_off); 1.576 + void store_c2i_double(VMReg r_2, 1.577 + VMReg r_1, Register base, const int st_off); 1.578 + void store_c2i_float(FloatRegister f, Register base, 1.579 + const int st_off); 1.580 + 1.581 + public: 1.582 + void gen_c2i_adapter(int total_args_passed, 1.583 + // VMReg max_arg, 1.584 + int comp_args_on_stack, // VMRegStackSlots 1.585 + const BasicType *sig_bt, 1.586 + const VMRegPair *regs, 1.587 + Label& skip_fixup); 1.588 + void gen_i2c_adapter(int total_args_passed, 1.589 + // VMReg max_arg, 1.590 + int comp_args_on_stack, // VMRegStackSlots 1.591 + const BasicType *sig_bt, 1.592 + const VMRegPair *regs); 1.593 + 1.594 + AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} 1.595 +}; 1.596 + 1.597 + 1.598 +// Patch the callers callsite with entry to compiled code if it exists. 1.599 +void AdapterGenerator::patch_callers_callsite() { 1.600 + Label L; 1.601 + __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch); 1.602 + __ br_null(G3_scratch, false, __ pt, L); 1.603 + // Schedule the branch target address early. 1.604 + __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); 1.605 + // Call into the VM to patch the caller, then jump to compiled callee 1.606 + __ save_frame(4); // Args in compiled layout; do not blow them 1.607 + 1.608 + // Must save all the live Gregs the list is: 1.609 + // G1: 1st Long arg (32bit build) 1.610 + // G2: global allocated to TLS 1.611 + // G3: used in inline cache check (scratch) 1.612 + // G4: 2nd Long arg (32bit build); 1.613 + // G5: used in inline cache check (methodOop) 1.614 + 1.615 + // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops. 1.616 + 1.617 +#ifdef _LP64 1.618 + // mov(s,d) 1.619 + __ mov(G1, L1); 1.620 + __ mov(G4, L4); 1.621 + __ mov(G5_method, L5); 1.622 + __ mov(G5_method, O0); // VM needs target method 1.623 + __ mov(I7, O1); // VM needs caller's callsite 1.624 + // Must be a leaf call... 1.625 + // can be very far once the blob has been relocated 1.626 + Address dest(O7, CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); 1.627 + __ relocate(relocInfo::runtime_call_type); 1.628 + __ jumpl_to(dest, O7); 1.629 + __ delayed()->mov(G2_thread, L7_thread_cache); 1.630 + __ mov(L7_thread_cache, G2_thread); 1.631 + __ mov(L1, G1); 1.632 + __ mov(L4, G4); 1.633 + __ mov(L5, G5_method); 1.634 +#else 1.635 + __ stx(G1, FP, -8 + STACK_BIAS); 1.636 + __ stx(G4, FP, -16 + STACK_BIAS); 1.637 + __ mov(G5_method, L5); 1.638 + __ mov(G5_method, O0); // VM needs target method 1.639 + __ mov(I7, O1); // VM needs caller's callsite 1.640 + // Must be a leaf call... 1.641 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type); 1.642 + __ delayed()->mov(G2_thread, L7_thread_cache); 1.643 + __ mov(L7_thread_cache, G2_thread); 1.644 + __ ldx(FP, -8 + STACK_BIAS, G1); 1.645 + __ ldx(FP, -16 + STACK_BIAS, G4); 1.646 + __ mov(L5, G5_method); 1.647 + __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); 1.648 +#endif /* _LP64 */ 1.649 + 1.650 + __ restore(); // Restore args 1.651 + __ bind(L); 1.652 +} 1.653 + 1.654 +void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off, 1.655 + Register scratch) { 1.656 + if (TaggedStackInterpreter) { 1.657 + int tag_off = st_off + Interpreter::tag_offset_in_bytes(); 1.658 +#ifdef _LP64 1.659 + Register tag_slot = Rdisp; 1.660 + __ set(tag_off, tag_slot); 1.661 +#else 1.662 + int tag_slot = tag_off; 1.663 +#endif // _LP64 1.664 + // have to store zero because local slots can be reused (rats!) 1.665 + if (t == frame::TagValue) { 1.666 + __ st_ptr(G0, base, tag_slot); 1.667 + } else if (t == frame::TagCategory2) { 1.668 + __ st_ptr(G0, base, tag_slot); 1.669 + int next_tag_off = st_off - Interpreter::stackElementSize() + 1.670 + Interpreter::tag_offset_in_bytes(); 1.671 +#ifdef _LP64 1.672 + __ set(next_tag_off, tag_slot); 1.673 +#else 1.674 + tag_slot = next_tag_off; 1.675 +#endif // _LP64 1.676 + __ st_ptr(G0, base, tag_slot); 1.677 + } else { 1.678 + __ mov(t, scratch); 1.679 + __ st_ptr(scratch, base, tag_slot); 1.680 + } 1.681 + } 1.682 +} 1.683 + 1.684 +#ifdef _LP64 1.685 +Register AdapterGenerator::arg_slot(const int st_off) { 1.686 + __ set( arg_offset(st_off), Rdisp); 1.687 + return Rdisp; 1.688 +} 1.689 + 1.690 +Register AdapterGenerator::next_arg_slot(const int st_off){ 1.691 + __ set( next_arg_offset(st_off), Rdisp); 1.692 + return Rdisp; 1.693 +} 1.694 +#endif // _LP64 1.695 + 1.696 +// Stores long into offset pointed to by base 1.697 +void AdapterGenerator::store_c2i_long(Register r, Register base, 1.698 + const int st_off, bool is_stack) { 1.699 +#ifdef COMPILER2 1.700 +#ifdef _LP64 1.701 + // In V9, longs are given 2 64-bit slots in the interpreter, but the 1.702 + // data is passed in only 1 slot. 1.703 + __ stx(r, base, next_arg_slot(st_off)); 1.704 +#else 1.705 + // Misaligned store of 64-bit data 1.706 + __ stw(r, base, arg_slot(st_off)); // lo bits 1.707 + __ srlx(r, 32, r); 1.708 + __ stw(r, base, next_arg_slot(st_off)); // hi bits 1.709 +#endif // _LP64 1.710 +#else 1.711 + if (is_stack) { 1.712 + // Misaligned store of 64-bit data 1.713 + __ stw(r, base, arg_slot(st_off)); // lo bits 1.714 + __ srlx(r, 32, r); 1.715 + __ stw(r, base, next_arg_slot(st_off)); // hi bits 1.716 + } else { 1.717 + __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits 1.718 + __ stw(r , base, next_arg_slot(st_off)); // hi bits 1.719 + } 1.720 +#endif // COMPILER2 1.721 + tag_c2i_arg(frame::TagCategory2, base, st_off, r); 1.722 +} 1.723 + 1.724 +void AdapterGenerator::store_c2i_object(Register r, Register base, 1.725 + const int st_off) { 1.726 + __ st_ptr (r, base, arg_slot(st_off)); 1.727 + tag_c2i_arg(frame::TagReference, base, st_off, r); 1.728 +} 1.729 + 1.730 +void AdapterGenerator::store_c2i_int(Register r, Register base, 1.731 + const int st_off) { 1.732 + __ st (r, base, arg_slot(st_off)); 1.733 + tag_c2i_arg(frame::TagValue, base, st_off, r); 1.734 +} 1.735 + 1.736 +// Stores into offset pointed to by base 1.737 +void AdapterGenerator::store_c2i_double(VMReg r_2, 1.738 + VMReg r_1, Register base, const int st_off) { 1.739 +#ifdef _LP64 1.740 + // In V9, doubles are given 2 64-bit slots in the interpreter, but the 1.741 + // data is passed in only 1 slot. 1.742 + __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 1.743 +#else 1.744 + // Need to marshal 64-bit value from misaligned Lesp loads 1.745 + __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 1.746 + __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) ); 1.747 +#endif 1.748 + tag_c2i_arg(frame::TagCategory2, base, st_off, G1_scratch); 1.749 +} 1.750 + 1.751 +void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, 1.752 + const int st_off) { 1.753 + __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off)); 1.754 + tag_c2i_arg(frame::TagValue, base, st_off, G1_scratch); 1.755 +} 1.756 + 1.757 +void AdapterGenerator::gen_c2i_adapter( 1.758 + int total_args_passed, 1.759 + // VMReg max_arg, 1.760 + int comp_args_on_stack, // VMRegStackSlots 1.761 + const BasicType *sig_bt, 1.762 + const VMRegPair *regs, 1.763 + Label& skip_fixup) { 1.764 + 1.765 + // Before we get into the guts of the C2I adapter, see if we should be here 1.766 + // at all. We've come from compiled code and are attempting to jump to the 1.767 + // interpreter, which means the caller made a static call to get here 1.768 + // (vcalls always get a compiled target if there is one). Check for a 1.769 + // compiled target. If there is one, we need to patch the caller's call. 1.770 + // However we will run interpreted if we come thru here. The next pass 1.771 + // thru the call site will run compiled. If we ran compiled here then 1.772 + // we can (theorectically) do endless i2c->c2i->i2c transitions during 1.773 + // deopt/uncommon trap cycles. If we always go interpreted here then 1.774 + // we can have at most one and don't need to play any tricks to keep 1.775 + // from endlessly growing the stack. 1.776 + // 1.777 + // Actually if we detected that we had an i2c->c2i transition here we 1.778 + // ought to be able to reset the world back to the state of the interpreted 1.779 + // call and not bother building another interpreter arg area. We don't 1.780 + // do that at this point. 1.781 + 1.782 + patch_callers_callsite(); 1.783 + 1.784 + __ bind(skip_fixup); 1.785 + 1.786 + // Since all args are passed on the stack, total_args_passed*wordSize is the 1.787 + // space we need. Add in varargs area needed by the interpreter. Round up 1.788 + // to stack alignment. 1.789 + const int arg_size = total_args_passed * Interpreter::stackElementSize(); 1.790 + const int varargs_area = 1.791 + (frame::varargs_offset - frame::register_save_words)*wordSize; 1.792 + const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); 1.793 + 1.794 + int bias = STACK_BIAS; 1.795 + const int interp_arg_offset = frame::varargs_offset*wordSize + 1.796 + (total_args_passed-1)*Interpreter::stackElementSize(); 1.797 + 1.798 + Register base = SP; 1.799 + 1.800 +#ifdef _LP64 1.801 + // In the 64bit build because of wider slots and STACKBIAS we can run 1.802 + // out of bits in the displacement to do loads and stores. Use g3 as 1.803 + // temporary displacement. 1.804 + if (! __ is_simm13(extraspace)) { 1.805 + __ set(extraspace, G3_scratch); 1.806 + __ sub(SP, G3_scratch, SP); 1.807 + } else { 1.808 + __ sub(SP, extraspace, SP); 1.809 + } 1.810 + set_Rdisp(G3_scratch); 1.811 +#else 1.812 + __ sub(SP, extraspace, SP); 1.813 +#endif // _LP64 1.814 + 1.815 + // First write G1 (if used) to where ever it must go 1.816 + for (int i=0; i<total_args_passed; i++) { 1.817 + const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize()) + bias; 1.818 + VMReg r_1 = regs[i].first(); 1.819 + VMReg r_2 = regs[i].second(); 1.820 + if (r_1 == G1_scratch->as_VMReg()) { 1.821 + if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 1.822 + store_c2i_object(G1_scratch, base, st_off); 1.823 + } else if (sig_bt[i] == T_LONG) { 1.824 + assert(!TieredCompilation, "should not use register args for longs"); 1.825 + store_c2i_long(G1_scratch, base, st_off, false); 1.826 + } else { 1.827 + store_c2i_int(G1_scratch, base, st_off); 1.828 + } 1.829 + } 1.830 + } 1.831 + 1.832 + // Now write the args into the outgoing interpreter space 1.833 + for (int i=0; i<total_args_passed; i++) { 1.834 + const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize()) + bias; 1.835 + VMReg r_1 = regs[i].first(); 1.836 + VMReg r_2 = regs[i].second(); 1.837 + if (!r_1->is_valid()) { 1.838 + assert(!r_2->is_valid(), ""); 1.839 + continue; 1.840 + } 1.841 + // Skip G1 if found as we did it first in order to free it up 1.842 + if (r_1 == G1_scratch->as_VMReg()) { 1.843 + continue; 1.844 + } 1.845 +#ifdef ASSERT 1.846 + bool G1_forced = false; 1.847 +#endif // ASSERT 1.848 + if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 1.849 +#ifdef _LP64 1.850 + Register ld_off = Rdisp; 1.851 + __ set(reg2offset(r_1) + extraspace + bias, ld_off); 1.852 +#else 1.853 + int ld_off = reg2offset(r_1) + extraspace + bias; 1.854 +#ifdef ASSERT 1.855 + G1_forced = true; 1.856 +#endif // ASSERT 1.857 +#endif // _LP64 1.858 + r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle 1.859 + if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); 1.860 + else __ ldx(base, ld_off, G1_scratch); 1.861 + } 1.862 + 1.863 + if (r_1->is_Register()) { 1.864 + Register r = r_1->as_Register()->after_restore(); 1.865 + if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 1.866 + store_c2i_object(r, base, st_off); 1.867 + } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1.868 + if (TieredCompilation) { 1.869 + assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs"); 1.870 + } 1.871 + store_c2i_long(r, base, st_off, r_2->is_stack()); 1.872 + } else { 1.873 + store_c2i_int(r, base, st_off); 1.874 + } 1.875 + } else { 1.876 + assert(r_1->is_FloatRegister(), ""); 1.877 + if (sig_bt[i] == T_FLOAT) { 1.878 + store_c2i_float(r_1->as_FloatRegister(), base, st_off); 1.879 + } else { 1.880 + assert(sig_bt[i] == T_DOUBLE, "wrong type"); 1.881 + store_c2i_double(r_2, r_1, base, st_off); 1.882 + } 1.883 + } 1.884 + } 1.885 + 1.886 +#ifdef _LP64 1.887 + // Need to reload G3_scratch, used for temporary displacements. 1.888 + __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); 1.889 + 1.890 + // Pass O5_savedSP as an argument to the interpreter. 1.891 + // The interpreter will restore SP to this value before returning. 1.892 + __ set(extraspace, G1); 1.893 + __ add(SP, G1, O5_savedSP); 1.894 +#else 1.895 + // Pass O5_savedSP as an argument to the interpreter. 1.896 + // The interpreter will restore SP to this value before returning. 1.897 + __ add(SP, extraspace, O5_savedSP); 1.898 +#endif // _LP64 1.899 + 1.900 + __ mov((frame::varargs_offset)*wordSize - 1.901 + 1*Interpreter::stackElementSize()+bias+BytesPerWord, G1); 1.902 + // Jump to the interpreter just as if interpreter was doing it. 1.903 + __ jmpl(G3_scratch, 0, G0); 1.904 + // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp 1.905 + // (really L0) is in use by the compiled frame as a generic temp. However, 1.906 + // the interpreter does not know where its args are without some kind of 1.907 + // arg pointer being passed in. Pass it in Gargs. 1.908 + __ delayed()->add(SP, G1, Gargs); 1.909 +} 1.910 + 1.911 +void AdapterGenerator::gen_i2c_adapter( 1.912 + int total_args_passed, 1.913 + // VMReg max_arg, 1.914 + int comp_args_on_stack, // VMRegStackSlots 1.915 + const BasicType *sig_bt, 1.916 + const VMRegPair *regs) { 1.917 + 1.918 + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame 1.919 + // layout. Lesp was saved by the calling I-frame and will be restored on 1.920 + // return. Meanwhile, outgoing arg space is all owned by the callee 1.921 + // C-frame, so we can mangle it at will. After adjusting the frame size, 1.922 + // hoist register arguments and repack other args according to the compiled 1.923 + // code convention. Finally, end in a jump to the compiled code. The entry 1.924 + // point address is the start of the buffer. 1.925 + 1.926 + // We will only enter here from an interpreted frame and never from after 1.927 + // passing thru a c2i. Azul allowed this but we do not. If we lose the 1.928 + // race and use a c2i we will remain interpreted for the race loser(s). 1.929 + // This removes all sorts of headaches on the x86 side and also eliminates 1.930 + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. 1.931 + 1.932 + // As you can see from the list of inputs & outputs there are not a lot 1.933 + // of temp registers to work with: mostly G1, G3 & G4. 1.934 + 1.935 + // Inputs: 1.936 + // G2_thread - TLS 1.937 + // G5_method - Method oop 1.938 + // O0 - Flag telling us to restore SP from O5 1.939 + // O4_args - Pointer to interpreter's args 1.940 + // O5 - Caller's saved SP, to be restored if needed 1.941 + // O6 - Current SP! 1.942 + // O7 - Valid return address 1.943 + // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 1.944 + 1.945 + // Outputs: 1.946 + // G2_thread - TLS 1.947 + // G1, G4 - Outgoing long args in 32-bit build 1.948 + // O0-O5 - Outgoing args in compiled layout 1.949 + // O6 - Adjusted or restored SP 1.950 + // O7 - Valid return address 1.951 + // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 1.952 + // F0-F7 - more outgoing args 1.953 + 1.954 + 1.955 + // O4 is about to get loaded up with compiled callee's args 1.956 + __ sub(Gargs, BytesPerWord, Gargs); 1.957 + 1.958 +#ifdef ASSERT 1.959 + { 1.960 + // on entry OsavedSP and SP should be equal 1.961 + Label ok; 1.962 + __ cmp(O5_savedSP, SP); 1.963 + __ br(Assembler::equal, false, Assembler::pt, ok); 1.964 + __ delayed()->nop(); 1.965 + __ stop("I5_savedSP not set"); 1.966 + __ should_not_reach_here(); 1.967 + __ bind(ok); 1.968 + } 1.969 +#endif 1.970 + 1.971 + // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME 1.972 + // WITH O7 HOLDING A VALID RETURN PC 1.973 + // 1.974 + // | | 1.975 + // : java stack : 1.976 + // | | 1.977 + // +--------------+ <--- start of outgoing args 1.978 + // | receiver | | 1.979 + // : rest of args : |---size is java-arg-words 1.980 + // | | | 1.981 + // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I 1.982 + // | | | 1.983 + // : unused : |---Space for max Java stack, plus stack alignment 1.984 + // | | | 1.985 + // +--------------+ <--- SP + 16*wordsize 1.986 + // | | 1.987 + // : window : 1.988 + // | | 1.989 + // +--------------+ <--- SP 1.990 + 1.991 + // WE REPACK THE STACK. We use the common calling convention layout as 1.992 + // discovered by calling SharedRuntime::calling_convention. We assume it 1.993 + // causes an arbitrary shuffle of memory, which may require some register 1.994 + // temps to do the shuffle. We hope for (and optimize for) the case where 1.995 + // temps are not needed. We may have to resize the stack slightly, in case 1.996 + // we need alignment padding (32-bit interpreter can pass longs & doubles 1.997 + // misaligned, but the compilers expect them aligned). 1.998 + // 1.999 + // | | 1.1000 + // : java stack : 1.1001 + // | | 1.1002 + // +--------------+ <--- start of outgoing args 1.1003 + // | pad, align | | 1.1004 + // +--------------+ | 1.1005 + // | ints, floats | |---Outgoing stack args, packed low. 1.1006 + // +--------------+ | First few args in registers. 1.1007 + // : doubles : | 1.1008 + // | longs | | 1.1009 + // +--------------+ <--- SP' + 16*wordsize 1.1010 + // | | 1.1011 + // : window : 1.1012 + // | | 1.1013 + // +--------------+ <--- SP' 1.1014 + 1.1015 + // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME 1.1016 + // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP 1.1017 + // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN. 1.1018 + 1.1019 + // Cut-out for having no stack args. Since up to 6 args are passed 1.1020 + // in registers, we will commonly have no stack args. 1.1021 + if (comp_args_on_stack > 0) { 1.1022 + 1.1023 + // Convert VMReg stack slots to words. 1.1024 + int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1.1025 + // Round up to miminum stack alignment, in wordSize 1.1026 + comp_words_on_stack = round_to(comp_words_on_stack, 2); 1.1027 + // Now compute the distance from Lesp to SP. This calculation does not 1.1028 + // include the space for total_args_passed because Lesp has not yet popped 1.1029 + // the arguments. 1.1030 + __ sub(SP, (comp_words_on_stack)*wordSize, SP); 1.1031 + } 1.1032 + 1.1033 + // Will jump to the compiled code just as if compiled code was doing it. 1.1034 + // Pre-load the register-jump target early, to schedule it better. 1.1035 + __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3); 1.1036 + 1.1037 + // Now generate the shuffle code. Pick up all register args and move the 1.1038 + // rest through G1_scratch. 1.1039 + for (int i=0; i<total_args_passed; i++) { 1.1040 + if (sig_bt[i] == T_VOID) { 1.1041 + // Longs and doubles are passed in native word order, but misaligned 1.1042 + // in the 32-bit build. 1.1043 + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1.1044 + continue; 1.1045 + } 1.1046 + 1.1047 + // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the 1.1048 + // 32-bit build and aligned in the 64-bit build. Look for the obvious 1.1049 + // ldx/lddf optimizations. 1.1050 + 1.1051 + // Load in argument order going down. 1.1052 + const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize(); 1.1053 +#ifdef _LP64 1.1054 + set_Rdisp(G1_scratch); 1.1055 +#endif // _LP64 1.1056 + 1.1057 + VMReg r_1 = regs[i].first(); 1.1058 + VMReg r_2 = regs[i].second(); 1.1059 + if (!r_1->is_valid()) { 1.1060 + assert(!r_2->is_valid(), ""); 1.1061 + continue; 1.1062 + } 1.1063 + if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9 1.1064 + r_1 = F8->as_VMReg(); // as part of the load/store shuffle 1.1065 + if (r_2->is_valid()) r_2 = r_1->next(); 1.1066 + } 1.1067 + if (r_1->is_Register()) { // Register argument 1.1068 + Register r = r_1->as_Register()->after_restore(); 1.1069 + if (!r_2->is_valid()) { 1.1070 + __ ld(Gargs, arg_slot(ld_off), r); 1.1071 + } else { 1.1072 +#ifdef _LP64 1.1073 + // In V9, longs are given 2 64-bit slots in the interpreter, but the 1.1074 + // data is passed in only 1 slot. 1.1075 + Register slot = (sig_bt[i]==T_LONG) ? 1.1076 + next_arg_slot(ld_off) : arg_slot(ld_off); 1.1077 + __ ldx(Gargs, slot, r); 1.1078 +#else 1.1079 + // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the 1.1080 + // stack shuffle. Load the first 2 longs into G1/G4 later. 1.1081 +#endif 1.1082 + } 1.1083 + } else { 1.1084 + assert(r_1->is_FloatRegister(), ""); 1.1085 + if (!r_2->is_valid()) { 1.1086 + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); 1.1087 + } else { 1.1088 +#ifdef _LP64 1.1089 + // In V9, doubles are given 2 64-bit slots in the interpreter, but the 1.1090 + // data is passed in only 1 slot. This code also handles longs that 1.1091 + // are passed on the stack, but need a stack-to-stack move through a 1.1092 + // spare float register. 1.1093 + Register slot = (sig_bt[i]==T_LONG || sig_bt[i] == T_DOUBLE) ? 1.1094 + next_arg_slot(ld_off) : arg_slot(ld_off); 1.1095 + __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); 1.1096 +#else 1.1097 + // Need to marshal 64-bit value from misaligned Lesp loads 1.1098 + __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); 1.1099 + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); 1.1100 +#endif 1.1101 + } 1.1102 + } 1.1103 + // Was the argument really intended to be on the stack, but was loaded 1.1104 + // into F8/F9? 1.1105 + if (regs[i].first()->is_stack()) { 1.1106 + assert(r_1->as_FloatRegister() == F8, "fix this code"); 1.1107 + // Convert stack slot to an SP offset 1.1108 + int st_off = reg2offset(regs[i].first()) + STACK_BIAS; 1.1109 + // Store down the shuffled stack word. Target address _is_ aligned. 1.1110 + if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, st_off); 1.1111 + else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, st_off); 1.1112 + } 1.1113 + } 1.1114 + bool made_space = false; 1.1115 +#ifndef _LP64 1.1116 + // May need to pick up a few long args in G1/G4 1.1117 + bool g4_crushed = false; 1.1118 + bool g3_crushed = false; 1.1119 + for (int i=0; i<total_args_passed; i++) { 1.1120 + if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) { 1.1121 + // Load in argument order going down 1.1122 + int ld_off = (total_args_passed-i)*Interpreter::stackElementSize(); 1.1123 + // Need to marshal 64-bit value from misaligned Lesp loads 1.1124 + Register r = regs[i].first()->as_Register()->after_restore(); 1.1125 + if (r == G1 || r == G4) { 1.1126 + assert(!g4_crushed, "ordering problem"); 1.1127 + if (r == G4){ 1.1128 + g4_crushed = true; 1.1129 + __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits 1.1130 + __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1.1131 + } else { 1.1132 + // better schedule this way 1.1133 + __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1.1134 + __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits 1.1135 + } 1.1136 + g3_crushed = true; 1.1137 + __ sllx(r, 32, r); 1.1138 + __ or3(G3_scratch, r, r); 1.1139 + } else { 1.1140 + assert(r->is_out(), "longs passed in two O registers"); 1.1141 + __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits 1.1142 + __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1.1143 + } 1.1144 + } 1.1145 + } 1.1146 +#endif 1.1147 + 1.1148 + // Jump to the compiled code just as if compiled code was doing it. 1.1149 + // 1.1150 +#ifndef _LP64 1.1151 + if (g3_crushed) { 1.1152 + // Rats load was wasted, at least it is in cache... 1.1153 + __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3); 1.1154 + } 1.1155 +#endif /* _LP64 */ 1.1156 + 1.1157 + // 6243940 We might end up in handle_wrong_method if 1.1158 + // the callee is deoptimized as we race thru here. If that 1.1159 + // happens we don't want to take a safepoint because the 1.1160 + // caller frame will look interpreted and arguments are now 1.1161 + // "compiled" so it is much better to make this transition 1.1162 + // invisible to the stack walking code. Unfortunately if 1.1163 + // we try and find the callee by normal means a safepoint 1.1164 + // is possible. So we stash the desired callee in the thread 1.1165 + // and the vm will find there should this case occur. 1.1166 + Address callee_target_addr(G2_thread, 0, in_bytes(JavaThread::callee_target_offset())); 1.1167 + __ st_ptr(G5_method, callee_target_addr); 1.1168 + 1.1169 + if (StressNonEntrant) { 1.1170 + // Open a big window for deopt failure 1.1171 + __ save_frame(0); 1.1172 + __ mov(G0, L0); 1.1173 + Label loop; 1.1174 + __ bind(loop); 1.1175 + __ sub(L0, 1, L0); 1.1176 + __ br_null(L0, false, Assembler::pt, loop); 1.1177 + __ delayed()->nop(); 1.1178 + 1.1179 + __ restore(); 1.1180 + } 1.1181 + 1.1182 + 1.1183 + __ jmpl(G3, 0, G0); 1.1184 + __ delayed()->nop(); 1.1185 +} 1.1186 + 1.1187 +// --------------------------------------------------------------- 1.1188 +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1.1189 + int total_args_passed, 1.1190 + // VMReg max_arg, 1.1191 + int comp_args_on_stack, // VMRegStackSlots 1.1192 + const BasicType *sig_bt, 1.1193 + const VMRegPair *regs) { 1.1194 + address i2c_entry = __ pc(); 1.1195 + 1.1196 + AdapterGenerator agen(masm); 1.1197 + 1.1198 + agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); 1.1199 + 1.1200 + 1.1201 + // ------------------------------------------------------------------------- 1.1202 + // Generate a C2I adapter. On entry we know G5 holds the methodOop. The 1.1203 + // args start out packed in the compiled layout. They need to be unpacked 1.1204 + // into the interpreter layout. This will almost always require some stack 1.1205 + // space. We grow the current (compiled) stack, then repack the args. We 1.1206 + // finally end in a jump to the generic interpreter entry point. On exit 1.1207 + // from the interpreter, the interpreter will restore our SP (lest the 1.1208 + // compiled code, which relys solely on SP and not FP, get sick). 1.1209 + 1.1210 + address c2i_unverified_entry = __ pc(); 1.1211 + Label skip_fixup; 1.1212 + { 1.1213 +#if !defined(_LP64) && defined(COMPILER2) 1.1214 + Register R_temp = L0; // another scratch register 1.1215 +#else 1.1216 + Register R_temp = G1; // another scratch register 1.1217 +#endif 1.1218 + 1.1219 + Address ic_miss(G3_scratch, SharedRuntime::get_ic_miss_stub()); 1.1220 + 1.1221 + __ verify_oop(O0); 1.1222 + __ verify_oop(G5_method); 1.1223 + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), G3_scratch); 1.1224 + __ verify_oop(G3_scratch); 1.1225 + 1.1226 +#if !defined(_LP64) && defined(COMPILER2) 1.1227 + __ save(SP, -frame::register_save_words*wordSize, SP); 1.1228 + __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp); 1.1229 + __ verify_oop(R_temp); 1.1230 + __ cmp(G3_scratch, R_temp); 1.1231 + __ restore(); 1.1232 +#else 1.1233 + __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp); 1.1234 + __ verify_oop(R_temp); 1.1235 + __ cmp(G3_scratch, R_temp); 1.1236 +#endif 1.1237 + 1.1238 + Label ok, ok2; 1.1239 + __ brx(Assembler::equal, false, Assembler::pt, ok); 1.1240 + __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method); 1.1241 + __ jump_to(ic_miss); 1.1242 + __ delayed()->nop(); 1.1243 + 1.1244 + __ bind(ok); 1.1245 + // Method might have been compiled since the call site was patched to 1.1246 + // interpreted if that is the case treat it as a miss so we can get 1.1247 + // the call site corrected. 1.1248 + __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch); 1.1249 + __ bind(ok2); 1.1250 + __ br_null(G3_scratch, false, __ pt, skip_fixup); 1.1251 + __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); 1.1252 + __ jump_to(ic_miss); 1.1253 + __ delayed()->nop(); 1.1254 + 1.1255 + } 1.1256 + 1.1257 + address c2i_entry = __ pc(); 1.1258 + 1.1259 + agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 1.1260 + 1.1261 + __ flush(); 1.1262 + return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry); 1.1263 + 1.1264 +} 1.1265 + 1.1266 +// Helper function for native calling conventions 1.1267 +static VMReg int_stk_helper( int i ) { 1.1268 + // Bias any stack based VMReg we get by ignoring the window area 1.1269 + // but not the register parameter save area. 1.1270 + // 1.1271 + // This is strange for the following reasons. We'd normally expect 1.1272 + // the calling convention to return an VMReg for a stack slot 1.1273 + // completely ignoring any abi reserved area. C2 thinks of that 1.1274 + // abi area as only out_preserve_stack_slots. This does not include 1.1275 + // the area allocated by the C abi to store down integer arguments 1.1276 + // because the java calling convention does not use it. So 1.1277 + // since c2 assumes that there are only out_preserve_stack_slots 1.1278 + // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack 1.1279 + // location the c calling convention must add in this bias amount 1.1280 + // to make up for the fact that the out_preserve_stack_slots is 1.1281 + // insufficient for C calls. What a mess. I sure hope those 6 1.1282 + // stack words were worth it on every java call! 1.1283 + 1.1284 + // Another way of cleaning this up would be for out_preserve_stack_slots 1.1285 + // to take a parameter to say whether it was C or java calling conventions. 1.1286 + // Then things might look a little better (but not much). 1.1287 + 1.1288 + int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; 1.1289 + if( mem_parm_offset < 0 ) { 1.1290 + return as_oRegister(i)->as_VMReg(); 1.1291 + } else { 1.1292 + int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; 1.1293 + // Now return a biased offset that will be correct when out_preserve_slots is added back in 1.1294 + return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); 1.1295 + } 1.1296 +} 1.1297 + 1.1298 + 1.1299 +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1.1300 + VMRegPair *regs, 1.1301 + int total_args_passed) { 1.1302 + 1.1303 + // Return the number of VMReg stack_slots needed for the args. 1.1304 + // This value does not include an abi space (like register window 1.1305 + // save area). 1.1306 + 1.1307 + // The native convention is V8 if !LP64 1.1308 + // The LP64 convention is the V9 convention which is slightly more sane. 1.1309 + 1.1310 + // We return the amount of VMReg stack slots we need to reserve for all 1.1311 + // the arguments NOT counting out_preserve_stack_slots. Since we always 1.1312 + // have space for storing at least 6 registers to memory we start with that. 1.1313 + // See int_stk_helper for a further discussion. 1.1314 + int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots(); 1.1315 + 1.1316 +#ifdef _LP64 1.1317 + // V9 convention: All things "as-if" on double-wide stack slots. 1.1318 + // Hoist any int/ptr/long's in the first 6 to int regs. 1.1319 + // Hoist any flt/dbl's in the first 16 dbl regs. 1.1320 + int j = 0; // Count of actual args, not HALVES 1.1321 + for( int i=0; i<total_args_passed; i++, j++ ) { 1.1322 + switch( sig_bt[i] ) { 1.1323 + case T_BOOLEAN: 1.1324 + case T_BYTE: 1.1325 + case T_CHAR: 1.1326 + case T_INT: 1.1327 + case T_SHORT: 1.1328 + regs[i].set1( int_stk_helper( j ) ); break; 1.1329 + case T_LONG: 1.1330 + assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1.1331 + case T_ADDRESS: // raw pointers, like current thread, for VM calls 1.1332 + case T_ARRAY: 1.1333 + case T_OBJECT: 1.1334 + regs[i].set2( int_stk_helper( j ) ); 1.1335 + break; 1.1336 + case T_FLOAT: 1.1337 + if ( j < 16 ) { 1.1338 + // V9ism: floats go in ODD registers 1.1339 + regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg()); 1.1340 + } else { 1.1341 + // V9ism: floats go in ODD stack slot 1.1342 + regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1))); 1.1343 + } 1.1344 + break; 1.1345 + case T_DOUBLE: 1.1346 + assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1.1347 + if ( j < 16 ) { 1.1348 + // V9ism: doubles go in EVEN/ODD regs 1.1349 + regs[i].set2(as_FloatRegister(j<<1)->as_VMReg()); 1.1350 + } else { 1.1351 + // V9ism: doubles go in EVEN/ODD stack slots 1.1352 + regs[i].set2(VMRegImpl::stack2reg(j<<1)); 1.1353 + } 1.1354 + break; 1.1355 + case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES 1.1356 + default: 1.1357 + ShouldNotReachHere(); 1.1358 + } 1.1359 + if (regs[i].first()->is_stack()) { 1.1360 + int off = regs[i].first()->reg2stack(); 1.1361 + if (off > max_stack_slots) max_stack_slots = off; 1.1362 + } 1.1363 + if (regs[i].second()->is_stack()) { 1.1364 + int off = regs[i].second()->reg2stack(); 1.1365 + if (off > max_stack_slots) max_stack_slots = off; 1.1366 + } 1.1367 + } 1.1368 + 1.1369 +#else // _LP64 1.1370 + // V8 convention: first 6 things in O-regs, rest on stack. 1.1371 + // Alignment is willy-nilly. 1.1372 + for( int i=0; i<total_args_passed; i++ ) { 1.1373 + switch( sig_bt[i] ) { 1.1374 + case T_ADDRESS: // raw pointers, like current thread, for VM calls 1.1375 + case T_ARRAY: 1.1376 + case T_BOOLEAN: 1.1377 + case T_BYTE: 1.1378 + case T_CHAR: 1.1379 + case T_FLOAT: 1.1380 + case T_INT: 1.1381 + case T_OBJECT: 1.1382 + case T_SHORT: 1.1383 + regs[i].set1( int_stk_helper( i ) ); 1.1384 + break; 1.1385 + case T_DOUBLE: 1.1386 + case T_LONG: 1.1387 + assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1.1388 + regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) ); 1.1389 + break; 1.1390 + case T_VOID: regs[i].set_bad(); break; 1.1391 + default: 1.1392 + ShouldNotReachHere(); 1.1393 + } 1.1394 + if (regs[i].first()->is_stack()) { 1.1395 + int off = regs[i].first()->reg2stack(); 1.1396 + if (off > max_stack_slots) max_stack_slots = off; 1.1397 + } 1.1398 + if (regs[i].second()->is_stack()) { 1.1399 + int off = regs[i].second()->reg2stack(); 1.1400 + if (off > max_stack_slots) max_stack_slots = off; 1.1401 + } 1.1402 + } 1.1403 +#endif // _LP64 1.1404 + 1.1405 + return round_to(max_stack_slots + 1, 2); 1.1406 + 1.1407 +} 1.1408 + 1.1409 + 1.1410 +// --------------------------------------------------------------------------- 1.1411 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1412 + switch (ret_type) { 1.1413 + case T_FLOAT: 1.1414 + __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS); 1.1415 + break; 1.1416 + case T_DOUBLE: 1.1417 + __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS); 1.1418 + break; 1.1419 + } 1.1420 +} 1.1421 + 1.1422 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1.1423 + switch (ret_type) { 1.1424 + case T_FLOAT: 1.1425 + __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0); 1.1426 + break; 1.1427 + case T_DOUBLE: 1.1428 + __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0); 1.1429 + break; 1.1430 + } 1.1431 +} 1.1432 + 1.1433 +// Check and forward and pending exception. Thread is stored in 1.1434 +// L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there 1.1435 +// is no exception handler. We merely pop this frame off and throw the 1.1436 +// exception in the caller's frame. 1.1437 +static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) { 1.1438 + Label L; 1.1439 + __ br_null(Rex_oop, false, Assembler::pt, L); 1.1440 + __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception 1.1441 + // Since this is a native call, we *know* the proper exception handler 1.1442 + // without calling into the VM: it's the empty function. Just pop this 1.1443 + // frame and then jump to forward_exception_entry; O7 will contain the 1.1444 + // native caller's return PC. 1.1445 + Address exception_entry(G3_scratch, StubRoutines::forward_exception_entry()); 1.1446 + __ jump_to(exception_entry); 1.1447 + __ delayed()->restore(); // Pop this frame off. 1.1448 + __ bind(L); 1.1449 +} 1.1450 + 1.1451 +// A simple move of integer like type 1.1452 +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1453 + if (src.first()->is_stack()) { 1.1454 + if (dst.first()->is_stack()) { 1.1455 + // stack to stack 1.1456 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1457 + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1458 + } else { 1.1459 + // stack to reg 1.1460 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1461 + } 1.1462 + } else if (dst.first()->is_stack()) { 1.1463 + // reg to stack 1.1464 + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1465 + } else { 1.1466 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1467 + } 1.1468 +} 1.1469 + 1.1470 +// On 64 bit we will store integer like items to the stack as 1.1471 +// 64 bits items (sparc abi) even though java would only store 1.1472 +// 32bits for a parameter. On 32bit it will simply be 32 bits 1.1473 +// So this routine will do 32->32 on 32bit and 32->64 on 64bit 1.1474 +static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1475 + if (src.first()->is_stack()) { 1.1476 + if (dst.first()->is_stack()) { 1.1477 + // stack to stack 1.1478 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1479 + __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1480 + } else { 1.1481 + // stack to reg 1.1482 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1483 + } 1.1484 + } else if (dst.first()->is_stack()) { 1.1485 + // reg to stack 1.1486 + __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1487 + } else { 1.1488 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1489 + } 1.1490 +} 1.1491 + 1.1492 + 1.1493 +// An oop arg. Must pass a handle not the oop itself 1.1494 +static void object_move(MacroAssembler* masm, 1.1495 + OopMap* map, 1.1496 + int oop_handle_offset, 1.1497 + int framesize_in_slots, 1.1498 + VMRegPair src, 1.1499 + VMRegPair dst, 1.1500 + bool is_receiver, 1.1501 + int* receiver_offset) { 1.1502 + 1.1503 + // must pass a handle. First figure out the location we use as a handle 1.1504 + 1.1505 + if (src.first()->is_stack()) { 1.1506 + // Oop is already on the stack 1.1507 + Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register(); 1.1508 + __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle); 1.1509 + __ ld_ptr(rHandle, 0, L4); 1.1510 +#ifdef _LP64 1.1511 + __ movr( Assembler::rc_z, L4, G0, rHandle ); 1.1512 +#else 1.1513 + __ tst( L4 ); 1.1514 + __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1.1515 +#endif 1.1516 + if (dst.first()->is_stack()) { 1.1517 + __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1518 + } 1.1519 + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1.1520 + if (is_receiver) { 1.1521 + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 1.1522 + } 1.1523 + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1.1524 + } else { 1.1525 + // Oop is in an input register pass we must flush it to the stack 1.1526 + const Register rOop = src.first()->as_Register(); 1.1527 + const Register rHandle = L5; 1.1528 + int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset; 1.1529 + int offset = oop_slot*VMRegImpl::stack_slot_size; 1.1530 + Label skip; 1.1531 + __ st_ptr(rOop, SP, offset + STACK_BIAS); 1.1532 + if (is_receiver) { 1.1533 + *receiver_offset = oop_slot * VMRegImpl::stack_slot_size; 1.1534 + } 1.1535 + map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1.1536 + __ add(SP, offset + STACK_BIAS, rHandle); 1.1537 +#ifdef _LP64 1.1538 + __ movr( Assembler::rc_z, rOop, G0, rHandle ); 1.1539 +#else 1.1540 + __ tst( rOop ); 1.1541 + __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1.1542 +#endif 1.1543 + 1.1544 + if (dst.first()->is_stack()) { 1.1545 + __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1546 + } else { 1.1547 + __ mov(rHandle, dst.first()->as_Register()); 1.1548 + } 1.1549 + } 1.1550 +} 1.1551 + 1.1552 +// A float arg may have to do float reg int reg conversion 1.1553 +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1554 + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); 1.1555 + 1.1556 + if (src.first()->is_stack()) { 1.1557 + if (dst.first()->is_stack()) { 1.1558 + // stack to stack the easiest of the bunch 1.1559 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1560 + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1561 + } else { 1.1562 + // stack to reg 1.1563 + if (dst.first()->is_Register()) { 1.1564 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1565 + } else { 1.1566 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1567 + } 1.1568 + } 1.1569 + } else if (dst.first()->is_stack()) { 1.1570 + // reg to stack 1.1571 + if (src.first()->is_Register()) { 1.1572 + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1573 + } else { 1.1574 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1575 + } 1.1576 + } else { 1.1577 + // reg to reg 1.1578 + if (src.first()->is_Register()) { 1.1579 + if (dst.first()->is_Register()) { 1.1580 + // gpr -> gpr 1.1581 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1582 + } else { 1.1583 + // gpr -> fpr 1.1584 + __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS); 1.1585 + __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1586 + } 1.1587 + } else if (dst.first()->is_Register()) { 1.1588 + // fpr -> gpr 1.1589 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS); 1.1590 + __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register()); 1.1591 + } else { 1.1592 + // fpr -> fpr 1.1593 + // In theory these overlap but the ordering is such that this is likely a nop 1.1594 + if ( src.first() != dst.first()) { 1.1595 + __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1.1596 + } 1.1597 + } 1.1598 + } 1.1599 +} 1.1600 + 1.1601 +static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1602 + VMRegPair src_lo(src.first()); 1.1603 + VMRegPair src_hi(src.second()); 1.1604 + VMRegPair dst_lo(dst.first()); 1.1605 + VMRegPair dst_hi(dst.second()); 1.1606 + simple_move32(masm, src_lo, dst_lo); 1.1607 + simple_move32(masm, src_hi, dst_hi); 1.1608 +} 1.1609 + 1.1610 +// A long move 1.1611 +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1612 + 1.1613 + // Do the simple ones here else do two int moves 1.1614 + if (src.is_single_phys_reg() ) { 1.1615 + if (dst.is_single_phys_reg()) { 1.1616 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1617 + } else { 1.1618 + // split src into two separate registers 1.1619 + // Remember hi means hi address or lsw on sparc 1.1620 + // Move msw to lsw 1.1621 + if (dst.second()->is_reg()) { 1.1622 + // MSW -> MSW 1.1623 + __ srax(src.first()->as_Register(), 32, dst.first()->as_Register()); 1.1624 + // Now LSW -> LSW 1.1625 + // this will only move lo -> lo and ignore hi 1.1626 + VMRegPair split(dst.second()); 1.1627 + simple_move32(masm, src, split); 1.1628 + } else { 1.1629 + VMRegPair split(src.first(), L4->as_VMReg()); 1.1630 + // MSW -> MSW (lo ie. first word) 1.1631 + __ srax(src.first()->as_Register(), 32, L4); 1.1632 + split_long_move(masm, split, dst); 1.1633 + } 1.1634 + } 1.1635 + } else if (dst.is_single_phys_reg()) { 1.1636 + if (src.is_adjacent_aligned_on_stack(2)) { 1.1637 + __ ldd(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1638 + } else { 1.1639 + // dst is a single reg. 1.1640 + // Remember lo is low address not msb for stack slots 1.1641 + // and lo is the "real" register for registers 1.1642 + // src is 1.1643 + 1.1644 + VMRegPair split; 1.1645 + 1.1646 + if (src.first()->is_reg()) { 1.1647 + // src.lo (msw) is a reg, src.hi is stk/reg 1.1648 + // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg] 1.1649 + split.set_pair(dst.first(), src.first()); 1.1650 + } else { 1.1651 + // msw is stack move to L5 1.1652 + // lsw is stack move to dst.lo (real reg) 1.1653 + // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5 1.1654 + split.set_pair(dst.first(), L5->as_VMReg()); 1.1655 + } 1.1656 + 1.1657 + // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg) 1.1658 + // msw -> src.lo/L5, lsw -> dst.lo 1.1659 + split_long_move(masm, src, split); 1.1660 + 1.1661 + // So dst now has the low order correct position the 1.1662 + // msw half 1.1663 + __ sllx(split.first()->as_Register(), 32, L5); 1.1664 + 1.1665 + const Register d = dst.first()->as_Register(); 1.1666 + __ or3(L5, d, d); 1.1667 + } 1.1668 + } else { 1.1669 + // For LP64 we can probably do better. 1.1670 + split_long_move(masm, src, dst); 1.1671 + } 1.1672 +} 1.1673 + 1.1674 +// A double move 1.1675 +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1.1676 + 1.1677 + // The painful thing here is that like long_move a VMRegPair might be 1.1678 + // 1: a single physical register 1.1679 + // 2: two physical registers (v8) 1.1680 + // 3: a physical reg [lo] and a stack slot [hi] (v8) 1.1681 + // 4: two stack slots 1.1682 + 1.1683 + // Since src is always a java calling convention we know that the src pair 1.1684 + // is always either all registers or all stack (and aligned?) 1.1685 + 1.1686 + // in a register [lo] and a stack slot [hi] 1.1687 + if (src.first()->is_stack()) { 1.1688 + if (dst.first()->is_stack()) { 1.1689 + // stack to stack the easiest of the bunch 1.1690 + // ought to be a way to do this where if alignment is ok we use ldd/std when possible 1.1691 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1.1692 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1.1693 + __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1.1694 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1695 + } else { 1.1696 + // stack to reg 1.1697 + if (dst.second()->is_stack()) { 1.1698 + // stack -> reg, stack -> stack 1.1699 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1.1700 + if (dst.first()->is_Register()) { 1.1701 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1702 + } else { 1.1703 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1704 + } 1.1705 + // This was missing. (very rare case) 1.1706 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1707 + } else { 1.1708 + // stack -> reg 1.1709 + // Eventually optimize for alignment QQQ 1.1710 + if (dst.first()->is_Register()) { 1.1711 + __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1.1712 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register()); 1.1713 + } else { 1.1714 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1715 + __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister()); 1.1716 + } 1.1717 + } 1.1718 + } 1.1719 + } else if (dst.first()->is_stack()) { 1.1720 + // reg to stack 1.1721 + if (src.first()->is_Register()) { 1.1722 + // Eventually optimize for alignment QQQ 1.1723 + __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1724 + if (src.second()->is_stack()) { 1.1725 + __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1.1726 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1727 + } else { 1.1728 + __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS); 1.1729 + } 1.1730 + } else { 1.1731 + // fpr to stack 1.1732 + if (src.second()->is_stack()) { 1.1733 + ShouldNotReachHere(); 1.1734 + } else { 1.1735 + // Is the stack aligned? 1.1736 + if (reg2offset(dst.first()) & 0x7) { 1.1737 + // No do as pairs 1.1738 + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1739 + __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS); 1.1740 + } else { 1.1741 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1.1742 + } 1.1743 + } 1.1744 + } 1.1745 + } else { 1.1746 + // reg to reg 1.1747 + if (src.first()->is_Register()) { 1.1748 + if (dst.first()->is_Register()) { 1.1749 + // gpr -> gpr 1.1750 + __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1.1751 + __ mov(src.second()->as_Register(), dst.second()->as_Register()); 1.1752 + } else { 1.1753 + // gpr -> fpr 1.1754 + // ought to be able to do a single store 1.1755 + __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS); 1.1756 + __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS); 1.1757 + // ought to be able to do a single load 1.1758 + __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister()); 1.1759 + __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister()); 1.1760 + } 1.1761 + } else if (dst.first()->is_Register()) { 1.1762 + // fpr -> gpr 1.1763 + // ought to be able to do a single store 1.1764 + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS); 1.1765 + // ought to be able to do a single load 1.1766 + // REMEMBER first() is low address not LSB 1.1767 + __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register()); 1.1768 + if (dst.second()->is_Register()) { 1.1769 + __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register()); 1.1770 + } else { 1.1771 + __ ld(FP, -4 + STACK_BIAS, L4); 1.1772 + __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1.1773 + } 1.1774 + } else { 1.1775 + // fpr -> fpr 1.1776 + // In theory these overlap but the ordering is such that this is likely a nop 1.1777 + if ( src.first() != dst.first()) { 1.1778 + __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1.1779 + } 1.1780 + } 1.1781 + } 1.1782 +} 1.1783 + 1.1784 +// Creates an inner frame if one hasn't already been created, and 1.1785 +// saves a copy of the thread in L7_thread_cache 1.1786 +static void create_inner_frame(MacroAssembler* masm, bool* already_created) { 1.1787 + if (!*already_created) { 1.1788 + __ save_frame(0); 1.1789 + // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below 1.1790 + // Don't use save_thread because it smashes G2 and we merely want to save a 1.1791 + // copy 1.1792 + __ mov(G2_thread, L7_thread_cache); 1.1793 + *already_created = true; 1.1794 + } 1.1795 +} 1.1796 + 1.1797 +// --------------------------------------------------------------------------- 1.1798 +// Generate a native wrapper for a given method. The method takes arguments 1.1799 +// in the Java compiled code convention, marshals them to the native 1.1800 +// convention (handlizes oops, etc), transitions to native, makes the call, 1.1801 +// returns to java state (possibly blocking), unhandlizes any result and 1.1802 +// returns. 1.1803 +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1.1804 + methodHandle method, 1.1805 + int total_in_args, 1.1806 + int comp_args_on_stack, // in VMRegStackSlots 1.1807 + BasicType *in_sig_bt, 1.1808 + VMRegPair *in_regs, 1.1809 + BasicType ret_type) { 1.1810 + 1.1811 + 1.1812 + // Native nmethod wrappers never take possesion of the oop arguments. 1.1813 + // So the caller will gc the arguments. The only thing we need an 1.1814 + // oopMap for is if the call is static 1.1815 + // 1.1816 + // An OopMap for lock (and class if static), and one for the VM call itself 1.1817 + OopMapSet *oop_maps = new OopMapSet(); 1.1818 + intptr_t start = (intptr_t)__ pc(); 1.1819 + 1.1820 + // First thing make an ic check to see if we should even be here 1.1821 + { 1.1822 + Label L; 1.1823 + const Register temp_reg = G3_scratch; 1.1824 + Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); 1.1825 + __ verify_oop(O0); 1.1826 + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); 1.1827 + __ cmp(temp_reg, G5_inline_cache_reg); 1.1828 + __ brx(Assembler::equal, true, Assembler::pt, L); 1.1829 + __ delayed()->nop(); 1.1830 + 1.1831 + __ jump_to(ic_miss, 0); 1.1832 + __ delayed()->nop(); 1.1833 + __ align(CodeEntryAlignment); 1.1834 + __ bind(L); 1.1835 + } 1.1836 + 1.1837 + int vep_offset = ((intptr_t)__ pc()) - start; 1.1838 + 1.1839 +#ifdef COMPILER1 1.1840 + if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { 1.1841 + // Object.hashCode can pull the hashCode from the header word 1.1842 + // instead of doing a full VM transition once it's been computed. 1.1843 + // Since hashCode is usually polymorphic at call sites we can't do 1.1844 + // this optimization at the call site without a lot of work. 1.1845 + Label slowCase; 1.1846 + Register receiver = O0; 1.1847 + Register result = O0; 1.1848 + Register header = G3_scratch; 1.1849 + Register hash = G3_scratch; // overwrite header value with hash value 1.1850 + Register mask = G1; // to get hash field from header 1.1851 + 1.1852 + // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. 1.1853 + // We depend on hash_mask being at most 32 bits and avoid the use of 1.1854 + // hash_mask_in_place because it could be larger than 32 bits in a 64-bit 1.1855 + // vm: see markOop.hpp. 1.1856 + __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header); 1.1857 + __ sethi(markOopDesc::hash_mask, mask); 1.1858 + __ btst(markOopDesc::unlocked_value, header); 1.1859 + __ br(Assembler::zero, false, Assembler::pn, slowCase); 1.1860 + if (UseBiasedLocking) { 1.1861 + // Check if biased and fall through to runtime if so 1.1862 + __ delayed()->nop(); 1.1863 + __ btst(markOopDesc::biased_lock_bit_in_place, header); 1.1864 + __ br(Assembler::notZero, false, Assembler::pn, slowCase); 1.1865 + } 1.1866 + __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask); 1.1867 + 1.1868 + // Check for a valid (non-zero) hash code and get its value. 1.1869 +#ifdef _LP64 1.1870 + __ srlx(header, markOopDesc::hash_shift, hash); 1.1871 +#else 1.1872 + __ srl(header, markOopDesc::hash_shift, hash); 1.1873 +#endif 1.1874 + __ andcc(hash, mask, hash); 1.1875 + __ br(Assembler::equal, false, Assembler::pn, slowCase); 1.1876 + __ delayed()->nop(); 1.1877 + 1.1878 + // leaf return. 1.1879 + __ retl(); 1.1880 + __ delayed()->mov(hash, result); 1.1881 + __ bind(slowCase); 1.1882 + } 1.1883 +#endif // COMPILER1 1.1884 + 1.1885 + 1.1886 + // We have received a description of where all the java arg are located 1.1887 + // on entry to the wrapper. We need to convert these args to where 1.1888 + // the jni function will expect them. To figure out where they go 1.1889 + // we convert the java signature to a C signature by inserting 1.1890 + // the hidden arguments as arg[0] and possibly arg[1] (static method) 1.1891 + 1.1892 + int total_c_args = total_in_args + 1; 1.1893 + if (method->is_static()) { 1.1894 + total_c_args++; 1.1895 + } 1.1896 + 1.1897 + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1.1898 + VMRegPair * out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1.1899 + 1.1900 + int argc = 0; 1.1901 + out_sig_bt[argc++] = T_ADDRESS; 1.1902 + if (method->is_static()) { 1.1903 + out_sig_bt[argc++] = T_OBJECT; 1.1904 + } 1.1905 + 1.1906 + for (int i = 0; i < total_in_args ; i++ ) { 1.1907 + out_sig_bt[argc++] = in_sig_bt[i]; 1.1908 + } 1.1909 + 1.1910 + // Now figure out where the args must be stored and how much stack space 1.1911 + // they require (neglecting out_preserve_stack_slots but space for storing 1.1912 + // the 1st six register arguments). It's weird see int_stk_helper. 1.1913 + // 1.1914 + int out_arg_slots; 1.1915 + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 1.1916 + 1.1917 + // Compute framesize for the wrapper. We need to handlize all oops in 1.1918 + // registers. We must create space for them here that is disjoint from 1.1919 + // the windowed save area because we have no control over when we might 1.1920 + // flush the window again and overwrite values that gc has since modified. 1.1921 + // (The live window race) 1.1922 + // 1.1923 + // We always just allocate 6 word for storing down these object. This allow 1.1924 + // us to simply record the base and use the Ireg number to decide which 1.1925 + // slot to use. (Note that the reg number is the inbound number not the 1.1926 + // outbound number). 1.1927 + // We must shuffle args to match the native convention, and include var-args space. 1.1928 + 1.1929 + // Calculate the total number of stack slots we will need. 1.1930 + 1.1931 + // First count the abi requirement plus all of the outgoing args 1.1932 + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 1.1933 + 1.1934 + // Now the space for the inbound oop handle area 1.1935 + 1.1936 + int oop_handle_offset = stack_slots; 1.1937 + stack_slots += 6*VMRegImpl::slots_per_word; 1.1938 + 1.1939 + // Now any space we need for handlizing a klass if static method 1.1940 + 1.1941 + int oop_temp_slot_offset = 0; 1.1942 + int klass_slot_offset = 0; 1.1943 + int klass_offset = -1; 1.1944 + int lock_slot_offset = 0; 1.1945 + bool is_static = false; 1.1946 + 1.1947 + if (method->is_static()) { 1.1948 + klass_slot_offset = stack_slots; 1.1949 + stack_slots += VMRegImpl::slots_per_word; 1.1950 + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1.1951 + is_static = true; 1.1952 + } 1.1953 + 1.1954 + // Plus a lock if needed 1.1955 + 1.1956 + if (method->is_synchronized()) { 1.1957 + lock_slot_offset = stack_slots; 1.1958 + stack_slots += VMRegImpl::slots_per_word; 1.1959 + } 1.1960 + 1.1961 + // Now a place to save return value or as a temporary for any gpr -> fpr moves 1.1962 + stack_slots += 2; 1.1963 + 1.1964 + // Ok The space we have allocated will look like: 1.1965 + // 1.1966 + // 1.1967 + // FP-> | | 1.1968 + // |---------------------| 1.1969 + // | 2 slots for moves | 1.1970 + // |---------------------| 1.1971 + // | lock box (if sync) | 1.1972 + // |---------------------| <- lock_slot_offset 1.1973 + // | klass (if static) | 1.1974 + // |---------------------| <- klass_slot_offset 1.1975 + // | oopHandle area | 1.1976 + // |---------------------| <- oop_handle_offset 1.1977 + // | outbound memory | 1.1978 + // | based arguments | 1.1979 + // | | 1.1980 + // |---------------------| 1.1981 + // | vararg area | 1.1982 + // |---------------------| 1.1983 + // | | 1.1984 + // SP-> | out_preserved_slots | 1.1985 + // 1.1986 + // 1.1987 + 1.1988 + 1.1989 + // Now compute actual number of stack words we need rounding to make 1.1990 + // stack properly aligned. 1.1991 + stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word); 1.1992 + 1.1993 + int stack_size = stack_slots * VMRegImpl::stack_slot_size; 1.1994 + 1.1995 + // Generate stack overflow check before creating frame 1.1996 + __ generate_stack_overflow_check(stack_size); 1.1997 + 1.1998 + // Generate a new frame for the wrapper. 1.1999 + __ save(SP, -stack_size, SP); 1.2000 + 1.2001 + int frame_complete = ((intptr_t)__ pc()) - start; 1.2002 + 1.2003 + __ verify_thread(); 1.2004 + 1.2005 + 1.2006 + // 1.2007 + // We immediately shuffle the arguments so that any vm call we have to 1.2008 + // make from here on out (sync slow path, jvmti, etc.) we will have 1.2009 + // captured the oops from our caller and have a valid oopMap for 1.2010 + // them. 1.2011 + 1.2012 + // ----------------- 1.2013 + // The Grand Shuffle 1.2014 + // 1.2015 + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 1.2016 + // (derived from JavaThread* which is in L7_thread_cache) and, if static, 1.2017 + // the class mirror instead of a receiver. This pretty much guarantees that 1.2018 + // register layout will not match. We ignore these extra arguments during 1.2019 + // the shuffle. The shuffle is described by the two calling convention 1.2020 + // vectors we have in our possession. We simply walk the java vector to 1.2021 + // get the source locations and the c vector to get the destinations. 1.2022 + // Because we have a new window and the argument registers are completely 1.2023 + // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about 1.2024 + // here. 1.2025 + 1.2026 + // This is a trick. We double the stack slots so we can claim 1.2027 + // the oops in the caller's frame. Since we are sure to have 1.2028 + // more args than the caller doubling is enough to make 1.2029 + // sure we can capture all the incoming oop args from the 1.2030 + // caller. 1.2031 + // 1.2032 + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1.2033 + int c_arg = total_c_args - 1; 1.2034 + // Record sp-based slot for receiver on stack for non-static methods 1.2035 + int receiver_offset = -1; 1.2036 + 1.2037 + // We move the arguments backward because the floating point registers 1.2038 + // destination will always be to a register with a greater or equal register 1.2039 + // number or the stack. 1.2040 + 1.2041 +#ifdef ASSERT 1.2042 + bool reg_destroyed[RegisterImpl::number_of_registers]; 1.2043 + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 1.2044 + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 1.2045 + reg_destroyed[r] = false; 1.2046 + } 1.2047 + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 1.2048 + freg_destroyed[f] = false; 1.2049 + } 1.2050 + 1.2051 +#endif /* ASSERT */ 1.2052 + 1.2053 + for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) { 1.2054 + 1.2055 +#ifdef ASSERT 1.2056 + if (in_regs[i].first()->is_Register()) { 1.2057 + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!"); 1.2058 + } else if (in_regs[i].first()->is_FloatRegister()) { 1.2059 + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!"); 1.2060 + } 1.2061 + if (out_regs[c_arg].first()->is_Register()) { 1.2062 + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 1.2063 + } else if (out_regs[c_arg].first()->is_FloatRegister()) { 1.2064 + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true; 1.2065 + } 1.2066 +#endif /* ASSERT */ 1.2067 + 1.2068 + switch (in_sig_bt[i]) { 1.2069 + case T_ARRAY: 1.2070 + case T_OBJECT: 1.2071 + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 1.2072 + ((i == 0) && (!is_static)), 1.2073 + &receiver_offset); 1.2074 + break; 1.2075 + case T_VOID: 1.2076 + break; 1.2077 + 1.2078 + case T_FLOAT: 1.2079 + float_move(masm, in_regs[i], out_regs[c_arg]); 1.2080 + break; 1.2081 + 1.2082 + case T_DOUBLE: 1.2083 + assert( i + 1 < total_in_args && 1.2084 + in_sig_bt[i + 1] == T_VOID && 1.2085 + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 1.2086 + double_move(masm, in_regs[i], out_regs[c_arg]); 1.2087 + break; 1.2088 + 1.2089 + case T_LONG : 1.2090 + long_move(masm, in_regs[i], out_regs[c_arg]); 1.2091 + break; 1.2092 + 1.2093 + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 1.2094 + 1.2095 + default: 1.2096 + move32_64(masm, in_regs[i], out_regs[c_arg]); 1.2097 + } 1.2098 + } 1.2099 + 1.2100 + // Pre-load a static method's oop into O1. Used both by locking code and 1.2101 + // the normal JNI call code. 1.2102 + if (method->is_static()) { 1.2103 + __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1); 1.2104 + 1.2105 + // Now handlize the static class mirror in O1. It's known not-null. 1.2106 + __ st_ptr(O1, SP, klass_offset + STACK_BIAS); 1.2107 + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1.2108 + __ add(SP, klass_offset + STACK_BIAS, O1); 1.2109 + } 1.2110 + 1.2111 + 1.2112 + const Register L6_handle = L6; 1.2113 + 1.2114 + if (method->is_synchronized()) { 1.2115 + __ mov(O1, L6_handle); 1.2116 + } 1.2117 + 1.2118 + // We have all of the arguments setup at this point. We MUST NOT touch any Oregs 1.2119 + // except O6/O7. So if we must call out we must push a new frame. We immediately 1.2120 + // push a new frame and flush the windows. 1.2121 + 1.2122 +#ifdef _LP64 1.2123 + intptr_t thepc = (intptr_t) __ pc(); 1.2124 + { 1.2125 + address here = __ pc(); 1.2126 + // Call the next instruction 1.2127 + __ call(here + 8, relocInfo::none); 1.2128 + __ delayed()->nop(); 1.2129 + } 1.2130 +#else 1.2131 + intptr_t thepc = __ load_pc_address(O7, 0); 1.2132 +#endif /* _LP64 */ 1.2133 + 1.2134 + // We use the same pc/oopMap repeatedly when we call out 1.2135 + oop_maps->add_gc_map(thepc - start, map); 1.2136 + 1.2137 + // O7 now has the pc loaded that we will use when we finally call to native. 1.2138 + 1.2139 + // Save thread in L7; it crosses a bunch of VM calls below 1.2140 + // Don't use save_thread because it smashes G2 and we merely 1.2141 + // want to save a copy 1.2142 + __ mov(G2_thread, L7_thread_cache); 1.2143 + 1.2144 + 1.2145 + // If we create an inner frame once is plenty 1.2146 + // when we create it we must also save G2_thread 1.2147 + bool inner_frame_created = false; 1.2148 + 1.2149 + // dtrace method entry support 1.2150 + { 1.2151 + SkipIfEqual skip_if( 1.2152 + masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 1.2153 + // create inner frame 1.2154 + __ save_frame(0); 1.2155 + __ mov(G2_thread, L7_thread_cache); 1.2156 + __ set_oop_constant(JNIHandles::make_local(method()), O1); 1.2157 + __ call_VM_leaf(L7_thread_cache, 1.2158 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 1.2159 + G2_thread, O1); 1.2160 + __ restore(); 1.2161 + } 1.2162 + 1.2163 + // We are in the jni frame unless saved_frame is true in which case 1.2164 + // we are in one frame deeper (the "inner" frame). If we are in the 1.2165 + // "inner" frames the args are in the Iregs and if the jni frame then 1.2166 + // they are in the Oregs. 1.2167 + // If we ever need to go to the VM (for locking, jvmti) then 1.2168 + // we will always be in the "inner" frame. 1.2169 + 1.2170 + // Lock a synchronized method 1.2171 + int lock_offset = -1; // Set if locked 1.2172 + if (method->is_synchronized()) { 1.2173 + Register Roop = O1; 1.2174 + const Register L3_box = L3; 1.2175 + 1.2176 + create_inner_frame(masm, &inner_frame_created); 1.2177 + 1.2178 + __ ld_ptr(I1, 0, O1); 1.2179 + Label done; 1.2180 + 1.2181 + lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); 1.2182 + __ add(FP, lock_offset+STACK_BIAS, L3_box); 1.2183 +#ifdef ASSERT 1.2184 + if (UseBiasedLocking) { 1.2185 + // making the box point to itself will make it clear it went unused 1.2186 + // but also be obviously invalid 1.2187 + __ st_ptr(L3_box, L3_box, 0); 1.2188 + } 1.2189 +#endif // ASSERT 1.2190 + // 1.2191 + // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch 1.2192 + // 1.2193 + __ compiler_lock_object(Roop, L1, L3_box, L2); 1.2194 + __ br(Assembler::equal, false, Assembler::pt, done); 1.2195 + __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box); 1.2196 + 1.2197 + 1.2198 + // None of the above fast optimizations worked so we have to get into the 1.2199 + // slow case of monitor enter. Inline a special case of call_VM that 1.2200 + // disallows any pending_exception. 1.2201 + __ mov(Roop, O0); // Need oop in O0 1.2202 + __ mov(L3_box, O1); 1.2203 + 1.2204 + // Record last_Java_sp, in case the VM code releases the JVM lock. 1.2205 + 1.2206 + __ set_last_Java_frame(FP, I7); 1.2207 + 1.2208 + // do the call 1.2209 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); 1.2210 + __ delayed()->mov(L7_thread_cache, O2); 1.2211 + 1.2212 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2213 + __ reset_last_Java_frame(); 1.2214 + 1.2215 +#ifdef ASSERT 1.2216 + { Label L; 1.2217 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 1.2218 + __ br_null(O0, false, Assembler::pt, L); 1.2219 + __ delayed()->nop(); 1.2220 + __ stop("no pending exception allowed on exit from IR::monitorenter"); 1.2221 + __ bind(L); 1.2222 + } 1.2223 +#endif 1.2224 + __ bind(done); 1.2225 + } 1.2226 + 1.2227 + 1.2228 + // Finally just about ready to make the JNI call 1.2229 + 1.2230 + __ flush_windows(); 1.2231 + if (inner_frame_created) { 1.2232 + __ restore(); 1.2233 + } else { 1.2234 + // Store only what we need from this frame 1.2235 + // QQQ I think that non-v9 (like we care) we don't need these saves 1.2236 + // either as the flush traps and the current window goes too. 1.2237 + __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 1.2238 + __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 1.2239 + } 1.2240 + 1.2241 + // get JNIEnv* which is first argument to native 1.2242 + 1.2243 + __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0); 1.2244 + 1.2245 + // Use that pc we placed in O7 a while back as the current frame anchor 1.2246 + 1.2247 + __ set_last_Java_frame(SP, O7); 1.2248 + 1.2249 + // Transition from _thread_in_Java to _thread_in_native. 1.2250 + __ set(_thread_in_native, G3_scratch); 1.2251 + __ st(G3_scratch, G2_thread, in_bytes(JavaThread::thread_state_offset())); 1.2252 + 1.2253 + // We flushed the windows ages ago now mark them as flushed 1.2254 + 1.2255 + // mark windows as flushed 1.2256 + __ set(JavaFrameAnchor::flushed, G3_scratch); 1.2257 + 1.2258 + Address flags(G2_thread, 1.2259 + 0, 1.2260 + in_bytes(JavaThread::frame_anchor_offset()) + in_bytes(JavaFrameAnchor::flags_offset())); 1.2261 + 1.2262 +#ifdef _LP64 1.2263 + Address dest(O7, method->native_function()); 1.2264 + __ relocate(relocInfo::runtime_call_type); 1.2265 + __ jumpl_to(dest, O7); 1.2266 +#else 1.2267 + __ call(method->native_function(), relocInfo::runtime_call_type); 1.2268 +#endif 1.2269 + __ delayed()->st(G3_scratch, flags); 1.2270 + 1.2271 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2272 + 1.2273 + // Unpack native results. For int-types, we do any needed sign-extension 1.2274 + // and move things into I0. The return value there will survive any VM 1.2275 + // calls for blocking or unlocking. An FP or OOP result (handle) is done 1.2276 + // specially in the slow-path code. 1.2277 + switch (ret_type) { 1.2278 + case T_VOID: break; // Nothing to do! 1.2279 + case T_FLOAT: break; // Got it where we want it (unless slow-path) 1.2280 + case T_DOUBLE: break; // Got it where we want it (unless slow-path) 1.2281 + // In 64 bits build result is in O0, in O0, O1 in 32bit build 1.2282 + case T_LONG: 1.2283 +#ifndef _LP64 1.2284 + __ mov(O1, I1); 1.2285 +#endif 1.2286 + // Fall thru 1.2287 + case T_OBJECT: // Really a handle 1.2288 + case T_ARRAY: 1.2289 + case T_INT: 1.2290 + __ mov(O0, I0); 1.2291 + break; 1.2292 + case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false 1.2293 + case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break; 1.2294 + case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value! 1.2295 + case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break; 1.2296 + break; // Cannot de-handlize until after reclaiming jvm_lock 1.2297 + default: 1.2298 + ShouldNotReachHere(); 1.2299 + } 1.2300 + 1.2301 + // must we block? 1.2302 + 1.2303 + // Block, if necessary, before resuming in _thread_in_Java state. 1.2304 + // In order for GC to work, don't clear the last_Java_sp until after blocking. 1.2305 + { Label no_block; 1.2306 + Address sync_state(G3_scratch, SafepointSynchronize::address_of_state()); 1.2307 + 1.2308 + // Switch thread to "native transition" state before reading the synchronization state. 1.2309 + // This additional state is necessary because reading and testing the synchronization 1.2310 + // state is not atomic w.r.t. GC, as this scenario demonstrates: 1.2311 + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 1.2312 + // VM thread changes sync state to synchronizing and suspends threads for GC. 1.2313 + // Thread A is resumed to finish this native method, but doesn't block here since it 1.2314 + // didn't see any synchronization is progress, and escapes. 1.2315 + __ set(_thread_in_native_trans, G3_scratch); 1.2316 + __ st(G3_scratch, G2_thread, in_bytes(JavaThread::thread_state_offset())); 1.2317 + if(os::is_MP()) { 1.2318 + if (UseMembar) { 1.2319 + // Force this write out before the read below 1.2320 + __ membar(Assembler::StoreLoad); 1.2321 + } else { 1.2322 + // Write serialization page so VM thread can do a pseudo remote membar. 1.2323 + // We use the current thread pointer to calculate a thread specific 1.2324 + // offset to write to within the page. This minimizes bus traffic 1.2325 + // due to cache line collision. 1.2326 + __ serialize_memory(G2_thread, G1_scratch, G3_scratch); 1.2327 + } 1.2328 + } 1.2329 + __ load_contents(sync_state, G3_scratch); 1.2330 + __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); 1.2331 + 1.2332 + Label L; 1.2333 + Address suspend_state(G2_thread, 0, in_bytes(JavaThread::suspend_flags_offset())); 1.2334 + __ br(Assembler::notEqual, false, Assembler::pn, L); 1.2335 + __ delayed()-> 1.2336 + ld(suspend_state, G3_scratch); 1.2337 + __ cmp(G3_scratch, 0); 1.2338 + __ br(Assembler::equal, false, Assembler::pt, no_block); 1.2339 + __ delayed()->nop(); 1.2340 + __ bind(L); 1.2341 + 1.2342 + // Block. Save any potential method result value before the operation and 1.2343 + // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this 1.2344 + // lets us share the oopMap we used when we went native rather the create 1.2345 + // a distinct one for this pc 1.2346 + // 1.2347 + save_native_result(masm, ret_type, stack_slots); 1.2348 + __ call_VM_leaf(L7_thread_cache, 1.2349 + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), 1.2350 + G2_thread); 1.2351 + 1.2352 + // Restore any method result value 1.2353 + restore_native_result(masm, ret_type, stack_slots); 1.2354 + __ bind(no_block); 1.2355 + } 1.2356 + 1.2357 + // thread state is thread_in_native_trans. Any safepoint blocking has already 1.2358 + // happened so we can now change state to _thread_in_Java. 1.2359 + 1.2360 + 1.2361 + __ set(_thread_in_Java, G3_scratch); 1.2362 + __ st(G3_scratch, G2_thread, in_bytes(JavaThread::thread_state_offset())); 1.2363 + 1.2364 + 1.2365 + Label no_reguard; 1.2366 + __ ld(G2_thread, in_bytes(JavaThread::stack_guard_state_offset()), G3_scratch); 1.2367 + __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled); 1.2368 + __ br(Assembler::notEqual, false, Assembler::pt, no_reguard); 1.2369 + __ delayed()->nop(); 1.2370 + 1.2371 + save_native_result(masm, ret_type, stack_slots); 1.2372 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 1.2373 + __ delayed()->nop(); 1.2374 + 1.2375 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2376 + restore_native_result(masm, ret_type, stack_slots); 1.2377 + 1.2378 + __ bind(no_reguard); 1.2379 + 1.2380 + // Handle possible exception (will unlock if necessary) 1.2381 + 1.2382 + // native result if any is live in freg or I0 (and I1 if long and 32bit vm) 1.2383 + 1.2384 + // Unlock 1.2385 + if (method->is_synchronized()) { 1.2386 + Label done; 1.2387 + Register I2_ex_oop = I2; 1.2388 + const Register L3_box = L3; 1.2389 + // Get locked oop from the handle we passed to jni 1.2390 + __ ld_ptr(L6_handle, 0, L4); 1.2391 + __ add(SP, lock_offset+STACK_BIAS, L3_box); 1.2392 + // Must save pending exception around the slow-path VM call. Since it's a 1.2393 + // leaf call, the pending exception (if any) can be kept in a register. 1.2394 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop); 1.2395 + // Now unlock 1.2396 + // (Roop, Rmark, Rbox, Rscratch) 1.2397 + __ compiler_unlock_object(L4, L1, L3_box, L2); 1.2398 + __ br(Assembler::equal, false, Assembler::pt, done); 1.2399 + __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box); 1.2400 + 1.2401 + // save and restore any potential method result value around the unlocking 1.2402 + // operation. Will save in I0 (or stack for FP returns). 1.2403 + save_native_result(masm, ret_type, stack_slots); 1.2404 + 1.2405 + // Must clear pending-exception before re-entering the VM. Since this is 1.2406 + // a leaf call, pending-exception-oop can be safely kept in a register. 1.2407 + __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset())); 1.2408 + 1.2409 + // slow case of monitor enter. Inline a special case of call_VM that 1.2410 + // disallows any pending_exception. 1.2411 + __ mov(L3_box, O1); 1.2412 + 1.2413 + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); 1.2414 + __ delayed()->mov(L4, O0); // Need oop in O0 1.2415 + 1.2416 + __ restore_thread(L7_thread_cache); // restore G2_thread 1.2417 + 1.2418 +#ifdef ASSERT 1.2419 + { Label L; 1.2420 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 1.2421 + __ br_null(O0, false, Assembler::pt, L); 1.2422 + __ delayed()->nop(); 1.2423 + __ stop("no pending exception allowed on exit from IR::monitorexit"); 1.2424 + __ bind(L); 1.2425 + } 1.2426 +#endif 1.2427 + restore_native_result(masm, ret_type, stack_slots); 1.2428 + // check_forward_pending_exception jump to forward_exception if any pending 1.2429 + // exception is set. The forward_exception routine expects to see the 1.2430 + // exception in pending_exception and not in a register. Kind of clumsy, 1.2431 + // since all folks who branch to forward_exception must have tested 1.2432 + // pending_exception first and hence have it in a register already. 1.2433 + __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset())); 1.2434 + __ bind(done); 1.2435 + } 1.2436 + 1.2437 + // Tell dtrace about this method exit 1.2438 + { 1.2439 + SkipIfEqual skip_if( 1.2440 + masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 1.2441 + save_native_result(masm, ret_type, stack_slots); 1.2442 + __ set_oop_constant(JNIHandles::make_local(method()), O1); 1.2443 + __ call_VM_leaf(L7_thread_cache, 1.2444 + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 1.2445 + G2_thread, O1); 1.2446 + restore_native_result(masm, ret_type, stack_slots); 1.2447 + } 1.2448 + 1.2449 + // Clear "last Java frame" SP and PC. 1.2450 + __ verify_thread(); // G2_thread must be correct 1.2451 + __ reset_last_Java_frame(); 1.2452 + 1.2453 + // Unpack oop result 1.2454 + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { 1.2455 + Label L; 1.2456 + __ addcc(G0, I0, G0); 1.2457 + __ brx(Assembler::notZero, true, Assembler::pt, L); 1.2458 + __ delayed()->ld_ptr(I0, 0, I0); 1.2459 + __ mov(G0, I0); 1.2460 + __ bind(L); 1.2461 + __ verify_oop(I0); 1.2462 + } 1.2463 + 1.2464 + // reset handle block 1.2465 + __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5); 1.2466 + __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes()); 1.2467 + 1.2468 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch); 1.2469 + check_forward_pending_exception(masm, G3_scratch); 1.2470 + 1.2471 + 1.2472 + // Return 1.2473 + 1.2474 +#ifndef _LP64 1.2475 + if (ret_type == T_LONG) { 1.2476 + 1.2477 + // Must leave proper result in O0,O1 and G1 (c2/tiered only) 1.2478 + __ sllx(I0, 32, G1); // Shift bits into high G1 1.2479 + __ srl (I1, 0, I1); // Zero extend O1 (harmless?) 1.2480 + __ or3 (I1, G1, G1); // OR 64 bits into G1 1.2481 + } 1.2482 +#endif 1.2483 + 1.2484 + __ ret(); 1.2485 + __ delayed()->restore(); 1.2486 + 1.2487 + __ flush(); 1.2488 + 1.2489 + nmethod *nm = nmethod::new_native_nmethod(method, 1.2490 + masm->code(), 1.2491 + vep_offset, 1.2492 + frame_complete, 1.2493 + stack_slots / VMRegImpl::slots_per_word, 1.2494 + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 1.2495 + in_ByteSize(lock_offset), 1.2496 + oop_maps); 1.2497 + return nm; 1.2498 + 1.2499 +} 1.2500 + 1.2501 +// this function returns the adjust size (in number of words) to a c2i adapter 1.2502 +// activation for use during deoptimization 1.2503 +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 1.2504 + assert(callee_locals >= callee_parameters, 1.2505 + "test and remove; got more parms than locals"); 1.2506 + if (callee_locals < callee_parameters) 1.2507 + return 0; // No adjustment for negative locals 1.2508 + int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords(); 1.2509 + return round_to(diff, WordsPerLong); 1.2510 +} 1.2511 + 1.2512 +// "Top of Stack" slots that may be unused by the calling convention but must 1.2513 +// otherwise be preserved. 1.2514 +// On Intel these are not necessary and the value can be zero. 1.2515 +// On Sparc this describes the words reserved for storing a register window 1.2516 +// when an interrupt occurs. 1.2517 +uint SharedRuntime::out_preserve_stack_slots() { 1.2518 + return frame::register_save_words * VMRegImpl::slots_per_word; 1.2519 +} 1.2520 + 1.2521 +static void gen_new_frame(MacroAssembler* masm, bool deopt) { 1.2522 +// 1.2523 +// Common out the new frame generation for deopt and uncommon trap 1.2524 +// 1.2525 + Register G3pcs = G3_scratch; // Array of new pcs (input) 1.2526 + Register Oreturn0 = O0; 1.2527 + Register Oreturn1 = O1; 1.2528 + Register O2UnrollBlock = O2; 1.2529 + Register O3array = O3; // Array of frame sizes (input) 1.2530 + Register O4array_size = O4; // number of frames (input) 1.2531 + Register O7frame_size = O7; // number of frames (input) 1.2532 + 1.2533 + __ ld_ptr(O3array, 0, O7frame_size); 1.2534 + __ sub(G0, O7frame_size, O7frame_size); 1.2535 + __ save(SP, O7frame_size, SP); 1.2536 + __ ld_ptr(G3pcs, 0, I7); // load frame's new pc 1.2537 + 1.2538 + #ifdef ASSERT 1.2539 + // make sure that the frames are aligned properly 1.2540 +#ifndef _LP64 1.2541 + __ btst(wordSize*2-1, SP); 1.2542 + __ breakpoint_trap(Assembler::notZero); 1.2543 +#endif 1.2544 + #endif 1.2545 + 1.2546 + // Deopt needs to pass some extra live values from frame to frame 1.2547 + 1.2548 + if (deopt) { 1.2549 + __ mov(Oreturn0->after_save(), Oreturn0); 1.2550 + __ mov(Oreturn1->after_save(), Oreturn1); 1.2551 + } 1.2552 + 1.2553 + __ mov(O4array_size->after_save(), O4array_size); 1.2554 + __ sub(O4array_size, 1, O4array_size); 1.2555 + __ mov(O3array->after_save(), O3array); 1.2556 + __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); 1.2557 + __ add(G3pcs, wordSize, G3pcs); // point to next pc value 1.2558 + 1.2559 + #ifdef ASSERT 1.2560 + // trash registers to show a clear pattern in backtraces 1.2561 + __ set(0xDEAD0000, I0); 1.2562 + __ add(I0, 2, I1); 1.2563 + __ add(I0, 4, I2); 1.2564 + __ add(I0, 6, I3); 1.2565 + __ add(I0, 8, I4); 1.2566 + // Don't touch I5 could have valuable savedSP 1.2567 + __ set(0xDEADBEEF, L0); 1.2568 + __ mov(L0, L1); 1.2569 + __ mov(L0, L2); 1.2570 + __ mov(L0, L3); 1.2571 + __ mov(L0, L4); 1.2572 + __ mov(L0, L5); 1.2573 + 1.2574 + // trash the return value as there is nothing to return yet 1.2575 + __ set(0xDEAD0001, O7); 1.2576 + #endif 1.2577 + 1.2578 + __ mov(SP, O5_savedSP); 1.2579 +} 1.2580 + 1.2581 + 1.2582 +static void make_new_frames(MacroAssembler* masm, bool deopt) { 1.2583 + // 1.2584 + // loop through the UnrollBlock info and create new frames 1.2585 + // 1.2586 + Register G3pcs = G3_scratch; 1.2587 + Register Oreturn0 = O0; 1.2588 + Register Oreturn1 = O1; 1.2589 + Register O2UnrollBlock = O2; 1.2590 + Register O3array = O3; 1.2591 + Register O4array_size = O4; 1.2592 + Label loop; 1.2593 + 1.2594 + // Before we make new frames, check to see if stack is available. 1.2595 + // Do this after the caller's return address is on top of stack 1.2596 + if (UseStackBanging) { 1.2597 + // Get total frame size for interpreted frames 1.2598 + __ ld(Address(O2UnrollBlock, 0, 1.2599 + Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4); 1.2600 + __ bang_stack_size(O4, O3, G3_scratch); 1.2601 + } 1.2602 + 1.2603 + __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size); 1.2604 + __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs); 1.2605 + 1.2606 + __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array); 1.2607 + 1.2608 + // Adjust old interpreter frame to make space for new frame's extra java locals 1.2609 + // 1.2610 + // We capture the original sp for the transition frame only because it is needed in 1.2611 + // order to properly calculate interpreter_sp_adjustment. Even though in real life 1.2612 + // every interpreter frame captures a savedSP it is only needed at the transition 1.2613 + // (fortunately). If we had to have it correct everywhere then we would need to 1.2614 + // be told the sp_adjustment for each frame we create. If the frame size array 1.2615 + // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] 1.2616 + // for each frame we create and keep up the illusion every where. 1.2617 + // 1.2618 + 1.2619 + __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7); 1.2620 + __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment 1.2621 + __ sub(SP, O7, SP); 1.2622 + 1.2623 +#ifdef ASSERT 1.2624 + // make sure that there is at least one entry in the array 1.2625 + __ tst(O4array_size); 1.2626 + __ breakpoint_trap(Assembler::zero); 1.2627 +#endif 1.2628 + 1.2629 + // Now push the new interpreter frames 1.2630 + __ bind(loop); 1.2631 + 1.2632 + // allocate a new frame, filling the registers 1.2633 + 1.2634 + gen_new_frame(masm, deopt); // allocate an interpreter frame 1.2635 + 1.2636 + __ tst(O4array_size); 1.2637 + __ br(Assembler::notZero, false, Assembler::pn, loop); 1.2638 + __ delayed()->add(O3array, wordSize, O3array); 1.2639 + __ ld_ptr(G3pcs, 0, O7); // load final frame new pc 1.2640 + 1.2641 +} 1.2642 + 1.2643 +//------------------------------generate_deopt_blob---------------------------- 1.2644 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM 1.2645 +// instead. 1.2646 +void SharedRuntime::generate_deopt_blob() { 1.2647 + // allocate space for the code 1.2648 + ResourceMark rm; 1.2649 + // setup code generation tools 1.2650 + int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code 1.2651 +#ifdef _LP64 1.2652 + CodeBuffer buffer("deopt_blob", 2100+pad, 512); 1.2653 +#else 1.2654 + // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread) 1.2655 + // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread) 1.2656 + CodeBuffer buffer("deopt_blob", 1600+pad, 512); 1.2657 +#endif /* _LP64 */ 1.2658 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.2659 + FloatRegister Freturn0 = F0; 1.2660 + Register Greturn1 = G1; 1.2661 + Register Oreturn0 = O0; 1.2662 + Register Oreturn1 = O1; 1.2663 + Register O2UnrollBlock = O2; 1.2664 + Register O3tmp = O3; 1.2665 + Register I5exception_tmp = I5; 1.2666 + Register G4exception_tmp = G4_scratch; 1.2667 + int frame_size_words; 1.2668 + Address saved_Freturn0_addr(FP, 0, -sizeof(double) + STACK_BIAS); 1.2669 +#if !defined(_LP64) && defined(COMPILER2) 1.2670 + Address saved_Greturn1_addr(FP, 0, -sizeof(double) -sizeof(jlong) + STACK_BIAS); 1.2671 +#endif 1.2672 + Label cont; 1.2673 + 1.2674 + OopMapSet *oop_maps = new OopMapSet(); 1.2675 + 1.2676 + // 1.2677 + // This is the entry point for code which is returning to a de-optimized 1.2678 + // frame. 1.2679 + // The steps taken by this frame are as follows: 1.2680 + // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1) 1.2681 + // and all potentially live registers (at a pollpoint many registers can be live). 1.2682 + // 1.2683 + // - call the C routine: Deoptimization::fetch_unroll_info (this function 1.2684 + // returns information about the number and size of interpreter frames 1.2685 + // which are equivalent to the frame which is being deoptimized) 1.2686 + // - deallocate the unpack frame, restoring only results values. Other 1.2687 + // volatile registers will now be captured in the vframeArray as needed. 1.2688 + // - deallocate the deoptimization frame 1.2689 + // - in a loop using the information returned in the previous step 1.2690 + // push new interpreter frames (take care to propagate the return 1.2691 + // values through each new frame pushed) 1.2692 + // - create a dummy "unpack_frame" and save the return values (O0, O1, F0) 1.2693 + // - call the C routine: Deoptimization::unpack_frames (this function 1.2694 + // lays out values on the interpreter frame which was just created) 1.2695 + // - deallocate the dummy unpack_frame 1.2696 + // - ensure that all the return values are correctly set and then do 1.2697 + // a return to the interpreter entry point 1.2698 + // 1.2699 + // Refer to the following methods for more information: 1.2700 + // - Deoptimization::fetch_unroll_info 1.2701 + // - Deoptimization::unpack_frames 1.2702 + 1.2703 + OopMap* map = NULL; 1.2704 + 1.2705 + int start = __ offset(); 1.2706 + 1.2707 + // restore G2, the trampoline destroyed it 1.2708 + __ get_thread(); 1.2709 + 1.2710 + // On entry we have been called by the deoptimized nmethod with a call that 1.2711 + // replaced the original call (or safepoint polling location) so the deoptimizing 1.2712 + // pc is now in O7. Return values are still in the expected places 1.2713 + 1.2714 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.2715 + __ ba(false, cont); 1.2716 + __ delayed()->mov(Deoptimization::Unpack_deopt, I5exception_tmp); 1.2717 + 1.2718 + int exception_offset = __ offset() - start; 1.2719 + 1.2720 + // restore G2, the trampoline destroyed it 1.2721 + __ get_thread(); 1.2722 + 1.2723 + // On entry we have been jumped to by the exception handler (or exception_blob 1.2724 + // for server). O0 contains the exception oop and O7 contains the original 1.2725 + // exception pc. So if we push a frame here it will look to the 1.2726 + // stack walking code (fetch_unroll_info) just like a normal call so 1.2727 + // state will be extracted normally. 1.2728 + 1.2729 + // save exception oop in JavaThread and fall through into the 1.2730 + // exception_in_tls case since they are handled in same way except 1.2731 + // for where the pending exception is kept. 1.2732 + __ st_ptr(Oexception, G2_thread, in_bytes(JavaThread::exception_oop_offset())); 1.2733 + 1.2734 + // 1.2735 + // Vanilla deoptimization with an exception pending in exception_oop 1.2736 + // 1.2737 + int exception_in_tls_offset = __ offset() - start; 1.2738 + 1.2739 + // No need to update oop_map as each call to save_live_registers will produce identical oopmap 1.2740 + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.2741 + 1.2742 + // Restore G2_thread 1.2743 + __ get_thread(); 1.2744 + 1.2745 +#ifdef ASSERT 1.2746 + { 1.2747 + // verify that there is really an exception oop in exception_oop 1.2748 + Label has_exception; 1.2749 + __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); 1.2750 + __ br_notnull(Oexception, false, Assembler::pt, has_exception); 1.2751 + __ delayed()-> nop(); 1.2752 + __ stop("no exception in thread"); 1.2753 + __ bind(has_exception); 1.2754 + 1.2755 + // verify that there is no pending exception 1.2756 + Label no_pending_exception; 1.2757 + Address exception_addr(G2_thread, 0, in_bytes(Thread::pending_exception_offset())); 1.2758 + __ ld_ptr(exception_addr, Oexception); 1.2759 + __ br_null(Oexception, false, Assembler::pt, no_pending_exception); 1.2760 + __ delayed()->nop(); 1.2761 + __ stop("must not have pending exception here"); 1.2762 + __ bind(no_pending_exception); 1.2763 + } 1.2764 +#endif 1.2765 + 1.2766 + __ ba(false, cont); 1.2767 + __ delayed()->mov(Deoptimization::Unpack_exception, I5exception_tmp);; 1.2768 + 1.2769 + // 1.2770 + // Reexecute entry, similar to c2 uncommon trap 1.2771 + // 1.2772 + int reexecute_offset = __ offset() - start; 1.2773 + 1.2774 + // No need to update oop_map as each call to save_live_registers will produce identical oopmap 1.2775 + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.2776 + 1.2777 + __ mov(Deoptimization::Unpack_reexecute, I5exception_tmp); 1.2778 + 1.2779 + __ bind(cont); 1.2780 + 1.2781 + __ set_last_Java_frame(SP, noreg); 1.2782 + 1.2783 + // do the call by hand so we can get the oopmap 1.2784 + 1.2785 + __ mov(G2_thread, L7_thread_cache); 1.2786 + __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); 1.2787 + __ delayed()->mov(G2_thread, O0); 1.2788 + 1.2789 + // Set an oopmap for the call site this describes all our saved volatile registers 1.2790 + 1.2791 + oop_maps->add_gc_map( __ offset()-start, map); 1.2792 + 1.2793 + __ mov(L7_thread_cache, G2_thread); 1.2794 + 1.2795 + __ reset_last_Java_frame(); 1.2796 + 1.2797 + // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers 1.2798 + // so this move will survive 1.2799 + 1.2800 + __ mov(I5exception_tmp, G4exception_tmp); 1.2801 + 1.2802 + __ mov(O0, O2UnrollBlock->after_save()); 1.2803 + 1.2804 + RegisterSaver::restore_result_registers(masm); 1.2805 + 1.2806 + Label noException; 1.2807 + __ cmp(G4exception_tmp, Deoptimization::Unpack_exception); // Was exception pending? 1.2808 + __ br(Assembler::notEqual, false, Assembler::pt, noException); 1.2809 + __ delayed()->nop(); 1.2810 + 1.2811 + // Move the pending exception from exception_oop to Oexception so 1.2812 + // the pending exception will be picked up the interpreter. 1.2813 + __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); 1.2814 + __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); 1.2815 + __ bind(noException); 1.2816 + 1.2817 + // deallocate the deoptimization frame taking care to preserve the return values 1.2818 + __ mov(Oreturn0, Oreturn0->after_save()); 1.2819 + __ mov(Oreturn1, Oreturn1->after_save()); 1.2820 + __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 1.2821 + __ restore(); 1.2822 + 1.2823 + // Allocate new interpreter frame(s) and possible c2i adapter frame 1.2824 + 1.2825 + make_new_frames(masm, true); 1.2826 + 1.2827 + // push a dummy "unpack_frame" taking care of float return values and 1.2828 + // call Deoptimization::unpack_frames to have the unpacker layout 1.2829 + // information in the interpreter frames just created and then return 1.2830 + // to the interpreter entry point 1.2831 + __ save(SP, -frame_size_words*wordSize, SP); 1.2832 + __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr); 1.2833 +#if !defined(_LP64) 1.2834 +#if defined(COMPILER2) 1.2835 + if (!TieredCompilation) { 1.2836 + // 32-bit 1-register longs return longs in G1 1.2837 + __ stx(Greturn1, saved_Greturn1_addr); 1.2838 + } 1.2839 +#endif 1.2840 + __ set_last_Java_frame(SP, noreg); 1.2841 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4exception_tmp); 1.2842 +#else 1.2843 + // LP64 uses g4 in set_last_Java_frame 1.2844 + __ mov(G4exception_tmp, O1); 1.2845 + __ set_last_Java_frame(SP, G0); 1.2846 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1); 1.2847 +#endif 1.2848 + __ reset_last_Java_frame(); 1.2849 + __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0); 1.2850 + 1.2851 + // In tiered we never use C2 to compile methods returning longs so 1.2852 + // the result is where we expect it already. 1.2853 + 1.2854 +#if !defined(_LP64) && defined(COMPILER2) 1.2855 + // In 32 bit, C2 returns longs in G1 so restore the saved G1 into 1.2856 + // I0/I1 if the return value is long. In the tiered world there is 1.2857 + // a mismatch between how C1 and C2 return longs compiles and so 1.2858 + // currently compilation of methods which return longs is disabled 1.2859 + // for C2 and so is this code. Eventually C1 and C2 will do the 1.2860 + // same thing for longs in the tiered world. 1.2861 + if (!TieredCompilation) { 1.2862 + Label not_long; 1.2863 + __ cmp(O0,T_LONG); 1.2864 + __ br(Assembler::notEqual, false, Assembler::pt, not_long); 1.2865 + __ delayed()->nop(); 1.2866 + __ ldd(saved_Greturn1_addr,I0); 1.2867 + __ bind(not_long); 1.2868 + } 1.2869 +#endif 1.2870 + __ ret(); 1.2871 + __ delayed()->restore(); 1.2872 + 1.2873 + masm->flush(); 1.2874 + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); 1.2875 + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 1.2876 +} 1.2877 + 1.2878 +#ifdef COMPILER2 1.2879 + 1.2880 +//------------------------------generate_uncommon_trap_blob-------------------- 1.2881 +// Ought to generate an ideal graph & compile, but here's some SPARC ASM 1.2882 +// instead. 1.2883 +void SharedRuntime::generate_uncommon_trap_blob() { 1.2884 + // allocate space for the code 1.2885 + ResourceMark rm; 1.2886 + // setup code generation tools 1.2887 + int pad = VerifyThread ? 512 : 0; 1.2888 +#ifdef _LP64 1.2889 + CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); 1.2890 +#else 1.2891 + // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) 1.2892 + // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) 1.2893 + CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); 1.2894 +#endif 1.2895 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.2896 + Register O2UnrollBlock = O2; 1.2897 + Register O3tmp = O3; 1.2898 + Register O2klass_index = O2; 1.2899 + 1.2900 + // 1.2901 + // This is the entry point for all traps the compiler takes when it thinks 1.2902 + // it cannot handle further execution of compilation code. The frame is 1.2903 + // deoptimized in these cases and converted into interpreter frames for 1.2904 + // execution 1.2905 + // The steps taken by this frame are as follows: 1.2906 + // - push a fake "unpack_frame" 1.2907 + // - call the C routine Deoptimization::uncommon_trap (this function 1.2908 + // packs the current compiled frame into vframe arrays and returns 1.2909 + // information about the number and size of interpreter frames which 1.2910 + // are equivalent to the frame which is being deoptimized) 1.2911 + // - deallocate the "unpack_frame" 1.2912 + // - deallocate the deoptimization frame 1.2913 + // - in a loop using the information returned in the previous step 1.2914 + // push interpreter frames; 1.2915 + // - create a dummy "unpack_frame" 1.2916 + // - call the C routine: Deoptimization::unpack_frames (this function 1.2917 + // lays out values on the interpreter frame which was just created) 1.2918 + // - deallocate the dummy unpack_frame 1.2919 + // - return to the interpreter entry point 1.2920 + // 1.2921 + // Refer to the following methods for more information: 1.2922 + // - Deoptimization::uncommon_trap 1.2923 + // - Deoptimization::unpack_frame 1.2924 + 1.2925 + // the unloaded class index is in O0 (first parameter to this blob) 1.2926 + 1.2927 + // push a dummy "unpack_frame" 1.2928 + // and call Deoptimization::uncommon_trap to pack the compiled frame into 1.2929 + // vframe array and return the UnrollBlock information 1.2930 + __ save_frame(0); 1.2931 + __ set_last_Java_frame(SP, noreg); 1.2932 + __ mov(I0, O2klass_index); 1.2933 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index); 1.2934 + __ reset_last_Java_frame(); 1.2935 + __ mov(O0, O2UnrollBlock->after_save()); 1.2936 + __ restore(); 1.2937 + 1.2938 + // deallocate the deoptimized frame taking care to preserve the return values 1.2939 + __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 1.2940 + __ restore(); 1.2941 + 1.2942 + // Allocate new interpreter frame(s) and possible c2i adapter frame 1.2943 + 1.2944 + make_new_frames(masm, false); 1.2945 + 1.2946 + // push a dummy "unpack_frame" taking care of float return values and 1.2947 + // call Deoptimization::unpack_frames to have the unpacker layout 1.2948 + // information in the interpreter frames just created and then return 1.2949 + // to the interpreter entry point 1.2950 + __ save_frame(0); 1.2951 + __ set_last_Java_frame(SP, noreg); 1.2952 + __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case 1.2953 + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3); 1.2954 + __ reset_last_Java_frame(); 1.2955 + __ ret(); 1.2956 + __ delayed()->restore(); 1.2957 + 1.2958 + masm->flush(); 1.2959 + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize); 1.2960 +} 1.2961 + 1.2962 +#endif // COMPILER2 1.2963 + 1.2964 +//------------------------------generate_handler_blob------------------- 1.2965 +// 1.2966 +// Generate a special Compile2Runtime blob that saves all registers, and sets 1.2967 +// up an OopMap. 1.2968 +// 1.2969 +// This blob is jumped to (via a breakpoint and the signal handler) from a 1.2970 +// safepoint in compiled code. On entry to this blob, O7 contains the 1.2971 +// address in the original nmethod at which we should resume normal execution. 1.2972 +// Thus, this blob looks like a subroutine which must preserve lots of 1.2973 +// registers and return normally. Note that O7 is never register-allocated, 1.2974 +// so it is guaranteed to be free here. 1.2975 +// 1.2976 + 1.2977 +// The hardest part of what this blob must do is to save the 64-bit %o 1.2978 +// registers in the 32-bit build. A simple 'save' turn the %o's to %i's and 1.2979 +// an interrupt will chop off their heads. Making space in the caller's frame 1.2980 +// first will let us save the 64-bit %o's before save'ing, but we cannot hand 1.2981 +// the adjusted FP off to the GC stack-crawler: this will modify the caller's 1.2982 +// SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save 1.2983 +// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). 1.2984 +// Tricky, tricky, tricky... 1.2985 + 1.2986 +static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) { 1.2987 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.2988 + 1.2989 + // allocate space for the code 1.2990 + ResourceMark rm; 1.2991 + // setup code generation tools 1.2992 + // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 1.2993 + // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 1.2994 + // even larger with TraceJumps 1.2995 + int pad = TraceJumps ? 512 : 0; 1.2996 + CodeBuffer buffer("handler_blob", 1600 + pad, 512); 1.2997 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.2998 + int frame_size_words; 1.2999 + OopMapSet *oop_maps = new OopMapSet(); 1.3000 + OopMap* map = NULL; 1.3001 + 1.3002 + int start = __ offset(); 1.3003 + 1.3004 + // If this causes a return before the processing, then do a "restore" 1.3005 + if (cause_return) { 1.3006 + __ restore(); 1.3007 + } else { 1.3008 + // Make it look like we were called via the poll 1.3009 + // so that frame constructor always sees a valid return address 1.3010 + __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7); 1.3011 + __ sub(O7, frame::pc_return_offset, O7); 1.3012 + } 1.3013 + 1.3014 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3015 + 1.3016 + // setup last_Java_sp (blows G4) 1.3017 + __ set_last_Java_frame(SP, noreg); 1.3018 + 1.3019 + // call into the runtime to handle illegal instructions exception 1.3020 + // Do not use call_VM_leaf, because we need to make a GC map at this call site. 1.3021 + __ mov(G2_thread, O0); 1.3022 + __ save_thread(L7_thread_cache); 1.3023 + __ call(call_ptr); 1.3024 + __ delayed()->nop(); 1.3025 + 1.3026 + // Set an oopmap for the call site. 1.3027 + // We need this not only for callee-saved registers, but also for volatile 1.3028 + // registers that the compiler might be keeping live across a safepoint. 1.3029 + 1.3030 + oop_maps->add_gc_map( __ offset() - start, map); 1.3031 + 1.3032 + __ restore_thread(L7_thread_cache); 1.3033 + // clear last_Java_sp 1.3034 + __ reset_last_Java_frame(); 1.3035 + 1.3036 + // Check for exceptions 1.3037 + Label pending; 1.3038 + 1.3039 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 1.3040 + __ tst(O1); 1.3041 + __ brx(Assembler::notEqual, true, Assembler::pn, pending); 1.3042 + __ delayed()->nop(); 1.3043 + 1.3044 + RegisterSaver::restore_live_registers(masm); 1.3045 + 1.3046 + // We are back the the original state on entry and ready to go. 1.3047 + 1.3048 + __ retl(); 1.3049 + __ delayed()->nop(); 1.3050 + 1.3051 + // Pending exception after the safepoint 1.3052 + 1.3053 + __ bind(pending); 1.3054 + 1.3055 + RegisterSaver::restore_live_registers(masm); 1.3056 + 1.3057 + // We are back the the original state on entry. 1.3058 + 1.3059 + // Tail-call forward_exception_entry, with the issuing PC in O7, 1.3060 + // so it looks like the original nmethod called forward_exception_entry. 1.3061 + __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 1.3062 + __ JMP(O0, 0); 1.3063 + __ delayed()->nop(); 1.3064 + 1.3065 + // ------------- 1.3066 + // make sure all code is generated 1.3067 + masm->flush(); 1.3068 + 1.3069 + // return exception blob 1.3070 + return SafepointBlob::create(&buffer, oop_maps, frame_size_words); 1.3071 +} 1.3072 + 1.3073 +// 1.3074 +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 1.3075 +// 1.3076 +// Generate a stub that calls into vm to find out the proper destination 1.3077 +// of a java call. All the argument registers are live at this point 1.3078 +// but since this is generic code we don't know what they are and the caller 1.3079 +// must do any gc of the args. 1.3080 +// 1.3081 +static RuntimeStub* generate_resolve_blob(address destination, const char* name) { 1.3082 + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 1.3083 + 1.3084 + // allocate space for the code 1.3085 + ResourceMark rm; 1.3086 + // setup code generation tools 1.3087 + // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 1.3088 + // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 1.3089 + // even larger with TraceJumps 1.3090 + int pad = TraceJumps ? 512 : 0; 1.3091 + CodeBuffer buffer(name, 1600 + pad, 512); 1.3092 + MacroAssembler* masm = new MacroAssembler(&buffer); 1.3093 + int frame_size_words; 1.3094 + OopMapSet *oop_maps = new OopMapSet(); 1.3095 + OopMap* map = NULL; 1.3096 + 1.3097 + int start = __ offset(); 1.3098 + 1.3099 + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 1.3100 + 1.3101 + int frame_complete = __ offset(); 1.3102 + 1.3103 + // setup last_Java_sp (blows G4) 1.3104 + __ set_last_Java_frame(SP, noreg); 1.3105 + 1.3106 + // call into the runtime to handle illegal instructions exception 1.3107 + // Do not use call_VM_leaf, because we need to make a GC map at this call site. 1.3108 + __ mov(G2_thread, O0); 1.3109 + __ save_thread(L7_thread_cache); 1.3110 + __ call(destination, relocInfo::runtime_call_type); 1.3111 + __ delayed()->nop(); 1.3112 + 1.3113 + // O0 contains the address we are going to jump to assuming no exception got installed 1.3114 + 1.3115 + // Set an oopmap for the call site. 1.3116 + // We need this not only for callee-saved registers, but also for volatile 1.3117 + // registers that the compiler might be keeping live across a safepoint. 1.3118 + 1.3119 + oop_maps->add_gc_map( __ offset() - start, map); 1.3120 + 1.3121 + __ restore_thread(L7_thread_cache); 1.3122 + // clear last_Java_sp 1.3123 + __ reset_last_Java_frame(); 1.3124 + 1.3125 + // Check for exceptions 1.3126 + Label pending; 1.3127 + 1.3128 + __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 1.3129 + __ tst(O1); 1.3130 + __ brx(Assembler::notEqual, true, Assembler::pn, pending); 1.3131 + __ delayed()->nop(); 1.3132 + 1.3133 + // get the returned methodOop 1.3134 + 1.3135 + __ get_vm_result(G5_method); 1.3136 + __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS); 1.3137 + 1.3138 + // O0 is where we want to jump, overwrite G3 which is saved and scratch 1.3139 + 1.3140 + __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS); 1.3141 + 1.3142 + RegisterSaver::restore_live_registers(masm); 1.3143 + 1.3144 + // We are back the the original state on entry and ready to go. 1.3145 + 1.3146 + __ JMP(G3, 0); 1.3147 + __ delayed()->nop(); 1.3148 + 1.3149 + // Pending exception after the safepoint 1.3150 + 1.3151 + __ bind(pending); 1.3152 + 1.3153 + RegisterSaver::restore_live_registers(masm); 1.3154 + 1.3155 + // We are back the the original state on entry. 1.3156 + 1.3157 + // Tail-call forward_exception_entry, with the issuing PC in O7, 1.3158 + // so it looks like the original nmethod called forward_exception_entry. 1.3159 + __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 1.3160 + __ JMP(O0, 0); 1.3161 + __ delayed()->nop(); 1.3162 + 1.3163 + // ------------- 1.3164 + // make sure all code is generated 1.3165 + masm->flush(); 1.3166 + 1.3167 + // return the blob 1.3168 + // frame_size_words or bytes?? 1.3169 + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); 1.3170 +} 1.3171 + 1.3172 +void SharedRuntime::generate_stubs() { 1.3173 + 1.3174 + _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method), 1.3175 + "wrong_method_stub"); 1.3176 + 1.3177 + _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss), 1.3178 + "ic_miss_stub"); 1.3179 + 1.3180 + _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C), 1.3181 + "resolve_opt_virtual_call"); 1.3182 + 1.3183 + _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C), 1.3184 + "resolve_virtual_call"); 1.3185 + 1.3186 + _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C), 1.3187 + "resolve_static_call"); 1.3188 + 1.3189 + _polling_page_safepoint_handler_blob = 1.3190 + generate_handler_blob(CAST_FROM_FN_PTR(address, 1.3191 + SafepointSynchronize::handle_polling_page_exception), false); 1.3192 + 1.3193 + _polling_page_return_handler_blob = 1.3194 + generate_handler_blob(CAST_FROM_FN_PTR(address, 1.3195 + SafepointSynchronize::handle_polling_page_exception), true); 1.3196 + 1.3197 + generate_deopt_blob(); 1.3198 + 1.3199 +#ifdef COMPILER2 1.3200 + generate_uncommon_trap_blob(); 1.3201 +#endif // COMPILER2 1.3202 +}