src/cpu/sparc/vm/sharedRuntime_sparc.cpp

Thu, 07 Apr 2011 09:53:20 -0700

author
johnc
date
Thu, 07 Apr 2011 09:53:20 -0700
changeset 2781
e1162778c1c8
parent 2497
3582bf76420e
child 2687
3d58a4983660
permissions
-rw-r--r--

7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes

duke@435 1 /*
trims@1907 2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
duke@435 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@435 4 *
duke@435 5 * This code is free software; you can redistribute it and/or modify it
duke@435 6 * under the terms of the GNU General Public License version 2 only, as
duke@435 7 * published by the Free Software Foundation.
duke@435 8 *
duke@435 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@435 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@435 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@435 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@435 13 * accompanied this code).
duke@435 14 *
duke@435 15 * You should have received a copy of the GNU General Public License version
duke@435 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@435 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@435 18 *
trims@1907 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
trims@1907 20 * or visit www.oracle.com if you need additional information or have any
trims@1907 21 * questions.
duke@435 22 *
duke@435 23 */
duke@435 24
stefank@2314 25 #include "precompiled.hpp"
stefank@2314 26 #include "asm/assembler.hpp"
stefank@2314 27 #include "assembler_sparc.inline.hpp"
stefank@2314 28 #include "code/debugInfoRec.hpp"
stefank@2314 29 #include "code/icBuffer.hpp"
stefank@2314 30 #include "code/vtableStubs.hpp"
stefank@2314 31 #include "interpreter/interpreter.hpp"
stefank@2314 32 #include "oops/compiledICHolderOop.hpp"
stefank@2314 33 #include "prims/jvmtiRedefineClassesTrace.hpp"
stefank@2314 34 #include "runtime/sharedRuntime.hpp"
stefank@2314 35 #include "runtime/vframeArray.hpp"
stefank@2314 36 #include "vmreg_sparc.inline.hpp"
stefank@2314 37 #ifdef COMPILER1
stefank@2314 38 #include "c1/c1_Runtime1.hpp"
stefank@2314 39 #endif
stefank@2314 40 #ifdef COMPILER2
stefank@2314 41 #include "opto/runtime.hpp"
stefank@2314 42 #endif
stefank@2314 43 #ifdef SHARK
stefank@2314 44 #include "compiler/compileBroker.hpp"
stefank@2314 45 #include "shark/sharkCompiler.hpp"
stefank@2314 46 #endif
duke@435 47
duke@435 48 #define __ masm->
duke@435 49
duke@435 50 #ifdef COMPILER2
duke@435 51 UncommonTrapBlob* SharedRuntime::_uncommon_trap_blob;
duke@435 52 #endif // COMPILER2
duke@435 53
duke@435 54 DeoptimizationBlob* SharedRuntime::_deopt_blob;
duke@435 55 SafepointBlob* SharedRuntime::_polling_page_safepoint_handler_blob;
duke@435 56 SafepointBlob* SharedRuntime::_polling_page_return_handler_blob;
duke@435 57 RuntimeStub* SharedRuntime::_wrong_method_blob;
duke@435 58 RuntimeStub* SharedRuntime::_ic_miss_blob;
duke@435 59 RuntimeStub* SharedRuntime::_resolve_opt_virtual_call_blob;
duke@435 60 RuntimeStub* SharedRuntime::_resolve_virtual_call_blob;
duke@435 61 RuntimeStub* SharedRuntime::_resolve_static_call_blob;
duke@435 62
duke@435 63 class RegisterSaver {
duke@435 64
duke@435 65 // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.
duke@435 66 // The Oregs are problematic. In the 32bit build the compiler can
duke@435 67 // have O registers live with 64 bit quantities. A window save will
duke@435 68 // cut the heads off of the registers. We have to do a very extensive
duke@435 69 // stack dance to save and restore these properly.
duke@435 70
duke@435 71 // Note that the Oregs problem only exists if we block at either a polling
duke@435 72 // page exception a compiled code safepoint that was not originally a call
duke@435 73 // or deoptimize following one of these kinds of safepoints.
duke@435 74
duke@435 75 // Lots of registers to save. For all builds, a window save will preserve
duke@435 76 // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit
duke@435 77 // builds a window-save will preserve the %o registers. In the LION build
duke@435 78 // we need to save the 64-bit %o registers which requires we save them
duke@435 79 // before the window-save (as then they become %i registers and get their
duke@435 80 // heads chopped off on interrupt). We have to save some %g registers here
duke@435 81 // as well.
duke@435 82 enum {
duke@435 83 // This frame's save area. Includes extra space for the native call:
duke@435 84 // vararg's layout space and the like. Briefly holds the caller's
duke@435 85 // register save area.
duke@435 86 call_args_area = frame::register_save_words_sp_offset +
duke@435 87 frame::memory_parameter_word_sp_offset*wordSize,
duke@435 88 // Make sure save locations are always 8 byte aligned.
duke@435 89 // can't use round_to because it doesn't produce compile time constant
duke@435 90 start_of_extra_save_area = ((call_args_area + 7) & ~7),
duke@435 91 g1_offset = start_of_extra_save_area, // g-regs needing saving
duke@435 92 g3_offset = g1_offset+8,
duke@435 93 g4_offset = g3_offset+8,
duke@435 94 g5_offset = g4_offset+8,
duke@435 95 o0_offset = g5_offset+8,
duke@435 96 o1_offset = o0_offset+8,
duke@435 97 o2_offset = o1_offset+8,
duke@435 98 o3_offset = o2_offset+8,
duke@435 99 o4_offset = o3_offset+8,
duke@435 100 o5_offset = o4_offset+8,
duke@435 101 start_of_flags_save_area = o5_offset+8,
duke@435 102 ccr_offset = start_of_flags_save_area,
duke@435 103 fsr_offset = ccr_offset + 8,
duke@435 104 d00_offset = fsr_offset+8, // Start of float save area
duke@435 105 register_save_size = d00_offset+8*32
duke@435 106 };
duke@435 107
duke@435 108
duke@435 109 public:
duke@435 110
duke@435 111 static int Oexception_offset() { return o0_offset; };
duke@435 112 static int G3_offset() { return g3_offset; };
duke@435 113 static int G5_offset() { return g5_offset; };
duke@435 114 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
duke@435 115 static void restore_live_registers(MacroAssembler* masm);
duke@435 116
duke@435 117 // During deoptimization only the result register need to be restored
duke@435 118 // all the other values have already been extracted.
duke@435 119
duke@435 120 static void restore_result_registers(MacroAssembler* masm);
duke@435 121 };
duke@435 122
duke@435 123 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
duke@435 124 // Record volatile registers as callee-save values in an OopMap so their save locations will be
duke@435 125 // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
duke@435 126 // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers
duke@435 127 // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
duke@435 128 // (as the stub's I's) when the runtime routine called by the stub creates its frame.
duke@435 129 int i;
kvn@1442 130 // Always make the frame size 16 byte aligned.
duke@435 131 int frame_size = round_to(additional_frame_words + register_save_size, 16);
duke@435 132 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
duke@435 133 int frame_size_in_slots = frame_size / sizeof(jint);
duke@435 134 // CodeBlob frame size is in words.
duke@435 135 *total_frame_words = frame_size / wordSize;
duke@435 136 // OopMap* map = new OopMap(*total_frame_words, 0);
duke@435 137 OopMap* map = new OopMap(frame_size_in_slots, 0);
duke@435 138
duke@435 139 #if !defined(_LP64)
duke@435 140
duke@435 141 // Save 64-bit O registers; they will get their heads chopped off on a 'save'.
duke@435 142 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
duke@435 143 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
duke@435 144 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
duke@435 145 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
duke@435 146 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
duke@435 147 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
duke@435 148 #endif /* _LP64 */
duke@435 149
duke@435 150 __ save(SP, -frame_size, SP);
duke@435 151
duke@435 152 #ifndef _LP64
duke@435 153 // Reload the 64 bit Oregs. Although they are now Iregs we load them
duke@435 154 // to Oregs here to avoid interrupts cutting off their heads
duke@435 155
duke@435 156 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
duke@435 157 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
duke@435 158 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
duke@435 159 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
duke@435 160 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
duke@435 161 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
duke@435 162
duke@435 163 __ stx(O0, SP, o0_offset+STACK_BIAS);
duke@435 164 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());
duke@435 165
duke@435 166 __ stx(O1, SP, o1_offset+STACK_BIAS);
duke@435 167
duke@435 168 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());
duke@435 169
duke@435 170 __ stx(O2, SP, o2_offset+STACK_BIAS);
duke@435 171 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());
duke@435 172
duke@435 173 __ stx(O3, SP, o3_offset+STACK_BIAS);
duke@435 174 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());
duke@435 175
duke@435 176 __ stx(O4, SP, o4_offset+STACK_BIAS);
duke@435 177 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());
duke@435 178
duke@435 179 __ stx(O5, SP, o5_offset+STACK_BIAS);
duke@435 180 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());
duke@435 181 #endif /* _LP64 */
duke@435 182
coleenp@548 183
coleenp@548 184 #ifdef _LP64
coleenp@548 185 int debug_offset = 0;
coleenp@548 186 #else
coleenp@548 187 int debug_offset = 4;
coleenp@548 188 #endif
duke@435 189 // Save the G's
duke@435 190 __ stx(G1, SP, g1_offset+STACK_BIAS);
coleenp@548 191 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
duke@435 192
duke@435 193 __ stx(G3, SP, g3_offset+STACK_BIAS);
coleenp@548 194 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
duke@435 195
duke@435 196 __ stx(G4, SP, g4_offset+STACK_BIAS);
coleenp@548 197 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
duke@435 198
duke@435 199 __ stx(G5, SP, g5_offset+STACK_BIAS);
coleenp@548 200 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
duke@435 201
duke@435 202 // This is really a waste but we'll keep things as they were for now
duke@435 203 if (true) {
duke@435 204 #ifndef _LP64
duke@435 205 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());
duke@435 206 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());
duke@435 207 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());
duke@435 208 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());
duke@435 209 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());
duke@435 210 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());
duke@435 211 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());
duke@435 212 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());
duke@435 213 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());
duke@435 214 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());
coleenp@548 215 #endif /* _LP64 */
duke@435 216 }
duke@435 217
duke@435 218
duke@435 219 // Save the flags
duke@435 220 __ rdccr( G5 );
duke@435 221 __ stx(G5, SP, ccr_offset+STACK_BIAS);
duke@435 222 __ stxfsr(SP, fsr_offset+STACK_BIAS);
duke@435 223
kvn@1442 224 // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
duke@435 225 int offset = d00_offset;
kvn@1442 226 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
duke@435 227 FloatRegister f = as_FloatRegister(i);
duke@435 228 __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS);
kvn@1442 229 // Record as callee saved both halves of double registers (2 float registers).
duke@435 230 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
kvn@1442 231 map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
duke@435 232 offset += sizeof(double);
duke@435 233 }
duke@435 234
duke@435 235 // And we're done.
duke@435 236
duke@435 237 return map;
duke@435 238 }
duke@435 239
duke@435 240
duke@435 241 // Pop the current frame and restore all the registers that we
duke@435 242 // saved.
duke@435 243 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
duke@435 244
duke@435 245 // Restore all the FP registers
kvn@1442 246 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
duke@435 247 __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
duke@435 248 }
duke@435 249
duke@435 250 __ ldx(SP, ccr_offset+STACK_BIAS, G1);
duke@435 251 __ wrccr (G1) ;
duke@435 252
duke@435 253 // Restore the G's
duke@435 254 // Note that G2 (AKA GThread) must be saved and restored separately.
duke@435 255 // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
duke@435 256
duke@435 257 __ ldx(SP, g1_offset+STACK_BIAS, G1);
duke@435 258 __ ldx(SP, g3_offset+STACK_BIAS, G3);
duke@435 259 __ ldx(SP, g4_offset+STACK_BIAS, G4);
duke@435 260 __ ldx(SP, g5_offset+STACK_BIAS, G5);
duke@435 261
duke@435 262
duke@435 263 #if !defined(_LP64)
duke@435 264 // Restore the 64-bit O's.
duke@435 265 __ ldx(SP, o0_offset+STACK_BIAS, O0);
duke@435 266 __ ldx(SP, o1_offset+STACK_BIAS, O1);
duke@435 267 __ ldx(SP, o2_offset+STACK_BIAS, O2);
duke@435 268 __ ldx(SP, o3_offset+STACK_BIAS, O3);
duke@435 269 __ ldx(SP, o4_offset+STACK_BIAS, O4);
duke@435 270 __ ldx(SP, o5_offset+STACK_BIAS, O5);
duke@435 271
duke@435 272 // And temporarily place them in TLS
duke@435 273
duke@435 274 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
duke@435 275 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
duke@435 276 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
duke@435 277 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
duke@435 278 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
duke@435 279 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
duke@435 280 #endif /* _LP64 */
duke@435 281
duke@435 282 // Restore flags
duke@435 283
duke@435 284 __ ldxfsr(SP, fsr_offset+STACK_BIAS);
duke@435 285
duke@435 286 __ restore();
duke@435 287
duke@435 288 #if !defined(_LP64)
duke@435 289 // Now reload the 64bit Oregs after we've restore the window.
duke@435 290 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
duke@435 291 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
duke@435 292 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
duke@435 293 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
duke@435 294 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
duke@435 295 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
duke@435 296 #endif /* _LP64 */
duke@435 297
duke@435 298 }
duke@435 299
duke@435 300 // Pop the current frame and restore the registers that might be holding
duke@435 301 // a result.
duke@435 302 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
duke@435 303
duke@435 304 #if !defined(_LP64)
duke@435 305 // 32bit build returns longs in G1
duke@435 306 __ ldx(SP, g1_offset+STACK_BIAS, G1);
duke@435 307
duke@435 308 // Retrieve the 64-bit O's.
duke@435 309 __ ldx(SP, o0_offset+STACK_BIAS, O0);
duke@435 310 __ ldx(SP, o1_offset+STACK_BIAS, O1);
duke@435 311 // and save to TLS
duke@435 312 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
duke@435 313 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
duke@435 314 #endif /* _LP64 */
duke@435 315
duke@435 316 __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
duke@435 317
duke@435 318 __ restore();
duke@435 319
duke@435 320 #if !defined(_LP64)
duke@435 321 // Now reload the 64bit Oregs after we've restore the window.
duke@435 322 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
duke@435 323 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
duke@435 324 #endif /* _LP64 */
duke@435 325
duke@435 326 }
duke@435 327
duke@435 328 // The java_calling_convention describes stack locations as ideal slots on
duke@435 329 // a frame with no abi restrictions. Since we must observe abi restrictions
duke@435 330 // (like the placement of the register window) the slots must be biased by
duke@435 331 // the following value.
duke@435 332 static int reg2offset(VMReg r) {
duke@435 333 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
duke@435 334 }
duke@435 335
duke@435 336 // ---------------------------------------------------------------------------
duke@435 337 // Read the array of BasicTypes from a signature, and compute where the
duke@435 338 // arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
duke@435 339 // quantities. Values less than VMRegImpl::stack0 are registers, those above
duke@435 340 // refer to 4-byte stack slots. All stack slots are based off of the window
duke@435 341 // top. VMRegImpl::stack0 refers to the first slot past the 16-word window,
duke@435 342 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
duke@435 343 // values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit
duke@435 344 // integer registers. Values 64-95 are the (32-bit only) float registers.
duke@435 345 // Each 32-bit quantity is given its own number, so the integer registers
duke@435 346 // (in either 32- or 64-bit builds) use 2 numbers. For example, there is
duke@435 347 // an O0-low and an O0-high. Essentially, all int register numbers are doubled.
duke@435 348
duke@435 349 // Register results are passed in O0-O5, for outgoing call arguments. To
duke@435 350 // convert to incoming arguments, convert all O's to I's. The regs array
duke@435 351 // refer to the low and hi 32-bit words of 64-bit registers or stack slots.
duke@435 352 // If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a
duke@435 353 // 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was
duke@435 354 // passed (used as a placeholder for the other half of longs and doubles in
duke@435 355 // the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is
duke@435 356 // regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).
duke@435 357 // Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()
duke@435 358 // == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the
duke@435 359 // same VMRegPair.
duke@435 360
duke@435 361 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
duke@435 362 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
duke@435 363 // units regardless of build.
duke@435 364
duke@435 365
duke@435 366 // ---------------------------------------------------------------------------
duke@435 367 // The compiled Java calling convention. The Java convention always passes
duke@435 368 // 64-bit values in adjacent aligned locations (either registers or stack),
duke@435 369 // floats in float registers and doubles in aligned float pairs. Values are
duke@435 370 // packed in the registers. There is no backing varargs store for values in
duke@435 371 // registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be
duke@435 372 // passed in I's, because longs in I's get their heads chopped off at
duke@435 373 // interrupt).
duke@435 374 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
duke@435 375 VMRegPair *regs,
duke@435 376 int total_args_passed,
duke@435 377 int is_outgoing) {
duke@435 378 assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
duke@435 379
duke@435 380 // Convention is to pack the first 6 int/oop args into the first 6 registers
duke@435 381 // (I0-I5), extras spill to the stack. Then pack the first 8 float args
duke@435 382 // into F0-F7, extras spill to the stack. Then pad all register sets to
duke@435 383 // align. Then put longs and doubles into the same registers as they fit,
duke@435 384 // else spill to the stack.
duke@435 385 const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
duke@435 386 const int flt_reg_max = 8;
duke@435 387 //
duke@435 388 // Where 32-bit 1-reg longs start being passed
duke@435 389 // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
duke@435 390 // So make it look like we've filled all the G regs that c2 wants to use.
duke@435 391 Register g_reg = TieredCompilation ? noreg : G1;
duke@435 392
duke@435 393 // Count int/oop and float args. See how many stack slots we'll need and
duke@435 394 // where the longs & doubles will go.
duke@435 395 int int_reg_cnt = 0;
duke@435 396 int flt_reg_cnt = 0;
duke@435 397 // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
duke@435 398 // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
duke@435 399 int stk_reg_pairs = 0;
duke@435 400 for (int i = 0; i < total_args_passed; i++) {
duke@435 401 switch (sig_bt[i]) {
duke@435 402 case T_LONG: // LP64, longs compete with int args
duke@435 403 assert(sig_bt[i+1] == T_VOID, "");
duke@435 404 #ifdef _LP64
duke@435 405 if (int_reg_cnt < int_reg_max) int_reg_cnt++;
duke@435 406 #endif
duke@435 407 break;
duke@435 408 case T_OBJECT:
duke@435 409 case T_ARRAY:
duke@435 410 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
duke@435 411 if (int_reg_cnt < int_reg_max) int_reg_cnt++;
duke@435 412 #ifndef _LP64
duke@435 413 else stk_reg_pairs++;
duke@435 414 #endif
duke@435 415 break;
duke@435 416 case T_INT:
duke@435 417 case T_SHORT:
duke@435 418 case T_CHAR:
duke@435 419 case T_BYTE:
duke@435 420 case T_BOOLEAN:
duke@435 421 if (int_reg_cnt < int_reg_max) int_reg_cnt++;
duke@435 422 else stk_reg_pairs++;
duke@435 423 break;
duke@435 424 case T_FLOAT:
duke@435 425 if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
duke@435 426 else stk_reg_pairs++;
duke@435 427 break;
duke@435 428 case T_DOUBLE:
duke@435 429 assert(sig_bt[i+1] == T_VOID, "");
duke@435 430 break;
duke@435 431 case T_VOID:
duke@435 432 break;
duke@435 433 default:
duke@435 434 ShouldNotReachHere();
duke@435 435 }
duke@435 436 }
duke@435 437
duke@435 438 // This is where the longs/doubles start on the stack.
duke@435 439 stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
duke@435 440
duke@435 441 int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
duke@435 442 int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
duke@435 443
duke@435 444 // int stk_reg = frame::register_save_words*(wordSize>>2);
duke@435 445 // int stk_reg = SharedRuntime::out_preserve_stack_slots();
duke@435 446 int stk_reg = 0;
duke@435 447 int int_reg = 0;
duke@435 448 int flt_reg = 0;
duke@435 449
duke@435 450 // Now do the signature layout
duke@435 451 for (int i = 0; i < total_args_passed; i++) {
duke@435 452 switch (sig_bt[i]) {
duke@435 453 case T_INT:
duke@435 454 case T_SHORT:
duke@435 455 case T_CHAR:
duke@435 456 case T_BYTE:
duke@435 457 case T_BOOLEAN:
duke@435 458 #ifndef _LP64
duke@435 459 case T_OBJECT:
duke@435 460 case T_ARRAY:
duke@435 461 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
duke@435 462 #endif // _LP64
duke@435 463 if (int_reg < int_reg_max) {
duke@435 464 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
duke@435 465 regs[i].set1(r->as_VMReg());
duke@435 466 } else {
duke@435 467 regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
duke@435 468 }
duke@435 469 break;
duke@435 470
duke@435 471 #ifdef _LP64
duke@435 472 case T_OBJECT:
duke@435 473 case T_ARRAY:
duke@435 474 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
duke@435 475 if (int_reg < int_reg_max) {
duke@435 476 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
duke@435 477 regs[i].set2(r->as_VMReg());
duke@435 478 } else {
duke@435 479 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
duke@435 480 stk_reg_pairs += 2;
duke@435 481 }
duke@435 482 break;
duke@435 483 #endif // _LP64
duke@435 484
duke@435 485 case T_LONG:
duke@435 486 assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
duke@435 487 #ifdef _LP64
duke@435 488 if (int_reg < int_reg_max) {
duke@435 489 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
duke@435 490 regs[i].set2(r->as_VMReg());
duke@435 491 } else {
duke@435 492 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
duke@435 493 stk_reg_pairs += 2;
duke@435 494 }
duke@435 495 #else
never@739 496 #ifdef COMPILER2
duke@435 497 // For 32-bit build, can't pass longs in O-regs because they become
duke@435 498 // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost
duke@435 499 // spare and available. This convention isn't used by the Sparc ABI or
duke@435 500 // anywhere else. If we're tiered then we don't use G-regs because c1
never@739 501 // can't deal with them as a "pair". (Tiered makes this code think g's are filled)
duke@435 502 // G0: zero
duke@435 503 // G1: 1st Long arg
duke@435 504 // G2: global allocated to TLS
duke@435 505 // G3: used in inline cache check
duke@435 506 // G4: 2nd Long arg
duke@435 507 // G5: used in inline cache check
duke@435 508 // G6: used by OS
duke@435 509 // G7: used by OS
duke@435 510
duke@435 511 if (g_reg == G1) {
duke@435 512 regs[i].set2(G1->as_VMReg()); // This long arg in G1
duke@435 513 g_reg = G4; // Where the next arg goes
duke@435 514 } else if (g_reg == G4) {
duke@435 515 regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
duke@435 516 g_reg = noreg; // No more longs in registers
duke@435 517 } else {
duke@435 518 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
duke@435 519 stk_reg_pairs += 2;
duke@435 520 }
duke@435 521 #else // COMPILER2
duke@435 522 if (int_reg_pairs + 1 < int_reg_max) {
duke@435 523 if (is_outgoing) {
duke@435 524 regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
duke@435 525 } else {
duke@435 526 regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
duke@435 527 }
duke@435 528 int_reg_pairs += 2;
duke@435 529 } else {
duke@435 530 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
duke@435 531 stk_reg_pairs += 2;
duke@435 532 }
duke@435 533 #endif // COMPILER2
never@739 534 #endif // _LP64
duke@435 535 break;
duke@435 536
duke@435 537 case T_FLOAT:
duke@435 538 if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
duke@435 539 else regs[i].set1( VMRegImpl::stack2reg(stk_reg++));
duke@435 540 break;
duke@435 541 case T_DOUBLE:
duke@435 542 assert(sig_bt[i+1] == T_VOID, "expecting half");
duke@435 543 if (flt_reg_pairs + 1 < flt_reg_max) {
duke@435 544 regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
duke@435 545 flt_reg_pairs += 2;
duke@435 546 } else {
duke@435 547 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
duke@435 548 stk_reg_pairs += 2;
duke@435 549 }
duke@435 550 break;
duke@435 551 case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles
duke@435 552 default:
duke@435 553 ShouldNotReachHere();
duke@435 554 }
duke@435 555 }
duke@435 556
duke@435 557 // retun the amount of stack space these arguments will need.
duke@435 558 return stk_reg_pairs;
duke@435 559
duke@435 560 }
duke@435 561
twisti@1441 562 // Helper class mostly to avoid passing masm everywhere, and handle
twisti@1441 563 // store displacement overflow logic.
duke@435 564 class AdapterGenerator {
duke@435 565 MacroAssembler *masm;
duke@435 566 Register Rdisp;
duke@435 567 void set_Rdisp(Register r) { Rdisp = r; }
duke@435 568
duke@435 569 void patch_callers_callsite();
duke@435 570
duke@435 571 // base+st_off points to top of argument
twisti@1861 572 int arg_offset(const int st_off) { return st_off; }
duke@435 573 int next_arg_offset(const int st_off) {
twisti@1861 574 return st_off - Interpreter::stackElementSize;
twisti@1441 575 }
twisti@1441 576
twisti@1441 577 // Argument slot values may be loaded first into a register because
twisti@1441 578 // they might not fit into displacement.
twisti@1441 579 RegisterOrConstant arg_slot(const int st_off);
twisti@1441 580 RegisterOrConstant next_arg_slot(const int st_off);
twisti@1441 581
duke@435 582 // Stores long into offset pointed to by base
duke@435 583 void store_c2i_long(Register r, Register base,
duke@435 584 const int st_off, bool is_stack);
duke@435 585 void store_c2i_object(Register r, Register base,
duke@435 586 const int st_off);
duke@435 587 void store_c2i_int(Register r, Register base,
duke@435 588 const int st_off);
duke@435 589 void store_c2i_double(VMReg r_2,
duke@435 590 VMReg r_1, Register base, const int st_off);
duke@435 591 void store_c2i_float(FloatRegister f, Register base,
duke@435 592 const int st_off);
duke@435 593
duke@435 594 public:
duke@435 595 void gen_c2i_adapter(int total_args_passed,
duke@435 596 // VMReg max_arg,
duke@435 597 int comp_args_on_stack, // VMRegStackSlots
duke@435 598 const BasicType *sig_bt,
duke@435 599 const VMRegPair *regs,
duke@435 600 Label& skip_fixup);
duke@435 601 void gen_i2c_adapter(int total_args_passed,
duke@435 602 // VMReg max_arg,
duke@435 603 int comp_args_on_stack, // VMRegStackSlots
duke@435 604 const BasicType *sig_bt,
duke@435 605 const VMRegPair *regs);
duke@435 606
duke@435 607 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
duke@435 608 };
duke@435 609
duke@435 610
duke@435 611 // Patch the callers callsite with entry to compiled code if it exists.
duke@435 612 void AdapterGenerator::patch_callers_callsite() {
duke@435 613 Label L;
duke@435 614 __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
duke@435 615 __ br_null(G3_scratch, false, __ pt, L);
duke@435 616 // Schedule the branch target address early.
duke@435 617 __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
duke@435 618 // Call into the VM to patch the caller, then jump to compiled callee
duke@435 619 __ save_frame(4); // Args in compiled layout; do not blow them
duke@435 620
duke@435 621 // Must save all the live Gregs the list is:
duke@435 622 // G1: 1st Long arg (32bit build)
duke@435 623 // G2: global allocated to TLS
duke@435 624 // G3: used in inline cache check (scratch)
duke@435 625 // G4: 2nd Long arg (32bit build);
duke@435 626 // G5: used in inline cache check (methodOop)
duke@435 627
duke@435 628 // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
duke@435 629
duke@435 630 #ifdef _LP64
duke@435 631 // mov(s,d)
duke@435 632 __ mov(G1, L1);
duke@435 633 __ mov(G4, L4);
duke@435 634 __ mov(G5_method, L5);
duke@435 635 __ mov(G5_method, O0); // VM needs target method
duke@435 636 __ mov(I7, O1); // VM needs caller's callsite
duke@435 637 // Must be a leaf call...
duke@435 638 // can be very far once the blob has been relocated
twisti@1162 639 AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
duke@435 640 __ relocate(relocInfo::runtime_call_type);
twisti@1162 641 __ jumpl_to(dest, O7, O7);
duke@435 642 __ delayed()->mov(G2_thread, L7_thread_cache);
duke@435 643 __ mov(L7_thread_cache, G2_thread);
duke@435 644 __ mov(L1, G1);
duke@435 645 __ mov(L4, G4);
duke@435 646 __ mov(L5, G5_method);
duke@435 647 #else
duke@435 648 __ stx(G1, FP, -8 + STACK_BIAS);
duke@435 649 __ stx(G4, FP, -16 + STACK_BIAS);
duke@435 650 __ mov(G5_method, L5);
duke@435 651 __ mov(G5_method, O0); // VM needs target method
duke@435 652 __ mov(I7, O1); // VM needs caller's callsite
duke@435 653 // Must be a leaf call...
duke@435 654 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);
duke@435 655 __ delayed()->mov(G2_thread, L7_thread_cache);
duke@435 656 __ mov(L7_thread_cache, G2_thread);
duke@435 657 __ ldx(FP, -8 + STACK_BIAS, G1);
duke@435 658 __ ldx(FP, -16 + STACK_BIAS, G4);
duke@435 659 __ mov(L5, G5_method);
duke@435 660 __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
duke@435 661 #endif /* _LP64 */
duke@435 662
duke@435 663 __ restore(); // Restore args
duke@435 664 __ bind(L);
duke@435 665 }
duke@435 666
twisti@1441 667
twisti@1441 668 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
twisti@1441 669 RegisterOrConstant roc(arg_offset(st_off));
twisti@1441 670 return __ ensure_simm13_or_reg(roc, Rdisp);
duke@435 671 }
duke@435 672
twisti@1441 673 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
twisti@1441 674 RegisterOrConstant roc(next_arg_offset(st_off));
twisti@1441 675 return __ ensure_simm13_or_reg(roc, Rdisp);
duke@435 676 }
twisti@1441 677
twisti@1441 678
duke@435 679 // Stores long into offset pointed to by base
duke@435 680 void AdapterGenerator::store_c2i_long(Register r, Register base,
duke@435 681 const int st_off, bool is_stack) {
duke@435 682 #ifdef _LP64
duke@435 683 // In V9, longs are given 2 64-bit slots in the interpreter, but the
duke@435 684 // data is passed in only 1 slot.
duke@435 685 __ stx(r, base, next_arg_slot(st_off));
duke@435 686 #else
ysr@777 687 #ifdef COMPILER2
duke@435 688 // Misaligned store of 64-bit data
duke@435 689 __ stw(r, base, arg_slot(st_off)); // lo bits
duke@435 690 __ srlx(r, 32, r);
duke@435 691 __ stw(r, base, next_arg_slot(st_off)); // hi bits
duke@435 692 #else
duke@435 693 if (is_stack) {
duke@435 694 // Misaligned store of 64-bit data
duke@435 695 __ stw(r, base, arg_slot(st_off)); // lo bits
duke@435 696 __ srlx(r, 32, r);
duke@435 697 __ stw(r, base, next_arg_slot(st_off)); // hi bits
duke@435 698 } else {
duke@435 699 __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits
duke@435 700 __ stw(r , base, next_arg_slot(st_off)); // hi bits
duke@435 701 }
duke@435 702 #endif // COMPILER2
ysr@777 703 #endif // _LP64
duke@435 704 }
duke@435 705
duke@435 706 void AdapterGenerator::store_c2i_object(Register r, Register base,
duke@435 707 const int st_off) {
duke@435 708 __ st_ptr (r, base, arg_slot(st_off));
duke@435 709 }
duke@435 710
duke@435 711 void AdapterGenerator::store_c2i_int(Register r, Register base,
duke@435 712 const int st_off) {
duke@435 713 __ st (r, base, arg_slot(st_off));
duke@435 714 }
duke@435 715
duke@435 716 // Stores into offset pointed to by base
duke@435 717 void AdapterGenerator::store_c2i_double(VMReg r_2,
duke@435 718 VMReg r_1, Register base, const int st_off) {
duke@435 719 #ifdef _LP64
duke@435 720 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
duke@435 721 // data is passed in only 1 slot.
duke@435 722 __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
duke@435 723 #else
duke@435 724 // Need to marshal 64-bit value from misaligned Lesp loads
duke@435 725 __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
duke@435 726 __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );
duke@435 727 #endif
duke@435 728 }
duke@435 729
duke@435 730 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
duke@435 731 const int st_off) {
duke@435 732 __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
duke@435 733 }
duke@435 734
duke@435 735 void AdapterGenerator::gen_c2i_adapter(
duke@435 736 int total_args_passed,
duke@435 737 // VMReg max_arg,
duke@435 738 int comp_args_on_stack, // VMRegStackSlots
duke@435 739 const BasicType *sig_bt,
duke@435 740 const VMRegPair *regs,
duke@435 741 Label& skip_fixup) {
duke@435 742
duke@435 743 // Before we get into the guts of the C2I adapter, see if we should be here
duke@435 744 // at all. We've come from compiled code and are attempting to jump to the
duke@435 745 // interpreter, which means the caller made a static call to get here
duke@435 746 // (vcalls always get a compiled target if there is one). Check for a
duke@435 747 // compiled target. If there is one, we need to patch the caller's call.
duke@435 748 // However we will run interpreted if we come thru here. The next pass
duke@435 749 // thru the call site will run compiled. If we ran compiled here then
duke@435 750 // we can (theorectically) do endless i2c->c2i->i2c transitions during
duke@435 751 // deopt/uncommon trap cycles. If we always go interpreted here then
duke@435 752 // we can have at most one and don't need to play any tricks to keep
duke@435 753 // from endlessly growing the stack.
duke@435 754 //
duke@435 755 // Actually if we detected that we had an i2c->c2i transition here we
duke@435 756 // ought to be able to reset the world back to the state of the interpreted
duke@435 757 // call and not bother building another interpreter arg area. We don't
duke@435 758 // do that at this point.
duke@435 759
duke@435 760 patch_callers_callsite();
duke@435 761
duke@435 762 __ bind(skip_fixup);
duke@435 763
duke@435 764 // Since all args are passed on the stack, total_args_passed*wordSize is the
duke@435 765 // space we need. Add in varargs area needed by the interpreter. Round up
duke@435 766 // to stack alignment.
twisti@1861 767 const int arg_size = total_args_passed * Interpreter::stackElementSize;
duke@435 768 const int varargs_area =
duke@435 769 (frame::varargs_offset - frame::register_save_words)*wordSize;
duke@435 770 const int extraspace = round_to(arg_size + varargs_area, 2*wordSize);
duke@435 771
duke@435 772 int bias = STACK_BIAS;
duke@435 773 const int interp_arg_offset = frame::varargs_offset*wordSize +
twisti@1861 774 (total_args_passed-1)*Interpreter::stackElementSize;
duke@435 775
duke@435 776 Register base = SP;
duke@435 777
duke@435 778 #ifdef _LP64
duke@435 779 // In the 64bit build because of wider slots and STACKBIAS we can run
duke@435 780 // out of bits in the displacement to do loads and stores. Use g3 as
duke@435 781 // temporary displacement.
duke@435 782 if (! __ is_simm13(extraspace)) {
duke@435 783 __ set(extraspace, G3_scratch);
duke@435 784 __ sub(SP, G3_scratch, SP);
duke@435 785 } else {
duke@435 786 __ sub(SP, extraspace, SP);
duke@435 787 }
duke@435 788 set_Rdisp(G3_scratch);
duke@435 789 #else
duke@435 790 __ sub(SP, extraspace, SP);
duke@435 791 #endif // _LP64
duke@435 792
duke@435 793 // First write G1 (if used) to where ever it must go
duke@435 794 for (int i=0; i<total_args_passed; i++) {
twisti@1861 795 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
duke@435 796 VMReg r_1 = regs[i].first();
duke@435 797 VMReg r_2 = regs[i].second();
duke@435 798 if (r_1 == G1_scratch->as_VMReg()) {
duke@435 799 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
duke@435 800 store_c2i_object(G1_scratch, base, st_off);
duke@435 801 } else if (sig_bt[i] == T_LONG) {
duke@435 802 assert(!TieredCompilation, "should not use register args for longs");
duke@435 803 store_c2i_long(G1_scratch, base, st_off, false);
duke@435 804 } else {
duke@435 805 store_c2i_int(G1_scratch, base, st_off);
duke@435 806 }
duke@435 807 }
duke@435 808 }
duke@435 809
duke@435 810 // Now write the args into the outgoing interpreter space
duke@435 811 for (int i=0; i<total_args_passed; i++) {
twisti@1861 812 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
duke@435 813 VMReg r_1 = regs[i].first();
duke@435 814 VMReg r_2 = regs[i].second();
duke@435 815 if (!r_1->is_valid()) {
duke@435 816 assert(!r_2->is_valid(), "");
duke@435 817 continue;
duke@435 818 }
duke@435 819 // Skip G1 if found as we did it first in order to free it up
duke@435 820 if (r_1 == G1_scratch->as_VMReg()) {
duke@435 821 continue;
duke@435 822 }
duke@435 823 #ifdef ASSERT
duke@435 824 bool G1_forced = false;
duke@435 825 #endif // ASSERT
duke@435 826 if (r_1->is_stack()) { // Pretend stack targets are loaded into G1
duke@435 827 #ifdef _LP64
duke@435 828 Register ld_off = Rdisp;
duke@435 829 __ set(reg2offset(r_1) + extraspace + bias, ld_off);
duke@435 830 #else
duke@435 831 int ld_off = reg2offset(r_1) + extraspace + bias;
kvn@1686 832 #endif // _LP64
duke@435 833 #ifdef ASSERT
duke@435 834 G1_forced = true;
duke@435 835 #endif // ASSERT
duke@435 836 r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle
duke@435 837 if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch);
duke@435 838 else __ ldx(base, ld_off, G1_scratch);
duke@435 839 }
duke@435 840
duke@435 841 if (r_1->is_Register()) {
duke@435 842 Register r = r_1->as_Register()->after_restore();
duke@435 843 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
duke@435 844 store_c2i_object(r, base, st_off);
duke@435 845 } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
kvn@1686 846 #ifndef _LP64
duke@435 847 if (TieredCompilation) {
duke@435 848 assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs");
duke@435 849 }
kvn@1686 850 #endif // _LP64
duke@435 851 store_c2i_long(r, base, st_off, r_2->is_stack());
duke@435 852 } else {
duke@435 853 store_c2i_int(r, base, st_off);
duke@435 854 }
duke@435 855 } else {
duke@435 856 assert(r_1->is_FloatRegister(), "");
duke@435 857 if (sig_bt[i] == T_FLOAT) {
duke@435 858 store_c2i_float(r_1->as_FloatRegister(), base, st_off);
duke@435 859 } else {
duke@435 860 assert(sig_bt[i] == T_DOUBLE, "wrong type");
duke@435 861 store_c2i_double(r_2, r_1, base, st_off);
duke@435 862 }
duke@435 863 }
duke@435 864 }
duke@435 865
duke@435 866 #ifdef _LP64
duke@435 867 // Need to reload G3_scratch, used for temporary displacements.
duke@435 868 __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
duke@435 869
duke@435 870 // Pass O5_savedSP as an argument to the interpreter.
duke@435 871 // The interpreter will restore SP to this value before returning.
duke@435 872 __ set(extraspace, G1);
duke@435 873 __ add(SP, G1, O5_savedSP);
duke@435 874 #else
duke@435 875 // Pass O5_savedSP as an argument to the interpreter.
duke@435 876 // The interpreter will restore SP to this value before returning.
duke@435 877 __ add(SP, extraspace, O5_savedSP);
duke@435 878 #endif // _LP64
duke@435 879
duke@435 880 __ mov((frame::varargs_offset)*wordSize -
twisti@1861 881 1*Interpreter::stackElementSize+bias+BytesPerWord, G1);
duke@435 882 // Jump to the interpreter just as if interpreter was doing it.
duke@435 883 __ jmpl(G3_scratch, 0, G0);
duke@435 884 // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp
duke@435 885 // (really L0) is in use by the compiled frame as a generic temp. However,
duke@435 886 // the interpreter does not know where its args are without some kind of
duke@435 887 // arg pointer being passed in. Pass it in Gargs.
duke@435 888 __ delayed()->add(SP, G1, Gargs);
duke@435 889 }
duke@435 890
duke@435 891 void AdapterGenerator::gen_i2c_adapter(
duke@435 892 int total_args_passed,
duke@435 893 // VMReg max_arg,
duke@435 894 int comp_args_on_stack, // VMRegStackSlots
duke@435 895 const BasicType *sig_bt,
duke@435 896 const VMRegPair *regs) {
duke@435 897
duke@435 898 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
duke@435 899 // layout. Lesp was saved by the calling I-frame and will be restored on
duke@435 900 // return. Meanwhile, outgoing arg space is all owned by the callee
duke@435 901 // C-frame, so we can mangle it at will. After adjusting the frame size,
duke@435 902 // hoist register arguments and repack other args according to the compiled
duke@435 903 // code convention. Finally, end in a jump to the compiled code. The entry
duke@435 904 // point address is the start of the buffer.
duke@435 905
duke@435 906 // We will only enter here from an interpreted frame and never from after
duke@435 907 // passing thru a c2i. Azul allowed this but we do not. If we lose the
duke@435 908 // race and use a c2i we will remain interpreted for the race loser(s).
duke@435 909 // This removes all sorts of headaches on the x86 side and also eliminates
duke@435 910 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
duke@435 911
duke@435 912 // As you can see from the list of inputs & outputs there are not a lot
duke@435 913 // of temp registers to work with: mostly G1, G3 & G4.
duke@435 914
duke@435 915 // Inputs:
duke@435 916 // G2_thread - TLS
duke@435 917 // G5_method - Method oop
jrose@1145 918 // G4 (Gargs) - Pointer to interpreter's args
jrose@1145 919 // O0..O4 - free for scratch
jrose@1145 920 // O5_savedSP - Caller's saved SP, to be restored if needed
duke@435 921 // O6 - Current SP!
duke@435 922 // O7 - Valid return address
jrose@1145 923 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet)
duke@435 924
duke@435 925 // Outputs:
duke@435 926 // G2_thread - TLS
duke@435 927 // G1, G4 - Outgoing long args in 32-bit build
duke@435 928 // O0-O5 - Outgoing args in compiled layout
duke@435 929 // O6 - Adjusted or restored SP
duke@435 930 // O7 - Valid return address
twisti@1919 931 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet)
duke@435 932 // F0-F7 - more outgoing args
duke@435 933
duke@435 934
jrose@1145 935 // Gargs is the incoming argument base, and also an outgoing argument.
duke@435 936 __ sub(Gargs, BytesPerWord, Gargs);
duke@435 937
duke@435 938 // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME
duke@435 939 // WITH O7 HOLDING A VALID RETURN PC
duke@435 940 //
duke@435 941 // | |
duke@435 942 // : java stack :
duke@435 943 // | |
duke@435 944 // +--------------+ <--- start of outgoing args
duke@435 945 // | receiver | |
duke@435 946 // : rest of args : |---size is java-arg-words
duke@435 947 // | | |
duke@435 948 // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I
duke@435 949 // | | |
duke@435 950 // : unused : |---Space for max Java stack, plus stack alignment
duke@435 951 // | | |
duke@435 952 // +--------------+ <--- SP + 16*wordsize
duke@435 953 // | |
duke@435 954 // : window :
duke@435 955 // | |
duke@435 956 // +--------------+ <--- SP
duke@435 957
duke@435 958 // WE REPACK THE STACK. We use the common calling convention layout as
duke@435 959 // discovered by calling SharedRuntime::calling_convention. We assume it
duke@435 960 // causes an arbitrary shuffle of memory, which may require some register
duke@435 961 // temps to do the shuffle. We hope for (and optimize for) the case where
duke@435 962 // temps are not needed. We may have to resize the stack slightly, in case
duke@435 963 // we need alignment padding (32-bit interpreter can pass longs & doubles
duke@435 964 // misaligned, but the compilers expect them aligned).
duke@435 965 //
duke@435 966 // | |
duke@435 967 // : java stack :
duke@435 968 // | |
duke@435 969 // +--------------+ <--- start of outgoing args
duke@435 970 // | pad, align | |
duke@435 971 // +--------------+ |
duke@435 972 // | ints, floats | |---Outgoing stack args, packed low.
duke@435 973 // +--------------+ | First few args in registers.
duke@435 974 // : doubles : |
duke@435 975 // | longs | |
duke@435 976 // +--------------+ <--- SP' + 16*wordsize
duke@435 977 // | |
duke@435 978 // : window :
duke@435 979 // | |
duke@435 980 // +--------------+ <--- SP'
duke@435 981
duke@435 982 // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME
duke@435 983 // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP
duke@435 984 // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN.
duke@435 985
duke@435 986 // Cut-out for having no stack args. Since up to 6 args are passed
duke@435 987 // in registers, we will commonly have no stack args.
duke@435 988 if (comp_args_on_stack > 0) {
duke@435 989
duke@435 990 // Convert VMReg stack slots to words.
duke@435 991 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
duke@435 992 // Round up to miminum stack alignment, in wordSize
duke@435 993 comp_words_on_stack = round_to(comp_words_on_stack, 2);
duke@435 994 // Now compute the distance from Lesp to SP. This calculation does not
duke@435 995 // include the space for total_args_passed because Lesp has not yet popped
duke@435 996 // the arguments.
duke@435 997 __ sub(SP, (comp_words_on_stack)*wordSize, SP);
duke@435 998 }
duke@435 999
duke@435 1000 // Will jump to the compiled code just as if compiled code was doing it.
duke@435 1001 // Pre-load the register-jump target early, to schedule it better.
duke@435 1002 __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3);
duke@435 1003
duke@435 1004 // Now generate the shuffle code. Pick up all register args and move the
duke@435 1005 // rest through G1_scratch.
duke@435 1006 for (int i=0; i<total_args_passed; i++) {
duke@435 1007 if (sig_bt[i] == T_VOID) {
duke@435 1008 // Longs and doubles are passed in native word order, but misaligned
duke@435 1009 // in the 32-bit build.
duke@435 1010 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
duke@435 1011 continue;
duke@435 1012 }
duke@435 1013
duke@435 1014 // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the
duke@435 1015 // 32-bit build and aligned in the 64-bit build. Look for the obvious
duke@435 1016 // ldx/lddf optimizations.
duke@435 1017
duke@435 1018 // Load in argument order going down.
twisti@1861 1019 const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
duke@435 1020 set_Rdisp(G1_scratch);
duke@435 1021
duke@435 1022 VMReg r_1 = regs[i].first();
duke@435 1023 VMReg r_2 = regs[i].second();
duke@435 1024 if (!r_1->is_valid()) {
duke@435 1025 assert(!r_2->is_valid(), "");
duke@435 1026 continue;
duke@435 1027 }
duke@435 1028 if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9
duke@435 1029 r_1 = F8->as_VMReg(); // as part of the load/store shuffle
duke@435 1030 if (r_2->is_valid()) r_2 = r_1->next();
duke@435 1031 }
duke@435 1032 if (r_1->is_Register()) { // Register argument
duke@435 1033 Register r = r_1->as_Register()->after_restore();
duke@435 1034 if (!r_2->is_valid()) {
duke@435 1035 __ ld(Gargs, arg_slot(ld_off), r);
duke@435 1036 } else {
duke@435 1037 #ifdef _LP64
duke@435 1038 // In V9, longs are given 2 64-bit slots in the interpreter, but the
duke@435 1039 // data is passed in only 1 slot.
twisti@1441 1040 RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
duke@435 1041 next_arg_slot(ld_off) : arg_slot(ld_off);
duke@435 1042 __ ldx(Gargs, slot, r);
duke@435 1043 #else
duke@435 1044 // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the
duke@435 1045 // stack shuffle. Load the first 2 longs into G1/G4 later.
duke@435 1046 #endif
duke@435 1047 }
duke@435 1048 } else {
duke@435 1049 assert(r_1->is_FloatRegister(), "");
duke@435 1050 if (!r_2->is_valid()) {
duke@435 1051 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
duke@435 1052 } else {
duke@435 1053 #ifdef _LP64
duke@435 1054 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
duke@435 1055 // data is passed in only 1 slot. This code also handles longs that
duke@435 1056 // are passed on the stack, but need a stack-to-stack move through a
duke@435 1057 // spare float register.
twisti@1441 1058 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
duke@435 1059 next_arg_slot(ld_off) : arg_slot(ld_off);
duke@435 1060 __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
duke@435 1061 #else
duke@435 1062 // Need to marshal 64-bit value from misaligned Lesp loads
duke@435 1063 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
duke@435 1064 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
duke@435 1065 #endif
duke@435 1066 }
duke@435 1067 }
duke@435 1068 // Was the argument really intended to be on the stack, but was loaded
duke@435 1069 // into F8/F9?
duke@435 1070 if (regs[i].first()->is_stack()) {
duke@435 1071 assert(r_1->as_FloatRegister() == F8, "fix this code");
duke@435 1072 // Convert stack slot to an SP offset
duke@435 1073 int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
duke@435 1074 // Store down the shuffled stack word. Target address _is_ aligned.
twisti@1441 1075 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
twisti@1441 1076 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
twisti@1441 1077 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
duke@435 1078 }
duke@435 1079 }
duke@435 1080 bool made_space = false;
duke@435 1081 #ifndef _LP64
duke@435 1082 // May need to pick up a few long args in G1/G4
duke@435 1083 bool g4_crushed = false;
duke@435 1084 bool g3_crushed = false;
duke@435 1085 for (int i=0; i<total_args_passed; i++) {
duke@435 1086 if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) {
duke@435 1087 // Load in argument order going down
twisti@1861 1088 int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
duke@435 1089 // Need to marshal 64-bit value from misaligned Lesp loads
duke@435 1090 Register r = regs[i].first()->as_Register()->after_restore();
duke@435 1091 if (r == G1 || r == G4) {
duke@435 1092 assert(!g4_crushed, "ordering problem");
duke@435 1093 if (r == G4){
duke@435 1094 g4_crushed = true;
duke@435 1095 __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits
duke@435 1096 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits
duke@435 1097 } else {
duke@435 1098 // better schedule this way
duke@435 1099 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits
duke@435 1100 __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits
duke@435 1101 }
duke@435 1102 g3_crushed = true;
duke@435 1103 __ sllx(r, 32, r);
duke@435 1104 __ or3(G3_scratch, r, r);
duke@435 1105 } else {
duke@435 1106 assert(r->is_out(), "longs passed in two O registers");
duke@435 1107 __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits
duke@435 1108 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits
duke@435 1109 }
duke@435 1110 }
duke@435 1111 }
duke@435 1112 #endif
duke@435 1113
duke@435 1114 // Jump to the compiled code just as if compiled code was doing it.
duke@435 1115 //
duke@435 1116 #ifndef _LP64
duke@435 1117 if (g3_crushed) {
duke@435 1118 // Rats load was wasted, at least it is in cache...
twisti@1162 1119 __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3);
duke@435 1120 }
duke@435 1121 #endif /* _LP64 */
duke@435 1122
duke@435 1123 // 6243940 We might end up in handle_wrong_method if
duke@435 1124 // the callee is deoptimized as we race thru here. If that
duke@435 1125 // happens we don't want to take a safepoint because the
duke@435 1126 // caller frame will look interpreted and arguments are now
duke@435 1127 // "compiled" so it is much better to make this transition
duke@435 1128 // invisible to the stack walking code. Unfortunately if
duke@435 1129 // we try and find the callee by normal means a safepoint
duke@435 1130 // is possible. So we stash the desired callee in the thread
duke@435 1131 // and the vm will find there should this case occur.
twisti@1162 1132 Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
duke@435 1133 __ st_ptr(G5_method, callee_target_addr);
duke@435 1134
duke@435 1135 if (StressNonEntrant) {
duke@435 1136 // Open a big window for deopt failure
duke@435 1137 __ save_frame(0);
duke@435 1138 __ mov(G0, L0);
duke@435 1139 Label loop;
duke@435 1140 __ bind(loop);
duke@435 1141 __ sub(L0, 1, L0);
duke@435 1142 __ br_null(L0, false, Assembler::pt, loop);
duke@435 1143 __ delayed()->nop();
duke@435 1144
duke@435 1145 __ restore();
duke@435 1146 }
duke@435 1147
duke@435 1148
duke@435 1149 __ jmpl(G3, 0, G0);
duke@435 1150 __ delayed()->nop();
duke@435 1151 }
duke@435 1152
duke@435 1153 // ---------------------------------------------------------------
duke@435 1154 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
duke@435 1155 int total_args_passed,
duke@435 1156 // VMReg max_arg,
duke@435 1157 int comp_args_on_stack, // VMRegStackSlots
duke@435 1158 const BasicType *sig_bt,
never@1622 1159 const VMRegPair *regs,
never@1622 1160 AdapterFingerPrint* fingerprint) {
duke@435 1161 address i2c_entry = __ pc();
duke@435 1162
duke@435 1163 AdapterGenerator agen(masm);
duke@435 1164
duke@435 1165 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
duke@435 1166
duke@435 1167
duke@435 1168 // -------------------------------------------------------------------------
duke@435 1169 // Generate a C2I adapter. On entry we know G5 holds the methodOop. The
duke@435 1170 // args start out packed in the compiled layout. They need to be unpacked
duke@435 1171 // into the interpreter layout. This will almost always require some stack
duke@435 1172 // space. We grow the current (compiled) stack, then repack the args. We
duke@435 1173 // finally end in a jump to the generic interpreter entry point. On exit
duke@435 1174 // from the interpreter, the interpreter will restore our SP (lest the
duke@435 1175 // compiled code, which relys solely on SP and not FP, get sick).
duke@435 1176
duke@435 1177 address c2i_unverified_entry = __ pc();
duke@435 1178 Label skip_fixup;
duke@435 1179 {
duke@435 1180 #if !defined(_LP64) && defined(COMPILER2)
duke@435 1181 Register R_temp = L0; // another scratch register
duke@435 1182 #else
duke@435 1183 Register R_temp = G1; // another scratch register
duke@435 1184 #endif
duke@435 1185
twisti@1162 1186 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
duke@435 1187
duke@435 1188 __ verify_oop(O0);
duke@435 1189 __ verify_oop(G5_method);
coleenp@548 1190 __ load_klass(O0, G3_scratch);
duke@435 1191 __ verify_oop(G3_scratch);
duke@435 1192
duke@435 1193 #if !defined(_LP64) && defined(COMPILER2)
duke@435 1194 __ save(SP, -frame::register_save_words*wordSize, SP);
duke@435 1195 __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
duke@435 1196 __ verify_oop(R_temp);
duke@435 1197 __ cmp(G3_scratch, R_temp);
duke@435 1198 __ restore();
duke@435 1199 #else
duke@435 1200 __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
duke@435 1201 __ verify_oop(R_temp);
duke@435 1202 __ cmp(G3_scratch, R_temp);
duke@435 1203 #endif
duke@435 1204
duke@435 1205 Label ok, ok2;
duke@435 1206 __ brx(Assembler::equal, false, Assembler::pt, ok);
duke@435 1207 __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method);
twisti@1162 1208 __ jump_to(ic_miss, G3_scratch);
duke@435 1209 __ delayed()->nop();
duke@435 1210
duke@435 1211 __ bind(ok);
duke@435 1212 // Method might have been compiled since the call site was patched to
duke@435 1213 // interpreted if that is the case treat it as a miss so we can get
duke@435 1214 // the call site corrected.
duke@435 1215 __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
duke@435 1216 __ bind(ok2);
duke@435 1217 __ br_null(G3_scratch, false, __ pt, skip_fixup);
duke@435 1218 __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
twisti@1162 1219 __ jump_to(ic_miss, G3_scratch);
duke@435 1220 __ delayed()->nop();
duke@435 1221
duke@435 1222 }
duke@435 1223
duke@435 1224 address c2i_entry = __ pc();
duke@435 1225
duke@435 1226 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
duke@435 1227
duke@435 1228 __ flush();
never@1622 1229 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
duke@435 1230
duke@435 1231 }
duke@435 1232
duke@435 1233 // Helper function for native calling conventions
duke@435 1234 static VMReg int_stk_helper( int i ) {
duke@435 1235 // Bias any stack based VMReg we get by ignoring the window area
duke@435 1236 // but not the register parameter save area.
duke@435 1237 //
duke@435 1238 // This is strange for the following reasons. We'd normally expect
duke@435 1239 // the calling convention to return an VMReg for a stack slot
duke@435 1240 // completely ignoring any abi reserved area. C2 thinks of that
duke@435 1241 // abi area as only out_preserve_stack_slots. This does not include
duke@435 1242 // the area allocated by the C abi to store down integer arguments
duke@435 1243 // because the java calling convention does not use it. So
duke@435 1244 // since c2 assumes that there are only out_preserve_stack_slots
duke@435 1245 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
duke@435 1246 // location the c calling convention must add in this bias amount
duke@435 1247 // to make up for the fact that the out_preserve_stack_slots is
duke@435 1248 // insufficient for C calls. What a mess. I sure hope those 6
duke@435 1249 // stack words were worth it on every java call!
duke@435 1250
duke@435 1251 // Another way of cleaning this up would be for out_preserve_stack_slots
duke@435 1252 // to take a parameter to say whether it was C or java calling conventions.
duke@435 1253 // Then things might look a little better (but not much).
duke@435 1254
duke@435 1255 int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
duke@435 1256 if( mem_parm_offset < 0 ) {
duke@435 1257 return as_oRegister(i)->as_VMReg();
duke@435 1258 } else {
duke@435 1259 int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
duke@435 1260 // Now return a biased offset that will be correct when out_preserve_slots is added back in
duke@435 1261 return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
duke@435 1262 }
duke@435 1263 }
duke@435 1264
duke@435 1265
duke@435 1266 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
duke@435 1267 VMRegPair *regs,
duke@435 1268 int total_args_passed) {
duke@435 1269
duke@435 1270 // Return the number of VMReg stack_slots needed for the args.
duke@435 1271 // This value does not include an abi space (like register window
duke@435 1272 // save area).
duke@435 1273
duke@435 1274 // The native convention is V8 if !LP64
duke@435 1275 // The LP64 convention is the V9 convention which is slightly more sane.
duke@435 1276
duke@435 1277 // We return the amount of VMReg stack slots we need to reserve for all
duke@435 1278 // the arguments NOT counting out_preserve_stack_slots. Since we always
duke@435 1279 // have space for storing at least 6 registers to memory we start with that.
duke@435 1280 // See int_stk_helper for a further discussion.
duke@435 1281 int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
duke@435 1282
duke@435 1283 #ifdef _LP64
duke@435 1284 // V9 convention: All things "as-if" on double-wide stack slots.
duke@435 1285 // Hoist any int/ptr/long's in the first 6 to int regs.
duke@435 1286 // Hoist any flt/dbl's in the first 16 dbl regs.
duke@435 1287 int j = 0; // Count of actual args, not HALVES
duke@435 1288 for( int i=0; i<total_args_passed; i++, j++ ) {
duke@435 1289 switch( sig_bt[i] ) {
duke@435 1290 case T_BOOLEAN:
duke@435 1291 case T_BYTE:
duke@435 1292 case T_CHAR:
duke@435 1293 case T_INT:
duke@435 1294 case T_SHORT:
duke@435 1295 regs[i].set1( int_stk_helper( j ) ); break;
duke@435 1296 case T_LONG:
duke@435 1297 assert( sig_bt[i+1] == T_VOID, "expecting half" );
duke@435 1298 case T_ADDRESS: // raw pointers, like current thread, for VM calls
duke@435 1299 case T_ARRAY:
duke@435 1300 case T_OBJECT:
duke@435 1301 regs[i].set2( int_stk_helper( j ) );
duke@435 1302 break;
duke@435 1303 case T_FLOAT:
duke@435 1304 if ( j < 16 ) {
duke@435 1305 // V9ism: floats go in ODD registers
duke@435 1306 regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg());
duke@435 1307 } else {
duke@435 1308 // V9ism: floats go in ODD stack slot
duke@435 1309 regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1)));
duke@435 1310 }
duke@435 1311 break;
duke@435 1312 case T_DOUBLE:
duke@435 1313 assert( sig_bt[i+1] == T_VOID, "expecting half" );
duke@435 1314 if ( j < 16 ) {
duke@435 1315 // V9ism: doubles go in EVEN/ODD regs
duke@435 1316 regs[i].set2(as_FloatRegister(j<<1)->as_VMReg());
duke@435 1317 } else {
duke@435 1318 // V9ism: doubles go in EVEN/ODD stack slots
duke@435 1319 regs[i].set2(VMRegImpl::stack2reg(j<<1));
duke@435 1320 }
duke@435 1321 break;
duke@435 1322 case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES
duke@435 1323 default:
duke@435 1324 ShouldNotReachHere();
duke@435 1325 }
duke@435 1326 if (regs[i].first()->is_stack()) {
duke@435 1327 int off = regs[i].first()->reg2stack();
duke@435 1328 if (off > max_stack_slots) max_stack_slots = off;
duke@435 1329 }
duke@435 1330 if (regs[i].second()->is_stack()) {
duke@435 1331 int off = regs[i].second()->reg2stack();
duke@435 1332 if (off > max_stack_slots) max_stack_slots = off;
duke@435 1333 }
duke@435 1334 }
duke@435 1335
duke@435 1336 #else // _LP64
duke@435 1337 // V8 convention: first 6 things in O-regs, rest on stack.
duke@435 1338 // Alignment is willy-nilly.
duke@435 1339 for( int i=0; i<total_args_passed; i++ ) {
duke@435 1340 switch( sig_bt[i] ) {
duke@435 1341 case T_ADDRESS: // raw pointers, like current thread, for VM calls
duke@435 1342 case T_ARRAY:
duke@435 1343 case T_BOOLEAN:
duke@435 1344 case T_BYTE:
duke@435 1345 case T_CHAR:
duke@435 1346 case T_FLOAT:
duke@435 1347 case T_INT:
duke@435 1348 case T_OBJECT:
duke@435 1349 case T_SHORT:
duke@435 1350 regs[i].set1( int_stk_helper( i ) );
duke@435 1351 break;
duke@435 1352 case T_DOUBLE:
duke@435 1353 case T_LONG:
duke@435 1354 assert( sig_bt[i+1] == T_VOID, "expecting half" );
duke@435 1355 regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) );
duke@435 1356 break;
duke@435 1357 case T_VOID: regs[i].set_bad(); break;
duke@435 1358 default:
duke@435 1359 ShouldNotReachHere();
duke@435 1360 }
duke@435 1361 if (regs[i].first()->is_stack()) {
duke@435 1362 int off = regs[i].first()->reg2stack();
duke@435 1363 if (off > max_stack_slots) max_stack_slots = off;
duke@435 1364 }
duke@435 1365 if (regs[i].second()->is_stack()) {
duke@435 1366 int off = regs[i].second()->reg2stack();
duke@435 1367 if (off > max_stack_slots) max_stack_slots = off;
duke@435 1368 }
duke@435 1369 }
duke@435 1370 #endif // _LP64
duke@435 1371
duke@435 1372 return round_to(max_stack_slots + 1, 2);
duke@435 1373
duke@435 1374 }
duke@435 1375
duke@435 1376
duke@435 1377 // ---------------------------------------------------------------------------
duke@435 1378 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
duke@435 1379 switch (ret_type) {
duke@435 1380 case T_FLOAT:
duke@435 1381 __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
duke@435 1382 break;
duke@435 1383 case T_DOUBLE:
duke@435 1384 __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
duke@435 1385 break;
duke@435 1386 }
duke@435 1387 }
duke@435 1388
duke@435 1389 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
duke@435 1390 switch (ret_type) {
duke@435 1391 case T_FLOAT:
duke@435 1392 __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0);
duke@435 1393 break;
duke@435 1394 case T_DOUBLE:
duke@435 1395 __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0);
duke@435 1396 break;
duke@435 1397 }
duke@435 1398 }
duke@435 1399
duke@435 1400 // Check and forward and pending exception. Thread is stored in
duke@435 1401 // L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there
duke@435 1402 // is no exception handler. We merely pop this frame off and throw the
duke@435 1403 // exception in the caller's frame.
duke@435 1404 static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) {
duke@435 1405 Label L;
duke@435 1406 __ br_null(Rex_oop, false, Assembler::pt, L);
duke@435 1407 __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception
duke@435 1408 // Since this is a native call, we *know* the proper exception handler
duke@435 1409 // without calling into the VM: it's the empty function. Just pop this
duke@435 1410 // frame and then jump to forward_exception_entry; O7 will contain the
duke@435 1411 // native caller's return PC.
twisti@1162 1412 AddressLiteral exception_entry(StubRoutines::forward_exception_entry());
twisti@1162 1413 __ jump_to(exception_entry, G3_scratch);
duke@435 1414 __ delayed()->restore(); // Pop this frame off.
duke@435 1415 __ bind(L);
duke@435 1416 }
duke@435 1417
duke@435 1418 // A simple move of integer like type
duke@435 1419 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
duke@435 1420 if (src.first()->is_stack()) {
duke@435 1421 if (dst.first()->is_stack()) {
duke@435 1422 // stack to stack
duke@435 1423 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
duke@435 1424 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1425 } else {
duke@435 1426 // stack to reg
duke@435 1427 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
duke@435 1428 }
duke@435 1429 } else if (dst.first()->is_stack()) {
duke@435 1430 // reg to stack
duke@435 1431 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1432 } else {
duke@435 1433 __ mov(src.first()->as_Register(), dst.first()->as_Register());
duke@435 1434 }
duke@435 1435 }
duke@435 1436
duke@435 1437 // On 64 bit we will store integer like items to the stack as
duke@435 1438 // 64 bits items (sparc abi) even though java would only store
duke@435 1439 // 32bits for a parameter. On 32bit it will simply be 32 bits
duke@435 1440 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
duke@435 1441 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
duke@435 1442 if (src.first()->is_stack()) {
duke@435 1443 if (dst.first()->is_stack()) {
duke@435 1444 // stack to stack
duke@435 1445 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
duke@435 1446 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1447 } else {
duke@435 1448 // stack to reg
duke@435 1449 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
duke@435 1450 }
duke@435 1451 } else if (dst.first()->is_stack()) {
duke@435 1452 // reg to stack
duke@435 1453 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1454 } else {
duke@435 1455 __ mov(src.first()->as_Register(), dst.first()->as_Register());
duke@435 1456 }
duke@435 1457 }
duke@435 1458
duke@435 1459
duke@435 1460 // An oop arg. Must pass a handle not the oop itself
duke@435 1461 static void object_move(MacroAssembler* masm,
duke@435 1462 OopMap* map,
duke@435 1463 int oop_handle_offset,
duke@435 1464 int framesize_in_slots,
duke@435 1465 VMRegPair src,
duke@435 1466 VMRegPair dst,
duke@435 1467 bool is_receiver,
duke@435 1468 int* receiver_offset) {
duke@435 1469
duke@435 1470 // must pass a handle. First figure out the location we use as a handle
duke@435 1471
duke@435 1472 if (src.first()->is_stack()) {
duke@435 1473 // Oop is already on the stack
duke@435 1474 Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
duke@435 1475 __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
duke@435 1476 __ ld_ptr(rHandle, 0, L4);
duke@435 1477 #ifdef _LP64
duke@435 1478 __ movr( Assembler::rc_z, L4, G0, rHandle );
duke@435 1479 #else
duke@435 1480 __ tst( L4 );
duke@435 1481 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
duke@435 1482 #endif
duke@435 1483 if (dst.first()->is_stack()) {
duke@435 1484 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1485 }
duke@435 1486 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
duke@435 1487 if (is_receiver) {
duke@435 1488 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
duke@435 1489 }
duke@435 1490 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
duke@435 1491 } else {
duke@435 1492 // Oop is in an input register pass we must flush it to the stack
duke@435 1493 const Register rOop = src.first()->as_Register();
duke@435 1494 const Register rHandle = L5;
duke@435 1495 int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
duke@435 1496 int offset = oop_slot*VMRegImpl::stack_slot_size;
duke@435 1497 Label skip;
duke@435 1498 __ st_ptr(rOop, SP, offset + STACK_BIAS);
duke@435 1499 if (is_receiver) {
duke@435 1500 *receiver_offset = oop_slot * VMRegImpl::stack_slot_size;
duke@435 1501 }
duke@435 1502 map->set_oop(VMRegImpl::stack2reg(oop_slot));
duke@435 1503 __ add(SP, offset + STACK_BIAS, rHandle);
duke@435 1504 #ifdef _LP64
duke@435 1505 __ movr( Assembler::rc_z, rOop, G0, rHandle );
duke@435 1506 #else
duke@435 1507 __ tst( rOop );
duke@435 1508 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
duke@435 1509 #endif
duke@435 1510
duke@435 1511 if (dst.first()->is_stack()) {
duke@435 1512 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1513 } else {
duke@435 1514 __ mov(rHandle, dst.first()->as_Register());
duke@435 1515 }
duke@435 1516 }
duke@435 1517 }
duke@435 1518
duke@435 1519 // A float arg may have to do float reg int reg conversion
duke@435 1520 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
duke@435 1521 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
duke@435 1522
duke@435 1523 if (src.first()->is_stack()) {
duke@435 1524 if (dst.first()->is_stack()) {
duke@435 1525 // stack to stack the easiest of the bunch
duke@435 1526 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
duke@435 1527 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1528 } else {
duke@435 1529 // stack to reg
duke@435 1530 if (dst.first()->is_Register()) {
duke@435 1531 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
duke@435 1532 } else {
duke@435 1533 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
duke@435 1534 }
duke@435 1535 }
duke@435 1536 } else if (dst.first()->is_stack()) {
duke@435 1537 // reg to stack
duke@435 1538 if (src.first()->is_Register()) {
duke@435 1539 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1540 } else {
duke@435 1541 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1542 }
duke@435 1543 } else {
duke@435 1544 // reg to reg
duke@435 1545 if (src.first()->is_Register()) {
duke@435 1546 if (dst.first()->is_Register()) {
duke@435 1547 // gpr -> gpr
duke@435 1548 __ mov(src.first()->as_Register(), dst.first()->as_Register());
duke@435 1549 } else {
duke@435 1550 // gpr -> fpr
duke@435 1551 __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS);
duke@435 1552 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister());
duke@435 1553 }
duke@435 1554 } else if (dst.first()->is_Register()) {
duke@435 1555 // fpr -> gpr
duke@435 1556 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS);
duke@435 1557 __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register());
duke@435 1558 } else {
duke@435 1559 // fpr -> fpr
duke@435 1560 // In theory these overlap but the ordering is such that this is likely a nop
duke@435 1561 if ( src.first() != dst.first()) {
duke@435 1562 __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
duke@435 1563 }
duke@435 1564 }
duke@435 1565 }
duke@435 1566 }
duke@435 1567
duke@435 1568 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
duke@435 1569 VMRegPair src_lo(src.first());
duke@435 1570 VMRegPair src_hi(src.second());
duke@435 1571 VMRegPair dst_lo(dst.first());
duke@435 1572 VMRegPair dst_hi(dst.second());
duke@435 1573 simple_move32(masm, src_lo, dst_lo);
duke@435 1574 simple_move32(masm, src_hi, dst_hi);
duke@435 1575 }
duke@435 1576
duke@435 1577 // A long move
duke@435 1578 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
duke@435 1579
duke@435 1580 // Do the simple ones here else do two int moves
duke@435 1581 if (src.is_single_phys_reg() ) {
duke@435 1582 if (dst.is_single_phys_reg()) {
duke@435 1583 __ mov(src.first()->as_Register(), dst.first()->as_Register());
duke@435 1584 } else {
duke@435 1585 // split src into two separate registers
duke@435 1586 // Remember hi means hi address or lsw on sparc
duke@435 1587 // Move msw to lsw
duke@435 1588 if (dst.second()->is_reg()) {
duke@435 1589 // MSW -> MSW
duke@435 1590 __ srax(src.first()->as_Register(), 32, dst.first()->as_Register());
duke@435 1591 // Now LSW -> LSW
duke@435 1592 // this will only move lo -> lo and ignore hi
duke@435 1593 VMRegPair split(dst.second());
duke@435 1594 simple_move32(masm, src, split);
duke@435 1595 } else {
duke@435 1596 VMRegPair split(src.first(), L4->as_VMReg());
duke@435 1597 // MSW -> MSW (lo ie. first word)
duke@435 1598 __ srax(src.first()->as_Register(), 32, L4);
duke@435 1599 split_long_move(masm, split, dst);
duke@435 1600 }
duke@435 1601 }
duke@435 1602 } else if (dst.is_single_phys_reg()) {
duke@435 1603 if (src.is_adjacent_aligned_on_stack(2)) {
never@739 1604 __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
duke@435 1605 } else {
duke@435 1606 // dst is a single reg.
duke@435 1607 // Remember lo is low address not msb for stack slots
duke@435 1608 // and lo is the "real" register for registers
duke@435 1609 // src is
duke@435 1610
duke@435 1611 VMRegPair split;
duke@435 1612
duke@435 1613 if (src.first()->is_reg()) {
duke@435 1614 // src.lo (msw) is a reg, src.hi is stk/reg
duke@435 1615 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg]
duke@435 1616 split.set_pair(dst.first(), src.first());
duke@435 1617 } else {
duke@435 1618 // msw is stack move to L5
duke@435 1619 // lsw is stack move to dst.lo (real reg)
duke@435 1620 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5
duke@435 1621 split.set_pair(dst.first(), L5->as_VMReg());
duke@435 1622 }
duke@435 1623
duke@435 1624 // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg)
duke@435 1625 // msw -> src.lo/L5, lsw -> dst.lo
duke@435 1626 split_long_move(masm, src, split);
duke@435 1627
duke@435 1628 // So dst now has the low order correct position the
duke@435 1629 // msw half
duke@435 1630 __ sllx(split.first()->as_Register(), 32, L5);
duke@435 1631
duke@435 1632 const Register d = dst.first()->as_Register();
duke@435 1633 __ or3(L5, d, d);
duke@435 1634 }
duke@435 1635 } else {
duke@435 1636 // For LP64 we can probably do better.
duke@435 1637 split_long_move(masm, src, dst);
duke@435 1638 }
duke@435 1639 }
duke@435 1640
duke@435 1641 // A double move
duke@435 1642 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
duke@435 1643
duke@435 1644 // The painful thing here is that like long_move a VMRegPair might be
duke@435 1645 // 1: a single physical register
duke@435 1646 // 2: two physical registers (v8)
duke@435 1647 // 3: a physical reg [lo] and a stack slot [hi] (v8)
duke@435 1648 // 4: two stack slots
duke@435 1649
duke@435 1650 // Since src is always a java calling convention we know that the src pair
duke@435 1651 // is always either all registers or all stack (and aligned?)
duke@435 1652
duke@435 1653 // in a register [lo] and a stack slot [hi]
duke@435 1654 if (src.first()->is_stack()) {
duke@435 1655 if (dst.first()->is_stack()) {
duke@435 1656 // stack to stack the easiest of the bunch
duke@435 1657 // ought to be a way to do this where if alignment is ok we use ldd/std when possible
duke@435 1658 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
duke@435 1659 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
duke@435 1660 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1661 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
duke@435 1662 } else {
duke@435 1663 // stack to reg
duke@435 1664 if (dst.second()->is_stack()) {
duke@435 1665 // stack -> reg, stack -> stack
duke@435 1666 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
duke@435 1667 if (dst.first()->is_Register()) {
duke@435 1668 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
duke@435 1669 } else {
duke@435 1670 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
duke@435 1671 }
duke@435 1672 // This was missing. (very rare case)
duke@435 1673 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
duke@435 1674 } else {
duke@435 1675 // stack -> reg
duke@435 1676 // Eventually optimize for alignment QQQ
duke@435 1677 if (dst.first()->is_Register()) {
duke@435 1678 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
duke@435 1679 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register());
duke@435 1680 } else {
duke@435 1681 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
duke@435 1682 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister());
duke@435 1683 }
duke@435 1684 }
duke@435 1685 }
duke@435 1686 } else if (dst.first()->is_stack()) {
duke@435 1687 // reg to stack
duke@435 1688 if (src.first()->is_Register()) {
duke@435 1689 // Eventually optimize for alignment QQQ
duke@435 1690 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1691 if (src.second()->is_stack()) {
duke@435 1692 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
duke@435 1693 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
duke@435 1694 } else {
duke@435 1695 __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS);
duke@435 1696 }
duke@435 1697 } else {
duke@435 1698 // fpr to stack
duke@435 1699 if (src.second()->is_stack()) {
duke@435 1700 ShouldNotReachHere();
duke@435 1701 } else {
duke@435 1702 // Is the stack aligned?
duke@435 1703 if (reg2offset(dst.first()) & 0x7) {
duke@435 1704 // No do as pairs
duke@435 1705 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1706 __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS);
duke@435 1707 } else {
duke@435 1708 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
duke@435 1709 }
duke@435 1710 }
duke@435 1711 }
duke@435 1712 } else {
duke@435 1713 // reg to reg
duke@435 1714 if (src.first()->is_Register()) {
duke@435 1715 if (dst.first()->is_Register()) {
duke@435 1716 // gpr -> gpr
duke@435 1717 __ mov(src.first()->as_Register(), dst.first()->as_Register());
duke@435 1718 __ mov(src.second()->as_Register(), dst.second()->as_Register());
duke@435 1719 } else {
duke@435 1720 // gpr -> fpr
duke@435 1721 // ought to be able to do a single store
duke@435 1722 __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS);
duke@435 1723 __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS);
duke@435 1724 // ought to be able to do a single load
duke@435 1725 __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister());
duke@435 1726 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister());
duke@435 1727 }
duke@435 1728 } else if (dst.first()->is_Register()) {
duke@435 1729 // fpr -> gpr
duke@435 1730 // ought to be able to do a single store
duke@435 1731 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS);
duke@435 1732 // ought to be able to do a single load
duke@435 1733 // REMEMBER first() is low address not LSB
duke@435 1734 __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register());
duke@435 1735 if (dst.second()->is_Register()) {
duke@435 1736 __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register());
duke@435 1737 } else {
duke@435 1738 __ ld(FP, -4 + STACK_BIAS, L4);
duke@435 1739 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
duke@435 1740 }
duke@435 1741 } else {
duke@435 1742 // fpr -> fpr
duke@435 1743 // In theory these overlap but the ordering is such that this is likely a nop
duke@435 1744 if ( src.first() != dst.first()) {
duke@435 1745 __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
duke@435 1746 }
duke@435 1747 }
duke@435 1748 }
duke@435 1749 }
duke@435 1750
duke@435 1751 // Creates an inner frame if one hasn't already been created, and
duke@435 1752 // saves a copy of the thread in L7_thread_cache
duke@435 1753 static void create_inner_frame(MacroAssembler* masm, bool* already_created) {
duke@435 1754 if (!*already_created) {
duke@435 1755 __ save_frame(0);
duke@435 1756 // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below
duke@435 1757 // Don't use save_thread because it smashes G2 and we merely want to save a
duke@435 1758 // copy
duke@435 1759 __ mov(G2_thread, L7_thread_cache);
duke@435 1760 *already_created = true;
duke@435 1761 }
duke@435 1762 }
duke@435 1763
duke@435 1764 // ---------------------------------------------------------------------------
duke@435 1765 // Generate a native wrapper for a given method. The method takes arguments
duke@435 1766 // in the Java compiled code convention, marshals them to the native
duke@435 1767 // convention (handlizes oops, etc), transitions to native, makes the call,
duke@435 1768 // returns to java state (possibly blocking), unhandlizes any result and
duke@435 1769 // returns.
duke@435 1770 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
duke@435 1771 methodHandle method,
duke@435 1772 int total_in_args,
duke@435 1773 int comp_args_on_stack, // in VMRegStackSlots
duke@435 1774 BasicType *in_sig_bt,
duke@435 1775 VMRegPair *in_regs,
duke@435 1776 BasicType ret_type) {
duke@435 1777
duke@435 1778 // Native nmethod wrappers never take possesion of the oop arguments.
duke@435 1779 // So the caller will gc the arguments. The only thing we need an
duke@435 1780 // oopMap for is if the call is static
duke@435 1781 //
duke@435 1782 // An OopMap for lock (and class if static), and one for the VM call itself
duke@435 1783 OopMapSet *oop_maps = new OopMapSet();
duke@435 1784 intptr_t start = (intptr_t)__ pc();
duke@435 1785
duke@435 1786 // First thing make an ic check to see if we should even be here
duke@435 1787 {
duke@435 1788 Label L;
duke@435 1789 const Register temp_reg = G3_scratch;
twisti@1162 1790 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
duke@435 1791 __ verify_oop(O0);
coleenp@548 1792 __ load_klass(O0, temp_reg);
duke@435 1793 __ cmp(temp_reg, G5_inline_cache_reg);
duke@435 1794 __ brx(Assembler::equal, true, Assembler::pt, L);
duke@435 1795 __ delayed()->nop();
duke@435 1796
twisti@1162 1797 __ jump_to(ic_miss, temp_reg);
duke@435 1798 __ delayed()->nop();
duke@435 1799 __ align(CodeEntryAlignment);
duke@435 1800 __ bind(L);
duke@435 1801 }
duke@435 1802
duke@435 1803 int vep_offset = ((intptr_t)__ pc()) - start;
duke@435 1804
duke@435 1805 #ifdef COMPILER1
duke@435 1806 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
duke@435 1807 // Object.hashCode can pull the hashCode from the header word
duke@435 1808 // instead of doing a full VM transition once it's been computed.
duke@435 1809 // Since hashCode is usually polymorphic at call sites we can't do
duke@435 1810 // this optimization at the call site without a lot of work.
duke@435 1811 Label slowCase;
duke@435 1812 Register receiver = O0;
duke@435 1813 Register result = O0;
duke@435 1814 Register header = G3_scratch;
duke@435 1815 Register hash = G3_scratch; // overwrite header value with hash value
duke@435 1816 Register mask = G1; // to get hash field from header
duke@435 1817
duke@435 1818 // Read the header and build a mask to get its hash field. Give up if the object is not unlocked.
duke@435 1819 // We depend on hash_mask being at most 32 bits and avoid the use of
duke@435 1820 // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
duke@435 1821 // vm: see markOop.hpp.
duke@435 1822 __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
duke@435 1823 __ sethi(markOopDesc::hash_mask, mask);
duke@435 1824 __ btst(markOopDesc::unlocked_value, header);
duke@435 1825 __ br(Assembler::zero, false, Assembler::pn, slowCase);
duke@435 1826 if (UseBiasedLocking) {
duke@435 1827 // Check if biased and fall through to runtime if so
duke@435 1828 __ delayed()->nop();
duke@435 1829 __ btst(markOopDesc::biased_lock_bit_in_place, header);
duke@435 1830 __ br(Assembler::notZero, false, Assembler::pn, slowCase);
duke@435 1831 }
duke@435 1832 __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
duke@435 1833
duke@435 1834 // Check for a valid (non-zero) hash code and get its value.
duke@435 1835 #ifdef _LP64
duke@435 1836 __ srlx(header, markOopDesc::hash_shift, hash);
duke@435 1837 #else
duke@435 1838 __ srl(header, markOopDesc::hash_shift, hash);
duke@435 1839 #endif
duke@435 1840 __ andcc(hash, mask, hash);
duke@435 1841 __ br(Assembler::equal, false, Assembler::pn, slowCase);
duke@435 1842 __ delayed()->nop();
duke@435 1843
duke@435 1844 // leaf return.
duke@435 1845 __ retl();
duke@435 1846 __ delayed()->mov(hash, result);
duke@435 1847 __ bind(slowCase);
duke@435 1848 }
duke@435 1849 #endif // COMPILER1
duke@435 1850
duke@435 1851
duke@435 1852 // We have received a description of where all the java arg are located
duke@435 1853 // on entry to the wrapper. We need to convert these args to where
duke@435 1854 // the jni function will expect them. To figure out where they go
duke@435 1855 // we convert the java signature to a C signature by inserting
duke@435 1856 // the hidden arguments as arg[0] and possibly arg[1] (static method)
duke@435 1857
duke@435 1858 int total_c_args = total_in_args + 1;
duke@435 1859 if (method->is_static()) {
duke@435 1860 total_c_args++;
duke@435 1861 }
duke@435 1862
duke@435 1863 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
duke@435 1864 VMRegPair * out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
duke@435 1865
duke@435 1866 int argc = 0;
duke@435 1867 out_sig_bt[argc++] = T_ADDRESS;
duke@435 1868 if (method->is_static()) {
duke@435 1869 out_sig_bt[argc++] = T_OBJECT;
duke@435 1870 }
duke@435 1871
duke@435 1872 for (int i = 0; i < total_in_args ; i++ ) {
duke@435 1873 out_sig_bt[argc++] = in_sig_bt[i];
duke@435 1874 }
duke@435 1875
duke@435 1876 // Now figure out where the args must be stored and how much stack space
duke@435 1877 // they require (neglecting out_preserve_stack_slots but space for storing
duke@435 1878 // the 1st six register arguments). It's weird see int_stk_helper.
duke@435 1879 //
duke@435 1880 int out_arg_slots;
duke@435 1881 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
duke@435 1882
duke@435 1883 // Compute framesize for the wrapper. We need to handlize all oops in
duke@435 1884 // registers. We must create space for them here that is disjoint from
duke@435 1885 // the windowed save area because we have no control over when we might
duke@435 1886 // flush the window again and overwrite values that gc has since modified.
duke@435 1887 // (The live window race)
duke@435 1888 //
duke@435 1889 // We always just allocate 6 word for storing down these object. This allow
duke@435 1890 // us to simply record the base and use the Ireg number to decide which
duke@435 1891 // slot to use. (Note that the reg number is the inbound number not the
duke@435 1892 // outbound number).
duke@435 1893 // We must shuffle args to match the native convention, and include var-args space.
duke@435 1894
duke@435 1895 // Calculate the total number of stack slots we will need.
duke@435 1896
duke@435 1897 // First count the abi requirement plus all of the outgoing args
duke@435 1898 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
duke@435 1899
duke@435 1900 // Now the space for the inbound oop handle area
duke@435 1901
duke@435 1902 int oop_handle_offset = stack_slots;
duke@435 1903 stack_slots += 6*VMRegImpl::slots_per_word;
duke@435 1904
duke@435 1905 // Now any space we need for handlizing a klass if static method
duke@435 1906
duke@435 1907 int oop_temp_slot_offset = 0;
duke@435 1908 int klass_slot_offset = 0;
duke@435 1909 int klass_offset = -1;
duke@435 1910 int lock_slot_offset = 0;
duke@435 1911 bool is_static = false;
duke@435 1912
duke@435 1913 if (method->is_static()) {
duke@435 1914 klass_slot_offset = stack_slots;
duke@435 1915 stack_slots += VMRegImpl::slots_per_word;
duke@435 1916 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
duke@435 1917 is_static = true;
duke@435 1918 }
duke@435 1919
duke@435 1920 // Plus a lock if needed
duke@435 1921
duke@435 1922 if (method->is_synchronized()) {
duke@435 1923 lock_slot_offset = stack_slots;
duke@435 1924 stack_slots += VMRegImpl::slots_per_word;
duke@435 1925 }
duke@435 1926
duke@435 1927 // Now a place to save return value or as a temporary for any gpr -> fpr moves
duke@435 1928 stack_slots += 2;
duke@435 1929
duke@435 1930 // Ok The space we have allocated will look like:
duke@435 1931 //
duke@435 1932 //
duke@435 1933 // FP-> | |
duke@435 1934 // |---------------------|
duke@435 1935 // | 2 slots for moves |
duke@435 1936 // |---------------------|
duke@435 1937 // | lock box (if sync) |
duke@435 1938 // |---------------------| <- lock_slot_offset
duke@435 1939 // | klass (if static) |
duke@435 1940 // |---------------------| <- klass_slot_offset
duke@435 1941 // | oopHandle area |
duke@435 1942 // |---------------------| <- oop_handle_offset
duke@435 1943 // | outbound memory |
duke@435 1944 // | based arguments |
duke@435 1945 // | |
duke@435 1946 // |---------------------|
duke@435 1947 // | vararg area |
duke@435 1948 // |---------------------|
duke@435 1949 // | |
duke@435 1950 // SP-> | out_preserved_slots |
duke@435 1951 //
duke@435 1952 //
duke@435 1953
duke@435 1954
duke@435 1955 // Now compute actual number of stack words we need rounding to make
duke@435 1956 // stack properly aligned.
duke@435 1957 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
duke@435 1958
duke@435 1959 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
duke@435 1960
duke@435 1961 // Generate stack overflow check before creating frame
duke@435 1962 __ generate_stack_overflow_check(stack_size);
duke@435 1963
duke@435 1964 // Generate a new frame for the wrapper.
duke@435 1965 __ save(SP, -stack_size, SP);
duke@435 1966
duke@435 1967 int frame_complete = ((intptr_t)__ pc()) - start;
duke@435 1968
duke@435 1969 __ verify_thread();
duke@435 1970
duke@435 1971
duke@435 1972 //
duke@435 1973 // We immediately shuffle the arguments so that any vm call we have to
duke@435 1974 // make from here on out (sync slow path, jvmti, etc.) we will have
duke@435 1975 // captured the oops from our caller and have a valid oopMap for
duke@435 1976 // them.
duke@435 1977
duke@435 1978 // -----------------
duke@435 1979 // The Grand Shuffle
duke@435 1980 //
duke@435 1981 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
duke@435 1982 // (derived from JavaThread* which is in L7_thread_cache) and, if static,
duke@435 1983 // the class mirror instead of a receiver. This pretty much guarantees that
duke@435 1984 // register layout will not match. We ignore these extra arguments during
duke@435 1985 // the shuffle. The shuffle is described by the two calling convention
duke@435 1986 // vectors we have in our possession. We simply walk the java vector to
duke@435 1987 // get the source locations and the c vector to get the destinations.
duke@435 1988 // Because we have a new window and the argument registers are completely
duke@435 1989 // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about
duke@435 1990 // here.
duke@435 1991
duke@435 1992 // This is a trick. We double the stack slots so we can claim
duke@435 1993 // the oops in the caller's frame. Since we are sure to have
duke@435 1994 // more args than the caller doubling is enough to make
duke@435 1995 // sure we can capture all the incoming oop args from the
duke@435 1996 // caller.
duke@435 1997 //
duke@435 1998 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
duke@435 1999 int c_arg = total_c_args - 1;
duke@435 2000 // Record sp-based slot for receiver on stack for non-static methods
duke@435 2001 int receiver_offset = -1;
duke@435 2002
duke@435 2003 // We move the arguments backward because the floating point registers
duke@435 2004 // destination will always be to a register with a greater or equal register
duke@435 2005 // number or the stack.
duke@435 2006
duke@435 2007 #ifdef ASSERT
duke@435 2008 bool reg_destroyed[RegisterImpl::number_of_registers];
duke@435 2009 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
duke@435 2010 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
duke@435 2011 reg_destroyed[r] = false;
duke@435 2012 }
duke@435 2013 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
duke@435 2014 freg_destroyed[f] = false;
duke@435 2015 }
duke@435 2016
duke@435 2017 #endif /* ASSERT */
duke@435 2018
duke@435 2019 for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
duke@435 2020
duke@435 2021 #ifdef ASSERT
duke@435 2022 if (in_regs[i].first()->is_Register()) {
duke@435 2023 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!");
duke@435 2024 } else if (in_regs[i].first()->is_FloatRegister()) {
duke@435 2025 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!");
duke@435 2026 }
duke@435 2027 if (out_regs[c_arg].first()->is_Register()) {
duke@435 2028 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
duke@435 2029 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
duke@435 2030 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true;
duke@435 2031 }
duke@435 2032 #endif /* ASSERT */
duke@435 2033
duke@435 2034 switch (in_sig_bt[i]) {
duke@435 2035 case T_ARRAY:
duke@435 2036 case T_OBJECT:
duke@435 2037 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
duke@435 2038 ((i == 0) && (!is_static)),
duke@435 2039 &receiver_offset);
duke@435 2040 break;
duke@435 2041 case T_VOID:
duke@435 2042 break;
duke@435 2043
duke@435 2044 case T_FLOAT:
duke@435 2045 float_move(masm, in_regs[i], out_regs[c_arg]);
duke@435 2046 break;
duke@435 2047
duke@435 2048 case T_DOUBLE:
duke@435 2049 assert( i + 1 < total_in_args &&
duke@435 2050 in_sig_bt[i + 1] == T_VOID &&
duke@435 2051 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
duke@435 2052 double_move(masm, in_regs[i], out_regs[c_arg]);
duke@435 2053 break;
duke@435 2054
duke@435 2055 case T_LONG :
duke@435 2056 long_move(masm, in_regs[i], out_regs[c_arg]);
duke@435 2057 break;
duke@435 2058
duke@435 2059 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
duke@435 2060
duke@435 2061 default:
duke@435 2062 move32_64(masm, in_regs[i], out_regs[c_arg]);
duke@435 2063 }
duke@435 2064 }
duke@435 2065
duke@435 2066 // Pre-load a static method's oop into O1. Used both by locking code and
duke@435 2067 // the normal JNI call code.
duke@435 2068 if (method->is_static()) {
duke@435 2069 __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
duke@435 2070
duke@435 2071 // Now handlize the static class mirror in O1. It's known not-null.
duke@435 2072 __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
duke@435 2073 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
duke@435 2074 __ add(SP, klass_offset + STACK_BIAS, O1);
duke@435 2075 }
duke@435 2076
duke@435 2077
duke@435 2078 const Register L6_handle = L6;
duke@435 2079
duke@435 2080 if (method->is_synchronized()) {
duke@435 2081 __ mov(O1, L6_handle);
duke@435 2082 }
duke@435 2083
duke@435 2084 // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
duke@435 2085 // except O6/O7. So if we must call out we must push a new frame. We immediately
duke@435 2086 // push a new frame and flush the windows.
duke@435 2087
duke@435 2088 #ifdef _LP64
duke@435 2089 intptr_t thepc = (intptr_t) __ pc();
duke@435 2090 {
duke@435 2091 address here = __ pc();
duke@435 2092 // Call the next instruction
duke@435 2093 __ call(here + 8, relocInfo::none);
duke@435 2094 __ delayed()->nop();
duke@435 2095 }
duke@435 2096 #else
duke@435 2097 intptr_t thepc = __ load_pc_address(O7, 0);
duke@435 2098 #endif /* _LP64 */
duke@435 2099
duke@435 2100 // We use the same pc/oopMap repeatedly when we call out
duke@435 2101 oop_maps->add_gc_map(thepc - start, map);
duke@435 2102
duke@435 2103 // O7 now has the pc loaded that we will use when we finally call to native.
duke@435 2104
duke@435 2105 // Save thread in L7; it crosses a bunch of VM calls below
duke@435 2106 // Don't use save_thread because it smashes G2 and we merely
duke@435 2107 // want to save a copy
duke@435 2108 __ mov(G2_thread, L7_thread_cache);
duke@435 2109
duke@435 2110
duke@435 2111 // If we create an inner frame once is plenty
duke@435 2112 // when we create it we must also save G2_thread
duke@435 2113 bool inner_frame_created = false;
duke@435 2114
duke@435 2115 // dtrace method entry support
duke@435 2116 {
duke@435 2117 SkipIfEqual skip_if(
duke@435 2118 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
duke@435 2119 // create inner frame
duke@435 2120 __ save_frame(0);
duke@435 2121 __ mov(G2_thread, L7_thread_cache);
duke@435 2122 __ set_oop_constant(JNIHandles::make_local(method()), O1);
duke@435 2123 __ call_VM_leaf(L7_thread_cache,
duke@435 2124 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
duke@435 2125 G2_thread, O1);
duke@435 2126 __ restore();
duke@435 2127 }
duke@435 2128
dcubed@1045 2129 // RedefineClasses() tracing support for obsolete method entry
dcubed@1045 2130 if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
dcubed@1045 2131 // create inner frame
dcubed@1045 2132 __ save_frame(0);
dcubed@1045 2133 __ mov(G2_thread, L7_thread_cache);
dcubed@1045 2134 __ set_oop_constant(JNIHandles::make_local(method()), O1);
dcubed@1045 2135 __ call_VM_leaf(L7_thread_cache,
dcubed@1045 2136 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
dcubed@1045 2137 G2_thread, O1);
dcubed@1045 2138 __ restore();
dcubed@1045 2139 }
dcubed@1045 2140
duke@435 2141 // We are in the jni frame unless saved_frame is true in which case
duke@435 2142 // we are in one frame deeper (the "inner" frame). If we are in the
duke@435 2143 // "inner" frames the args are in the Iregs and if the jni frame then
duke@435 2144 // they are in the Oregs.
duke@435 2145 // If we ever need to go to the VM (for locking, jvmti) then
duke@435 2146 // we will always be in the "inner" frame.
duke@435 2147
duke@435 2148 // Lock a synchronized method
duke@435 2149 int lock_offset = -1; // Set if locked
duke@435 2150 if (method->is_synchronized()) {
duke@435 2151 Register Roop = O1;
duke@435 2152 const Register L3_box = L3;
duke@435 2153
duke@435 2154 create_inner_frame(masm, &inner_frame_created);
duke@435 2155
duke@435 2156 __ ld_ptr(I1, 0, O1);
duke@435 2157 Label done;
duke@435 2158
duke@435 2159 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
duke@435 2160 __ add(FP, lock_offset+STACK_BIAS, L3_box);
duke@435 2161 #ifdef ASSERT
duke@435 2162 if (UseBiasedLocking) {
duke@435 2163 // making the box point to itself will make it clear it went unused
duke@435 2164 // but also be obviously invalid
duke@435 2165 __ st_ptr(L3_box, L3_box, 0);
duke@435 2166 }
duke@435 2167 #endif // ASSERT
duke@435 2168 //
duke@435 2169 // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch
duke@435 2170 //
duke@435 2171 __ compiler_lock_object(Roop, L1, L3_box, L2);
duke@435 2172 __ br(Assembler::equal, false, Assembler::pt, done);
duke@435 2173 __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box);
duke@435 2174
duke@435 2175
duke@435 2176 // None of the above fast optimizations worked so we have to get into the
duke@435 2177 // slow case of monitor enter. Inline a special case of call_VM that
duke@435 2178 // disallows any pending_exception.
duke@435 2179 __ mov(Roop, O0); // Need oop in O0
duke@435 2180 __ mov(L3_box, O1);
duke@435 2181
duke@435 2182 // Record last_Java_sp, in case the VM code releases the JVM lock.
duke@435 2183
duke@435 2184 __ set_last_Java_frame(FP, I7);
duke@435 2185
duke@435 2186 // do the call
duke@435 2187 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
duke@435 2188 __ delayed()->mov(L7_thread_cache, O2);
duke@435 2189
duke@435 2190 __ restore_thread(L7_thread_cache); // restore G2_thread
duke@435 2191 __ reset_last_Java_frame();
duke@435 2192
duke@435 2193 #ifdef ASSERT
duke@435 2194 { Label L;
duke@435 2195 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
duke@435 2196 __ br_null(O0, false, Assembler::pt, L);
duke@435 2197 __ delayed()->nop();
duke@435 2198 __ stop("no pending exception allowed on exit from IR::monitorenter");
duke@435 2199 __ bind(L);
duke@435 2200 }
duke@435 2201 #endif
duke@435 2202 __ bind(done);
duke@435 2203 }
duke@435 2204
duke@435 2205
duke@435 2206 // Finally just about ready to make the JNI call
duke@435 2207
duke@435 2208 __ flush_windows();
duke@435 2209 if (inner_frame_created) {
duke@435 2210 __ restore();
duke@435 2211 } else {
duke@435 2212 // Store only what we need from this frame
duke@435 2213 // QQQ I think that non-v9 (like we care) we don't need these saves
duke@435 2214 // either as the flush traps and the current window goes too.
duke@435 2215 __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
duke@435 2216 __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
duke@435 2217 }
duke@435 2218
duke@435 2219 // get JNIEnv* which is first argument to native
duke@435 2220
duke@435 2221 __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
duke@435 2222
duke@435 2223 // Use that pc we placed in O7 a while back as the current frame anchor
duke@435 2224
duke@435 2225 __ set_last_Java_frame(SP, O7);
duke@435 2226
duke@435 2227 // Transition from _thread_in_Java to _thread_in_native.
duke@435 2228 __ set(_thread_in_native, G3_scratch);
twisti@1162 2229 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
duke@435 2230
duke@435 2231 // We flushed the windows ages ago now mark them as flushed
duke@435 2232
duke@435 2233 // mark windows as flushed
duke@435 2234 __ set(JavaFrameAnchor::flushed, G3_scratch);
duke@435 2235
twisti@1162 2236 Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
duke@435 2237
duke@435 2238 #ifdef _LP64
twisti@1162 2239 AddressLiteral dest(method->native_function());
duke@435 2240 __ relocate(relocInfo::runtime_call_type);
twisti@1162 2241 __ jumpl_to(dest, O7, O7);
duke@435 2242 #else
duke@435 2243 __ call(method->native_function(), relocInfo::runtime_call_type);
duke@435 2244 #endif
duke@435 2245 __ delayed()->st(G3_scratch, flags);
duke@435 2246
duke@435 2247 __ restore_thread(L7_thread_cache); // restore G2_thread
duke@435 2248
duke@435 2249 // Unpack native results. For int-types, we do any needed sign-extension
duke@435 2250 // and move things into I0. The return value there will survive any VM
duke@435 2251 // calls for blocking or unlocking. An FP or OOP result (handle) is done
duke@435 2252 // specially in the slow-path code.
duke@435 2253 switch (ret_type) {
duke@435 2254 case T_VOID: break; // Nothing to do!
duke@435 2255 case T_FLOAT: break; // Got it where we want it (unless slow-path)
duke@435 2256 case T_DOUBLE: break; // Got it where we want it (unless slow-path)
duke@435 2257 // In 64 bits build result is in O0, in O0, O1 in 32bit build
duke@435 2258 case T_LONG:
duke@435 2259 #ifndef _LP64
duke@435 2260 __ mov(O1, I1);
duke@435 2261 #endif
duke@435 2262 // Fall thru
duke@435 2263 case T_OBJECT: // Really a handle
duke@435 2264 case T_ARRAY:
duke@435 2265 case T_INT:
duke@435 2266 __ mov(O0, I0);
duke@435 2267 break;
duke@435 2268 case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
duke@435 2269 case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break;
duke@435 2270 case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value!
duke@435 2271 case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break;
duke@435 2272 break; // Cannot de-handlize until after reclaiming jvm_lock
duke@435 2273 default:
duke@435 2274 ShouldNotReachHere();
duke@435 2275 }
duke@435 2276
duke@435 2277 // must we block?
duke@435 2278
duke@435 2279 // Block, if necessary, before resuming in _thread_in_Java state.
duke@435 2280 // In order for GC to work, don't clear the last_Java_sp until after blocking.
duke@435 2281 { Label no_block;
twisti@1162 2282 AddressLiteral sync_state(SafepointSynchronize::address_of_state());
duke@435 2283
duke@435 2284 // Switch thread to "native transition" state before reading the synchronization state.
duke@435 2285 // This additional state is necessary because reading and testing the synchronization
duke@435 2286 // state is not atomic w.r.t. GC, as this scenario demonstrates:
duke@435 2287 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
duke@435 2288 // VM thread changes sync state to synchronizing and suspends threads for GC.
duke@435 2289 // Thread A is resumed to finish this native method, but doesn't block here since it
duke@435 2290 // didn't see any synchronization is progress, and escapes.
duke@435 2291 __ set(_thread_in_native_trans, G3_scratch);
twisti@1162 2292 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
duke@435 2293 if(os::is_MP()) {
duke@435 2294 if (UseMembar) {
duke@435 2295 // Force this write out before the read below
duke@435 2296 __ membar(Assembler::StoreLoad);
duke@435 2297 } else {
duke@435 2298 // Write serialization page so VM thread can do a pseudo remote membar.
duke@435 2299 // We use the current thread pointer to calculate a thread specific
duke@435 2300 // offset to write to within the page. This minimizes bus traffic
duke@435 2301 // due to cache line collision.
duke@435 2302 __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
duke@435 2303 }
duke@435 2304 }
duke@435 2305 __ load_contents(sync_state, G3_scratch);
duke@435 2306 __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
duke@435 2307
duke@435 2308 Label L;
twisti@1162 2309 Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());
duke@435 2310 __ br(Assembler::notEqual, false, Assembler::pn, L);
twisti@1162 2311 __ delayed()->ld(suspend_state, G3_scratch);
duke@435 2312 __ cmp(G3_scratch, 0);
duke@435 2313 __ br(Assembler::equal, false, Assembler::pt, no_block);
duke@435 2314 __ delayed()->nop();
duke@435 2315 __ bind(L);
duke@435 2316
duke@435 2317 // Block. Save any potential method result value before the operation and
duke@435 2318 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
duke@435 2319 // lets us share the oopMap we used when we went native rather the create
duke@435 2320 // a distinct one for this pc
duke@435 2321 //
duke@435 2322 save_native_result(masm, ret_type, stack_slots);
duke@435 2323 __ call_VM_leaf(L7_thread_cache,
duke@435 2324 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
duke@435 2325 G2_thread);
duke@435 2326
duke@435 2327 // Restore any method result value
duke@435 2328 restore_native_result(masm, ret_type, stack_slots);
duke@435 2329 __ bind(no_block);
duke@435 2330 }
duke@435 2331
duke@435 2332 // thread state is thread_in_native_trans. Any safepoint blocking has already
duke@435 2333 // happened so we can now change state to _thread_in_Java.
duke@435 2334
duke@435 2335
duke@435 2336 __ set(_thread_in_Java, G3_scratch);
twisti@1162 2337 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
duke@435 2338
duke@435 2339
duke@435 2340 Label no_reguard;
twisti@1162 2341 __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
duke@435 2342 __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);
duke@435 2343 __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);
duke@435 2344 __ delayed()->nop();
duke@435 2345
duke@435 2346 save_native_result(masm, ret_type, stack_slots);
duke@435 2347 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
duke@435 2348 __ delayed()->nop();
duke@435 2349
duke@435 2350 __ restore_thread(L7_thread_cache); // restore G2_thread
duke@435 2351 restore_native_result(masm, ret_type, stack_slots);
duke@435 2352
duke@435 2353 __ bind(no_reguard);
duke@435 2354
duke@435 2355 // Handle possible exception (will unlock if necessary)
duke@435 2356
duke@435 2357 // native result if any is live in freg or I0 (and I1 if long and 32bit vm)
duke@435 2358
duke@435 2359 // Unlock
duke@435 2360 if (method->is_synchronized()) {
duke@435 2361 Label done;
duke@435 2362 Register I2_ex_oop = I2;
duke@435 2363 const Register L3_box = L3;
duke@435 2364 // Get locked oop from the handle we passed to jni
duke@435 2365 __ ld_ptr(L6_handle, 0, L4);
duke@435 2366 __ add(SP, lock_offset+STACK_BIAS, L3_box);
duke@435 2367 // Must save pending exception around the slow-path VM call. Since it's a
duke@435 2368 // leaf call, the pending exception (if any) can be kept in a register.
duke@435 2369 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop);
duke@435 2370 // Now unlock
duke@435 2371 // (Roop, Rmark, Rbox, Rscratch)
duke@435 2372 __ compiler_unlock_object(L4, L1, L3_box, L2);
duke@435 2373 __ br(Assembler::equal, false, Assembler::pt, done);
duke@435 2374 __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box);
duke@435 2375
duke@435 2376 // save and restore any potential method result value around the unlocking
duke@435 2377 // operation. Will save in I0 (or stack for FP returns).
duke@435 2378 save_native_result(masm, ret_type, stack_slots);
duke@435 2379
duke@435 2380 // Must clear pending-exception before re-entering the VM. Since this is
duke@435 2381 // a leaf call, pending-exception-oop can be safely kept in a register.
duke@435 2382 __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset()));
duke@435 2383
duke@435 2384 // slow case of monitor enter. Inline a special case of call_VM that
duke@435 2385 // disallows any pending_exception.
duke@435 2386 __ mov(L3_box, O1);
duke@435 2387
duke@435 2388 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);
duke@435 2389 __ delayed()->mov(L4, O0); // Need oop in O0
duke@435 2390
duke@435 2391 __ restore_thread(L7_thread_cache); // restore G2_thread
duke@435 2392
duke@435 2393 #ifdef ASSERT
duke@435 2394 { Label L;
duke@435 2395 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
duke@435 2396 __ br_null(O0, false, Assembler::pt, L);
duke@435 2397 __ delayed()->nop();
duke@435 2398 __ stop("no pending exception allowed on exit from IR::monitorexit");
duke@435 2399 __ bind(L);
duke@435 2400 }
duke@435 2401 #endif
duke@435 2402 restore_native_result(masm, ret_type, stack_slots);
duke@435 2403 // check_forward_pending_exception jump to forward_exception if any pending
duke@435 2404 // exception is set. The forward_exception routine expects to see the
duke@435 2405 // exception in pending_exception and not in a register. Kind of clumsy,
duke@435 2406 // since all folks who branch to forward_exception must have tested
duke@435 2407 // pending_exception first and hence have it in a register already.
duke@435 2408 __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset()));
duke@435 2409 __ bind(done);
duke@435 2410 }
duke@435 2411
duke@435 2412 // Tell dtrace about this method exit
duke@435 2413 {
duke@435 2414 SkipIfEqual skip_if(
duke@435 2415 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
duke@435 2416 save_native_result(masm, ret_type, stack_slots);
duke@435 2417 __ set_oop_constant(JNIHandles::make_local(method()), O1);
duke@435 2418 __ call_VM_leaf(L7_thread_cache,
duke@435 2419 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
duke@435 2420 G2_thread, O1);
duke@435 2421 restore_native_result(masm, ret_type, stack_slots);
duke@435 2422 }
duke@435 2423
duke@435 2424 // Clear "last Java frame" SP and PC.
duke@435 2425 __ verify_thread(); // G2_thread must be correct
duke@435 2426 __ reset_last_Java_frame();
duke@435 2427
duke@435 2428 // Unpack oop result
duke@435 2429 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
duke@435 2430 Label L;
duke@435 2431 __ addcc(G0, I0, G0);
duke@435 2432 __ brx(Assembler::notZero, true, Assembler::pt, L);
duke@435 2433 __ delayed()->ld_ptr(I0, 0, I0);
duke@435 2434 __ mov(G0, I0);
duke@435 2435 __ bind(L);
duke@435 2436 __ verify_oop(I0);
duke@435 2437 }
duke@435 2438
duke@435 2439 // reset handle block
duke@435 2440 __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
duke@435 2441 __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
duke@435 2442
duke@435 2443 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
duke@435 2444 check_forward_pending_exception(masm, G3_scratch);
duke@435 2445
duke@435 2446
duke@435 2447 // Return
duke@435 2448
duke@435 2449 #ifndef _LP64
duke@435 2450 if (ret_type == T_LONG) {
duke@435 2451
duke@435 2452 // Must leave proper result in O0,O1 and G1 (c2/tiered only)
duke@435 2453 __ sllx(I0, 32, G1); // Shift bits into high G1
duke@435 2454 __ srl (I1, 0, I1); // Zero extend O1 (harmless?)
duke@435 2455 __ or3 (I1, G1, G1); // OR 64 bits into G1
duke@435 2456 }
duke@435 2457 #endif
duke@435 2458
duke@435 2459 __ ret();
duke@435 2460 __ delayed()->restore();
duke@435 2461
duke@435 2462 __ flush();
duke@435 2463
duke@435 2464 nmethod *nm = nmethod::new_native_nmethod(method,
duke@435 2465 masm->code(),
duke@435 2466 vep_offset,
duke@435 2467 frame_complete,
duke@435 2468 stack_slots / VMRegImpl::slots_per_word,
duke@435 2469 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
duke@435 2470 in_ByteSize(lock_offset),
duke@435 2471 oop_maps);
duke@435 2472 return nm;
duke@435 2473
duke@435 2474 }
duke@435 2475
kamg@551 2476 #ifdef HAVE_DTRACE_H
kamg@551 2477 // ---------------------------------------------------------------------------
kamg@551 2478 // Generate a dtrace nmethod for a given signature. The method takes arguments
kamg@551 2479 // in the Java compiled code convention, marshals them to the native
kamg@551 2480 // abi and then leaves nops at the position you would expect to call a native
kamg@551 2481 // function. When the probe is enabled the nops are replaced with a trap
kamg@551 2482 // instruction that dtrace inserts and the trace will cause a notification
kamg@551 2483 // to dtrace.
kamg@551 2484 //
kamg@551 2485 // The probes are only able to take primitive types and java/lang/String as
kamg@551 2486 // arguments. No other java types are allowed. Strings are converted to utf8
kamg@551 2487 // strings so that from dtrace point of view java strings are converted to C
kamg@551 2488 // strings. There is an arbitrary fixed limit on the total space that a method
kamg@551 2489 // can use for converting the strings. (256 chars per string in the signature).
kamg@551 2490 // So any java string larger then this is truncated.
kamg@551 2491
kamg@551 2492 static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
kamg@551 2493 static bool offsets_initialized = false;
kamg@551 2494
kamg@551 2495 static VMRegPair reg64_to_VMRegPair(Register r) {
kamg@551 2496 VMRegPair ret;
kamg@551 2497 if (wordSize == 8) {
kamg@551 2498 ret.set2(r->as_VMReg());
kamg@551 2499 } else {
kamg@551 2500 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
kamg@551 2501 }
kamg@551 2502 return ret;
kamg@551 2503 }
kamg@551 2504
kamg@551 2505
kamg@551 2506 nmethod *SharedRuntime::generate_dtrace_nmethod(
kamg@551 2507 MacroAssembler *masm, methodHandle method) {
kamg@551 2508
kamg@551 2509
kamg@551 2510 // generate_dtrace_nmethod is guarded by a mutex so we are sure to
kamg@551 2511 // be single threaded in this method.
kamg@551 2512 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
kamg@551 2513
kamg@551 2514 // Fill in the signature array, for the calling-convention call.
kamg@551 2515 int total_args_passed = method->size_of_parameters();
kamg@551 2516
kamg@551 2517 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
kamg@551 2518 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
kamg@551 2519
kamg@551 2520 // The signature we are going to use for the trap that dtrace will see
kamg@551 2521 // java/lang/String is converted. We drop "this" and any other object
kamg@551 2522 // is converted to NULL. (A one-slot java/lang/Long object reference
kamg@551 2523 // is converted to a two-slot long, which is why we double the allocation).
kamg@551 2524 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
kamg@551 2525 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
kamg@551 2526
kamg@551 2527 int i=0;
kamg@551 2528 int total_strings = 0;
kamg@551 2529 int first_arg_to_pass = 0;
kamg@551 2530 int total_c_args = 0;
kamg@551 2531
kamg@551 2532 // Skip the receiver as dtrace doesn't want to see it
kamg@551 2533 if( !method->is_static() ) {
kamg@551 2534 in_sig_bt[i++] = T_OBJECT;
kamg@551 2535 first_arg_to_pass = 1;
kamg@551 2536 }
kamg@551 2537
kamg@551 2538 SignatureStream ss(method->signature());
kamg@551 2539 for ( ; !ss.at_return_type(); ss.next()) {
kamg@551 2540 BasicType bt = ss.type();
kamg@551 2541 in_sig_bt[i++] = bt; // Collect remaining bits of signature
kamg@551 2542 out_sig_bt[total_c_args++] = bt;
kamg@551 2543 if( bt == T_OBJECT) {
coleenp@2497 2544 Symbol* s = ss.as_symbol_or_null();
kamg@551 2545 if (s == vmSymbols::java_lang_String()) {
kamg@551 2546 total_strings++;
kamg@551 2547 out_sig_bt[total_c_args-1] = T_ADDRESS;
kamg@551 2548 } else if (s == vmSymbols::java_lang_Boolean() ||
kamg@551 2549 s == vmSymbols::java_lang_Byte()) {
kamg@551 2550 out_sig_bt[total_c_args-1] = T_BYTE;
kamg@551 2551 } else if (s == vmSymbols::java_lang_Character() ||
kamg@551 2552 s == vmSymbols::java_lang_Short()) {
kamg@551 2553 out_sig_bt[total_c_args-1] = T_SHORT;
kamg@551 2554 } else if (s == vmSymbols::java_lang_Integer() ||
kamg@551 2555 s == vmSymbols::java_lang_Float()) {
kamg@551 2556 out_sig_bt[total_c_args-1] = T_INT;
kamg@551 2557 } else if (s == vmSymbols::java_lang_Long() ||
kamg@551 2558 s == vmSymbols::java_lang_Double()) {
kamg@551 2559 out_sig_bt[total_c_args-1] = T_LONG;
kamg@551 2560 out_sig_bt[total_c_args++] = T_VOID;
kamg@551 2561 }
kamg@551 2562 } else if ( bt == T_LONG || bt == T_DOUBLE ) {
kamg@551 2563 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
kamg@551 2564 // We convert double to long
kamg@551 2565 out_sig_bt[total_c_args-1] = T_LONG;
kamg@551 2566 out_sig_bt[total_c_args++] = T_VOID;
kamg@551 2567 } else if ( bt == T_FLOAT) {
kamg@551 2568 // We convert float to int
kamg@551 2569 out_sig_bt[total_c_args-1] = T_INT;
kamg@551 2570 }
kamg@551 2571 }
kamg@551 2572
kamg@551 2573 assert(i==total_args_passed, "validly parsed signature");
kamg@551 2574
kamg@551 2575 // Now get the compiled-Java layout as input arguments
kamg@551 2576 int comp_args_on_stack;
kamg@551 2577 comp_args_on_stack = SharedRuntime::java_calling_convention(
kamg@551 2578 in_sig_bt, in_regs, total_args_passed, false);
kamg@551 2579
kamg@551 2580 // We have received a description of where all the java arg are located
kamg@551 2581 // on entry to the wrapper. We need to convert these args to where
kamg@551 2582 // the a native (non-jni) function would expect them. To figure out
kamg@551 2583 // where they go we convert the java signature to a C signature and remove
kamg@551 2584 // T_VOID for any long/double we might have received.
kamg@551 2585
kamg@551 2586
kamg@551 2587 // Now figure out where the args must be stored and how much stack space
kamg@551 2588 // they require (neglecting out_preserve_stack_slots but space for storing
kamg@551 2589 // the 1st six register arguments). It's weird see int_stk_helper.
kamg@551 2590 //
kamg@551 2591 int out_arg_slots;
kamg@551 2592 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
kamg@551 2593
kamg@551 2594 // Calculate the total number of stack slots we will need.
kamg@551 2595
kamg@551 2596 // First count the abi requirement plus all of the outgoing args
kamg@551 2597 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
kamg@551 2598
kamg@551 2599 // Plus a temp for possible converion of float/double/long register args
kamg@551 2600
kamg@551 2601 int conversion_temp = stack_slots;
kamg@551 2602 stack_slots += 2;
kamg@551 2603
kamg@551 2604
kamg@551 2605 // Now space for the string(s) we must convert
kamg@551 2606
kamg@551 2607 int string_locs = stack_slots;
kamg@551 2608 stack_slots += total_strings *
kamg@551 2609 (max_dtrace_string_size / VMRegImpl::stack_slot_size);
kamg@551 2610
kamg@551 2611 // Ok The space we have allocated will look like:
kamg@551 2612 //
kamg@551 2613 //
kamg@551 2614 // FP-> | |
kamg@551 2615 // |---------------------|
kamg@551 2616 // | string[n] |
kamg@551 2617 // |---------------------| <- string_locs[n]
kamg@551 2618 // | string[n-1] |
kamg@551 2619 // |---------------------| <- string_locs[n-1]
kamg@551 2620 // | ... |
kamg@551 2621 // | ... |
kamg@551 2622 // |---------------------| <- string_locs[1]
kamg@551 2623 // | string[0] |
kamg@551 2624 // |---------------------| <- string_locs[0]
kamg@551 2625 // | temp |
kamg@551 2626 // |---------------------| <- conversion_temp
kamg@551 2627 // | outbound memory |
kamg@551 2628 // | based arguments |
kamg@551 2629 // | |
kamg@551 2630 // |---------------------|
kamg@551 2631 // | |
kamg@551 2632 // SP-> | out_preserved_slots |
kamg@551 2633 //
kamg@551 2634 //
kamg@551 2635
kamg@551 2636 // Now compute actual number of stack words we need rounding to make
kamg@551 2637 // stack properly aligned.
kamg@551 2638 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
kamg@551 2639
kamg@551 2640 int stack_size = stack_slots * VMRegImpl::stack_slot_size;
kamg@551 2641
kamg@551 2642 intptr_t start = (intptr_t)__ pc();
kamg@551 2643
kamg@551 2644 // First thing make an ic check to see if we should even be here
kamg@551 2645
kamg@551 2646 {
kamg@551 2647 Label L;
kamg@551 2648 const Register temp_reg = G3_scratch;
twisti@1162 2649 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
kamg@551 2650 __ verify_oop(O0);
kamg@551 2651 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
kamg@551 2652 __ cmp(temp_reg, G5_inline_cache_reg);
kamg@551 2653 __ brx(Assembler::equal, true, Assembler::pt, L);
kamg@551 2654 __ delayed()->nop();
kamg@551 2655
twisti@1162 2656 __ jump_to(ic_miss, temp_reg);
kamg@551 2657 __ delayed()->nop();
kamg@551 2658 __ align(CodeEntryAlignment);
kamg@551 2659 __ bind(L);
kamg@551 2660 }
kamg@551 2661
kamg@551 2662 int vep_offset = ((intptr_t)__ pc()) - start;
kamg@551 2663
kamg@551 2664
kamg@551 2665 // The instruction at the verified entry point must be 5 bytes or longer
kamg@551 2666 // because it can be patched on the fly by make_non_entrant. The stack bang
kamg@551 2667 // instruction fits that requirement.
kamg@551 2668
kamg@551 2669 // Generate stack overflow check before creating frame
kamg@551 2670 __ generate_stack_overflow_check(stack_size);
kamg@551 2671
kamg@551 2672 assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
kamg@551 2673 "valid size for make_non_entrant");
kamg@551 2674
kamg@551 2675 // Generate a new frame for the wrapper.
kamg@551 2676 __ save(SP, -stack_size, SP);
kamg@551 2677
kamg@551 2678 // Frame is now completed as far a size and linkage.
kamg@551 2679
kamg@551 2680 int frame_complete = ((intptr_t)__ pc()) - start;
kamg@551 2681
kamg@551 2682 #ifdef ASSERT
kamg@551 2683 bool reg_destroyed[RegisterImpl::number_of_registers];
kamg@551 2684 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
kamg@551 2685 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
kamg@551 2686 reg_destroyed[r] = false;
kamg@551 2687 }
kamg@551 2688 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
kamg@551 2689 freg_destroyed[f] = false;
kamg@551 2690 }
kamg@551 2691
kamg@551 2692 #endif /* ASSERT */
kamg@551 2693
kamg@551 2694 VMRegPair zero;
kamg@611 2695 const Register g0 = G0; // without this we get a compiler warning (why??)
kamg@611 2696 zero.set2(g0->as_VMReg());
kamg@551 2697
kamg@551 2698 int c_arg, j_arg;
kamg@551 2699
kamg@551 2700 Register conversion_off = noreg;
kamg@551 2701
kamg@551 2702 for (j_arg = first_arg_to_pass, c_arg = 0 ;
kamg@551 2703 j_arg < total_args_passed ; j_arg++, c_arg++ ) {
kamg@551 2704
kamg@551 2705 VMRegPair src = in_regs[j_arg];
kamg@551 2706 VMRegPair dst = out_regs[c_arg];
kamg@551 2707
kamg@551 2708 #ifdef ASSERT
kamg@551 2709 if (src.first()->is_Register()) {
kamg@551 2710 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
kamg@551 2711 } else if (src.first()->is_FloatRegister()) {
kamg@551 2712 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
kamg@551 2713 FloatRegisterImpl::S)], "ack!");
kamg@551 2714 }
kamg@551 2715 if (dst.first()->is_Register()) {
kamg@551 2716 reg_destroyed[dst.first()->as_Register()->encoding()] = true;
kamg@551 2717 } else if (dst.first()->is_FloatRegister()) {
kamg@551 2718 freg_destroyed[dst.first()->as_FloatRegister()->encoding(
kamg@551 2719 FloatRegisterImpl::S)] = true;
kamg@551 2720 }
kamg@551 2721 #endif /* ASSERT */
kamg@551 2722
kamg@551 2723 switch (in_sig_bt[j_arg]) {
kamg@551 2724 case T_ARRAY:
kamg@551 2725 case T_OBJECT:
kamg@551 2726 {
kamg@551 2727 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT ||
kamg@551 2728 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
kamg@551 2729 // need to unbox a one-slot value
kamg@551 2730 Register in_reg = L0;
kamg@551 2731 Register tmp = L2;
kamg@551 2732 if ( src.first()->is_reg() ) {
kamg@551 2733 in_reg = src.first()->as_Register();
kamg@551 2734 } else {
kamg@551 2735 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
kamg@551 2736 "must be");
kamg@551 2737 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
kamg@551 2738 }
kamg@551 2739 // If the final destination is an acceptable register
kamg@551 2740 if ( dst.first()->is_reg() ) {
kamg@551 2741 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
kamg@551 2742 tmp = dst.first()->as_Register();
kamg@551 2743 }
kamg@551 2744 }
kamg@551 2745
kamg@551 2746 Label skipUnbox;
kamg@551 2747 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
kamg@551 2748 __ mov(G0, tmp->successor());
kamg@551 2749 }
kamg@551 2750 __ br_null(in_reg, true, Assembler::pn, skipUnbox);
kamg@551 2751 __ delayed()->mov(G0, tmp);
kamg@551 2752
kvn@600 2753 BasicType bt = out_sig_bt[c_arg];
kvn@600 2754 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
kvn@600 2755 switch (bt) {
kamg@551 2756 case T_BYTE:
kamg@551 2757 __ ldub(in_reg, box_offset, tmp); break;
kamg@551 2758 case T_SHORT:
kamg@551 2759 __ lduh(in_reg, box_offset, tmp); break;
kamg@551 2760 case T_INT:
kamg@551 2761 __ ld(in_reg, box_offset, tmp); break;
kamg@551 2762 case T_LONG:
kamg@551 2763 __ ld_long(in_reg, box_offset, tmp); break;
kamg@551 2764 default: ShouldNotReachHere();
kamg@551 2765 }
kamg@551 2766
kamg@551 2767 __ bind(skipUnbox);
kamg@551 2768 // If tmp wasn't final destination copy to final destination
kamg@551 2769 if (tmp == L2) {
kamg@551 2770 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
kamg@551 2771 if (out_sig_bt[c_arg] == T_LONG) {
kamg@551 2772 long_move(masm, tmp_as_VM, dst);
kamg@551 2773 } else {
kamg@551 2774 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
kamg@551 2775 }
kamg@551 2776 }
kamg@551 2777 if (out_sig_bt[c_arg] == T_LONG) {
kamg@551 2778 assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
kamg@551 2779 ++c_arg; // move over the T_VOID to keep the loop indices in sync
kamg@551 2780 }
kamg@551 2781 } else if (out_sig_bt[c_arg] == T_ADDRESS) {
kamg@551 2782 Register s =
kamg@551 2783 src.first()->is_reg() ? src.first()->as_Register() : L2;
kamg@551 2784 Register d =
kamg@551 2785 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
kamg@551 2786
kamg@551 2787 // We store the oop now so that the conversion pass can reach
kamg@551 2788 // while in the inner frame. This will be the only store if
kamg@551 2789 // the oop is NULL.
kamg@551 2790 if (s != L2) {
kamg@551 2791 // src is register
kamg@551 2792 if (d != L2) {
kamg@551 2793 // dst is register
kamg@551 2794 __ mov(s, d);
kamg@551 2795 } else {
kamg@551 2796 assert(Assembler::is_simm13(reg2offset(dst.first()) +
kamg@551 2797 STACK_BIAS), "must be");
kamg@551 2798 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
kamg@551 2799 }
kamg@551 2800 } else {
kamg@551 2801 // src not a register
kamg@551 2802 assert(Assembler::is_simm13(reg2offset(src.first()) +
kamg@551 2803 STACK_BIAS), "must be");
kamg@551 2804 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
kamg@551 2805 if (d == L2) {
kamg@551 2806 assert(Assembler::is_simm13(reg2offset(dst.first()) +
kamg@551 2807 STACK_BIAS), "must be");
kamg@551 2808 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
kamg@551 2809 }
kamg@551 2810 }
kamg@551 2811 } else if (out_sig_bt[c_arg] != T_VOID) {
kamg@551 2812 // Convert the arg to NULL
kamg@551 2813 if (dst.first()->is_reg()) {
kamg@551 2814 __ mov(G0, dst.first()->as_Register());
kamg@551 2815 } else {
kamg@551 2816 assert(Assembler::is_simm13(reg2offset(dst.first()) +
kamg@551 2817 STACK_BIAS), "must be");
kamg@551 2818 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
kamg@551 2819 }
kamg@551 2820 }
kamg@551 2821 }
kamg@551 2822 break;
kamg@551 2823 case T_VOID:
kamg@551 2824 break;
kamg@551 2825
kamg@551 2826 case T_FLOAT:
kamg@551 2827 if (src.first()->is_stack()) {
kamg@551 2828 // Stack to stack/reg is simple
kamg@551 2829 move32_64(masm, src, dst);
kamg@551 2830 } else {
kamg@551 2831 if (dst.first()->is_reg()) {
kamg@551 2832 // freg -> reg
kamg@551 2833 int off =
kamg@551 2834 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
kamg@551 2835 Register d = dst.first()->as_Register();
kamg@551 2836 if (Assembler::is_simm13(off)) {
kamg@551 2837 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
kamg@551 2838 SP, off);
kamg@551 2839 __ ld(SP, off, d);
kamg@551 2840 } else {
kamg@551 2841 if (conversion_off == noreg) {
kamg@551 2842 __ set(off, L6);
kamg@551 2843 conversion_off = L6;
kamg@551 2844 }
kamg@551 2845 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
kamg@551 2846 SP, conversion_off);
kamg@551 2847 __ ld(SP, conversion_off , d);
kamg@551 2848 }
kamg@551 2849 } else {
kamg@551 2850 // freg -> mem
kamg@551 2851 int off = STACK_BIAS + reg2offset(dst.first());
kamg@551 2852 if (Assembler::is_simm13(off)) {
kamg@551 2853 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
kamg@551 2854 SP, off);
kamg@551 2855 } else {
kamg@551 2856 if (conversion_off == noreg) {
kamg@551 2857 __ set(off, L6);
kamg@551 2858 conversion_off = L6;
kamg@551 2859 }
kamg@551 2860 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
kamg@551 2861 SP, conversion_off);
kamg@551 2862 }
kamg@551 2863 }
kamg@551 2864 }
kamg@551 2865 break;
kamg@551 2866
kamg@551 2867 case T_DOUBLE:
kamg@551 2868 assert( j_arg + 1 < total_args_passed &&
kamg@551 2869 in_sig_bt[j_arg + 1] == T_VOID &&
kamg@551 2870 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
kamg@551 2871 if (src.first()->is_stack()) {
kamg@551 2872 // Stack to stack/reg is simple
kamg@551 2873 long_move(masm, src, dst);
kamg@551 2874 } else {
kamg@551 2875 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
kamg@551 2876
kamg@551 2877 // Destination could be an odd reg on 32bit in which case
kamg@551 2878 // we can't load direct to the destination.
kamg@551 2879
kamg@551 2880 if (!d->is_even() && wordSize == 4) {
kamg@551 2881 d = L2;
kamg@551 2882 }
kamg@551 2883 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
kamg@551 2884 if (Assembler::is_simm13(off)) {
kamg@551 2885 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
kamg@551 2886 SP, off);
kamg@551 2887 __ ld_long(SP, off, d);
kamg@551 2888 } else {
kamg@551 2889 if (conversion_off == noreg) {
kamg@551 2890 __ set(off, L6);
kamg@551 2891 conversion_off = L6;
kamg@551 2892 }
kamg@551 2893 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
kamg@551 2894 SP, conversion_off);
kamg@551 2895 __ ld_long(SP, conversion_off, d);
kamg@551 2896 }
kamg@551 2897 if (d == L2) {
kamg@551 2898 long_move(masm, reg64_to_VMRegPair(L2), dst);
kamg@551 2899 }
kamg@551 2900 }
kamg@551 2901 break;
kamg@551 2902
kamg@551 2903 case T_LONG :
kamg@551 2904 // 32bit can't do a split move of something like g1 -> O0, O1
kamg@551 2905 // so use a memory temp
kamg@551 2906 if (src.is_single_phys_reg() && wordSize == 4) {
kamg@551 2907 Register tmp = L2;
kamg@551 2908 if (dst.first()->is_reg() &&
kamg@551 2909 (wordSize == 8 || dst.first()->as_Register()->is_even())) {
kamg@551 2910 tmp = dst.first()->as_Register();
kamg@551 2911 }
kamg@551 2912
kamg@551 2913 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
kamg@551 2914 if (Assembler::is_simm13(off)) {
kamg@551 2915 __ stx(src.first()->as_Register(), SP, off);
kamg@551 2916 __ ld_long(SP, off, tmp);
kamg@551 2917 } else {
kamg@551 2918 if (conversion_off == noreg) {
kamg@551 2919 __ set(off, L6);
kamg@551 2920 conversion_off = L6;
kamg@551 2921 }
kamg@551 2922 __ stx(src.first()->as_Register(), SP, conversion_off);
kamg@551 2923 __ ld_long(SP, conversion_off, tmp);
kamg@551 2924 }
kamg@551 2925
kamg@551 2926 if (tmp == L2) {
kamg@551 2927 long_move(masm, reg64_to_VMRegPair(L2), dst);
kamg@551 2928 }
kamg@551 2929 } else {
kamg@551 2930 long_move(masm, src, dst);
kamg@551 2931 }
kamg@551 2932 break;
kamg@551 2933
kamg@551 2934 case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
kamg@551 2935
kamg@551 2936 default:
kamg@551 2937 move32_64(masm, src, dst);
kamg@551 2938 }
kamg@551 2939 }
kamg@551 2940
kamg@551 2941
kamg@551 2942 // If we have any strings we must store any register based arg to the stack
kamg@551 2943 // This includes any still live xmm registers too.
kamg@551 2944
kamg@551 2945 if (total_strings > 0 ) {
kamg@551 2946
kamg@551 2947 // protect all the arg registers
kamg@551 2948 __ save_frame(0);
kamg@551 2949 __ mov(G2_thread, L7_thread_cache);
kamg@551 2950 const Register L2_string_off = L2;
kamg@551 2951
kamg@551 2952 // Get first string offset
kamg@551 2953 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
kamg@551 2954
kamg@551 2955 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
kamg@551 2956 if (out_sig_bt[c_arg] == T_ADDRESS) {
kamg@551 2957
kamg@551 2958 VMRegPair dst = out_regs[c_arg];
kamg@551 2959 const Register d = dst.first()->is_reg() ?
kamg@551 2960 dst.first()->as_Register()->after_save() : noreg;
kamg@551 2961
kamg@551 2962 // It's a string the oop and it was already copied to the out arg
kamg@551 2963 // position
kamg@551 2964 if (d != noreg) {
kamg@551 2965 __ mov(d, O0);
kamg@551 2966 } else {
kamg@551 2967 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
kamg@551 2968 "must be");
kamg@551 2969 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0);
kamg@551 2970 }
kamg@551 2971 Label skip;
kamg@551 2972
kamg@551 2973 __ br_null(O0, false, Assembler::pn, skip);
kamg@551 2974 __ delayed()->add(FP, L2_string_off, O1);
kamg@551 2975
kamg@551 2976 if (d != noreg) {
kamg@551 2977 __ mov(O1, d);
kamg@551 2978 } else {
kamg@551 2979 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
kamg@551 2980 "must be");
kamg@551 2981 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS);
kamg@551 2982 }
kamg@551 2983
kamg@551 2984 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
kamg@551 2985 relocInfo::runtime_call_type);
kamg@551 2986 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
kamg@551 2987
kamg@551 2988 __ bind(skip);
kamg@551 2989
kamg@551 2990 }
kamg@551 2991
kamg@551 2992 }
kamg@551 2993 __ mov(L7_thread_cache, G2_thread);
kamg@551 2994 __ restore();
kamg@551 2995
kamg@551 2996 }
kamg@551 2997
kamg@551 2998
kamg@551 2999 // Ok now we are done. Need to place the nop that dtrace wants in order to
kamg@551 3000 // patch in the trap
kamg@551 3001
kamg@551 3002 int patch_offset = ((intptr_t)__ pc()) - start;
kamg@551 3003
kamg@551 3004 __ nop();
kamg@551 3005
kamg@551 3006
kamg@551 3007 // Return
kamg@551 3008
kamg@551 3009 __ ret();
kamg@551 3010 __ delayed()->restore();
kamg@551 3011
kamg@551 3012 __ flush();
kamg@551 3013
kamg@551 3014 nmethod *nm = nmethod::new_dtrace_nmethod(
kamg@551 3015 method, masm->code(), vep_offset, patch_offset, frame_complete,
kamg@551 3016 stack_slots / VMRegImpl::slots_per_word);
kamg@551 3017 return nm;
kamg@551 3018
kamg@551 3019 }
kamg@551 3020
kamg@551 3021 #endif // HAVE_DTRACE_H
kamg@551 3022
duke@435 3023 // this function returns the adjust size (in number of words) to a c2i adapter
duke@435 3024 // activation for use during deoptimization
duke@435 3025 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
duke@435 3026 assert(callee_locals >= callee_parameters,
duke@435 3027 "test and remove; got more parms than locals");
duke@435 3028 if (callee_locals < callee_parameters)
duke@435 3029 return 0; // No adjustment for negative locals
twisti@1861 3030 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
duke@435 3031 return round_to(diff, WordsPerLong);
duke@435 3032 }
duke@435 3033
duke@435 3034 // "Top of Stack" slots that may be unused by the calling convention but must
duke@435 3035 // otherwise be preserved.
duke@435 3036 // On Intel these are not necessary and the value can be zero.
duke@435 3037 // On Sparc this describes the words reserved for storing a register window
duke@435 3038 // when an interrupt occurs.
duke@435 3039 uint SharedRuntime::out_preserve_stack_slots() {
duke@435 3040 return frame::register_save_words * VMRegImpl::slots_per_word;
duke@435 3041 }
duke@435 3042
duke@435 3043 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
duke@435 3044 //
duke@435 3045 // Common out the new frame generation for deopt and uncommon trap
duke@435 3046 //
duke@435 3047 Register G3pcs = G3_scratch; // Array of new pcs (input)
duke@435 3048 Register Oreturn0 = O0;
duke@435 3049 Register Oreturn1 = O1;
duke@435 3050 Register O2UnrollBlock = O2;
duke@435 3051 Register O3array = O3; // Array of frame sizes (input)
duke@435 3052 Register O4array_size = O4; // number of frames (input)
duke@435 3053 Register O7frame_size = O7; // number of frames (input)
duke@435 3054
duke@435 3055 __ ld_ptr(O3array, 0, O7frame_size);
duke@435 3056 __ sub(G0, O7frame_size, O7frame_size);
duke@435 3057 __ save(SP, O7frame_size, SP);
duke@435 3058 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc
duke@435 3059
duke@435 3060 #ifdef ASSERT
duke@435 3061 // make sure that the frames are aligned properly
duke@435 3062 #ifndef _LP64
duke@435 3063 __ btst(wordSize*2-1, SP);
duke@435 3064 __ breakpoint_trap(Assembler::notZero);
duke@435 3065 #endif
duke@435 3066 #endif
duke@435 3067
duke@435 3068 // Deopt needs to pass some extra live values from frame to frame
duke@435 3069
duke@435 3070 if (deopt) {
duke@435 3071 __ mov(Oreturn0->after_save(), Oreturn0);
duke@435 3072 __ mov(Oreturn1->after_save(), Oreturn1);
duke@435 3073 }
duke@435 3074
duke@435 3075 __ mov(O4array_size->after_save(), O4array_size);
duke@435 3076 __ sub(O4array_size, 1, O4array_size);
duke@435 3077 __ mov(O3array->after_save(), O3array);
duke@435 3078 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
duke@435 3079 __ add(G3pcs, wordSize, G3pcs); // point to next pc value
duke@435 3080
duke@435 3081 #ifdef ASSERT
duke@435 3082 // trash registers to show a clear pattern in backtraces
duke@435 3083 __ set(0xDEAD0000, I0);
duke@435 3084 __ add(I0, 2, I1);
duke@435 3085 __ add(I0, 4, I2);
duke@435 3086 __ add(I0, 6, I3);
duke@435 3087 __ add(I0, 8, I4);
duke@435 3088 // Don't touch I5 could have valuable savedSP
duke@435 3089 __ set(0xDEADBEEF, L0);
duke@435 3090 __ mov(L0, L1);
duke@435 3091 __ mov(L0, L2);
duke@435 3092 __ mov(L0, L3);
duke@435 3093 __ mov(L0, L4);
duke@435 3094 __ mov(L0, L5);
duke@435 3095
duke@435 3096 // trash the return value as there is nothing to return yet
duke@435 3097 __ set(0xDEAD0001, O7);
duke@435 3098 #endif
duke@435 3099
duke@435 3100 __ mov(SP, O5_savedSP);
duke@435 3101 }
duke@435 3102
duke@435 3103
duke@435 3104 static void make_new_frames(MacroAssembler* masm, bool deopt) {
duke@435 3105 //
duke@435 3106 // loop through the UnrollBlock info and create new frames
duke@435 3107 //
duke@435 3108 Register G3pcs = G3_scratch;
duke@435 3109 Register Oreturn0 = O0;
duke@435 3110 Register Oreturn1 = O1;
duke@435 3111 Register O2UnrollBlock = O2;
duke@435 3112 Register O3array = O3;
duke@435 3113 Register O4array_size = O4;
duke@435 3114 Label loop;
duke@435 3115
duke@435 3116 // Before we make new frames, check to see if stack is available.
duke@435 3117 // Do this after the caller's return address is on top of stack
duke@435 3118 if (UseStackBanging) {
duke@435 3119 // Get total frame size for interpreted frames
twisti@1162 3120 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4);
duke@435 3121 __ bang_stack_size(O4, O3, G3_scratch);
duke@435 3122 }
duke@435 3123
twisti@1162 3124 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size);
twisti@1162 3125 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs);
twisti@1162 3126 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array);
duke@435 3127
duke@435 3128 // Adjust old interpreter frame to make space for new frame's extra java locals
duke@435 3129 //
duke@435 3130 // We capture the original sp for the transition frame only because it is needed in
duke@435 3131 // order to properly calculate interpreter_sp_adjustment. Even though in real life
duke@435 3132 // every interpreter frame captures a savedSP it is only needed at the transition
duke@435 3133 // (fortunately). If we had to have it correct everywhere then we would need to
duke@435 3134 // be told the sp_adjustment for each frame we create. If the frame size array
duke@435 3135 // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
duke@435 3136 // for each frame we create and keep up the illusion every where.
duke@435 3137 //
duke@435 3138
twisti@1162 3139 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7);
duke@435 3140 __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment
duke@435 3141 __ sub(SP, O7, SP);
duke@435 3142
duke@435 3143 #ifdef ASSERT
duke@435 3144 // make sure that there is at least one entry in the array
duke@435 3145 __ tst(O4array_size);
duke@435 3146 __ breakpoint_trap(Assembler::zero);
duke@435 3147 #endif
duke@435 3148
duke@435 3149 // Now push the new interpreter frames
duke@435 3150 __ bind(loop);
duke@435 3151
duke@435 3152 // allocate a new frame, filling the registers
duke@435 3153
duke@435 3154 gen_new_frame(masm, deopt); // allocate an interpreter frame
duke@435 3155
duke@435 3156 __ tst(O4array_size);
duke@435 3157 __ br(Assembler::notZero, false, Assembler::pn, loop);
duke@435 3158 __ delayed()->add(O3array, wordSize, O3array);
duke@435 3159 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc
duke@435 3160
duke@435 3161 }
duke@435 3162
duke@435 3163 //------------------------------generate_deopt_blob----------------------------
duke@435 3164 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
duke@435 3165 // instead.
duke@435 3166 void SharedRuntime::generate_deopt_blob() {
duke@435 3167 // allocate space for the code
duke@435 3168 ResourceMark rm;
duke@435 3169 // setup code generation tools
duke@435 3170 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
duke@435 3171 #ifdef _LP64
duke@435 3172 CodeBuffer buffer("deopt_blob", 2100+pad, 512);
duke@435 3173 #else
duke@435 3174 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
duke@435 3175 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
duke@435 3176 CodeBuffer buffer("deopt_blob", 1600+pad, 512);
duke@435 3177 #endif /* _LP64 */
duke@435 3178 MacroAssembler* masm = new MacroAssembler(&buffer);
duke@435 3179 FloatRegister Freturn0 = F0;
duke@435 3180 Register Greturn1 = G1;
duke@435 3181 Register Oreturn0 = O0;
duke@435 3182 Register Oreturn1 = O1;
duke@435 3183 Register O2UnrollBlock = O2;
never@1472 3184 Register L0deopt_mode = L0;
never@1472 3185 Register G4deopt_mode = G4_scratch;
duke@435 3186 int frame_size_words;
twisti@1162 3187 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
duke@435 3188 #if !defined(_LP64) && defined(COMPILER2)
twisti@1162 3189 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
duke@435 3190 #endif
duke@435 3191 Label cont;
duke@435 3192
duke@435 3193 OopMapSet *oop_maps = new OopMapSet();
duke@435 3194
duke@435 3195 //
duke@435 3196 // This is the entry point for code which is returning to a de-optimized
duke@435 3197 // frame.
duke@435 3198 // The steps taken by this frame are as follows:
duke@435 3199 // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
duke@435 3200 // and all potentially live registers (at a pollpoint many registers can be live).
duke@435 3201 //
duke@435 3202 // - call the C routine: Deoptimization::fetch_unroll_info (this function
duke@435 3203 // returns information about the number and size of interpreter frames
duke@435 3204 // which are equivalent to the frame which is being deoptimized)
duke@435 3205 // - deallocate the unpack frame, restoring only results values. Other
duke@435 3206 // volatile registers will now be captured in the vframeArray as needed.
duke@435 3207 // - deallocate the deoptimization frame
duke@435 3208 // - in a loop using the information returned in the previous step
duke@435 3209 // push new interpreter frames (take care to propagate the return
duke@435 3210 // values through each new frame pushed)
duke@435 3211 // - create a dummy "unpack_frame" and save the return values (O0, O1, F0)
duke@435 3212 // - call the C routine: Deoptimization::unpack_frames (this function
duke@435 3213 // lays out values on the interpreter frame which was just created)
duke@435 3214 // - deallocate the dummy unpack_frame
duke@435 3215 // - ensure that all the return values are correctly set and then do
duke@435 3216 // a return to the interpreter entry point
duke@435 3217 //
duke@435 3218 // Refer to the following methods for more information:
duke@435 3219 // - Deoptimization::fetch_unroll_info
duke@435 3220 // - Deoptimization::unpack_frames
duke@435 3221
duke@435 3222 OopMap* map = NULL;
duke@435 3223
duke@435 3224 int start = __ offset();
duke@435 3225
duke@435 3226 // restore G2, the trampoline destroyed it
duke@435 3227 __ get_thread();
duke@435 3228
duke@435 3229 // On entry we have been called by the deoptimized nmethod with a call that
duke@435 3230 // replaced the original call (or safepoint polling location) so the deoptimizing
duke@435 3231 // pc is now in O7. Return values are still in the expected places
duke@435 3232
duke@435 3233 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
duke@435 3234 __ ba(false, cont);
never@1472 3235 __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
duke@435 3236
duke@435 3237 int exception_offset = __ offset() - start;
duke@435 3238
duke@435 3239 // restore G2, the trampoline destroyed it
duke@435 3240 __ get_thread();
duke@435 3241
duke@435 3242 // On entry we have been jumped to by the exception handler (or exception_blob
duke@435 3243 // for server). O0 contains the exception oop and O7 contains the original
duke@435 3244 // exception pc. So if we push a frame here it will look to the
duke@435 3245 // stack walking code (fetch_unroll_info) just like a normal call so
duke@435 3246 // state will be extracted normally.
duke@435 3247
duke@435 3248 // save exception oop in JavaThread and fall through into the
duke@435 3249 // exception_in_tls case since they are handled in same way except
duke@435 3250 // for where the pending exception is kept.
twisti@1162 3251 __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset());
duke@435 3252
duke@435 3253 //
duke@435 3254 // Vanilla deoptimization with an exception pending in exception_oop
duke@435 3255 //
duke@435 3256 int exception_in_tls_offset = __ offset() - start;
duke@435 3257
duke@435 3258 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
duke@435 3259 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
duke@435 3260
duke@435 3261 // Restore G2_thread
duke@435 3262 __ get_thread();
duke@435 3263
duke@435 3264 #ifdef ASSERT
duke@435 3265 {
duke@435 3266 // verify that there is really an exception oop in exception_oop
duke@435 3267 Label has_exception;
twisti@1162 3268 __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);
duke@435 3269 __ br_notnull(Oexception, false, Assembler::pt, has_exception);
duke@435 3270 __ delayed()-> nop();
duke@435 3271 __ stop("no exception in thread");
duke@435 3272 __ bind(has_exception);
duke@435 3273
duke@435 3274 // verify that there is no pending exception
duke@435 3275 Label no_pending_exception;
twisti@1162 3276 Address exception_addr(G2_thread, Thread::pending_exception_offset());
duke@435 3277 __ ld_ptr(exception_addr, Oexception);
duke@435 3278 __ br_null(Oexception, false, Assembler::pt, no_pending_exception);
duke@435 3279 __ delayed()->nop();
duke@435 3280 __ stop("must not have pending exception here");
duke@435 3281 __ bind(no_pending_exception);
duke@435 3282 }
duke@435 3283 #endif
duke@435 3284
duke@435 3285 __ ba(false, cont);
never@1472 3286 __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;
duke@435 3287
duke@435 3288 //
duke@435 3289 // Reexecute entry, similar to c2 uncommon trap
duke@435 3290 //
duke@435 3291 int reexecute_offset = __ offset() - start;
duke@435 3292
duke@435 3293 // No need to update oop_map as each call to save_live_registers will produce identical oopmap
duke@435 3294 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
duke@435 3295
never@1472 3296 __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode);
duke@435 3297
duke@435 3298 __ bind(cont);
duke@435 3299
duke@435 3300 __ set_last_Java_frame(SP, noreg);
duke@435 3301
duke@435 3302 // do the call by hand so we can get the oopmap
duke@435 3303
duke@435 3304 __ mov(G2_thread, L7_thread_cache);
duke@435 3305 __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
duke@435 3306 __ delayed()->mov(G2_thread, O0);
duke@435 3307
duke@435 3308 // Set an oopmap for the call site this describes all our saved volatile registers
duke@435 3309
duke@435 3310 oop_maps->add_gc_map( __ offset()-start, map);
duke@435 3311
duke@435 3312 __ mov(L7_thread_cache, G2_thread);
duke@435 3313
duke@435 3314 __ reset_last_Java_frame();
duke@435 3315
duke@435 3316 // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
duke@435 3317 // so this move will survive
duke@435 3318
never@1472 3319 __ mov(L0deopt_mode, G4deopt_mode);
duke@435 3320
duke@435 3321 __ mov(O0, O2UnrollBlock->after_save());
duke@435 3322
duke@435 3323 RegisterSaver::restore_result_registers(masm);
duke@435 3324
duke@435 3325 Label noException;
never@1472 3326 __ cmp(G4deopt_mode, Deoptimization::Unpack_exception); // Was exception pending?
duke@435 3327 __ br(Assembler::notEqual, false, Assembler::pt, noException);
duke@435 3328 __ delayed()->nop();
duke@435 3329
duke@435 3330 // Move the pending exception from exception_oop to Oexception so
duke@435 3331 // the pending exception will be picked up the interpreter.
duke@435 3332 __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
duke@435 3333 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
duke@435 3334 __ bind(noException);
duke@435 3335
duke@435 3336 // deallocate the deoptimization frame taking care to preserve the return values
duke@435 3337 __ mov(Oreturn0, Oreturn0->after_save());
duke@435 3338 __ mov(Oreturn1, Oreturn1->after_save());
duke@435 3339 __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
duke@435 3340 __ restore();
duke@435 3341
duke@435 3342 // Allocate new interpreter frame(s) and possible c2i adapter frame
duke@435 3343
duke@435 3344 make_new_frames(masm, true);
duke@435 3345
duke@435 3346 // push a dummy "unpack_frame" taking care of float return values and
duke@435 3347 // call Deoptimization::unpack_frames to have the unpacker layout
duke@435 3348 // information in the interpreter frames just created and then return
duke@435 3349 // to the interpreter entry point
duke@435 3350 __ save(SP, -frame_size_words*wordSize, SP);
duke@435 3351 __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
duke@435 3352 #if !defined(_LP64)
duke@435 3353 #if defined(COMPILER2)
iveresov@2138 3354 // 32-bit 1-register longs return longs in G1
iveresov@2138 3355 __ stx(Greturn1, saved_Greturn1_addr);
duke@435 3356 #endif
duke@435 3357 __ set_last_Java_frame(SP, noreg);
never@1472 3358 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode);
duke@435 3359 #else
duke@435 3360 // LP64 uses g4 in set_last_Java_frame
never@1472 3361 __ mov(G4deopt_mode, O1);
duke@435 3362 __ set_last_Java_frame(SP, G0);
duke@435 3363 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
duke@435 3364 #endif
duke@435 3365 __ reset_last_Java_frame();
duke@435 3366 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
duke@435 3367
duke@435 3368 #if !defined(_LP64) && defined(COMPILER2)
duke@435 3369 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
iveresov@2138 3370 // I0/I1 if the return value is long.
iveresov@2138 3371 Label not_long;
iveresov@2138 3372 __ cmp(O0,T_LONG);
iveresov@2138 3373 __ br(Assembler::notEqual, false, Assembler::pt, not_long);
iveresov@2138 3374 __ delayed()->nop();
iveresov@2138 3375 __ ldd(saved_Greturn1_addr,I0);
iveresov@2138 3376 __ bind(not_long);
duke@435 3377 #endif
duke@435 3378 __ ret();
duke@435 3379 __ delayed()->restore();
duke@435 3380
duke@435 3381 masm->flush();
duke@435 3382 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
duke@435 3383 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
duke@435 3384 }
duke@435 3385
duke@435 3386 #ifdef COMPILER2
duke@435 3387
duke@435 3388 //------------------------------generate_uncommon_trap_blob--------------------
duke@435 3389 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
duke@435 3390 // instead.
duke@435 3391 void SharedRuntime::generate_uncommon_trap_blob() {
duke@435 3392 // allocate space for the code
duke@435 3393 ResourceMark rm;
duke@435 3394 // setup code generation tools
duke@435 3395 int pad = VerifyThread ? 512 : 0;
duke@435 3396 #ifdef _LP64
duke@435 3397 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
duke@435 3398 #else
duke@435 3399 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
duke@435 3400 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
duke@435 3401 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
duke@435 3402 #endif
duke@435 3403 MacroAssembler* masm = new MacroAssembler(&buffer);
duke@435 3404 Register O2UnrollBlock = O2;
duke@435 3405 Register O2klass_index = O2;
duke@435 3406
duke@435 3407 //
duke@435 3408 // This is the entry point for all traps the compiler takes when it thinks
duke@435 3409 // it cannot handle further execution of compilation code. The frame is
duke@435 3410 // deoptimized in these cases and converted into interpreter frames for
duke@435 3411 // execution
duke@435 3412 // The steps taken by this frame are as follows:
duke@435 3413 // - push a fake "unpack_frame"
duke@435 3414 // - call the C routine Deoptimization::uncommon_trap (this function
duke@435 3415 // packs the current compiled frame into vframe arrays and returns
duke@435 3416 // information about the number and size of interpreter frames which
duke@435 3417 // are equivalent to the frame which is being deoptimized)
duke@435 3418 // - deallocate the "unpack_frame"
duke@435 3419 // - deallocate the deoptimization frame
duke@435 3420 // - in a loop using the information returned in the previous step
duke@435 3421 // push interpreter frames;
duke@435 3422 // - create a dummy "unpack_frame"
duke@435 3423 // - call the C routine: Deoptimization::unpack_frames (this function
duke@435 3424 // lays out values on the interpreter frame which was just created)
duke@435 3425 // - deallocate the dummy unpack_frame
duke@435 3426 // - return to the interpreter entry point
duke@435 3427 //
duke@435 3428 // Refer to the following methods for more information:
duke@435 3429 // - Deoptimization::uncommon_trap
duke@435 3430 // - Deoptimization::unpack_frame
duke@435 3431
duke@435 3432 // the unloaded class index is in O0 (first parameter to this blob)
duke@435 3433
duke@435 3434 // push a dummy "unpack_frame"
duke@435 3435 // and call Deoptimization::uncommon_trap to pack the compiled frame into
duke@435 3436 // vframe array and return the UnrollBlock information
duke@435 3437 __ save_frame(0);
duke@435 3438 __ set_last_Java_frame(SP, noreg);
duke@435 3439 __ mov(I0, O2klass_index);
duke@435 3440 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
duke@435 3441 __ reset_last_Java_frame();
duke@435 3442 __ mov(O0, O2UnrollBlock->after_save());
duke@435 3443 __ restore();
duke@435 3444
duke@435 3445 // deallocate the deoptimized frame taking care to preserve the return values
duke@435 3446 __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
duke@435 3447 __ restore();
duke@435 3448
duke@435 3449 // Allocate new interpreter frame(s) and possible c2i adapter frame
duke@435 3450
duke@435 3451 make_new_frames(masm, false);
duke@435 3452
duke@435 3453 // push a dummy "unpack_frame" taking care of float return values and
duke@435 3454 // call Deoptimization::unpack_frames to have the unpacker layout
duke@435 3455 // information in the interpreter frames just created and then return
duke@435 3456 // to the interpreter entry point
duke@435 3457 __ save_frame(0);
duke@435 3458 __ set_last_Java_frame(SP, noreg);
duke@435 3459 __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case
duke@435 3460 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3);
duke@435 3461 __ reset_last_Java_frame();
duke@435 3462 __ ret();
duke@435 3463 __ delayed()->restore();
duke@435 3464
duke@435 3465 masm->flush();
duke@435 3466 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize);
duke@435 3467 }
duke@435 3468
duke@435 3469 #endif // COMPILER2
duke@435 3470
duke@435 3471 //------------------------------generate_handler_blob-------------------
duke@435 3472 //
duke@435 3473 // Generate a special Compile2Runtime blob that saves all registers, and sets
duke@435 3474 // up an OopMap.
duke@435 3475 //
duke@435 3476 // This blob is jumped to (via a breakpoint and the signal handler) from a
duke@435 3477 // safepoint in compiled code. On entry to this blob, O7 contains the
duke@435 3478 // address in the original nmethod at which we should resume normal execution.
duke@435 3479 // Thus, this blob looks like a subroutine which must preserve lots of
duke@435 3480 // registers and return normally. Note that O7 is never register-allocated,
duke@435 3481 // so it is guaranteed to be free here.
duke@435 3482 //
duke@435 3483
duke@435 3484 // The hardest part of what this blob must do is to save the 64-bit %o
duke@435 3485 // registers in the 32-bit build. A simple 'save' turn the %o's to %i's and
duke@435 3486 // an interrupt will chop off their heads. Making space in the caller's frame
duke@435 3487 // first will let us save the 64-bit %o's before save'ing, but we cannot hand
duke@435 3488 // the adjusted FP off to the GC stack-crawler: this will modify the caller's
duke@435 3489 // SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save
duke@435 3490 // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
duke@435 3491 // Tricky, tricky, tricky...
duke@435 3492
duke@435 3493 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
duke@435 3494 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
duke@435 3495
duke@435 3496 // allocate space for the code
duke@435 3497 ResourceMark rm;
duke@435 3498 // setup code generation tools
duke@435 3499 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
duke@435 3500 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
duke@435 3501 // even larger with TraceJumps
duke@435 3502 int pad = TraceJumps ? 512 : 0;
duke@435 3503 CodeBuffer buffer("handler_blob", 1600 + pad, 512);
duke@435 3504 MacroAssembler* masm = new MacroAssembler(&buffer);
duke@435 3505 int frame_size_words;
duke@435 3506 OopMapSet *oop_maps = new OopMapSet();
duke@435 3507 OopMap* map = NULL;
duke@435 3508
duke@435 3509 int start = __ offset();
duke@435 3510
duke@435 3511 // If this causes a return before the processing, then do a "restore"
duke@435 3512 if (cause_return) {
duke@435 3513 __ restore();
duke@435 3514 } else {
duke@435 3515 // Make it look like we were called via the poll
duke@435 3516 // so that frame constructor always sees a valid return address
duke@435 3517 __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7);
duke@435 3518 __ sub(O7, frame::pc_return_offset, O7);
duke@435 3519 }
duke@435 3520
duke@435 3521 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
duke@435 3522
duke@435 3523 // setup last_Java_sp (blows G4)
duke@435 3524 __ set_last_Java_frame(SP, noreg);
duke@435 3525
duke@435 3526 // call into the runtime to handle illegal instructions exception
duke@435 3527 // Do not use call_VM_leaf, because we need to make a GC map at this call site.
duke@435 3528 __ mov(G2_thread, O0);
duke@435 3529 __ save_thread(L7_thread_cache);
duke@435 3530 __ call(call_ptr);
duke@435 3531 __ delayed()->nop();
duke@435 3532
duke@435 3533 // Set an oopmap for the call site.
duke@435 3534 // We need this not only for callee-saved registers, but also for volatile
duke@435 3535 // registers that the compiler might be keeping live across a safepoint.
duke@435 3536
duke@435 3537 oop_maps->add_gc_map( __ offset() - start, map);
duke@435 3538
duke@435 3539 __ restore_thread(L7_thread_cache);
duke@435 3540 // clear last_Java_sp
duke@435 3541 __ reset_last_Java_frame();
duke@435 3542
duke@435 3543 // Check for exceptions
duke@435 3544 Label pending;
duke@435 3545
duke@435 3546 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
duke@435 3547 __ tst(O1);
duke@435 3548 __ brx(Assembler::notEqual, true, Assembler::pn, pending);
duke@435 3549 __ delayed()->nop();
duke@435 3550
duke@435 3551 RegisterSaver::restore_live_registers(masm);
duke@435 3552
duke@435 3553 // We are back the the original state on entry and ready to go.
duke@435 3554
duke@435 3555 __ retl();
duke@435 3556 __ delayed()->nop();
duke@435 3557
duke@435 3558 // Pending exception after the safepoint
duke@435 3559
duke@435 3560 __ bind(pending);
duke@435 3561
duke@435 3562 RegisterSaver::restore_live_registers(masm);
duke@435 3563
duke@435 3564 // We are back the the original state on entry.
duke@435 3565
duke@435 3566 // Tail-call forward_exception_entry, with the issuing PC in O7,
duke@435 3567 // so it looks like the original nmethod called forward_exception_entry.
duke@435 3568 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
duke@435 3569 __ JMP(O0, 0);
duke@435 3570 __ delayed()->nop();
duke@435 3571
duke@435 3572 // -------------
duke@435 3573 // make sure all code is generated
duke@435 3574 masm->flush();
duke@435 3575
duke@435 3576 // return exception blob
duke@435 3577 return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
duke@435 3578 }
duke@435 3579
duke@435 3580 //
duke@435 3581 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
duke@435 3582 //
duke@435 3583 // Generate a stub that calls into vm to find out the proper destination
duke@435 3584 // of a java call. All the argument registers are live at this point
duke@435 3585 // but since this is generic code we don't know what they are and the caller
duke@435 3586 // must do any gc of the args.
duke@435 3587 //
duke@435 3588 static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
duke@435 3589 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
duke@435 3590
duke@435 3591 // allocate space for the code
duke@435 3592 ResourceMark rm;
duke@435 3593 // setup code generation tools
duke@435 3594 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
duke@435 3595 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
duke@435 3596 // even larger with TraceJumps
duke@435 3597 int pad = TraceJumps ? 512 : 0;
duke@435 3598 CodeBuffer buffer(name, 1600 + pad, 512);
duke@435 3599 MacroAssembler* masm = new MacroAssembler(&buffer);
duke@435 3600 int frame_size_words;
duke@435 3601 OopMapSet *oop_maps = new OopMapSet();
duke@435 3602 OopMap* map = NULL;
duke@435 3603
duke@435 3604 int start = __ offset();
duke@435 3605
duke@435 3606 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
duke@435 3607
duke@435 3608 int frame_complete = __ offset();
duke@435 3609
duke@435 3610 // setup last_Java_sp (blows G4)
duke@435 3611 __ set_last_Java_frame(SP, noreg);
duke@435 3612
duke@435 3613 // call into the runtime to handle illegal instructions exception
duke@435 3614 // Do not use call_VM_leaf, because we need to make a GC map at this call site.
duke@435 3615 __ mov(G2_thread, O0);
duke@435 3616 __ save_thread(L7_thread_cache);
duke@435 3617 __ call(destination, relocInfo::runtime_call_type);
duke@435 3618 __ delayed()->nop();
duke@435 3619
duke@435 3620 // O0 contains the address we are going to jump to assuming no exception got installed
duke@435 3621
duke@435 3622 // Set an oopmap for the call site.
duke@435 3623 // We need this not only for callee-saved registers, but also for volatile
duke@435 3624 // registers that the compiler might be keeping live across a safepoint.
duke@435 3625
duke@435 3626 oop_maps->add_gc_map( __ offset() - start, map);
duke@435 3627
duke@435 3628 __ restore_thread(L7_thread_cache);
duke@435 3629 // clear last_Java_sp
duke@435 3630 __ reset_last_Java_frame();
duke@435 3631
duke@435 3632 // Check for exceptions
duke@435 3633 Label pending;
duke@435 3634
duke@435 3635 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
duke@435 3636 __ tst(O1);
duke@435 3637 __ brx(Assembler::notEqual, true, Assembler::pn, pending);
duke@435 3638 __ delayed()->nop();
duke@435 3639
duke@435 3640 // get the returned methodOop
duke@435 3641
duke@435 3642 __ get_vm_result(G5_method);
duke@435 3643 __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS);
duke@435 3644
duke@435 3645 // O0 is where we want to jump, overwrite G3 which is saved and scratch
duke@435 3646
duke@435 3647 __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS);
duke@435 3648
duke@435 3649 RegisterSaver::restore_live_registers(masm);
duke@435 3650
duke@435 3651 // We are back the the original state on entry and ready to go.
duke@435 3652
duke@435 3653 __ JMP(G3, 0);
duke@435 3654 __ delayed()->nop();
duke@435 3655
duke@435 3656 // Pending exception after the safepoint
duke@435 3657
duke@435 3658 __ bind(pending);
duke@435 3659
duke@435 3660 RegisterSaver::restore_live_registers(masm);
duke@435 3661
duke@435 3662 // We are back the the original state on entry.
duke@435 3663
duke@435 3664 // Tail-call forward_exception_entry, with the issuing PC in O7,
duke@435 3665 // so it looks like the original nmethod called forward_exception_entry.
duke@435 3666 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
duke@435 3667 __ JMP(O0, 0);
duke@435 3668 __ delayed()->nop();
duke@435 3669
duke@435 3670 // -------------
duke@435 3671 // make sure all code is generated
duke@435 3672 masm->flush();
duke@435 3673
duke@435 3674 // return the blob
duke@435 3675 // frame_size_words or bytes??
duke@435 3676 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
duke@435 3677 }
duke@435 3678
duke@435 3679 void SharedRuntime::generate_stubs() {
duke@435 3680
duke@435 3681 _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
duke@435 3682 "wrong_method_stub");
duke@435 3683
duke@435 3684 _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
duke@435 3685 "ic_miss_stub");
duke@435 3686
duke@435 3687 _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
duke@435 3688 "resolve_opt_virtual_call");
duke@435 3689
duke@435 3690 _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),
duke@435 3691 "resolve_virtual_call");
duke@435 3692
duke@435 3693 _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
duke@435 3694 "resolve_static_call");
duke@435 3695
duke@435 3696 _polling_page_safepoint_handler_blob =
duke@435 3697 generate_handler_blob(CAST_FROM_FN_PTR(address,
duke@435 3698 SafepointSynchronize::handle_polling_page_exception), false);
duke@435 3699
duke@435 3700 _polling_page_return_handler_blob =
duke@435 3701 generate_handler_blob(CAST_FROM_FN_PTR(address,
duke@435 3702 SafepointSynchronize::handle_polling_page_exception), true);
duke@435 3703
duke@435 3704 generate_deopt_blob();
duke@435 3705
duke@435 3706 #ifdef COMPILER2
duke@435 3707 generate_uncommon_trap_blob();
duke@435 3708 #endif // COMPILER2
duke@435 3709 }

mercurial