src/cpu/sparc/vm/sharedRuntime_sparc.cpp

Thu, 07 Apr 2011 09:53:20 -0700

author
johnc
date
Thu, 07 Apr 2011 09:53:20 -0700
changeset 2781
e1162778c1c8
parent 2497
3582bf76420e
child 2687
3d58a4983660
permissions
-rw-r--r--

7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes

     1 /*
     2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "asm/assembler.hpp"
    27 #include "assembler_sparc.inline.hpp"
    28 #include "code/debugInfoRec.hpp"
    29 #include "code/icBuffer.hpp"
    30 #include "code/vtableStubs.hpp"
    31 #include "interpreter/interpreter.hpp"
    32 #include "oops/compiledICHolderOop.hpp"
    33 #include "prims/jvmtiRedefineClassesTrace.hpp"
    34 #include "runtime/sharedRuntime.hpp"
    35 #include "runtime/vframeArray.hpp"
    36 #include "vmreg_sparc.inline.hpp"
    37 #ifdef COMPILER1
    38 #include "c1/c1_Runtime1.hpp"
    39 #endif
    40 #ifdef COMPILER2
    41 #include "opto/runtime.hpp"
    42 #endif
    43 #ifdef SHARK
    44 #include "compiler/compileBroker.hpp"
    45 #include "shark/sharkCompiler.hpp"
    46 #endif
    48 #define __ masm->
    50 #ifdef COMPILER2
    51 UncommonTrapBlob*   SharedRuntime::_uncommon_trap_blob;
    52 #endif // COMPILER2
    54 DeoptimizationBlob* SharedRuntime::_deopt_blob;
    55 SafepointBlob*      SharedRuntime::_polling_page_safepoint_handler_blob;
    56 SafepointBlob*      SharedRuntime::_polling_page_return_handler_blob;
    57 RuntimeStub*        SharedRuntime::_wrong_method_blob;
    58 RuntimeStub*        SharedRuntime::_ic_miss_blob;
    59 RuntimeStub*        SharedRuntime::_resolve_opt_virtual_call_blob;
    60 RuntimeStub*        SharedRuntime::_resolve_virtual_call_blob;
    61 RuntimeStub*        SharedRuntime::_resolve_static_call_blob;
    63 class RegisterSaver {
    65   // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.
    66   // The Oregs are problematic. In the 32bit build the compiler can
    67   // have O registers live with 64 bit quantities. A window save will
    68   // cut the heads off of the registers. We have to do a very extensive
    69   // stack dance to save and restore these properly.
    71   // Note that the Oregs problem only exists if we block at either a polling
    72   // page exception a compiled code safepoint that was not originally a call
    73   // or deoptimize following one of these kinds of safepoints.
    75   // Lots of registers to save.  For all builds, a window save will preserve
    76   // the %i and %l registers.  For the 32-bit longs-in-two entries and 64-bit
    77   // builds a window-save will preserve the %o registers.  In the LION build
    78   // we need to save the 64-bit %o registers which requires we save them
    79   // before the window-save (as then they become %i registers and get their
    80   // heads chopped off on interrupt).  We have to save some %g registers here
    81   // as well.
    82   enum {
    83     // This frame's save area.  Includes extra space for the native call:
    84     // vararg's layout space and the like.  Briefly holds the caller's
    85     // register save area.
    86     call_args_area = frame::register_save_words_sp_offset +
    87                      frame::memory_parameter_word_sp_offset*wordSize,
    88     // Make sure save locations are always 8 byte aligned.
    89     // can't use round_to because it doesn't produce compile time constant
    90     start_of_extra_save_area = ((call_args_area + 7) & ~7),
    91     g1_offset = start_of_extra_save_area, // g-regs needing saving
    92     g3_offset = g1_offset+8,
    93     g4_offset = g3_offset+8,
    94     g5_offset = g4_offset+8,
    95     o0_offset = g5_offset+8,
    96     o1_offset = o0_offset+8,
    97     o2_offset = o1_offset+8,
    98     o3_offset = o2_offset+8,
    99     o4_offset = o3_offset+8,
   100     o5_offset = o4_offset+8,
   101     start_of_flags_save_area = o5_offset+8,
   102     ccr_offset = start_of_flags_save_area,
   103     fsr_offset = ccr_offset + 8,
   104     d00_offset = fsr_offset+8,  // Start of float save area
   105     register_save_size = d00_offset+8*32
   106   };
   109   public:
   111   static int Oexception_offset() { return o0_offset; };
   112   static int G3_offset() { return g3_offset; };
   113   static int G5_offset() { return g5_offset; };
   114   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
   115   static void restore_live_registers(MacroAssembler* masm);
   117   // During deoptimization only the result register need to be restored
   118   // all the other values have already been extracted.
   120   static void restore_result_registers(MacroAssembler* masm);
   121 };
   123 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
   124   // Record volatile registers as callee-save values in an OopMap so their save locations will be
   125   // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
   126   // deoptimization; see compiledVFrame::create_stack_value).  The caller's I, L and O registers
   127   // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
   128   // (as the stub's I's) when the runtime routine called by the stub creates its frame.
   129   int i;
   130   // Always make the frame size 16 byte aligned.
   131   int frame_size = round_to(additional_frame_words + register_save_size, 16);
   132   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
   133   int frame_size_in_slots = frame_size / sizeof(jint);
   134   // CodeBlob frame size is in words.
   135   *total_frame_words = frame_size / wordSize;
   136   // OopMap* map = new OopMap(*total_frame_words, 0);
   137   OopMap* map = new OopMap(frame_size_in_slots, 0);
   139 #if !defined(_LP64)
   141   // Save 64-bit O registers; they will get their heads chopped off on a 'save'.
   142   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
   143   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
   144   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
   145   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
   146   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
   147   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
   148 #endif /* _LP64 */
   150   __ save(SP, -frame_size, SP);
   152 #ifndef _LP64
   153   // Reload the 64 bit Oregs. Although they are now Iregs we load them
   154   // to Oregs here to avoid interrupts cutting off their heads
   156   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
   157   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
   158   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
   159   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
   160   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
   161   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
   163   __ stx(O0, SP, o0_offset+STACK_BIAS);
   164   map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());
   166   __ stx(O1, SP, o1_offset+STACK_BIAS);
   168   map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());
   170   __ stx(O2, SP, o2_offset+STACK_BIAS);
   171   map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());
   173   __ stx(O3, SP, o3_offset+STACK_BIAS);
   174   map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());
   176   __ stx(O4, SP, o4_offset+STACK_BIAS);
   177   map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());
   179   __ stx(O5, SP, o5_offset+STACK_BIAS);
   180   map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());
   181 #endif /* _LP64 */
   184 #ifdef _LP64
   185   int debug_offset = 0;
   186 #else
   187   int debug_offset = 4;
   188 #endif
   189   // Save the G's
   190   __ stx(G1, SP, g1_offset+STACK_BIAS);
   191   map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
   193   __ stx(G3, SP, g3_offset+STACK_BIAS);
   194   map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
   196   __ stx(G4, SP, g4_offset+STACK_BIAS);
   197   map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
   199   __ stx(G5, SP, g5_offset+STACK_BIAS);
   200   map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
   202   // This is really a waste but we'll keep things as they were for now
   203   if (true) {
   204 #ifndef _LP64
   205     map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());
   206     map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());
   207     map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());
   208     map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());
   209     map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());
   210     map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());
   211     map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());
   212     map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());
   213     map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());
   214     map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());
   215 #endif /* _LP64 */
   216   }
   219   // Save the flags
   220   __ rdccr( G5 );
   221   __ stx(G5, SP, ccr_offset+STACK_BIAS);
   222   __ stxfsr(SP, fsr_offset+STACK_BIAS);
   224   // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
   225   int offset = d00_offset;
   226   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
   227     FloatRegister f = as_FloatRegister(i);
   228     __ stf(FloatRegisterImpl::D,  f, SP, offset+STACK_BIAS);
   229     // Record as callee saved both halves of double registers (2 float registers).
   230     map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
   231     map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
   232     offset += sizeof(double);
   233   }
   235   // And we're done.
   237   return map;
   238 }
   241 // Pop the current frame and restore all the registers that we
   242 // saved.
   243 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
   245   // Restore all the FP registers
   246   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
   247     __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
   248   }
   250   __ ldx(SP, ccr_offset+STACK_BIAS, G1);
   251   __ wrccr (G1) ;
   253   // Restore the G's
   254   // Note that G2 (AKA GThread) must be saved and restored separately.
   255   // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
   257   __ ldx(SP, g1_offset+STACK_BIAS, G1);
   258   __ ldx(SP, g3_offset+STACK_BIAS, G3);
   259   __ ldx(SP, g4_offset+STACK_BIAS, G4);
   260   __ ldx(SP, g5_offset+STACK_BIAS, G5);
   263 #if !defined(_LP64)
   264   // Restore the 64-bit O's.
   265   __ ldx(SP, o0_offset+STACK_BIAS, O0);
   266   __ ldx(SP, o1_offset+STACK_BIAS, O1);
   267   __ ldx(SP, o2_offset+STACK_BIAS, O2);
   268   __ ldx(SP, o3_offset+STACK_BIAS, O3);
   269   __ ldx(SP, o4_offset+STACK_BIAS, O4);
   270   __ ldx(SP, o5_offset+STACK_BIAS, O5);
   272   // And temporarily place them in TLS
   274   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
   275   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
   276   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
   277   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
   278   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
   279   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
   280 #endif /* _LP64 */
   282   // Restore flags
   284   __ ldxfsr(SP, fsr_offset+STACK_BIAS);
   286   __ restore();
   288 #if !defined(_LP64)
   289   // Now reload the 64bit Oregs after we've restore the window.
   290   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
   291   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
   292   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
   293   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
   294   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
   295   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
   296 #endif /* _LP64 */
   298 }
   300 // Pop the current frame and restore the registers that might be holding
   301 // a result.
   302 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
   304 #if !defined(_LP64)
   305   // 32bit build returns longs in G1
   306   __ ldx(SP, g1_offset+STACK_BIAS, G1);
   308   // Retrieve the 64-bit O's.
   309   __ ldx(SP, o0_offset+STACK_BIAS, O0);
   310   __ ldx(SP, o1_offset+STACK_BIAS, O1);
   311   // and save to TLS
   312   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
   313   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
   314 #endif /* _LP64 */
   316   __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
   318   __ restore();
   320 #if !defined(_LP64)
   321   // Now reload the 64bit Oregs after we've restore the window.
   322   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
   323   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
   324 #endif /* _LP64 */
   326 }
   328 // The java_calling_convention describes stack locations as ideal slots on
   329 // a frame with no abi restrictions. Since we must observe abi restrictions
   330 // (like the placement of the register window) the slots must be biased by
   331 // the following value.
   332 static int reg2offset(VMReg r) {
   333   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   334 }
   336 // ---------------------------------------------------------------------------
   337 // Read the array of BasicTypes from a signature, and compute where the
   338 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
   339 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
   340 // refer to 4-byte stack slots.  All stack slots are based off of the window
   341 // top.  VMRegImpl::stack0 refers to the first slot past the 16-word window,
   342 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
   343 // values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit
   344 // integer registers.  Values 64-95 are the (32-bit only) float registers.
   345 // Each 32-bit quantity is given its own number, so the integer registers
   346 // (in either 32- or 64-bit builds) use 2 numbers.  For example, there is
   347 // an O0-low and an O0-high.  Essentially, all int register numbers are doubled.
   349 // Register results are passed in O0-O5, for outgoing call arguments.  To
   350 // convert to incoming arguments, convert all O's to I's.  The regs array
   351 // refer to the low and hi 32-bit words of 64-bit registers or stack slots.
   352 // If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a
   353 // 32-bit value was passed).  If both are VMRegImpl::Bad(), it means no value was
   354 // passed (used as a placeholder for the other half of longs and doubles in
   355 // the 64-bit build).  regs[].second() is either VMRegImpl::Bad() or regs[].second() is
   356 // regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).
   357 // Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()
   358 // == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the
   359 // same VMRegPair.
   361 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
   362 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
   363 // units regardless of build.
   366 // ---------------------------------------------------------------------------
   367 // The compiled Java calling convention.  The Java convention always passes
   368 // 64-bit values in adjacent aligned locations (either registers or stack),
   369 // floats in float registers and doubles in aligned float pairs.  Values are
   370 // packed in the registers.  There is no backing varargs store for values in
   371 // registers.  In the 32-bit build, longs are passed in G1 and G4 (cannot be
   372 // passed in I's, because longs in I's get their heads chopped off at
   373 // interrupt).
   374 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
   375                                            VMRegPair *regs,
   376                                            int total_args_passed,
   377                                            int is_outgoing) {
   378   assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
   380   // Convention is to pack the first 6 int/oop args into the first 6 registers
   381   // (I0-I5), extras spill to the stack.  Then pack the first 8 float args
   382   // into F0-F7, extras spill to the stack.  Then pad all register sets to
   383   // align.  Then put longs and doubles into the same registers as they fit,
   384   // else spill to the stack.
   385   const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
   386   const int flt_reg_max = 8;
   387   //
   388   // Where 32-bit 1-reg longs start being passed
   389   // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
   390   // So make it look like we've filled all the G regs that c2 wants to use.
   391   Register g_reg = TieredCompilation ? noreg : G1;
   393   // Count int/oop and float args.  See how many stack slots we'll need and
   394   // where the longs & doubles will go.
   395   int int_reg_cnt   = 0;
   396   int flt_reg_cnt   = 0;
   397   // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
   398   // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
   399   int stk_reg_pairs = 0;
   400   for (int i = 0; i < total_args_passed; i++) {
   401     switch (sig_bt[i]) {
   402     case T_LONG:                // LP64, longs compete with int args
   403       assert(sig_bt[i+1] == T_VOID, "");
   404 #ifdef _LP64
   405       if (int_reg_cnt < int_reg_max) int_reg_cnt++;
   406 #endif
   407       break;
   408     case T_OBJECT:
   409     case T_ARRAY:
   410     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   411       if (int_reg_cnt < int_reg_max) int_reg_cnt++;
   412 #ifndef _LP64
   413       else                            stk_reg_pairs++;
   414 #endif
   415       break;
   416     case T_INT:
   417     case T_SHORT:
   418     case T_CHAR:
   419     case T_BYTE:
   420     case T_BOOLEAN:
   421       if (int_reg_cnt < int_reg_max) int_reg_cnt++;
   422       else                            stk_reg_pairs++;
   423       break;
   424     case T_FLOAT:
   425       if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
   426       else                            stk_reg_pairs++;
   427       break;
   428     case T_DOUBLE:
   429       assert(sig_bt[i+1] == T_VOID, "");
   430       break;
   431     case T_VOID:
   432       break;
   433     default:
   434       ShouldNotReachHere();
   435     }
   436   }
   438   // This is where the longs/doubles start on the stack.
   439   stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
   441   int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
   442   int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
   444   // int stk_reg = frame::register_save_words*(wordSize>>2);
   445   // int stk_reg = SharedRuntime::out_preserve_stack_slots();
   446   int stk_reg = 0;
   447   int int_reg = 0;
   448   int flt_reg = 0;
   450   // Now do the signature layout
   451   for (int i = 0; i < total_args_passed; i++) {
   452     switch (sig_bt[i]) {
   453     case T_INT:
   454     case T_SHORT:
   455     case T_CHAR:
   456     case T_BYTE:
   457     case T_BOOLEAN:
   458 #ifndef _LP64
   459     case T_OBJECT:
   460     case T_ARRAY:
   461     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   462 #endif // _LP64
   463       if (int_reg < int_reg_max) {
   464         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   465         regs[i].set1(r->as_VMReg());
   466       } else {
   467         regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
   468       }
   469       break;
   471 #ifdef _LP64
   472     case T_OBJECT:
   473     case T_ARRAY:
   474     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   475       if (int_reg < int_reg_max) {
   476         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   477         regs[i].set2(r->as_VMReg());
   478       } else {
   479         regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   480         stk_reg_pairs += 2;
   481       }
   482       break;
   483 #endif // _LP64
   485     case T_LONG:
   486       assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
   487 #ifdef _LP64
   488         if (int_reg < int_reg_max) {
   489           Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   490           regs[i].set2(r->as_VMReg());
   491         } else {
   492           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   493           stk_reg_pairs += 2;
   494         }
   495 #else
   496 #ifdef COMPILER2
   497         // For 32-bit build, can't pass longs in O-regs because they become
   498         // I-regs and get trashed.  Use G-regs instead.  G1 and G4 are almost
   499         // spare and available.  This convention isn't used by the Sparc ABI or
   500         // anywhere else. If we're tiered then we don't use G-regs because c1
   501         // can't deal with them as a "pair". (Tiered makes this code think g's are filled)
   502         // G0: zero
   503         // G1: 1st Long arg
   504         // G2: global allocated to TLS
   505         // G3: used in inline cache check
   506         // G4: 2nd Long arg
   507         // G5: used in inline cache check
   508         // G6: used by OS
   509         // G7: used by OS
   511         if (g_reg == G1) {
   512           regs[i].set2(G1->as_VMReg()); // This long arg in G1
   513           g_reg = G4;                  // Where the next arg goes
   514         } else if (g_reg == G4) {
   515           regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
   516           g_reg = noreg;               // No more longs in registers
   517         } else {
   518           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   519           stk_reg_pairs += 2;
   520         }
   521 #else // COMPILER2
   522         if (int_reg_pairs + 1 < int_reg_max) {
   523           if (is_outgoing) {
   524             regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
   525           } else {
   526             regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
   527           }
   528           int_reg_pairs += 2;
   529         } else {
   530           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   531           stk_reg_pairs += 2;
   532         }
   533 #endif // COMPILER2
   534 #endif // _LP64
   535       break;
   537     case T_FLOAT:
   538       if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
   539       else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
   540       break;
   541     case T_DOUBLE:
   542       assert(sig_bt[i+1] == T_VOID, "expecting half");
   543       if (flt_reg_pairs + 1 < flt_reg_max) {
   544         regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
   545         flt_reg_pairs += 2;
   546       } else {
   547         regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   548         stk_reg_pairs += 2;
   549       }
   550       break;
   551     case T_VOID: regs[i].set_bad();  break; // Halves of longs & doubles
   552     default:
   553       ShouldNotReachHere();
   554     }
   555   }
   557   // retun the amount of stack space these arguments will need.
   558   return stk_reg_pairs;
   560 }
   562 // Helper class mostly to avoid passing masm everywhere, and handle
   563 // store displacement overflow logic.
   564 class AdapterGenerator {
   565   MacroAssembler *masm;
   566   Register Rdisp;
   567   void set_Rdisp(Register r)  { Rdisp = r; }
   569   void patch_callers_callsite();
   571   // base+st_off points to top of argument
   572   int arg_offset(const int st_off) { return st_off; }
   573   int next_arg_offset(const int st_off) {
   574     return st_off - Interpreter::stackElementSize;
   575   }
   577   // Argument slot values may be loaded first into a register because
   578   // they might not fit into displacement.
   579   RegisterOrConstant arg_slot(const int st_off);
   580   RegisterOrConstant next_arg_slot(const int st_off);
   582   // Stores long into offset pointed to by base
   583   void store_c2i_long(Register r, Register base,
   584                       const int st_off, bool is_stack);
   585   void store_c2i_object(Register r, Register base,
   586                         const int st_off);
   587   void store_c2i_int(Register r, Register base,
   588                      const int st_off);
   589   void store_c2i_double(VMReg r_2,
   590                         VMReg r_1, Register base, const int st_off);
   591   void store_c2i_float(FloatRegister f, Register base,
   592                        const int st_off);
   594  public:
   595   void gen_c2i_adapter(int total_args_passed,
   596                               // VMReg max_arg,
   597                               int comp_args_on_stack, // VMRegStackSlots
   598                               const BasicType *sig_bt,
   599                               const VMRegPair *regs,
   600                               Label& skip_fixup);
   601   void gen_i2c_adapter(int total_args_passed,
   602                               // VMReg max_arg,
   603                               int comp_args_on_stack, // VMRegStackSlots
   604                               const BasicType *sig_bt,
   605                               const VMRegPair *regs);
   607   AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
   608 };
   611 // Patch the callers callsite with entry to compiled code if it exists.
   612 void AdapterGenerator::patch_callers_callsite() {
   613   Label L;
   614   __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
   615   __ br_null(G3_scratch, false, __ pt, L);
   616   // Schedule the branch target address early.
   617   __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   618   // Call into the VM to patch the caller, then jump to compiled callee
   619   __ save_frame(4);     // Args in compiled layout; do not blow them
   621   // Must save all the live Gregs the list is:
   622   // G1: 1st Long arg (32bit build)
   623   // G2: global allocated to TLS
   624   // G3: used in inline cache check (scratch)
   625   // G4: 2nd Long arg (32bit build);
   626   // G5: used in inline cache check (methodOop)
   628   // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
   630 #ifdef _LP64
   631   // mov(s,d)
   632   __ mov(G1, L1);
   633   __ mov(G4, L4);
   634   __ mov(G5_method, L5);
   635   __ mov(G5_method, O0);         // VM needs target method
   636   __ mov(I7, O1);                // VM needs caller's callsite
   637   // Must be a leaf call...
   638   // can be very far once the blob has been relocated
   639   AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
   640   __ relocate(relocInfo::runtime_call_type);
   641   __ jumpl_to(dest, O7, O7);
   642   __ delayed()->mov(G2_thread, L7_thread_cache);
   643   __ mov(L7_thread_cache, G2_thread);
   644   __ mov(L1, G1);
   645   __ mov(L4, G4);
   646   __ mov(L5, G5_method);
   647 #else
   648   __ stx(G1, FP, -8 + STACK_BIAS);
   649   __ stx(G4, FP, -16 + STACK_BIAS);
   650   __ mov(G5_method, L5);
   651   __ mov(G5_method, O0);         // VM needs target method
   652   __ mov(I7, O1);                // VM needs caller's callsite
   653   // Must be a leaf call...
   654   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);
   655   __ delayed()->mov(G2_thread, L7_thread_cache);
   656   __ mov(L7_thread_cache, G2_thread);
   657   __ ldx(FP, -8 + STACK_BIAS, G1);
   658   __ ldx(FP, -16 + STACK_BIAS, G4);
   659   __ mov(L5, G5_method);
   660   __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   661 #endif /* _LP64 */
   663   __ restore();      // Restore args
   664   __ bind(L);
   665 }
   668 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
   669   RegisterOrConstant roc(arg_offset(st_off));
   670   return __ ensure_simm13_or_reg(roc, Rdisp);
   671 }
   673 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
   674   RegisterOrConstant roc(next_arg_offset(st_off));
   675   return __ ensure_simm13_or_reg(roc, Rdisp);
   676 }
   679 // Stores long into offset pointed to by base
   680 void AdapterGenerator::store_c2i_long(Register r, Register base,
   681                                       const int st_off, bool is_stack) {
   682 #ifdef _LP64
   683   // In V9, longs are given 2 64-bit slots in the interpreter, but the
   684   // data is passed in only 1 slot.
   685   __ stx(r, base, next_arg_slot(st_off));
   686 #else
   687 #ifdef COMPILER2
   688   // Misaligned store of 64-bit data
   689   __ stw(r, base, arg_slot(st_off));    // lo bits
   690   __ srlx(r, 32, r);
   691   __ stw(r, base, next_arg_slot(st_off));  // hi bits
   692 #else
   693   if (is_stack) {
   694     // Misaligned store of 64-bit data
   695     __ stw(r, base, arg_slot(st_off));    // lo bits
   696     __ srlx(r, 32, r);
   697     __ stw(r, base, next_arg_slot(st_off));  // hi bits
   698   } else {
   699     __ stw(r->successor(), base, arg_slot(st_off)     ); // lo bits
   700     __ stw(r             , base, next_arg_slot(st_off)); // hi bits
   701   }
   702 #endif // COMPILER2
   703 #endif // _LP64
   704 }
   706 void AdapterGenerator::store_c2i_object(Register r, Register base,
   707                       const int st_off) {
   708   __ st_ptr (r, base, arg_slot(st_off));
   709 }
   711 void AdapterGenerator::store_c2i_int(Register r, Register base,
   712                    const int st_off) {
   713   __ st (r, base, arg_slot(st_off));
   714 }
   716 // Stores into offset pointed to by base
   717 void AdapterGenerator::store_c2i_double(VMReg r_2,
   718                       VMReg r_1, Register base, const int st_off) {
   719 #ifdef _LP64
   720   // In V9, doubles are given 2 64-bit slots in the interpreter, but the
   721   // data is passed in only 1 slot.
   722   __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
   723 #else
   724   // Need to marshal 64-bit value from misaligned Lesp loads
   725   __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
   726   __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );
   727 #endif
   728 }
   730 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
   731                                        const int st_off) {
   732   __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
   733 }
   735 void AdapterGenerator::gen_c2i_adapter(
   736                             int total_args_passed,
   737                             // VMReg max_arg,
   738                             int comp_args_on_stack, // VMRegStackSlots
   739                             const BasicType *sig_bt,
   740                             const VMRegPair *regs,
   741                             Label& skip_fixup) {
   743   // Before we get into the guts of the C2I adapter, see if we should be here
   744   // at all.  We've come from compiled code and are attempting to jump to the
   745   // interpreter, which means the caller made a static call to get here
   746   // (vcalls always get a compiled target if there is one).  Check for a
   747   // compiled target.  If there is one, we need to patch the caller's call.
   748   // However we will run interpreted if we come thru here. The next pass
   749   // thru the call site will run compiled. If we ran compiled here then
   750   // we can (theorectically) do endless i2c->c2i->i2c transitions during
   751   // deopt/uncommon trap cycles. If we always go interpreted here then
   752   // we can have at most one and don't need to play any tricks to keep
   753   // from endlessly growing the stack.
   754   //
   755   // Actually if we detected that we had an i2c->c2i transition here we
   756   // ought to be able to reset the world back to the state of the interpreted
   757   // call and not bother building another interpreter arg area. We don't
   758   // do that at this point.
   760   patch_callers_callsite();
   762   __ bind(skip_fixup);
   764   // Since all args are passed on the stack, total_args_passed*wordSize is the
   765   // space we need.  Add in varargs area needed by the interpreter. Round up
   766   // to stack alignment.
   767   const int arg_size = total_args_passed * Interpreter::stackElementSize;
   768   const int varargs_area =
   769                  (frame::varargs_offset - frame::register_save_words)*wordSize;
   770   const int extraspace = round_to(arg_size + varargs_area, 2*wordSize);
   772   int bias = STACK_BIAS;
   773   const int interp_arg_offset = frame::varargs_offset*wordSize +
   774                         (total_args_passed-1)*Interpreter::stackElementSize;
   776   Register base = SP;
   778 #ifdef _LP64
   779   // In the 64bit build because of wider slots and STACKBIAS we can run
   780   // out of bits in the displacement to do loads and stores.  Use g3 as
   781   // temporary displacement.
   782   if (! __ is_simm13(extraspace)) {
   783     __ set(extraspace, G3_scratch);
   784     __ sub(SP, G3_scratch, SP);
   785   } else {
   786     __ sub(SP, extraspace, SP);
   787   }
   788   set_Rdisp(G3_scratch);
   789 #else
   790   __ sub(SP, extraspace, SP);
   791 #endif // _LP64
   793   // First write G1 (if used) to where ever it must go
   794   for (int i=0; i<total_args_passed; i++) {
   795     const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
   796     VMReg r_1 = regs[i].first();
   797     VMReg r_2 = regs[i].second();
   798     if (r_1 == G1_scratch->as_VMReg()) {
   799       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
   800         store_c2i_object(G1_scratch, base, st_off);
   801       } else if (sig_bt[i] == T_LONG) {
   802         assert(!TieredCompilation, "should not use register args for longs");
   803         store_c2i_long(G1_scratch, base, st_off, false);
   804       } else {
   805         store_c2i_int(G1_scratch, base, st_off);
   806       }
   807     }
   808   }
   810   // Now write the args into the outgoing interpreter space
   811   for (int i=0; i<total_args_passed; i++) {
   812     const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
   813     VMReg r_1 = regs[i].first();
   814     VMReg r_2 = regs[i].second();
   815     if (!r_1->is_valid()) {
   816       assert(!r_2->is_valid(), "");
   817       continue;
   818     }
   819     // Skip G1 if found as we did it first in order to free it up
   820     if (r_1 == G1_scratch->as_VMReg()) {
   821       continue;
   822     }
   823 #ifdef ASSERT
   824     bool G1_forced = false;
   825 #endif // ASSERT
   826     if (r_1->is_stack()) {        // Pretend stack targets are loaded into G1
   827 #ifdef _LP64
   828       Register ld_off = Rdisp;
   829       __ set(reg2offset(r_1) + extraspace + bias, ld_off);
   830 #else
   831       int ld_off = reg2offset(r_1) + extraspace + bias;
   832 #endif // _LP64
   833 #ifdef ASSERT
   834       G1_forced = true;
   835 #endif // ASSERT
   836       r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle
   837       if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch);
   838       else                  __ ldx(base, ld_off, G1_scratch);
   839     }
   841     if (r_1->is_Register()) {
   842       Register r = r_1->as_Register()->after_restore();
   843       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
   844         store_c2i_object(r, base, st_off);
   845       } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
   846 #ifndef _LP64
   847         if (TieredCompilation) {
   848           assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs");
   849         }
   850 #endif // _LP64
   851         store_c2i_long(r, base, st_off, r_2->is_stack());
   852       } else {
   853         store_c2i_int(r, base, st_off);
   854       }
   855     } else {
   856       assert(r_1->is_FloatRegister(), "");
   857       if (sig_bt[i] == T_FLOAT) {
   858         store_c2i_float(r_1->as_FloatRegister(), base, st_off);
   859       } else {
   860         assert(sig_bt[i] == T_DOUBLE, "wrong type");
   861         store_c2i_double(r_2, r_1, base, st_off);
   862       }
   863     }
   864   }
   866 #ifdef _LP64
   867   // Need to reload G3_scratch, used for temporary displacements.
   868   __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   870   // Pass O5_savedSP as an argument to the interpreter.
   871   // The interpreter will restore SP to this value before returning.
   872   __ set(extraspace, G1);
   873   __ add(SP, G1, O5_savedSP);
   874 #else
   875   // Pass O5_savedSP as an argument to the interpreter.
   876   // The interpreter will restore SP to this value before returning.
   877   __ add(SP, extraspace, O5_savedSP);
   878 #endif // _LP64
   880   __ mov((frame::varargs_offset)*wordSize -
   881          1*Interpreter::stackElementSize+bias+BytesPerWord, G1);
   882   // Jump to the interpreter just as if interpreter was doing it.
   883   __ jmpl(G3_scratch, 0, G0);
   884   // Setup Lesp for the call.  Cannot actually set Lesp as the current Lesp
   885   // (really L0) is in use by the compiled frame as a generic temp.  However,
   886   // the interpreter does not know where its args are without some kind of
   887   // arg pointer being passed in.  Pass it in Gargs.
   888   __ delayed()->add(SP, G1, Gargs);
   889 }
   891 void AdapterGenerator::gen_i2c_adapter(
   892                             int total_args_passed,
   893                             // VMReg max_arg,
   894                             int comp_args_on_stack, // VMRegStackSlots
   895                             const BasicType *sig_bt,
   896                             const VMRegPair *regs) {
   898   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
   899   // layout.  Lesp was saved by the calling I-frame and will be restored on
   900   // return.  Meanwhile, outgoing arg space is all owned by the callee
   901   // C-frame, so we can mangle it at will.  After adjusting the frame size,
   902   // hoist register arguments and repack other args according to the compiled
   903   // code convention.  Finally, end in a jump to the compiled code.  The entry
   904   // point address is the start of the buffer.
   906   // We will only enter here from an interpreted frame and never from after
   907   // passing thru a c2i. Azul allowed this but we do not. If we lose the
   908   // race and use a c2i we will remain interpreted for the race loser(s).
   909   // This removes all sorts of headaches on the x86 side and also eliminates
   910   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
   912   // As you can see from the list of inputs & outputs there are not a lot
   913   // of temp registers to work with: mostly G1, G3 & G4.
   915   // Inputs:
   916   // G2_thread      - TLS
   917   // G5_method      - Method oop
   918   // G4 (Gargs)     - Pointer to interpreter's args
   919   // O0..O4         - free for scratch
   920   // O5_savedSP     - Caller's saved SP, to be restored if needed
   921   // O6             - Current SP!
   922   // O7             - Valid return address
   923   // L0-L7, I0-I7   - Caller's temps (no frame pushed yet)
   925   // Outputs:
   926   // G2_thread      - TLS
   927   // G1, G4         - Outgoing long args in 32-bit build
   928   // O0-O5          - Outgoing args in compiled layout
   929   // O6             - Adjusted or restored SP
   930   // O7             - Valid return address
   931   // L0-L7, I0-I7   - Caller's temps (no frame pushed yet)
   932   // F0-F7          - more outgoing args
   935   // Gargs is the incoming argument base, and also an outgoing argument.
   936   __ sub(Gargs, BytesPerWord, Gargs);
   938   // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME
   939   // WITH O7 HOLDING A VALID RETURN PC
   940   //
   941   // |              |
   942   // :  java stack  :
   943   // |              |
   944   // +--------------+ <--- start of outgoing args
   945   // |   receiver   |   |
   946   // : rest of args :   |---size is java-arg-words
   947   // |              |   |
   948   // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I
   949   // |              |   |
   950   // :    unused    :   |---Space for max Java stack, plus stack alignment
   951   // |              |   |
   952   // +--------------+ <--- SP + 16*wordsize
   953   // |              |
   954   // :    window    :
   955   // |              |
   956   // +--------------+ <--- SP
   958   // WE REPACK THE STACK.  We use the common calling convention layout as
   959   // discovered by calling SharedRuntime::calling_convention.  We assume it
   960   // causes an arbitrary shuffle of memory, which may require some register
   961   // temps to do the shuffle.  We hope for (and optimize for) the case where
   962   // temps are not needed.  We may have to resize the stack slightly, in case
   963   // we need alignment padding (32-bit interpreter can pass longs & doubles
   964   // misaligned, but the compilers expect them aligned).
   965   //
   966   // |              |
   967   // :  java stack  :
   968   // |              |
   969   // +--------------+ <--- start of outgoing args
   970   // |  pad, align  |   |
   971   // +--------------+   |
   972   // | ints, floats |   |---Outgoing stack args, packed low.
   973   // +--------------+   |   First few args in registers.
   974   // :   doubles    :   |
   975   // |   longs      |   |
   976   // +--------------+ <--- SP' + 16*wordsize
   977   // |              |
   978   // :    window    :
   979   // |              |
   980   // +--------------+ <--- SP'
   982   // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME
   983   // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP
   984   // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN.
   986   // Cut-out for having no stack args.  Since up to 6 args are passed
   987   // in registers, we will commonly have no stack args.
   988   if (comp_args_on_stack > 0) {
   990     // Convert VMReg stack slots to words.
   991     int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
   992     // Round up to miminum stack alignment, in wordSize
   993     comp_words_on_stack = round_to(comp_words_on_stack, 2);
   994     // Now compute the distance from Lesp to SP.  This calculation does not
   995     // include the space for total_args_passed because Lesp has not yet popped
   996     // the arguments.
   997     __ sub(SP, (comp_words_on_stack)*wordSize, SP);
   998   }
  1000   // Will jump to the compiled code just as if compiled code was doing it.
  1001   // Pre-load the register-jump target early, to schedule it better.
  1002   __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3);
  1004   // Now generate the shuffle code.  Pick up all register args and move the
  1005   // rest through G1_scratch.
  1006   for (int i=0; i<total_args_passed; i++) {
  1007     if (sig_bt[i] == T_VOID) {
  1008       // Longs and doubles are passed in native word order, but misaligned
  1009       // in the 32-bit build.
  1010       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
  1011       continue;
  1014     // Pick up 0, 1 or 2 words from Lesp+offset.  Assume mis-aligned in the
  1015     // 32-bit build and aligned in the 64-bit build.  Look for the obvious
  1016     // ldx/lddf optimizations.
  1018     // Load in argument order going down.
  1019     const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
  1020     set_Rdisp(G1_scratch);
  1022     VMReg r_1 = regs[i].first();
  1023     VMReg r_2 = regs[i].second();
  1024     if (!r_1->is_valid()) {
  1025       assert(!r_2->is_valid(), "");
  1026       continue;
  1028     if (r_1->is_stack()) {        // Pretend stack targets are loaded into F8/F9
  1029       r_1 = F8->as_VMReg();        // as part of the load/store shuffle
  1030       if (r_2->is_valid()) r_2 = r_1->next();
  1032     if (r_1->is_Register()) {  // Register argument
  1033       Register r = r_1->as_Register()->after_restore();
  1034       if (!r_2->is_valid()) {
  1035         __ ld(Gargs, arg_slot(ld_off), r);
  1036       } else {
  1037 #ifdef _LP64
  1038         // In V9, longs are given 2 64-bit slots in the interpreter, but the
  1039         // data is passed in only 1 slot.
  1040         RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
  1041               next_arg_slot(ld_off) : arg_slot(ld_off);
  1042         __ ldx(Gargs, slot, r);
  1043 #else
  1044         // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the
  1045         // stack shuffle.  Load the first 2 longs into G1/G4 later.
  1046 #endif
  1048     } else {
  1049       assert(r_1->is_FloatRegister(), "");
  1050       if (!r_2->is_valid()) {
  1051         __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
  1052       } else {
  1053 #ifdef _LP64
  1054         // In V9, doubles are given 2 64-bit slots in the interpreter, but the
  1055         // data is passed in only 1 slot.  This code also handles longs that
  1056         // are passed on the stack, but need a stack-to-stack move through a
  1057         // spare float register.
  1058         RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
  1059               next_arg_slot(ld_off) : arg_slot(ld_off);
  1060         __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
  1061 #else
  1062         // Need to marshal 64-bit value from misaligned Lesp loads
  1063         __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
  1064         __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
  1065 #endif
  1068     // Was the argument really intended to be on the stack, but was loaded
  1069     // into F8/F9?
  1070     if (regs[i].first()->is_stack()) {
  1071       assert(r_1->as_FloatRegister() == F8, "fix this code");
  1072       // Convert stack slot to an SP offset
  1073       int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
  1074       // Store down the shuffled stack word.  Target address _is_ aligned.
  1075       RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
  1076       if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
  1077       else                  __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
  1080   bool made_space = false;
  1081 #ifndef _LP64
  1082   // May need to pick up a few long args in G1/G4
  1083   bool g4_crushed = false;
  1084   bool g3_crushed = false;
  1085   for (int i=0; i<total_args_passed; i++) {
  1086     if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) {
  1087       // Load in argument order going down
  1088       int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
  1089       // Need to marshal 64-bit value from misaligned Lesp loads
  1090       Register r = regs[i].first()->as_Register()->after_restore();
  1091       if (r == G1 || r == G4) {
  1092         assert(!g4_crushed, "ordering problem");
  1093         if (r == G4){
  1094           g4_crushed = true;
  1095           __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits
  1096           __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits
  1097         } else {
  1098           // better schedule this way
  1099           __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits
  1100           __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits
  1102         g3_crushed = true;
  1103         __ sllx(r, 32, r);
  1104         __ or3(G3_scratch, r, r);
  1105       } else {
  1106         assert(r->is_out(), "longs passed in two O registers");
  1107         __ ld  (Gargs, arg_slot(ld_off)     , r->successor()); // Load lo bits
  1108         __ ld  (Gargs, next_arg_slot(ld_off), r);              // Load hi bits
  1112 #endif
  1114   // Jump to the compiled code just as if compiled code was doing it.
  1115   //
  1116 #ifndef _LP64
  1117     if (g3_crushed) {
  1118       // Rats load was wasted, at least it is in cache...
  1119       __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3);
  1121 #endif /* _LP64 */
  1123     // 6243940 We might end up in handle_wrong_method if
  1124     // the callee is deoptimized as we race thru here. If that
  1125     // happens we don't want to take a safepoint because the
  1126     // caller frame will look interpreted and arguments are now
  1127     // "compiled" so it is much better to make this transition
  1128     // invisible to the stack walking code. Unfortunately if
  1129     // we try and find the callee by normal means a safepoint
  1130     // is possible. So we stash the desired callee in the thread
  1131     // and the vm will find there should this case occur.
  1132     Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
  1133     __ st_ptr(G5_method, callee_target_addr);
  1135     if (StressNonEntrant) {
  1136       // Open a big window for deopt failure
  1137       __ save_frame(0);
  1138       __ mov(G0, L0);
  1139       Label loop;
  1140       __ bind(loop);
  1141       __ sub(L0, 1, L0);
  1142       __ br_null(L0, false, Assembler::pt, loop);
  1143       __ delayed()->nop();
  1145       __ restore();
  1149     __ jmpl(G3, 0, G0);
  1150     __ delayed()->nop();
  1153 // ---------------------------------------------------------------
  1154 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
  1155                                                             int total_args_passed,
  1156                                                             // VMReg max_arg,
  1157                                                             int comp_args_on_stack, // VMRegStackSlots
  1158                                                             const BasicType *sig_bt,
  1159                                                             const VMRegPair *regs,
  1160                                                             AdapterFingerPrint* fingerprint) {
  1161   address i2c_entry = __ pc();
  1163   AdapterGenerator agen(masm);
  1165   agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
  1168   // -------------------------------------------------------------------------
  1169   // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
  1170   // args start out packed in the compiled layout.  They need to be unpacked
  1171   // into the interpreter layout.  This will almost always require some stack
  1172   // space.  We grow the current (compiled) stack, then repack the args.  We
  1173   // finally end in a jump to the generic interpreter entry point.  On exit
  1174   // from the interpreter, the interpreter will restore our SP (lest the
  1175   // compiled code, which relys solely on SP and not FP, get sick).
  1177   address c2i_unverified_entry = __ pc();
  1178   Label skip_fixup;
  1180 #if !defined(_LP64) && defined(COMPILER2)
  1181     Register R_temp   = L0;   // another scratch register
  1182 #else
  1183     Register R_temp   = G1;   // another scratch register
  1184 #endif
  1186     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
  1188     __ verify_oop(O0);
  1189     __ verify_oop(G5_method);
  1190     __ load_klass(O0, G3_scratch);
  1191     __ verify_oop(G3_scratch);
  1193 #if !defined(_LP64) && defined(COMPILER2)
  1194     __ save(SP, -frame::register_save_words*wordSize, SP);
  1195     __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
  1196     __ verify_oop(R_temp);
  1197     __ cmp(G3_scratch, R_temp);
  1198     __ restore();
  1199 #else
  1200     __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
  1201     __ verify_oop(R_temp);
  1202     __ cmp(G3_scratch, R_temp);
  1203 #endif
  1205     Label ok, ok2;
  1206     __ brx(Assembler::equal, false, Assembler::pt, ok);
  1207     __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method);
  1208     __ jump_to(ic_miss, G3_scratch);
  1209     __ delayed()->nop();
  1211     __ bind(ok);
  1212     // Method might have been compiled since the call site was patched to
  1213     // interpreted if that is the case treat it as a miss so we can get
  1214     // the call site corrected.
  1215     __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
  1216     __ bind(ok2);
  1217     __ br_null(G3_scratch, false, __ pt, skip_fixup);
  1218     __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
  1219     __ jump_to(ic_miss, G3_scratch);
  1220     __ delayed()->nop();
  1224   address c2i_entry = __ pc();
  1226   agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
  1228   __ flush();
  1229   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
  1233 // Helper function for native calling conventions
  1234 static VMReg int_stk_helper( int i ) {
  1235   // Bias any stack based VMReg we get by ignoring the window area
  1236   // but not the register parameter save area.
  1237   //
  1238   // This is strange for the following reasons. We'd normally expect
  1239   // the calling convention to return an VMReg for a stack slot
  1240   // completely ignoring any abi reserved area. C2 thinks of that
  1241   // abi area as only out_preserve_stack_slots. This does not include
  1242   // the area allocated by the C abi to store down integer arguments
  1243   // because the java calling convention does not use it. So
  1244   // since c2 assumes that there are only out_preserve_stack_slots
  1245   // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
  1246   // location the c calling convention must add in this bias amount
  1247   // to make up for the fact that the out_preserve_stack_slots is
  1248   // insufficient for C calls. What a mess. I sure hope those 6
  1249   // stack words were worth it on every java call!
  1251   // Another way of cleaning this up would be for out_preserve_stack_slots
  1252   // to take a parameter to say whether it was C or java calling conventions.
  1253   // Then things might look a little better (but not much).
  1255   int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
  1256   if( mem_parm_offset < 0 ) {
  1257     return as_oRegister(i)->as_VMReg();
  1258   } else {
  1259     int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
  1260     // Now return a biased offset that will be correct when out_preserve_slots is added back in
  1261     return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
  1266 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
  1267                                          VMRegPair *regs,
  1268                                          int total_args_passed) {
  1270     // Return the number of VMReg stack_slots needed for the args.
  1271     // This value does not include an abi space (like register window
  1272     // save area).
  1274     // The native convention is V8 if !LP64
  1275     // The LP64 convention is the V9 convention which is slightly more sane.
  1277     // We return the amount of VMReg stack slots we need to reserve for all
  1278     // the arguments NOT counting out_preserve_stack_slots. Since we always
  1279     // have space for storing at least 6 registers to memory we start with that.
  1280     // See int_stk_helper for a further discussion.
  1281     int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
  1283 #ifdef _LP64
  1284     // V9 convention: All things "as-if" on double-wide stack slots.
  1285     // Hoist any int/ptr/long's in the first 6 to int regs.
  1286     // Hoist any flt/dbl's in the first 16 dbl regs.
  1287     int j = 0;                  // Count of actual args, not HALVES
  1288     for( int i=0; i<total_args_passed; i++, j++ ) {
  1289       switch( sig_bt[i] ) {
  1290       case T_BOOLEAN:
  1291       case T_BYTE:
  1292       case T_CHAR:
  1293       case T_INT:
  1294       case T_SHORT:
  1295         regs[i].set1( int_stk_helper( j ) ); break;
  1296       case T_LONG:
  1297         assert( sig_bt[i+1] == T_VOID, "expecting half" );
  1298       case T_ADDRESS: // raw pointers, like current thread, for VM calls
  1299       case T_ARRAY:
  1300       case T_OBJECT:
  1301         regs[i].set2( int_stk_helper( j ) );
  1302         break;
  1303       case T_FLOAT:
  1304         if ( j < 16 ) {
  1305           // V9ism: floats go in ODD registers
  1306           regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg());
  1307         } else {
  1308           // V9ism: floats go in ODD stack slot
  1309           regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1)));
  1311         break;
  1312       case T_DOUBLE:
  1313         assert( sig_bt[i+1] == T_VOID, "expecting half" );
  1314         if ( j < 16 ) {
  1315           // V9ism: doubles go in EVEN/ODD regs
  1316           regs[i].set2(as_FloatRegister(j<<1)->as_VMReg());
  1317         } else {
  1318           // V9ism: doubles go in EVEN/ODD stack slots
  1319           regs[i].set2(VMRegImpl::stack2reg(j<<1));
  1321         break;
  1322       case T_VOID:  regs[i].set_bad(); j--; break; // Do not count HALVES
  1323       default:
  1324         ShouldNotReachHere();
  1326       if (regs[i].first()->is_stack()) {
  1327         int off =  regs[i].first()->reg2stack();
  1328         if (off > max_stack_slots) max_stack_slots = off;
  1330       if (regs[i].second()->is_stack()) {
  1331         int off =  regs[i].second()->reg2stack();
  1332         if (off > max_stack_slots) max_stack_slots = off;
  1336 #else // _LP64
  1337     // V8 convention: first 6 things in O-regs, rest on stack.
  1338     // Alignment is willy-nilly.
  1339     for( int i=0; i<total_args_passed; i++ ) {
  1340       switch( sig_bt[i] ) {
  1341       case T_ADDRESS: // raw pointers, like current thread, for VM calls
  1342       case T_ARRAY:
  1343       case T_BOOLEAN:
  1344       case T_BYTE:
  1345       case T_CHAR:
  1346       case T_FLOAT:
  1347       case T_INT:
  1348       case T_OBJECT:
  1349       case T_SHORT:
  1350         regs[i].set1( int_stk_helper( i ) );
  1351         break;
  1352       case T_DOUBLE:
  1353       case T_LONG:
  1354         assert( sig_bt[i+1] == T_VOID, "expecting half" );
  1355         regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) );
  1356         break;
  1357       case T_VOID: regs[i].set_bad(); break;
  1358       default:
  1359         ShouldNotReachHere();
  1361       if (regs[i].first()->is_stack()) {
  1362         int off =  regs[i].first()->reg2stack();
  1363         if (off > max_stack_slots) max_stack_slots = off;
  1365       if (regs[i].second()->is_stack()) {
  1366         int off =  regs[i].second()->reg2stack();
  1367         if (off > max_stack_slots) max_stack_slots = off;
  1370 #endif // _LP64
  1372   return round_to(max_stack_slots + 1, 2);
  1377 // ---------------------------------------------------------------------------
  1378 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1379   switch (ret_type) {
  1380   case T_FLOAT:
  1381     __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
  1382     break;
  1383   case T_DOUBLE:
  1384     __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
  1385     break;
  1389 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1390   switch (ret_type) {
  1391   case T_FLOAT:
  1392     __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0);
  1393     break;
  1394   case T_DOUBLE:
  1395     __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0);
  1396     break;
  1400 // Check and forward and pending exception.  Thread is stored in
  1401 // L7_thread_cache and possibly NOT in G2_thread.  Since this is a native call, there
  1402 // is no exception handler.  We merely pop this frame off and throw the
  1403 // exception in the caller's frame.
  1404 static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) {
  1405   Label L;
  1406   __ br_null(Rex_oop, false, Assembler::pt, L);
  1407   __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception
  1408   // Since this is a native call, we *know* the proper exception handler
  1409   // without calling into the VM: it's the empty function.  Just pop this
  1410   // frame and then jump to forward_exception_entry; O7 will contain the
  1411   // native caller's return PC.
  1412  AddressLiteral exception_entry(StubRoutines::forward_exception_entry());
  1413   __ jump_to(exception_entry, G3_scratch);
  1414   __ delayed()->restore();      // Pop this frame off.
  1415   __ bind(L);
  1418 // A simple move of integer like type
  1419 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1420   if (src.first()->is_stack()) {
  1421     if (dst.first()->is_stack()) {
  1422       // stack to stack
  1423       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1424       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1425     } else {
  1426       // stack to reg
  1427       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1429   } else if (dst.first()->is_stack()) {
  1430     // reg to stack
  1431     __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1432   } else {
  1433     __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1437 // On 64 bit we will store integer like items to the stack as
  1438 // 64 bits items (sparc abi) even though java would only store
  1439 // 32bits for a parameter. On 32bit it will simply be 32 bits
  1440 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
  1441 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1442   if (src.first()->is_stack()) {
  1443     if (dst.first()->is_stack()) {
  1444       // stack to stack
  1445       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1446       __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1447     } else {
  1448       // stack to reg
  1449       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1451   } else if (dst.first()->is_stack()) {
  1452     // reg to stack
  1453     __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1454   } else {
  1455     __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1460 // An oop arg. Must pass a handle not the oop itself
  1461 static void object_move(MacroAssembler* masm,
  1462                         OopMap* map,
  1463                         int oop_handle_offset,
  1464                         int framesize_in_slots,
  1465                         VMRegPair src,
  1466                         VMRegPair dst,
  1467                         bool is_receiver,
  1468                         int* receiver_offset) {
  1470   // must pass a handle. First figure out the location we use as a handle
  1472   if (src.first()->is_stack()) {
  1473     // Oop is already on the stack
  1474     Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
  1475     __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
  1476     __ ld_ptr(rHandle, 0, L4);
  1477 #ifdef _LP64
  1478     __ movr( Assembler::rc_z, L4, G0, rHandle );
  1479 #else
  1480     __ tst( L4 );
  1481     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
  1482 #endif
  1483     if (dst.first()->is_stack()) {
  1484       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
  1486     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
  1487     if (is_receiver) {
  1488       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
  1490     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
  1491   } else {
  1492     // Oop is in an input register pass we must flush it to the stack
  1493     const Register rOop = src.first()->as_Register();
  1494     const Register rHandle = L5;
  1495     int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
  1496     int offset = oop_slot*VMRegImpl::stack_slot_size;
  1497     Label skip;
  1498     __ st_ptr(rOop, SP, offset + STACK_BIAS);
  1499     if (is_receiver) {
  1500       *receiver_offset = oop_slot * VMRegImpl::stack_slot_size;
  1502     map->set_oop(VMRegImpl::stack2reg(oop_slot));
  1503     __ add(SP, offset + STACK_BIAS, rHandle);
  1504 #ifdef _LP64
  1505     __ movr( Assembler::rc_z, rOop, G0, rHandle );
  1506 #else
  1507     __ tst( rOop );
  1508     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
  1509 #endif
  1511     if (dst.first()->is_stack()) {
  1512       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
  1513     } else {
  1514       __ mov(rHandle, dst.first()->as_Register());
  1519 // A float arg may have to do float reg int reg conversion
  1520 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1521   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
  1523   if (src.first()->is_stack()) {
  1524     if (dst.first()->is_stack()) {
  1525       // stack to stack the easiest of the bunch
  1526       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1527       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1528     } else {
  1529       // stack to reg
  1530       if (dst.first()->is_Register()) {
  1531         __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1532       } else {
  1533         __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
  1536   } else if (dst.first()->is_stack()) {
  1537     // reg to stack
  1538     if (src.first()->is_Register()) {
  1539       __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1540     } else {
  1541       __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1543   } else {
  1544     // reg to reg
  1545     if (src.first()->is_Register()) {
  1546       if (dst.first()->is_Register()) {
  1547         // gpr -> gpr
  1548         __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1549       } else {
  1550         // gpr -> fpr
  1551         __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS);
  1552         __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister());
  1554     } else if (dst.first()->is_Register()) {
  1555       // fpr -> gpr
  1556       __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS);
  1557       __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register());
  1558     } else {
  1559       // fpr -> fpr
  1560       // In theory these overlap but the ordering is such that this is likely a nop
  1561       if ( src.first() != dst.first()) {
  1562         __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
  1568 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1569   VMRegPair src_lo(src.first());
  1570   VMRegPair src_hi(src.second());
  1571   VMRegPair dst_lo(dst.first());
  1572   VMRegPair dst_hi(dst.second());
  1573   simple_move32(masm, src_lo, dst_lo);
  1574   simple_move32(masm, src_hi, dst_hi);
  1577 // A long move
  1578 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1580   // Do the simple ones here else do two int moves
  1581   if (src.is_single_phys_reg() ) {
  1582     if (dst.is_single_phys_reg()) {
  1583       __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1584     } else {
  1585       // split src into two separate registers
  1586       // Remember hi means hi address or lsw on sparc
  1587       // Move msw to lsw
  1588       if (dst.second()->is_reg()) {
  1589         // MSW -> MSW
  1590         __ srax(src.first()->as_Register(), 32, dst.first()->as_Register());
  1591         // Now LSW -> LSW
  1592         // this will only move lo -> lo and ignore hi
  1593         VMRegPair split(dst.second());
  1594         simple_move32(masm, src, split);
  1595       } else {
  1596         VMRegPair split(src.first(), L4->as_VMReg());
  1597         // MSW -> MSW (lo ie. first word)
  1598         __ srax(src.first()->as_Register(), 32, L4);
  1599         split_long_move(masm, split, dst);
  1602   } else if (dst.is_single_phys_reg()) {
  1603     if (src.is_adjacent_aligned_on_stack(2)) {
  1604       __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1605     } else {
  1606       // dst is a single reg.
  1607       // Remember lo is low address not msb for stack slots
  1608       // and lo is the "real" register for registers
  1609       // src is
  1611       VMRegPair split;
  1613       if (src.first()->is_reg()) {
  1614         // src.lo (msw) is a reg, src.hi is stk/reg
  1615         // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg]
  1616         split.set_pair(dst.first(), src.first());
  1617       } else {
  1618         // msw is stack move to L5
  1619         // lsw is stack move to dst.lo (real reg)
  1620         // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5
  1621         split.set_pair(dst.first(), L5->as_VMReg());
  1624       // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg)
  1625       // msw   -> src.lo/L5,  lsw -> dst.lo
  1626       split_long_move(masm, src, split);
  1628       // So dst now has the low order correct position the
  1629       // msw half
  1630       __ sllx(split.first()->as_Register(), 32, L5);
  1632       const Register d = dst.first()->as_Register();
  1633       __ or3(L5, d, d);
  1635   } else {
  1636     // For LP64 we can probably do better.
  1637     split_long_move(masm, src, dst);
  1641 // A double move
  1642 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1644   // The painful thing here is that like long_move a VMRegPair might be
  1645   // 1: a single physical register
  1646   // 2: two physical registers (v8)
  1647   // 3: a physical reg [lo] and a stack slot [hi] (v8)
  1648   // 4: two stack slots
  1650   // Since src is always a java calling convention we know that the src pair
  1651   // is always either all registers or all stack (and aligned?)
  1653   // in a register [lo] and a stack slot [hi]
  1654   if (src.first()->is_stack()) {
  1655     if (dst.first()->is_stack()) {
  1656       // stack to stack the easiest of the bunch
  1657       // ought to be a way to do this where if alignment is ok we use ldd/std when possible
  1658       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1659       __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
  1660       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1661       __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1662     } else {
  1663       // stack to reg
  1664       if (dst.second()->is_stack()) {
  1665         // stack -> reg, stack -> stack
  1666         __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
  1667         if (dst.first()->is_Register()) {
  1668           __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1669         } else {
  1670           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
  1672         // This was missing. (very rare case)
  1673         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1674       } else {
  1675         // stack -> reg
  1676         // Eventually optimize for alignment QQQ
  1677         if (dst.first()->is_Register()) {
  1678           __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1679           __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register());
  1680         } else {
  1681           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
  1682           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister());
  1686   } else if (dst.first()->is_stack()) {
  1687     // reg to stack
  1688     if (src.first()->is_Register()) {
  1689       // Eventually optimize for alignment QQQ
  1690       __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1691       if (src.second()->is_stack()) {
  1692         __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
  1693         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1694       } else {
  1695         __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS);
  1697     } else {
  1698       // fpr to stack
  1699       if (src.second()->is_stack()) {
  1700         ShouldNotReachHere();
  1701       } else {
  1702         // Is the stack aligned?
  1703         if (reg2offset(dst.first()) & 0x7) {
  1704           // No do as pairs
  1705           __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1706           __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS);
  1707         } else {
  1708           __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1712   } else {
  1713     // reg to reg
  1714     if (src.first()->is_Register()) {
  1715       if (dst.first()->is_Register()) {
  1716         // gpr -> gpr
  1717         __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1718         __ mov(src.second()->as_Register(), dst.second()->as_Register());
  1719       } else {
  1720         // gpr -> fpr
  1721         // ought to be able to do a single store
  1722         __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS);
  1723         __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS);
  1724         // ought to be able to do a single load
  1725         __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister());
  1726         __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister());
  1728     } else if (dst.first()->is_Register()) {
  1729       // fpr -> gpr
  1730       // ought to be able to do a single store
  1731       __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS);
  1732       // ought to be able to do a single load
  1733       // REMEMBER first() is low address not LSB
  1734       __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register());
  1735       if (dst.second()->is_Register()) {
  1736         __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register());
  1737       } else {
  1738         __ ld(FP, -4 + STACK_BIAS, L4);
  1739         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1741     } else {
  1742       // fpr -> fpr
  1743       // In theory these overlap but the ordering is such that this is likely a nop
  1744       if ( src.first() != dst.first()) {
  1745         __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
  1751 // Creates an inner frame if one hasn't already been created, and
  1752 // saves a copy of the thread in L7_thread_cache
  1753 static void create_inner_frame(MacroAssembler* masm, bool* already_created) {
  1754   if (!*already_created) {
  1755     __ save_frame(0);
  1756     // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below
  1757     // Don't use save_thread because it smashes G2 and we merely want to save a
  1758     // copy
  1759     __ mov(G2_thread, L7_thread_cache);
  1760     *already_created = true;
  1764 // ---------------------------------------------------------------------------
  1765 // Generate a native wrapper for a given method.  The method takes arguments
  1766 // in the Java compiled code convention, marshals them to the native
  1767 // convention (handlizes oops, etc), transitions to native, makes the call,
  1768 // returns to java state (possibly blocking), unhandlizes any result and
  1769 // returns.
  1770 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  1771                                                 methodHandle method,
  1772                                                 int total_in_args,
  1773                                                 int comp_args_on_stack, // in VMRegStackSlots
  1774                                                 BasicType *in_sig_bt,
  1775                                                 VMRegPair *in_regs,
  1776                                                 BasicType ret_type) {
  1778   // Native nmethod wrappers never take possesion of the oop arguments.
  1779   // So the caller will gc the arguments. The only thing we need an
  1780   // oopMap for is if the call is static
  1781   //
  1782   // An OopMap for lock (and class if static), and one for the VM call itself
  1783   OopMapSet *oop_maps = new OopMapSet();
  1784   intptr_t start = (intptr_t)__ pc();
  1786   // First thing make an ic check to see if we should even be here
  1788     Label L;
  1789     const Register temp_reg = G3_scratch;
  1790     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
  1791     __ verify_oop(O0);
  1792     __ load_klass(O0, temp_reg);
  1793     __ cmp(temp_reg, G5_inline_cache_reg);
  1794     __ brx(Assembler::equal, true, Assembler::pt, L);
  1795     __ delayed()->nop();
  1797     __ jump_to(ic_miss, temp_reg);
  1798     __ delayed()->nop();
  1799     __ align(CodeEntryAlignment);
  1800     __ bind(L);
  1803   int vep_offset = ((intptr_t)__ pc()) - start;
  1805 #ifdef COMPILER1
  1806   if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
  1807     // Object.hashCode can pull the hashCode from the header word
  1808     // instead of doing a full VM transition once it's been computed.
  1809     // Since hashCode is usually polymorphic at call sites we can't do
  1810     // this optimization at the call site without a lot of work.
  1811     Label slowCase;
  1812     Register receiver             = O0;
  1813     Register result               = O0;
  1814     Register header               = G3_scratch;
  1815     Register hash                 = G3_scratch; // overwrite header value with hash value
  1816     Register mask                 = G1;         // to get hash field from header
  1818     // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.
  1819     // We depend on hash_mask being at most 32 bits and avoid the use of
  1820     // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
  1821     // vm: see markOop.hpp.
  1822     __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
  1823     __ sethi(markOopDesc::hash_mask, mask);
  1824     __ btst(markOopDesc::unlocked_value, header);
  1825     __ br(Assembler::zero, false, Assembler::pn, slowCase);
  1826     if (UseBiasedLocking) {
  1827       // Check if biased and fall through to runtime if so
  1828       __ delayed()->nop();
  1829       __ btst(markOopDesc::biased_lock_bit_in_place, header);
  1830       __ br(Assembler::notZero, false, Assembler::pn, slowCase);
  1832     __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
  1834     // Check for a valid (non-zero) hash code and get its value.
  1835 #ifdef _LP64
  1836     __ srlx(header, markOopDesc::hash_shift, hash);
  1837 #else
  1838     __ srl(header, markOopDesc::hash_shift, hash);
  1839 #endif
  1840     __ andcc(hash, mask, hash);
  1841     __ br(Assembler::equal, false, Assembler::pn, slowCase);
  1842     __ delayed()->nop();
  1844     // leaf return.
  1845     __ retl();
  1846     __ delayed()->mov(hash, result);
  1847     __ bind(slowCase);
  1849 #endif // COMPILER1
  1852   // We have received a description of where all the java arg are located
  1853   // on entry to the wrapper. We need to convert these args to where
  1854   // the jni function will expect them. To figure out where they go
  1855   // we convert the java signature to a C signature by inserting
  1856   // the hidden arguments as arg[0] and possibly arg[1] (static method)
  1858   int total_c_args = total_in_args + 1;
  1859   if (method->is_static()) {
  1860     total_c_args++;
  1863   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  1864   VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
  1866   int argc = 0;
  1867   out_sig_bt[argc++] = T_ADDRESS;
  1868   if (method->is_static()) {
  1869     out_sig_bt[argc++] = T_OBJECT;
  1872   for (int i = 0; i < total_in_args ; i++ ) {
  1873     out_sig_bt[argc++] = in_sig_bt[i];
  1876   // Now figure out where the args must be stored and how much stack space
  1877   // they require (neglecting out_preserve_stack_slots but space for storing
  1878   // the 1st six register arguments). It's weird see int_stk_helper.
  1879   //
  1880   int out_arg_slots;
  1881   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
  1883   // Compute framesize for the wrapper.  We need to handlize all oops in
  1884   // registers. We must create space for them here that is disjoint from
  1885   // the windowed save area because we have no control over when we might
  1886   // flush the window again and overwrite values that gc has since modified.
  1887   // (The live window race)
  1888   //
  1889   // We always just allocate 6 word for storing down these object. This allow
  1890   // us to simply record the base and use the Ireg number to decide which
  1891   // slot to use. (Note that the reg number is the inbound number not the
  1892   // outbound number).
  1893   // We must shuffle args to match the native convention, and include var-args space.
  1895   // Calculate the total number of stack slots we will need.
  1897   // First count the abi requirement plus all of the outgoing args
  1898   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  1900   // Now the space for the inbound oop handle area
  1902   int oop_handle_offset = stack_slots;
  1903   stack_slots += 6*VMRegImpl::slots_per_word;
  1905   // Now any space we need for handlizing a klass if static method
  1907   int oop_temp_slot_offset = 0;
  1908   int klass_slot_offset = 0;
  1909   int klass_offset = -1;
  1910   int lock_slot_offset = 0;
  1911   bool is_static = false;
  1913   if (method->is_static()) {
  1914     klass_slot_offset = stack_slots;
  1915     stack_slots += VMRegImpl::slots_per_word;
  1916     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
  1917     is_static = true;
  1920   // Plus a lock if needed
  1922   if (method->is_synchronized()) {
  1923     lock_slot_offset = stack_slots;
  1924     stack_slots += VMRegImpl::slots_per_word;
  1927   // Now a place to save return value or as a temporary for any gpr -> fpr moves
  1928   stack_slots += 2;
  1930   // Ok The space we have allocated will look like:
  1931   //
  1932   //
  1933   // FP-> |                     |
  1934   //      |---------------------|
  1935   //      | 2 slots for moves   |
  1936   //      |---------------------|
  1937   //      | lock box (if sync)  |
  1938   //      |---------------------| <- lock_slot_offset
  1939   //      | klass (if static)   |
  1940   //      |---------------------| <- klass_slot_offset
  1941   //      | oopHandle area      |
  1942   //      |---------------------| <- oop_handle_offset
  1943   //      | outbound memory     |
  1944   //      | based arguments     |
  1945   //      |                     |
  1946   //      |---------------------|
  1947   //      | vararg area         |
  1948   //      |---------------------|
  1949   //      |                     |
  1950   // SP-> | out_preserved_slots |
  1951   //
  1952   //
  1955   // Now compute actual number of stack words we need rounding to make
  1956   // stack properly aligned.
  1957   stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
  1959   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  1961   // Generate stack overflow check before creating frame
  1962   __ generate_stack_overflow_check(stack_size);
  1964   // Generate a new frame for the wrapper.
  1965   __ save(SP, -stack_size, SP);
  1967   int frame_complete = ((intptr_t)__ pc()) - start;
  1969   __ verify_thread();
  1972   //
  1973   // We immediately shuffle the arguments so that any vm call we have to
  1974   // make from here on out (sync slow path, jvmti, etc.) we will have
  1975   // captured the oops from our caller and have a valid oopMap for
  1976   // them.
  1978   // -----------------
  1979   // The Grand Shuffle
  1980   //
  1981   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  1982   // (derived from JavaThread* which is in L7_thread_cache) and, if static,
  1983   // the class mirror instead of a receiver.  This pretty much guarantees that
  1984   // register layout will not match.  We ignore these extra arguments during
  1985   // the shuffle. The shuffle is described by the two calling convention
  1986   // vectors we have in our possession. We simply walk the java vector to
  1987   // get the source locations and the c vector to get the destinations.
  1988   // Because we have a new window and the argument registers are completely
  1989   // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about
  1990   // here.
  1992   // This is a trick. We double the stack slots so we can claim
  1993   // the oops in the caller's frame. Since we are sure to have
  1994   // more args than the caller doubling is enough to make
  1995   // sure we can capture all the incoming oop args from the
  1996   // caller.
  1997   //
  1998   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  1999   int c_arg = total_c_args - 1;
  2000   // Record sp-based slot for receiver on stack for non-static methods
  2001   int receiver_offset = -1;
  2003   // We move the arguments backward because the floating point registers
  2004   // destination will always be to a register with a greater or equal register
  2005   // number or the stack.
  2007 #ifdef ASSERT
  2008   bool reg_destroyed[RegisterImpl::number_of_registers];
  2009   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  2010   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  2011     reg_destroyed[r] = false;
  2013   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  2014     freg_destroyed[f] = false;
  2017 #endif /* ASSERT */
  2019   for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
  2021 #ifdef ASSERT
  2022     if (in_regs[i].first()->is_Register()) {
  2023       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!");
  2024     } else if (in_regs[i].first()->is_FloatRegister()) {
  2025       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!");
  2027     if (out_regs[c_arg].first()->is_Register()) {
  2028       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  2029     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  2030       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true;
  2032 #endif /* ASSERT */
  2034     switch (in_sig_bt[i]) {
  2035       case T_ARRAY:
  2036       case T_OBJECT:
  2037         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
  2038                     ((i == 0) && (!is_static)),
  2039                     &receiver_offset);
  2040         break;
  2041       case T_VOID:
  2042         break;
  2044       case T_FLOAT:
  2045         float_move(masm, in_regs[i], out_regs[c_arg]);
  2046           break;
  2048       case T_DOUBLE:
  2049         assert( i + 1 < total_in_args &&
  2050                 in_sig_bt[i + 1] == T_VOID &&
  2051                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  2052         double_move(masm, in_regs[i], out_regs[c_arg]);
  2053         break;
  2055       case T_LONG :
  2056         long_move(masm, in_regs[i], out_regs[c_arg]);
  2057         break;
  2059       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  2061       default:
  2062         move32_64(masm, in_regs[i], out_regs[c_arg]);
  2066   // Pre-load a static method's oop into O1.  Used both by locking code and
  2067   // the normal JNI call code.
  2068   if (method->is_static()) {
  2069     __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
  2071     // Now handlize the static class mirror in O1.  It's known not-null.
  2072     __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
  2073     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
  2074     __ add(SP, klass_offset + STACK_BIAS, O1);
  2078   const Register L6_handle = L6;
  2080   if (method->is_synchronized()) {
  2081     __ mov(O1, L6_handle);
  2084   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
  2085   // except O6/O7. So if we must call out we must push a new frame. We immediately
  2086   // push a new frame and flush the windows.
  2088 #ifdef _LP64
  2089   intptr_t thepc = (intptr_t) __ pc();
  2091     address here = __ pc();
  2092     // Call the next instruction
  2093     __ call(here + 8, relocInfo::none);
  2094     __ delayed()->nop();
  2096 #else
  2097   intptr_t thepc = __ load_pc_address(O7, 0);
  2098 #endif /* _LP64 */
  2100   // We use the same pc/oopMap repeatedly when we call out
  2101   oop_maps->add_gc_map(thepc - start, map);
  2103   // O7 now has the pc loaded that we will use when we finally call to native.
  2105   // Save thread in L7; it crosses a bunch of VM calls below
  2106   // Don't use save_thread because it smashes G2 and we merely
  2107   // want to save a copy
  2108   __ mov(G2_thread, L7_thread_cache);
  2111   // If we create an inner frame once is plenty
  2112   // when we create it we must also save G2_thread
  2113   bool inner_frame_created = false;
  2115   // dtrace method entry support
  2117     SkipIfEqual skip_if(
  2118       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
  2119     // create inner frame
  2120     __ save_frame(0);
  2121     __ mov(G2_thread, L7_thread_cache);
  2122     __ set_oop_constant(JNIHandles::make_local(method()), O1);
  2123     __ call_VM_leaf(L7_thread_cache,
  2124          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
  2125          G2_thread, O1);
  2126     __ restore();
  2129   // RedefineClasses() tracing support for obsolete method entry
  2130   if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
  2131     // create inner frame
  2132     __ save_frame(0);
  2133     __ mov(G2_thread, L7_thread_cache);
  2134     __ set_oop_constant(JNIHandles::make_local(method()), O1);
  2135     __ call_VM_leaf(L7_thread_cache,
  2136          CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
  2137          G2_thread, O1);
  2138     __ restore();
  2141   // We are in the jni frame unless saved_frame is true in which case
  2142   // we are in one frame deeper (the "inner" frame). If we are in the
  2143   // "inner" frames the args are in the Iregs and if the jni frame then
  2144   // they are in the Oregs.
  2145   // If we ever need to go to the VM (for locking, jvmti) then
  2146   // we will always be in the "inner" frame.
  2148   // Lock a synchronized method
  2149   int lock_offset = -1;         // Set if locked
  2150   if (method->is_synchronized()) {
  2151     Register Roop = O1;
  2152     const Register L3_box = L3;
  2154     create_inner_frame(masm, &inner_frame_created);
  2156     __ ld_ptr(I1, 0, O1);
  2157     Label done;
  2159     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
  2160     __ add(FP, lock_offset+STACK_BIAS, L3_box);
  2161 #ifdef ASSERT
  2162     if (UseBiasedLocking) {
  2163       // making the box point to itself will make it clear it went unused
  2164       // but also be obviously invalid
  2165       __ st_ptr(L3_box, L3_box, 0);
  2167 #endif // ASSERT
  2168     //
  2169     // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch
  2170     //
  2171     __ compiler_lock_object(Roop, L1,    L3_box, L2);
  2172     __ br(Assembler::equal, false, Assembler::pt, done);
  2173     __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box);
  2176     // None of the above fast optimizations worked so we have to get into the
  2177     // slow case of monitor enter.  Inline a special case of call_VM that
  2178     // disallows any pending_exception.
  2179     __ mov(Roop, O0);            // Need oop in O0
  2180     __ mov(L3_box, O1);
  2182     // Record last_Java_sp, in case the VM code releases the JVM lock.
  2184     __ set_last_Java_frame(FP, I7);
  2186     // do the call
  2187     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
  2188     __ delayed()->mov(L7_thread_cache, O2);
  2190     __ restore_thread(L7_thread_cache); // restore G2_thread
  2191     __ reset_last_Java_frame();
  2193 #ifdef ASSERT
  2194     { Label L;
  2195     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
  2196     __ br_null(O0, false, Assembler::pt, L);
  2197     __ delayed()->nop();
  2198     __ stop("no pending exception allowed on exit from IR::monitorenter");
  2199     __ bind(L);
  2201 #endif
  2202     __ bind(done);
  2206   // Finally just about ready to make the JNI call
  2208   __ flush_windows();
  2209   if (inner_frame_created) {
  2210     __ restore();
  2211   } else {
  2212     // Store only what we need from this frame
  2213     // QQQ I think that non-v9 (like we care) we don't need these saves
  2214     // either as the flush traps and the current window goes too.
  2215     __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
  2216     __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
  2219   // get JNIEnv* which is first argument to native
  2221   __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
  2223   // Use that pc we placed in O7 a while back as the current frame anchor
  2225   __ set_last_Java_frame(SP, O7);
  2227   // Transition from _thread_in_Java to _thread_in_native.
  2228   __ set(_thread_in_native, G3_scratch);
  2229   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
  2231   // We flushed the windows ages ago now mark them as flushed
  2233   // mark windows as flushed
  2234   __ set(JavaFrameAnchor::flushed, G3_scratch);
  2236   Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
  2238 #ifdef _LP64
  2239   AddressLiteral dest(method->native_function());
  2240   __ relocate(relocInfo::runtime_call_type);
  2241   __ jumpl_to(dest, O7, O7);
  2242 #else
  2243   __ call(method->native_function(), relocInfo::runtime_call_type);
  2244 #endif
  2245   __ delayed()->st(G3_scratch, flags);
  2247   __ restore_thread(L7_thread_cache); // restore G2_thread
  2249   // Unpack native results.  For int-types, we do any needed sign-extension
  2250   // and move things into I0.  The return value there will survive any VM
  2251   // calls for blocking or unlocking.  An FP or OOP result (handle) is done
  2252   // specially in the slow-path code.
  2253   switch (ret_type) {
  2254   case T_VOID:    break;        // Nothing to do!
  2255   case T_FLOAT:   break;        // Got it where we want it (unless slow-path)
  2256   case T_DOUBLE:  break;        // Got it where we want it (unless slow-path)
  2257   // In 64 bits build result is in O0, in O0, O1 in 32bit build
  2258   case T_LONG:
  2259 #ifndef _LP64
  2260                   __ mov(O1, I1);
  2261 #endif
  2262                   // Fall thru
  2263   case T_OBJECT:                // Really a handle
  2264   case T_ARRAY:
  2265   case T_INT:
  2266                   __ mov(O0, I0);
  2267                   break;
  2268   case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
  2269   case T_BYTE   : __ sll(O0, 24, O0); __ sra(O0, 24, I0);   break;
  2270   case T_CHAR   : __ sll(O0, 16, O0); __ srl(O0, 16, I0);   break; // cannot use and3, 0xFFFF too big as immediate value!
  2271   case T_SHORT  : __ sll(O0, 16, O0); __ sra(O0, 16, I0);   break;
  2272     break;                      // Cannot de-handlize until after reclaiming jvm_lock
  2273   default:
  2274     ShouldNotReachHere();
  2277   // must we block?
  2279   // Block, if necessary, before resuming in _thread_in_Java state.
  2280   // In order for GC to work, don't clear the last_Java_sp until after blocking.
  2281   { Label no_block;
  2282     AddressLiteral sync_state(SafepointSynchronize::address_of_state());
  2284     // Switch thread to "native transition" state before reading the synchronization state.
  2285     // This additional state is necessary because reading and testing the synchronization
  2286     // state is not atomic w.r.t. GC, as this scenario demonstrates:
  2287     //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
  2288     //     VM thread changes sync state to synchronizing and suspends threads for GC.
  2289     //     Thread A is resumed to finish this native method, but doesn't block here since it
  2290     //     didn't see any synchronization is progress, and escapes.
  2291     __ set(_thread_in_native_trans, G3_scratch);
  2292     __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
  2293     if(os::is_MP()) {
  2294       if (UseMembar) {
  2295         // Force this write out before the read below
  2296         __ membar(Assembler::StoreLoad);
  2297       } else {
  2298         // Write serialization page so VM thread can do a pseudo remote membar.
  2299         // We use the current thread pointer to calculate a thread specific
  2300         // offset to write to within the page. This minimizes bus traffic
  2301         // due to cache line collision.
  2302         __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
  2305     __ load_contents(sync_state, G3_scratch);
  2306     __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
  2308     Label L;
  2309     Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());
  2310     __ br(Assembler::notEqual, false, Assembler::pn, L);
  2311     __ delayed()->ld(suspend_state, G3_scratch);
  2312     __ cmp(G3_scratch, 0);
  2313     __ br(Assembler::equal, false, Assembler::pt, no_block);
  2314     __ delayed()->nop();
  2315     __ bind(L);
  2317     // Block.  Save any potential method result value before the operation and
  2318     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
  2319     // lets us share the oopMap we used when we went native rather the create
  2320     // a distinct one for this pc
  2321     //
  2322     save_native_result(masm, ret_type, stack_slots);
  2323     __ call_VM_leaf(L7_thread_cache,
  2324                     CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
  2325                     G2_thread);
  2327     // Restore any method result value
  2328     restore_native_result(masm, ret_type, stack_slots);
  2329     __ bind(no_block);
  2332   // thread state is thread_in_native_trans. Any safepoint blocking has already
  2333   // happened so we can now change state to _thread_in_Java.
  2336   __ set(_thread_in_Java, G3_scratch);
  2337   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
  2340   Label no_reguard;
  2341   __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
  2342   __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);
  2343   __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);
  2344   __ delayed()->nop();
  2346     save_native_result(masm, ret_type, stack_slots);
  2347   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
  2348   __ delayed()->nop();
  2350   __ restore_thread(L7_thread_cache); // restore G2_thread
  2351     restore_native_result(masm, ret_type, stack_slots);
  2353   __ bind(no_reguard);
  2355   // Handle possible exception (will unlock if necessary)
  2357   // native result if any is live in freg or I0 (and I1 if long and 32bit vm)
  2359   // Unlock
  2360   if (method->is_synchronized()) {
  2361     Label done;
  2362     Register I2_ex_oop = I2;
  2363     const Register L3_box = L3;
  2364     // Get locked oop from the handle we passed to jni
  2365     __ ld_ptr(L6_handle, 0, L4);
  2366     __ add(SP, lock_offset+STACK_BIAS, L3_box);
  2367     // Must save pending exception around the slow-path VM call.  Since it's a
  2368     // leaf call, the pending exception (if any) can be kept in a register.
  2369     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop);
  2370     // Now unlock
  2371     //                       (Roop, Rmark, Rbox,   Rscratch)
  2372     __ compiler_unlock_object(L4,   L1,    L3_box, L2);
  2373     __ br(Assembler::equal, false, Assembler::pt, done);
  2374     __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box);
  2376     // save and restore any potential method result value around the unlocking
  2377     // operation.  Will save in I0 (or stack for FP returns).
  2378     save_native_result(masm, ret_type, stack_slots);
  2380     // Must clear pending-exception before re-entering the VM.  Since this is
  2381     // a leaf call, pending-exception-oop can be safely kept in a register.
  2382     __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset()));
  2384     // slow case of monitor enter.  Inline a special case of call_VM that
  2385     // disallows any pending_exception.
  2386     __ mov(L3_box, O1);
  2388     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);
  2389     __ delayed()->mov(L4, O0);              // Need oop in O0
  2391     __ restore_thread(L7_thread_cache); // restore G2_thread
  2393 #ifdef ASSERT
  2394     { Label L;
  2395     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
  2396     __ br_null(O0, false, Assembler::pt, L);
  2397     __ delayed()->nop();
  2398     __ stop("no pending exception allowed on exit from IR::monitorexit");
  2399     __ bind(L);
  2401 #endif
  2402     restore_native_result(masm, ret_type, stack_slots);
  2403     // check_forward_pending_exception jump to forward_exception if any pending
  2404     // exception is set.  The forward_exception routine expects to see the
  2405     // exception in pending_exception and not in a register.  Kind of clumsy,
  2406     // since all folks who branch to forward_exception must have tested
  2407     // pending_exception first and hence have it in a register already.
  2408     __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset()));
  2409     __ bind(done);
  2412   // Tell dtrace about this method exit
  2414     SkipIfEqual skip_if(
  2415       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
  2416     save_native_result(masm, ret_type, stack_slots);
  2417     __ set_oop_constant(JNIHandles::make_local(method()), O1);
  2418     __ call_VM_leaf(L7_thread_cache,
  2419        CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
  2420        G2_thread, O1);
  2421     restore_native_result(masm, ret_type, stack_slots);
  2424   // Clear "last Java frame" SP and PC.
  2425   __ verify_thread(); // G2_thread must be correct
  2426   __ reset_last_Java_frame();
  2428   // Unpack oop result
  2429   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
  2430       Label L;
  2431       __ addcc(G0, I0, G0);
  2432       __ brx(Assembler::notZero, true, Assembler::pt, L);
  2433       __ delayed()->ld_ptr(I0, 0, I0);
  2434       __ mov(G0, I0);
  2435       __ bind(L);
  2436       __ verify_oop(I0);
  2439   // reset handle block
  2440   __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
  2441   __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
  2443   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
  2444   check_forward_pending_exception(masm, G3_scratch);
  2447   // Return
  2449 #ifndef _LP64
  2450   if (ret_type == T_LONG) {
  2452     // Must leave proper result in O0,O1 and G1 (c2/tiered only)
  2453     __ sllx(I0, 32, G1);          // Shift bits into high G1
  2454     __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)
  2455     __ or3 (I1, G1, G1);          // OR 64 bits into G1
  2457 #endif
  2459   __ ret();
  2460   __ delayed()->restore();
  2462   __ flush();
  2464   nmethod *nm = nmethod::new_native_nmethod(method,
  2465                                             masm->code(),
  2466                                             vep_offset,
  2467                                             frame_complete,
  2468                                             stack_slots / VMRegImpl::slots_per_word,
  2469                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
  2470                                             in_ByteSize(lock_offset),
  2471                                             oop_maps);
  2472   return nm;
  2476 #ifdef HAVE_DTRACE_H
  2477 // ---------------------------------------------------------------------------
  2478 // Generate a dtrace nmethod for a given signature.  The method takes arguments
  2479 // in the Java compiled code convention, marshals them to the native
  2480 // abi and then leaves nops at the position you would expect to call a native
  2481 // function. When the probe is enabled the nops are replaced with a trap
  2482 // instruction that dtrace inserts and the trace will cause a notification
  2483 // to dtrace.
  2484 //
  2485 // The probes are only able to take primitive types and java/lang/String as
  2486 // arguments.  No other java types are allowed. Strings are converted to utf8
  2487 // strings so that from dtrace point of view java strings are converted to C
  2488 // strings. There is an arbitrary fixed limit on the total space that a method
  2489 // can use for converting the strings. (256 chars per string in the signature).
  2490 // So any java string larger then this is truncated.
  2492 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
  2493 static bool offsets_initialized = false;
  2495 static VMRegPair reg64_to_VMRegPair(Register r) {
  2496   VMRegPair ret;
  2497   if (wordSize == 8) {
  2498     ret.set2(r->as_VMReg());
  2499   } else {
  2500     ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
  2502   return ret;
  2506 nmethod *SharedRuntime::generate_dtrace_nmethod(
  2507     MacroAssembler *masm, methodHandle method) {
  2510   // generate_dtrace_nmethod is guarded by a mutex so we are sure to
  2511   // be single threaded in this method.
  2512   assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
  2514   // Fill in the signature array, for the calling-convention call.
  2515   int total_args_passed = method->size_of_parameters();
  2517   BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
  2518   VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
  2520   // The signature we are going to use for the trap that dtrace will see
  2521   // java/lang/String is converted. We drop "this" and any other object
  2522   // is converted to NULL.  (A one-slot java/lang/Long object reference
  2523   // is converted to a two-slot long, which is why we double the allocation).
  2524   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
  2525   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
  2527   int i=0;
  2528   int total_strings = 0;
  2529   int first_arg_to_pass = 0;
  2530   int total_c_args = 0;
  2532   // Skip the receiver as dtrace doesn't want to see it
  2533   if( !method->is_static() ) {
  2534     in_sig_bt[i++] = T_OBJECT;
  2535     first_arg_to_pass = 1;
  2538   SignatureStream ss(method->signature());
  2539   for ( ; !ss.at_return_type(); ss.next()) {
  2540     BasicType bt = ss.type();
  2541     in_sig_bt[i++] = bt;  // Collect remaining bits of signature
  2542     out_sig_bt[total_c_args++] = bt;
  2543     if( bt == T_OBJECT) {
  2544       Symbol* s = ss.as_symbol_or_null();
  2545       if (s == vmSymbols::java_lang_String()) {
  2546         total_strings++;
  2547         out_sig_bt[total_c_args-1] = T_ADDRESS;
  2548       } else if (s == vmSymbols::java_lang_Boolean() ||
  2549                  s == vmSymbols::java_lang_Byte()) {
  2550         out_sig_bt[total_c_args-1] = T_BYTE;
  2551       } else if (s == vmSymbols::java_lang_Character() ||
  2552                  s == vmSymbols::java_lang_Short()) {
  2553         out_sig_bt[total_c_args-1] = T_SHORT;
  2554       } else if (s == vmSymbols::java_lang_Integer() ||
  2555                  s == vmSymbols::java_lang_Float()) {
  2556         out_sig_bt[total_c_args-1] = T_INT;
  2557       } else if (s == vmSymbols::java_lang_Long() ||
  2558                  s == vmSymbols::java_lang_Double()) {
  2559         out_sig_bt[total_c_args-1] = T_LONG;
  2560         out_sig_bt[total_c_args++] = T_VOID;
  2562     } else if ( bt == T_LONG || bt == T_DOUBLE ) {
  2563       in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
  2564       // We convert double to long
  2565       out_sig_bt[total_c_args-1] = T_LONG;
  2566       out_sig_bt[total_c_args++] = T_VOID;
  2567     } else if ( bt == T_FLOAT) {
  2568       // We convert float to int
  2569       out_sig_bt[total_c_args-1] = T_INT;
  2573   assert(i==total_args_passed, "validly parsed signature");
  2575   // Now get the compiled-Java layout as input arguments
  2576   int comp_args_on_stack;
  2577   comp_args_on_stack = SharedRuntime::java_calling_convention(
  2578       in_sig_bt, in_regs, total_args_passed, false);
  2580   // We have received a description of where all the java arg are located
  2581   // on entry to the wrapper. We need to convert these args to where
  2582   // the a  native (non-jni) function would expect them. To figure out
  2583   // where they go we convert the java signature to a C signature and remove
  2584   // T_VOID for any long/double we might have received.
  2587   // Now figure out where the args must be stored and how much stack space
  2588   // they require (neglecting out_preserve_stack_slots but space for storing
  2589   // the 1st six register arguments). It's weird see int_stk_helper.
  2590   //
  2591   int out_arg_slots;
  2592   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
  2594   // Calculate the total number of stack slots we will need.
  2596   // First count the abi requirement plus all of the outgoing args
  2597   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  2599   // Plus a temp for possible converion of float/double/long register args
  2601   int conversion_temp = stack_slots;
  2602   stack_slots += 2;
  2605   // Now space for the string(s) we must convert
  2607   int string_locs = stack_slots;
  2608   stack_slots += total_strings *
  2609                    (max_dtrace_string_size / VMRegImpl::stack_slot_size);
  2611   // Ok The space we have allocated will look like:
  2612   //
  2613   //
  2614   // FP-> |                     |
  2615   //      |---------------------|
  2616   //      | string[n]           |
  2617   //      |---------------------| <- string_locs[n]
  2618   //      | string[n-1]         |
  2619   //      |---------------------| <- string_locs[n-1]
  2620   //      | ...                 |
  2621   //      | ...                 |
  2622   //      |---------------------| <- string_locs[1]
  2623   //      | string[0]           |
  2624   //      |---------------------| <- string_locs[0]
  2625   //      | temp                |
  2626   //      |---------------------| <- conversion_temp
  2627   //      | outbound memory     |
  2628   //      | based arguments     |
  2629   //      |                     |
  2630   //      |---------------------|
  2631   //      |                     |
  2632   // SP-> | out_preserved_slots |
  2633   //
  2634   //
  2636   // Now compute actual number of stack words we need rounding to make
  2637   // stack properly aligned.
  2638   stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
  2640   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  2642   intptr_t start = (intptr_t)__ pc();
  2644   // First thing make an ic check to see if we should even be here
  2647     Label L;
  2648     const Register temp_reg = G3_scratch;
  2649     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
  2650     __ verify_oop(O0);
  2651     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
  2652     __ cmp(temp_reg, G5_inline_cache_reg);
  2653     __ brx(Assembler::equal, true, Assembler::pt, L);
  2654     __ delayed()->nop();
  2656     __ jump_to(ic_miss, temp_reg);
  2657     __ delayed()->nop();
  2658     __ align(CodeEntryAlignment);
  2659     __ bind(L);
  2662   int vep_offset = ((intptr_t)__ pc()) - start;
  2665   // The instruction at the verified entry point must be 5 bytes or longer
  2666   // because it can be patched on the fly by make_non_entrant. The stack bang
  2667   // instruction fits that requirement.
  2669   // Generate stack overflow check before creating frame
  2670   __ generate_stack_overflow_check(stack_size);
  2672   assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
  2673          "valid size for make_non_entrant");
  2675   // Generate a new frame for the wrapper.
  2676   __ save(SP, -stack_size, SP);
  2678   // Frame is now completed as far a size and linkage.
  2680   int frame_complete = ((intptr_t)__ pc()) - start;
  2682 #ifdef ASSERT
  2683   bool reg_destroyed[RegisterImpl::number_of_registers];
  2684   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  2685   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  2686     reg_destroyed[r] = false;
  2688   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  2689     freg_destroyed[f] = false;
  2692 #endif /* ASSERT */
  2694   VMRegPair zero;
  2695   const Register g0 = G0; // without this we get a compiler warning (why??)
  2696   zero.set2(g0->as_VMReg());
  2698   int c_arg, j_arg;
  2700   Register conversion_off = noreg;
  2702   for (j_arg = first_arg_to_pass, c_arg = 0 ;
  2703        j_arg < total_args_passed ; j_arg++, c_arg++ ) {
  2705     VMRegPair src = in_regs[j_arg];
  2706     VMRegPair dst = out_regs[c_arg];
  2708 #ifdef ASSERT
  2709     if (src.first()->is_Register()) {
  2710       assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
  2711     } else if (src.first()->is_FloatRegister()) {
  2712       assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
  2713                                                FloatRegisterImpl::S)], "ack!");
  2715     if (dst.first()->is_Register()) {
  2716       reg_destroyed[dst.first()->as_Register()->encoding()] = true;
  2717     } else if (dst.first()->is_FloatRegister()) {
  2718       freg_destroyed[dst.first()->as_FloatRegister()->encoding(
  2719                                                  FloatRegisterImpl::S)] = true;
  2721 #endif /* ASSERT */
  2723     switch (in_sig_bt[j_arg]) {
  2724       case T_ARRAY:
  2725       case T_OBJECT:
  2727           if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
  2728               out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
  2729             // need to unbox a one-slot value
  2730             Register in_reg = L0;
  2731             Register tmp = L2;
  2732             if ( src.first()->is_reg() ) {
  2733               in_reg = src.first()->as_Register();
  2734             } else {
  2735               assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
  2736                      "must be");
  2737               __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
  2739             // If the final destination is an acceptable register
  2740             if ( dst.first()->is_reg() ) {
  2741               if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
  2742                 tmp = dst.first()->as_Register();
  2746             Label skipUnbox;
  2747             if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
  2748               __ mov(G0, tmp->successor());
  2750             __ br_null(in_reg, true, Assembler::pn, skipUnbox);
  2751             __ delayed()->mov(G0, tmp);
  2753             BasicType bt = out_sig_bt[c_arg];
  2754             int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
  2755             switch (bt) {
  2756                 case T_BYTE:
  2757                   __ ldub(in_reg, box_offset, tmp); break;
  2758                 case T_SHORT:
  2759                   __ lduh(in_reg, box_offset, tmp); break;
  2760                 case T_INT:
  2761                   __ ld(in_reg, box_offset, tmp); break;
  2762                 case T_LONG:
  2763                   __ ld_long(in_reg, box_offset, tmp); break;
  2764                 default: ShouldNotReachHere();
  2767             __ bind(skipUnbox);
  2768             // If tmp wasn't final destination copy to final destination
  2769             if (tmp == L2) {
  2770               VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
  2771               if (out_sig_bt[c_arg] == T_LONG) {
  2772                 long_move(masm, tmp_as_VM, dst);
  2773               } else {
  2774                 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
  2777             if (out_sig_bt[c_arg] == T_LONG) {
  2778               assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
  2779               ++c_arg; // move over the T_VOID to keep the loop indices in sync
  2781           } else if (out_sig_bt[c_arg] == T_ADDRESS) {
  2782             Register s =
  2783                 src.first()->is_reg() ? src.first()->as_Register() : L2;
  2784             Register d =
  2785                 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  2787             // We store the oop now so that the conversion pass can reach
  2788             // while in the inner frame. This will be the only store if
  2789             // the oop is NULL.
  2790             if (s != L2) {
  2791               // src is register
  2792               if (d != L2) {
  2793                 // dst is register
  2794                 __ mov(s, d);
  2795               } else {
  2796                 assert(Assembler::is_simm13(reg2offset(dst.first()) +
  2797                           STACK_BIAS), "must be");
  2798                 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
  2800             } else {
  2801                 // src not a register
  2802                 assert(Assembler::is_simm13(reg2offset(src.first()) +
  2803                            STACK_BIAS), "must be");
  2804                 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
  2805                 if (d == L2) {
  2806                   assert(Assembler::is_simm13(reg2offset(dst.first()) +
  2807                              STACK_BIAS), "must be");
  2808                   __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
  2811           } else if (out_sig_bt[c_arg] != T_VOID) {
  2812             // Convert the arg to NULL
  2813             if (dst.first()->is_reg()) {
  2814               __ mov(G0, dst.first()->as_Register());
  2815             } else {
  2816               assert(Assembler::is_simm13(reg2offset(dst.first()) +
  2817                          STACK_BIAS), "must be");
  2818               __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
  2822         break;
  2823       case T_VOID:
  2824         break;
  2826       case T_FLOAT:
  2827         if (src.first()->is_stack()) {
  2828           // Stack to stack/reg is simple
  2829           move32_64(masm, src, dst);
  2830         } else {
  2831           if (dst.first()->is_reg()) {
  2832             // freg -> reg
  2833             int off =
  2834               STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  2835             Register d = dst.first()->as_Register();
  2836             if (Assembler::is_simm13(off)) {
  2837               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  2838                      SP, off);
  2839               __ ld(SP, off, d);
  2840             } else {
  2841               if (conversion_off == noreg) {
  2842                 __ set(off, L6);
  2843                 conversion_off = L6;
  2845               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  2846                      SP, conversion_off);
  2847               __ ld(SP, conversion_off , d);
  2849           } else {
  2850             // freg -> mem
  2851             int off = STACK_BIAS + reg2offset(dst.first());
  2852             if (Assembler::is_simm13(off)) {
  2853               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  2854                      SP, off);
  2855             } else {
  2856               if (conversion_off == noreg) {
  2857                 __ set(off, L6);
  2858                 conversion_off = L6;
  2860               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  2861                      SP, conversion_off);
  2865         break;
  2867       case T_DOUBLE:
  2868         assert( j_arg + 1 < total_args_passed &&
  2869                 in_sig_bt[j_arg + 1] == T_VOID &&
  2870                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  2871         if (src.first()->is_stack()) {
  2872           // Stack to stack/reg is simple
  2873           long_move(masm, src, dst);
  2874         } else {
  2875           Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  2877           // Destination could be an odd reg on 32bit in which case
  2878           // we can't load direct to the destination.
  2880           if (!d->is_even() && wordSize == 4) {
  2881             d = L2;
  2883           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  2884           if (Assembler::is_simm13(off)) {
  2885             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  2886                    SP, off);
  2887             __ ld_long(SP, off, d);
  2888           } else {
  2889             if (conversion_off == noreg) {
  2890               __ set(off, L6);
  2891               conversion_off = L6;
  2893             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  2894                    SP, conversion_off);
  2895             __ ld_long(SP, conversion_off, d);
  2897           if (d == L2) {
  2898             long_move(masm, reg64_to_VMRegPair(L2), dst);
  2901         break;
  2903       case T_LONG :
  2904         // 32bit can't do a split move of something like g1 -> O0, O1
  2905         // so use a memory temp
  2906         if (src.is_single_phys_reg() && wordSize == 4) {
  2907           Register tmp = L2;
  2908           if (dst.first()->is_reg() &&
  2909               (wordSize == 8 || dst.first()->as_Register()->is_even())) {
  2910             tmp = dst.first()->as_Register();
  2913           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  2914           if (Assembler::is_simm13(off)) {
  2915             __ stx(src.first()->as_Register(), SP, off);
  2916             __ ld_long(SP, off, tmp);
  2917           } else {
  2918             if (conversion_off == noreg) {
  2919               __ set(off, L6);
  2920               conversion_off = L6;
  2922             __ stx(src.first()->as_Register(), SP, conversion_off);
  2923             __ ld_long(SP, conversion_off, tmp);
  2926           if (tmp == L2) {
  2927             long_move(masm, reg64_to_VMRegPair(L2), dst);
  2929         } else {
  2930           long_move(masm, src, dst);
  2932         break;
  2934       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  2936       default:
  2937         move32_64(masm, src, dst);
  2942   // If we have any strings we must store any register based arg to the stack
  2943   // This includes any still live xmm registers too.
  2945   if (total_strings > 0 ) {
  2947     // protect all the arg registers
  2948     __ save_frame(0);
  2949     __ mov(G2_thread, L7_thread_cache);
  2950     const Register L2_string_off = L2;
  2952     // Get first string offset
  2953     __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
  2955     for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
  2956       if (out_sig_bt[c_arg] == T_ADDRESS) {
  2958         VMRegPair dst = out_regs[c_arg];
  2959         const Register d = dst.first()->is_reg() ?
  2960             dst.first()->as_Register()->after_save() : noreg;
  2962         // It's a string the oop and it was already copied to the out arg
  2963         // position
  2964         if (d != noreg) {
  2965           __ mov(d, O0);
  2966         } else {
  2967           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  2968                  "must be");
  2969           __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
  2971         Label skip;
  2973         __ br_null(O0, false, Assembler::pn, skip);
  2974         __ delayed()->add(FP, L2_string_off, O1);
  2976         if (d != noreg) {
  2977           __ mov(O1, d);
  2978         } else {
  2979           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  2980                  "must be");
  2981           __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
  2984         __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
  2985                 relocInfo::runtime_call_type);
  2986         __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
  2988         __ bind(skip);
  2993     __ mov(L7_thread_cache, G2_thread);
  2994     __ restore();
  2999   // Ok now we are done. Need to place the nop that dtrace wants in order to
  3000   // patch in the trap
  3002   int patch_offset = ((intptr_t)__ pc()) - start;
  3004   __ nop();
  3007   // Return
  3009   __ ret();
  3010   __ delayed()->restore();
  3012   __ flush();
  3014   nmethod *nm = nmethod::new_dtrace_nmethod(
  3015       method, masm->code(), vep_offset, patch_offset, frame_complete,
  3016       stack_slots / VMRegImpl::slots_per_word);
  3017   return nm;
  3021 #endif // HAVE_DTRACE_H
  3023 // this function returns the adjust size (in number of words) to a c2i adapter
  3024 // activation for use during deoptimization
  3025 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  3026   assert(callee_locals >= callee_parameters,
  3027           "test and remove; got more parms than locals");
  3028   if (callee_locals < callee_parameters)
  3029     return 0;                   // No adjustment for negative locals
  3030   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
  3031   return round_to(diff, WordsPerLong);
  3034 // "Top of Stack" slots that may be unused by the calling convention but must
  3035 // otherwise be preserved.
  3036 // On Intel these are not necessary and the value can be zero.
  3037 // On Sparc this describes the words reserved for storing a register window
  3038 // when an interrupt occurs.
  3039 uint SharedRuntime::out_preserve_stack_slots() {
  3040   return frame::register_save_words * VMRegImpl::slots_per_word;
  3043 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
  3044 //
  3045 // Common out the new frame generation for deopt and uncommon trap
  3046 //
  3047   Register        G3pcs              = G3_scratch; // Array of new pcs (input)
  3048   Register        Oreturn0           = O0;
  3049   Register        Oreturn1           = O1;
  3050   Register        O2UnrollBlock      = O2;
  3051   Register        O3array            = O3;         // Array of frame sizes (input)
  3052   Register        O4array_size       = O4;         // number of frames (input)
  3053   Register        O7frame_size       = O7;         // number of frames (input)
  3055   __ ld_ptr(O3array, 0, O7frame_size);
  3056   __ sub(G0, O7frame_size, O7frame_size);
  3057   __ save(SP, O7frame_size, SP);
  3058   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
  3060   #ifdef ASSERT
  3061   // make sure that the frames are aligned properly
  3062 #ifndef _LP64
  3063   __ btst(wordSize*2-1, SP);
  3064   __ breakpoint_trap(Assembler::notZero);
  3065 #endif
  3066   #endif
  3068   // Deopt needs to pass some extra live values from frame to frame
  3070   if (deopt) {
  3071     __ mov(Oreturn0->after_save(), Oreturn0);
  3072     __ mov(Oreturn1->after_save(), Oreturn1);
  3075   __ mov(O4array_size->after_save(), O4array_size);
  3076   __ sub(O4array_size, 1, O4array_size);
  3077   __ mov(O3array->after_save(), O3array);
  3078   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
  3079   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
  3081   #ifdef ASSERT
  3082   // trash registers to show a clear pattern in backtraces
  3083   __ set(0xDEAD0000, I0);
  3084   __ add(I0,  2, I1);
  3085   __ add(I0,  4, I2);
  3086   __ add(I0,  6, I3);
  3087   __ add(I0,  8, I4);
  3088   // Don't touch I5 could have valuable savedSP
  3089   __ set(0xDEADBEEF, L0);
  3090   __ mov(L0, L1);
  3091   __ mov(L0, L2);
  3092   __ mov(L0, L3);
  3093   __ mov(L0, L4);
  3094   __ mov(L0, L5);
  3096   // trash the return value as there is nothing to return yet
  3097   __ set(0xDEAD0001, O7);
  3098   #endif
  3100   __ mov(SP, O5_savedSP);
  3104 static void make_new_frames(MacroAssembler* masm, bool deopt) {
  3105   //
  3106   // loop through the UnrollBlock info and create new frames
  3107   //
  3108   Register        G3pcs              = G3_scratch;
  3109   Register        Oreturn0           = O0;
  3110   Register        Oreturn1           = O1;
  3111   Register        O2UnrollBlock      = O2;
  3112   Register        O3array            = O3;
  3113   Register        O4array_size       = O4;
  3114   Label           loop;
  3116   // Before we make new frames, check to see if stack is available.
  3117   // Do this after the caller's return address is on top of stack
  3118   if (UseStackBanging) {
  3119     // Get total frame size for interpreted frames
  3120     __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4);
  3121     __ bang_stack_size(O4, O3, G3_scratch);
  3124   __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size);
  3125   __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs);
  3126   __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array);
  3128   // Adjust old interpreter frame to make space for new frame's extra java locals
  3129   //
  3130   // We capture the original sp for the transition frame only because it is needed in
  3131   // order to properly calculate interpreter_sp_adjustment. Even though in real life
  3132   // every interpreter frame captures a savedSP it is only needed at the transition
  3133   // (fortunately). If we had to have it correct everywhere then we would need to
  3134   // be told the sp_adjustment for each frame we create. If the frame size array
  3135   // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
  3136   // for each frame we create and keep up the illusion every where.
  3137   //
  3139   __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7);
  3140   __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
  3141   __ sub(SP, O7, SP);
  3143 #ifdef ASSERT
  3144   // make sure that there is at least one entry in the array
  3145   __ tst(O4array_size);
  3146   __ breakpoint_trap(Assembler::zero);
  3147 #endif
  3149   // Now push the new interpreter frames
  3150   __ bind(loop);
  3152   // allocate a new frame, filling the registers
  3154   gen_new_frame(masm, deopt);        // allocate an interpreter frame
  3156   __ tst(O4array_size);
  3157   __ br(Assembler::notZero, false, Assembler::pn, loop);
  3158   __ delayed()->add(O3array, wordSize, O3array);
  3159   __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
  3163 //------------------------------generate_deopt_blob----------------------------
  3164 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  3165 // instead.
  3166 void SharedRuntime::generate_deopt_blob() {
  3167   // allocate space for the code
  3168   ResourceMark rm;
  3169   // setup code generation tools
  3170   int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
  3171 #ifdef _LP64
  3172   CodeBuffer buffer("deopt_blob", 2100+pad, 512);
  3173 #else
  3174   // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
  3175   // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
  3176   CodeBuffer buffer("deopt_blob", 1600+pad, 512);
  3177 #endif /* _LP64 */
  3178   MacroAssembler* masm               = new MacroAssembler(&buffer);
  3179   FloatRegister   Freturn0           = F0;
  3180   Register        Greturn1           = G1;
  3181   Register        Oreturn0           = O0;
  3182   Register        Oreturn1           = O1;
  3183   Register        O2UnrollBlock      = O2;
  3184   Register        L0deopt_mode       = L0;
  3185   Register        G4deopt_mode       = G4_scratch;
  3186   int             frame_size_words;
  3187   Address         saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
  3188 #if !defined(_LP64) && defined(COMPILER2)
  3189   Address         saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
  3190 #endif
  3191   Label           cont;
  3193   OopMapSet *oop_maps = new OopMapSet();
  3195   //
  3196   // This is the entry point for code which is returning to a de-optimized
  3197   // frame.
  3198   // The steps taken by this frame are as follows:
  3199   //   - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
  3200   //     and all potentially live registers (at a pollpoint many registers can be live).
  3201   //
  3202   //   - call the C routine: Deoptimization::fetch_unroll_info (this function
  3203   //     returns information about the number and size of interpreter frames
  3204   //     which are equivalent to the frame which is being deoptimized)
  3205   //   - deallocate the unpack frame, restoring only results values. Other
  3206   //     volatile registers will now be captured in the vframeArray as needed.
  3207   //   - deallocate the deoptimization frame
  3208   //   - in a loop using the information returned in the previous step
  3209   //     push new interpreter frames (take care to propagate the return
  3210   //     values through each new frame pushed)
  3211   //   - create a dummy "unpack_frame" and save the return values (O0, O1, F0)
  3212   //   - call the C routine: Deoptimization::unpack_frames (this function
  3213   //     lays out values on the interpreter frame which was just created)
  3214   //   - deallocate the dummy unpack_frame
  3215   //   - ensure that all the return values are correctly set and then do
  3216   //     a return to the interpreter entry point
  3217   //
  3218   // Refer to the following methods for more information:
  3219   //   - Deoptimization::fetch_unroll_info
  3220   //   - Deoptimization::unpack_frames
  3222   OopMap* map = NULL;
  3224   int start = __ offset();
  3226   // restore G2, the trampoline destroyed it
  3227   __ get_thread();
  3229   // On entry we have been called by the deoptimized nmethod with a call that
  3230   // replaced the original call (or safepoint polling location) so the deoptimizing
  3231   // pc is now in O7. Return values are still in the expected places
  3233   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3234   __ ba(false, cont);
  3235   __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
  3237   int exception_offset = __ offset() - start;
  3239   // restore G2, the trampoline destroyed it
  3240   __ get_thread();
  3242   // On entry we have been jumped to by the exception handler (or exception_blob
  3243   // for server).  O0 contains the exception oop and O7 contains the original
  3244   // exception pc.  So if we push a frame here it will look to the
  3245   // stack walking code (fetch_unroll_info) just like a normal call so
  3246   // state will be extracted normally.
  3248   // save exception oop in JavaThread and fall through into the
  3249   // exception_in_tls case since they are handled in same way except
  3250   // for where the pending exception is kept.
  3251   __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset());
  3253   //
  3254   // Vanilla deoptimization with an exception pending in exception_oop
  3255   //
  3256   int exception_in_tls_offset = __ offset() - start;
  3258   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
  3259   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3261   // Restore G2_thread
  3262   __ get_thread();
  3264 #ifdef ASSERT
  3266     // verify that there is really an exception oop in exception_oop
  3267     Label has_exception;
  3268     __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);
  3269     __ br_notnull(Oexception, false, Assembler::pt, has_exception);
  3270     __ delayed()-> nop();
  3271     __ stop("no exception in thread");
  3272     __ bind(has_exception);
  3274     // verify that there is no pending exception
  3275     Label no_pending_exception;
  3276     Address exception_addr(G2_thread, Thread::pending_exception_offset());
  3277     __ ld_ptr(exception_addr, Oexception);
  3278     __ br_null(Oexception, false, Assembler::pt, no_pending_exception);
  3279     __ delayed()->nop();
  3280     __ stop("must not have pending exception here");
  3281     __ bind(no_pending_exception);
  3283 #endif
  3285   __ ba(false, cont);
  3286   __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;
  3288   //
  3289   // Reexecute entry, similar to c2 uncommon trap
  3290   //
  3291   int reexecute_offset = __ offset() - start;
  3293   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
  3294   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3296   __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode);
  3298   __ bind(cont);
  3300   __ set_last_Java_frame(SP, noreg);
  3302   // do the call by hand so we can get the oopmap
  3304   __ mov(G2_thread, L7_thread_cache);
  3305   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
  3306   __ delayed()->mov(G2_thread, O0);
  3308   // Set an oopmap for the call site this describes all our saved volatile registers
  3310   oop_maps->add_gc_map( __ offset()-start, map);
  3312   __ mov(L7_thread_cache, G2_thread);
  3314   __ reset_last_Java_frame();
  3316   // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
  3317   // so this move will survive
  3319   __ mov(L0deopt_mode, G4deopt_mode);
  3321   __ mov(O0, O2UnrollBlock->after_save());
  3323   RegisterSaver::restore_result_registers(masm);
  3325   Label noException;
  3326   __ cmp(G4deopt_mode, Deoptimization::Unpack_exception);   // Was exception pending?
  3327   __ br(Assembler::notEqual, false, Assembler::pt, noException);
  3328   __ delayed()->nop();
  3330   // Move the pending exception from exception_oop to Oexception so
  3331   // the pending exception will be picked up the interpreter.
  3332   __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
  3333   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
  3334   __ bind(noException);
  3336   // deallocate the deoptimization frame taking care to preserve the return values
  3337   __ mov(Oreturn0,     Oreturn0->after_save());
  3338   __ mov(Oreturn1,     Oreturn1->after_save());
  3339   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
  3340   __ restore();
  3342   // Allocate new interpreter frame(s) and possible c2i adapter frame
  3344   make_new_frames(masm, true);
  3346   // push a dummy "unpack_frame" taking care of float return values and
  3347   // call Deoptimization::unpack_frames to have the unpacker layout
  3348   // information in the interpreter frames just created and then return
  3349   // to the interpreter entry point
  3350   __ save(SP, -frame_size_words*wordSize, SP);
  3351   __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
  3352 #if !defined(_LP64)
  3353 #if defined(COMPILER2)
  3354   // 32-bit 1-register longs return longs in G1
  3355   __ stx(Greturn1, saved_Greturn1_addr);
  3356 #endif
  3357   __ set_last_Java_frame(SP, noreg);
  3358   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode);
  3359 #else
  3360   // LP64 uses g4 in set_last_Java_frame
  3361   __ mov(G4deopt_mode, O1);
  3362   __ set_last_Java_frame(SP, G0);
  3363   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
  3364 #endif
  3365   __ reset_last_Java_frame();
  3366   __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
  3368 #if !defined(_LP64) && defined(COMPILER2)
  3369   // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
  3370   // I0/I1 if the return value is long.
  3371   Label not_long;
  3372   __ cmp(O0,T_LONG);
  3373   __ br(Assembler::notEqual, false, Assembler::pt, not_long);
  3374   __ delayed()->nop();
  3375   __ ldd(saved_Greturn1_addr,I0);
  3376   __ bind(not_long);
  3377 #endif
  3378   __ ret();
  3379   __ delayed()->restore();
  3381   masm->flush();
  3382   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
  3383   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
  3386 #ifdef COMPILER2
  3388 //------------------------------generate_uncommon_trap_blob--------------------
  3389 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  3390 // instead.
  3391 void SharedRuntime::generate_uncommon_trap_blob() {
  3392   // allocate space for the code
  3393   ResourceMark rm;
  3394   // setup code generation tools
  3395   int pad = VerifyThread ? 512 : 0;
  3396 #ifdef _LP64
  3397   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
  3398 #else
  3399   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
  3400   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
  3401   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
  3402 #endif
  3403   MacroAssembler* masm               = new MacroAssembler(&buffer);
  3404   Register        O2UnrollBlock      = O2;
  3405   Register        O2klass_index      = O2;
  3407   //
  3408   // This is the entry point for all traps the compiler takes when it thinks
  3409   // it cannot handle further execution of compilation code. The frame is
  3410   // deoptimized in these cases and converted into interpreter frames for
  3411   // execution
  3412   // The steps taken by this frame are as follows:
  3413   //   - push a fake "unpack_frame"
  3414   //   - call the C routine Deoptimization::uncommon_trap (this function
  3415   //     packs the current compiled frame into vframe arrays and returns
  3416   //     information about the number and size of interpreter frames which
  3417   //     are equivalent to the frame which is being deoptimized)
  3418   //   - deallocate the "unpack_frame"
  3419   //   - deallocate the deoptimization frame
  3420   //   - in a loop using the information returned in the previous step
  3421   //     push interpreter frames;
  3422   //   - create a dummy "unpack_frame"
  3423   //   - call the C routine: Deoptimization::unpack_frames (this function
  3424   //     lays out values on the interpreter frame which was just created)
  3425   //   - deallocate the dummy unpack_frame
  3426   //   - return to the interpreter entry point
  3427   //
  3428   //  Refer to the following methods for more information:
  3429   //   - Deoptimization::uncommon_trap
  3430   //   - Deoptimization::unpack_frame
  3432   // the unloaded class index is in O0 (first parameter to this blob)
  3434   // push a dummy "unpack_frame"
  3435   // and call Deoptimization::uncommon_trap to pack the compiled frame into
  3436   // vframe array and return the UnrollBlock information
  3437   __ save_frame(0);
  3438   __ set_last_Java_frame(SP, noreg);
  3439   __ mov(I0, O2klass_index);
  3440   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
  3441   __ reset_last_Java_frame();
  3442   __ mov(O0, O2UnrollBlock->after_save());
  3443   __ restore();
  3445   // deallocate the deoptimized frame taking care to preserve the return values
  3446   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
  3447   __ restore();
  3449   // Allocate new interpreter frame(s) and possible c2i adapter frame
  3451   make_new_frames(masm, false);
  3453   // push a dummy "unpack_frame" taking care of float return values and
  3454   // call Deoptimization::unpack_frames to have the unpacker layout
  3455   // information in the interpreter frames just created and then return
  3456   // to the interpreter entry point
  3457   __ save_frame(0);
  3458   __ set_last_Java_frame(SP, noreg);
  3459   __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case
  3460   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3);
  3461   __ reset_last_Java_frame();
  3462   __ ret();
  3463   __ delayed()->restore();
  3465   masm->flush();
  3466   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize);
  3469 #endif // COMPILER2
  3471 //------------------------------generate_handler_blob-------------------
  3472 //
  3473 // Generate a special Compile2Runtime blob that saves all registers, and sets
  3474 // up an OopMap.
  3475 //
  3476 // This blob is jumped to (via a breakpoint and the signal handler) from a
  3477 // safepoint in compiled code.  On entry to this blob, O7 contains the
  3478 // address in the original nmethod at which we should resume normal execution.
  3479 // Thus, this blob looks like a subroutine which must preserve lots of
  3480 // registers and return normally.  Note that O7 is never register-allocated,
  3481 // so it is guaranteed to be free here.
  3482 //
  3484 // The hardest part of what this blob must do is to save the 64-bit %o
  3485 // registers in the 32-bit build.  A simple 'save' turn the %o's to %i's and
  3486 // an interrupt will chop off their heads.  Making space in the caller's frame
  3487 // first will let us save the 64-bit %o's before save'ing, but we cannot hand
  3488 // the adjusted FP off to the GC stack-crawler: this will modify the caller's
  3489 // SP and mess up HIS OopMaps.  So we first adjust the caller's SP, then save
  3490 // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
  3491 // Tricky, tricky, tricky...
  3493 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
  3494   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  3496   // allocate space for the code
  3497   ResourceMark rm;
  3498   // setup code generation tools
  3499   // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
  3500   // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
  3501   // even larger with TraceJumps
  3502   int pad = TraceJumps ? 512 : 0;
  3503   CodeBuffer buffer("handler_blob", 1600 + pad, 512);
  3504   MacroAssembler* masm                = new MacroAssembler(&buffer);
  3505   int             frame_size_words;
  3506   OopMapSet *oop_maps = new OopMapSet();
  3507   OopMap* map = NULL;
  3509   int start = __ offset();
  3511   // If this causes a return before the processing, then do a "restore"
  3512   if (cause_return) {
  3513     __ restore();
  3514   } else {
  3515     // Make it look like we were called via the poll
  3516     // so that frame constructor always sees a valid return address
  3517     __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7);
  3518     __ sub(O7, frame::pc_return_offset, O7);
  3521   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3523   // setup last_Java_sp (blows G4)
  3524   __ set_last_Java_frame(SP, noreg);
  3526   // call into the runtime to handle illegal instructions exception
  3527   // Do not use call_VM_leaf, because we need to make a GC map at this call site.
  3528   __ mov(G2_thread, O0);
  3529   __ save_thread(L7_thread_cache);
  3530   __ call(call_ptr);
  3531   __ delayed()->nop();
  3533   // Set an oopmap for the call site.
  3534   // We need this not only for callee-saved registers, but also for volatile
  3535   // registers that the compiler might be keeping live across a safepoint.
  3537   oop_maps->add_gc_map( __ offset() - start, map);
  3539   __ restore_thread(L7_thread_cache);
  3540   // clear last_Java_sp
  3541   __ reset_last_Java_frame();
  3543   // Check for exceptions
  3544   Label pending;
  3546   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
  3547   __ tst(O1);
  3548   __ brx(Assembler::notEqual, true, Assembler::pn, pending);
  3549   __ delayed()->nop();
  3551   RegisterSaver::restore_live_registers(masm);
  3553   // We are back the the original state on entry and ready to go.
  3555   __ retl();
  3556   __ delayed()->nop();
  3558   // Pending exception after the safepoint
  3560   __ bind(pending);
  3562   RegisterSaver::restore_live_registers(masm);
  3564   // We are back the the original state on entry.
  3566   // Tail-call forward_exception_entry, with the issuing PC in O7,
  3567   // so it looks like the original nmethod called forward_exception_entry.
  3568   __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
  3569   __ JMP(O0, 0);
  3570   __ delayed()->nop();
  3572   // -------------
  3573   // make sure all code is generated
  3574   masm->flush();
  3576   // return exception blob
  3577   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
  3580 //
  3581 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
  3582 //
  3583 // Generate a stub that calls into vm to find out the proper destination
  3584 // of a java call. All the argument registers are live at this point
  3585 // but since this is generic code we don't know what they are and the caller
  3586 // must do any gc of the args.
  3587 //
  3588 static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
  3589   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  3591   // allocate space for the code
  3592   ResourceMark rm;
  3593   // setup code generation tools
  3594   // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
  3595   // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
  3596   // even larger with TraceJumps
  3597   int pad = TraceJumps ? 512 : 0;
  3598   CodeBuffer buffer(name, 1600 + pad, 512);
  3599   MacroAssembler* masm                = new MacroAssembler(&buffer);
  3600   int             frame_size_words;
  3601   OopMapSet *oop_maps = new OopMapSet();
  3602   OopMap* map = NULL;
  3604   int start = __ offset();
  3606   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3608   int frame_complete = __ offset();
  3610   // setup last_Java_sp (blows G4)
  3611   __ set_last_Java_frame(SP, noreg);
  3613   // call into the runtime to handle illegal instructions exception
  3614   // Do not use call_VM_leaf, because we need to make a GC map at this call site.
  3615   __ mov(G2_thread, O0);
  3616   __ save_thread(L7_thread_cache);
  3617   __ call(destination, relocInfo::runtime_call_type);
  3618   __ delayed()->nop();
  3620   // O0 contains the address we are going to jump to assuming no exception got installed
  3622   // Set an oopmap for the call site.
  3623   // We need this not only for callee-saved registers, but also for volatile
  3624   // registers that the compiler might be keeping live across a safepoint.
  3626   oop_maps->add_gc_map( __ offset() - start, map);
  3628   __ restore_thread(L7_thread_cache);
  3629   // clear last_Java_sp
  3630   __ reset_last_Java_frame();
  3632   // Check for exceptions
  3633   Label pending;
  3635   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
  3636   __ tst(O1);
  3637   __ brx(Assembler::notEqual, true, Assembler::pn, pending);
  3638   __ delayed()->nop();
  3640   // get the returned methodOop
  3642   __ get_vm_result(G5_method);
  3643   __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS);
  3645   // O0 is where we want to jump, overwrite G3 which is saved and scratch
  3647   __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS);
  3649   RegisterSaver::restore_live_registers(masm);
  3651   // We are back the the original state on entry and ready to go.
  3653   __ JMP(G3, 0);
  3654   __ delayed()->nop();
  3656   // Pending exception after the safepoint
  3658   __ bind(pending);
  3660   RegisterSaver::restore_live_registers(masm);
  3662   // We are back the the original state on entry.
  3664   // Tail-call forward_exception_entry, with the issuing PC in O7,
  3665   // so it looks like the original nmethod called forward_exception_entry.
  3666   __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
  3667   __ JMP(O0, 0);
  3668   __ delayed()->nop();
  3670   // -------------
  3671   // make sure all code is generated
  3672   masm->flush();
  3674   // return the  blob
  3675   // frame_size_words or bytes??
  3676   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
  3679 void SharedRuntime::generate_stubs() {
  3681   _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
  3682                                              "wrong_method_stub");
  3684   _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
  3685                                         "ic_miss_stub");
  3687   _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
  3688                                         "resolve_opt_virtual_call");
  3690   _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),
  3691                                         "resolve_virtual_call");
  3693   _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
  3694                                         "resolve_static_call");
  3696   _polling_page_safepoint_handler_blob =
  3697     generate_handler_blob(CAST_FROM_FN_PTR(address,
  3698                    SafepointSynchronize::handle_polling_page_exception), false);
  3700   _polling_page_return_handler_blob =
  3701     generate_handler_blob(CAST_FROM_FN_PTR(address,
  3702                    SafepointSynchronize::handle_polling_page_exception), true);
  3704   generate_deopt_blob();
  3706 #ifdef COMPILER2
  3707   generate_uncommon_trap_blob();
  3708 #endif // COMPILER2

mercurial