src/cpu/sparc/vm/sharedRuntime_sparc.cpp

Sat, 01 Dec 2007 00:00:00 +0000

author
duke
date
Sat, 01 Dec 2007 00:00:00 +0000
changeset 435
a61af66fc99e
child 548
ba764ed4b6f2
permissions
-rw-r--r--

Initial load

     1 /*
     2  * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_sharedRuntime_sparc.cpp.incl"
    28 #define __ masm->
    30 #ifdef COMPILER2
    31 UncommonTrapBlob*   SharedRuntime::_uncommon_trap_blob;
    32 #endif // COMPILER2
    34 DeoptimizationBlob* SharedRuntime::_deopt_blob;
    35 SafepointBlob*      SharedRuntime::_polling_page_safepoint_handler_blob;
    36 SafepointBlob*      SharedRuntime::_polling_page_return_handler_blob;
    37 RuntimeStub*        SharedRuntime::_wrong_method_blob;
    38 RuntimeStub*        SharedRuntime::_ic_miss_blob;
    39 RuntimeStub*        SharedRuntime::_resolve_opt_virtual_call_blob;
    40 RuntimeStub*        SharedRuntime::_resolve_virtual_call_blob;
    41 RuntimeStub*        SharedRuntime::_resolve_static_call_blob;
    43 class RegisterSaver {
    45   // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.
    46   // The Oregs are problematic. In the 32bit build the compiler can
    47   // have O registers live with 64 bit quantities. A window save will
    48   // cut the heads off of the registers. We have to do a very extensive
    49   // stack dance to save and restore these properly.
    51   // Note that the Oregs problem only exists if we block at either a polling
    52   // page exception a compiled code safepoint that was not originally a call
    53   // or deoptimize following one of these kinds of safepoints.
    55   // Lots of registers to save.  For all builds, a window save will preserve
    56   // the %i and %l registers.  For the 32-bit longs-in-two entries and 64-bit
    57   // builds a window-save will preserve the %o registers.  In the LION build
    58   // we need to save the 64-bit %o registers which requires we save them
    59   // before the window-save (as then they become %i registers and get their
    60   // heads chopped off on interrupt).  We have to save some %g registers here
    61   // as well.
    62   enum {
    63     // This frame's save area.  Includes extra space for the native call:
    64     // vararg's layout space and the like.  Briefly holds the caller's
    65     // register save area.
    66     call_args_area = frame::register_save_words_sp_offset +
    67                      frame::memory_parameter_word_sp_offset*wordSize,
    68     // Make sure save locations are always 8 byte aligned.
    69     // can't use round_to because it doesn't produce compile time constant
    70     start_of_extra_save_area = ((call_args_area + 7) & ~7),
    71     g1_offset = start_of_extra_save_area, // g-regs needing saving
    72     g3_offset = g1_offset+8,
    73     g4_offset = g3_offset+8,
    74     g5_offset = g4_offset+8,
    75     o0_offset = g5_offset+8,
    76     o1_offset = o0_offset+8,
    77     o2_offset = o1_offset+8,
    78     o3_offset = o2_offset+8,
    79     o4_offset = o3_offset+8,
    80     o5_offset = o4_offset+8,
    81     start_of_flags_save_area = o5_offset+8,
    82     ccr_offset = start_of_flags_save_area,
    83     fsr_offset = ccr_offset + 8,
    84     d00_offset = fsr_offset+8,  // Start of float save area
    85     register_save_size = d00_offset+8*32
    86   };
    89   public:
    91   static int Oexception_offset() { return o0_offset; };
    92   static int G3_offset() { return g3_offset; };
    93   static int G5_offset() { return g5_offset; };
    94   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
    95   static void restore_live_registers(MacroAssembler* masm);
    97   // During deoptimization only the result register need to be restored
    98   // all the other values have already been extracted.
   100   static void restore_result_registers(MacroAssembler* masm);
   101 };
   103 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
   104   // Record volatile registers as callee-save values in an OopMap so their save locations will be
   105   // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
   106   // deoptimization; see compiledVFrame::create_stack_value).  The caller's I, L and O registers
   107   // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
   108   // (as the stub's I's) when the runtime routine called by the stub creates its frame.
   109   int i;
   110   // Always make the frame size 16 bytr aligned.
   111   int frame_size = round_to(additional_frame_words + register_save_size, 16);
   112   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
   113   int frame_size_in_slots = frame_size / sizeof(jint);
   114   // CodeBlob frame size is in words.
   115   *total_frame_words = frame_size / wordSize;
   116   // OopMap* map = new OopMap(*total_frame_words, 0);
   117   OopMap* map = new OopMap(frame_size_in_slots, 0);
   119 #if !defined(_LP64)
   121   // Save 64-bit O registers; they will get their heads chopped off on a 'save'.
   122   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
   123   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
   124   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
   125   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
   126   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
   127   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
   128 #endif /* _LP64 */
   130   __ save(SP, -frame_size, SP);
   132 #ifndef _LP64
   133   // Reload the 64 bit Oregs. Although they are now Iregs we load them
   134   // to Oregs here to avoid interrupts cutting off their heads
   136   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
   137   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
   138   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
   139   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
   140   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
   141   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
   143   __ stx(O0, SP, o0_offset+STACK_BIAS);
   144   map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());
   146   __ stx(O1, SP, o1_offset+STACK_BIAS);
   148   map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());
   150   __ stx(O2, SP, o2_offset+STACK_BIAS);
   151   map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());
   153   __ stx(O3, SP, o3_offset+STACK_BIAS);
   154   map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());
   156   __ stx(O4, SP, o4_offset+STACK_BIAS);
   157   map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());
   159   __ stx(O5, SP, o5_offset+STACK_BIAS);
   160   map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());
   161 #endif /* _LP64 */
   163   // Save the G's
   164   __ stx(G1, SP, g1_offset+STACK_BIAS);
   165   map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + 4)>>2), G1->as_VMReg());
   167   __ stx(G3, SP, g3_offset+STACK_BIAS);
   168   map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + 4)>>2), G3->as_VMReg());
   170   __ stx(G4, SP, g4_offset+STACK_BIAS);
   171   map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + 4)>>2), G4->as_VMReg());
   173   __ stx(G5, SP, g5_offset+STACK_BIAS);
   174   map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + 4)>>2), G5->as_VMReg());
   176   // This is really a waste but we'll keep things as they were for now
   177   if (true) {
   178 #ifndef _LP64
   179     map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());
   180     map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());
   181     map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());
   182     map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());
   183     map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());
   184     map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());
   185 #endif /* _LP64 */
   186     map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());
   187     map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());
   188     map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());
   189     map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());
   190   }
   193   // Save the flags
   194   __ rdccr( G5 );
   195   __ stx(G5, SP, ccr_offset+STACK_BIAS);
   196   __ stxfsr(SP, fsr_offset+STACK_BIAS);
   198   // Save all the FP registers
   199   int offset = d00_offset;
   200   for( int i=0; i<64; i+=2 ) {
   201     FloatRegister f = as_FloatRegister(i);
   202     __ stf(FloatRegisterImpl::D,  f, SP, offset+STACK_BIAS);
   203     map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
   204     if (true) {
   205       map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
   206     }
   207     offset += sizeof(double);
   208   }
   210   // And we're done.
   212   return map;
   213 }
   216 // Pop the current frame and restore all the registers that we
   217 // saved.
   218 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
   220   // Restore all the FP registers
   221   for( int i=0; i<64; i+=2 ) {
   222     __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
   223   }
   225   __ ldx(SP, ccr_offset+STACK_BIAS, G1);
   226   __ wrccr (G1) ;
   228   // Restore the G's
   229   // Note that G2 (AKA GThread) must be saved and restored separately.
   230   // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
   232   __ ldx(SP, g1_offset+STACK_BIAS, G1);
   233   __ ldx(SP, g3_offset+STACK_BIAS, G3);
   234   __ ldx(SP, g4_offset+STACK_BIAS, G4);
   235   __ ldx(SP, g5_offset+STACK_BIAS, G5);
   238 #if !defined(_LP64)
   239   // Restore the 64-bit O's.
   240   __ ldx(SP, o0_offset+STACK_BIAS, O0);
   241   __ ldx(SP, o1_offset+STACK_BIAS, O1);
   242   __ ldx(SP, o2_offset+STACK_BIAS, O2);
   243   __ ldx(SP, o3_offset+STACK_BIAS, O3);
   244   __ ldx(SP, o4_offset+STACK_BIAS, O4);
   245   __ ldx(SP, o5_offset+STACK_BIAS, O5);
   247   // And temporarily place them in TLS
   249   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
   250   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
   251   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
   252   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
   253   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
   254   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
   255 #endif /* _LP64 */
   257   // Restore flags
   259   __ ldxfsr(SP, fsr_offset+STACK_BIAS);
   261   __ restore();
   263 #if !defined(_LP64)
   264   // Now reload the 64bit Oregs after we've restore the window.
   265   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
   266   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
   267   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
   268   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
   269   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
   270   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
   271 #endif /* _LP64 */
   273 }
   275 // Pop the current frame and restore the registers that might be holding
   276 // a result.
   277 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
   279 #if !defined(_LP64)
   280   // 32bit build returns longs in G1
   281   __ ldx(SP, g1_offset+STACK_BIAS, G1);
   283   // Retrieve the 64-bit O's.
   284   __ ldx(SP, o0_offset+STACK_BIAS, O0);
   285   __ ldx(SP, o1_offset+STACK_BIAS, O1);
   286   // and save to TLS
   287   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
   288   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
   289 #endif /* _LP64 */
   291   __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
   293   __ restore();
   295 #if !defined(_LP64)
   296   // Now reload the 64bit Oregs after we've restore the window.
   297   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
   298   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
   299 #endif /* _LP64 */
   301 }
   303 // The java_calling_convention describes stack locations as ideal slots on
   304 // a frame with no abi restrictions. Since we must observe abi restrictions
   305 // (like the placement of the register window) the slots must be biased by
   306 // the following value.
   307 static int reg2offset(VMReg r) {
   308   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   309 }
   311 // ---------------------------------------------------------------------------
   312 // Read the array of BasicTypes from a signature, and compute where the
   313 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
   314 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
   315 // refer to 4-byte stack slots.  All stack slots are based off of the window
   316 // top.  VMRegImpl::stack0 refers to the first slot past the 16-word window,
   317 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
   318 // values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit
   319 // integer registers.  Values 64-95 are the (32-bit only) float registers.
   320 // Each 32-bit quantity is given its own number, so the integer registers
   321 // (in either 32- or 64-bit builds) use 2 numbers.  For example, there is
   322 // an O0-low and an O0-high.  Essentially, all int register numbers are doubled.
   324 // Register results are passed in O0-O5, for outgoing call arguments.  To
   325 // convert to incoming arguments, convert all O's to I's.  The regs array
   326 // refer to the low and hi 32-bit words of 64-bit registers or stack slots.
   327 // If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a
   328 // 32-bit value was passed).  If both are VMRegImpl::Bad(), it means no value was
   329 // passed (used as a placeholder for the other half of longs and doubles in
   330 // the 64-bit build).  regs[].second() is either VMRegImpl::Bad() or regs[].second() is
   331 // regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).
   332 // Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()
   333 // == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the
   334 // same VMRegPair.
   336 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
   337 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
   338 // units regardless of build.
   341 // ---------------------------------------------------------------------------
   342 // The compiled Java calling convention.  The Java convention always passes
   343 // 64-bit values in adjacent aligned locations (either registers or stack),
   344 // floats in float registers and doubles in aligned float pairs.  Values are
   345 // packed in the registers.  There is no backing varargs store for values in
   346 // registers.  In the 32-bit build, longs are passed in G1 and G4 (cannot be
   347 // passed in I's, because longs in I's get their heads chopped off at
   348 // interrupt).
   349 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
   350                                            VMRegPair *regs,
   351                                            int total_args_passed,
   352                                            int is_outgoing) {
   353   assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
   355   // Convention is to pack the first 6 int/oop args into the first 6 registers
   356   // (I0-I5), extras spill to the stack.  Then pack the first 8 float args
   357   // into F0-F7, extras spill to the stack.  Then pad all register sets to
   358   // align.  Then put longs and doubles into the same registers as they fit,
   359   // else spill to the stack.
   360   const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
   361   const int flt_reg_max = 8;
   362   //
   363   // Where 32-bit 1-reg longs start being passed
   364   // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
   365   // So make it look like we've filled all the G regs that c2 wants to use.
   366   Register g_reg = TieredCompilation ? noreg : G1;
   368   // Count int/oop and float args.  See how many stack slots we'll need and
   369   // where the longs & doubles will go.
   370   int int_reg_cnt   = 0;
   371   int flt_reg_cnt   = 0;
   372   // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
   373   // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
   374   int stk_reg_pairs = 0;
   375   for (int i = 0; i < total_args_passed; i++) {
   376     switch (sig_bt[i]) {
   377     case T_LONG:                // LP64, longs compete with int args
   378       assert(sig_bt[i+1] == T_VOID, "");
   379 #ifdef _LP64
   380       if (int_reg_cnt < int_reg_max) int_reg_cnt++;
   381 #endif
   382       break;
   383     case T_OBJECT:
   384     case T_ARRAY:
   385     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   386       if (int_reg_cnt < int_reg_max) int_reg_cnt++;
   387 #ifndef _LP64
   388       else                            stk_reg_pairs++;
   389 #endif
   390       break;
   391     case T_INT:
   392     case T_SHORT:
   393     case T_CHAR:
   394     case T_BYTE:
   395     case T_BOOLEAN:
   396       if (int_reg_cnt < int_reg_max) int_reg_cnt++;
   397       else                            stk_reg_pairs++;
   398       break;
   399     case T_FLOAT:
   400       if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
   401       else                            stk_reg_pairs++;
   402       break;
   403     case T_DOUBLE:
   404       assert(sig_bt[i+1] == T_VOID, "");
   405       break;
   406     case T_VOID:
   407       break;
   408     default:
   409       ShouldNotReachHere();
   410     }
   411   }
   413   // This is where the longs/doubles start on the stack.
   414   stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
   416   int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
   417   int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
   419   // int stk_reg = frame::register_save_words*(wordSize>>2);
   420   // int stk_reg = SharedRuntime::out_preserve_stack_slots();
   421   int stk_reg = 0;
   422   int int_reg = 0;
   423   int flt_reg = 0;
   425   // Now do the signature layout
   426   for (int i = 0; i < total_args_passed; i++) {
   427     switch (sig_bt[i]) {
   428     case T_INT:
   429     case T_SHORT:
   430     case T_CHAR:
   431     case T_BYTE:
   432     case T_BOOLEAN:
   433 #ifndef _LP64
   434     case T_OBJECT:
   435     case T_ARRAY:
   436     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   437 #endif // _LP64
   438       if (int_reg < int_reg_max) {
   439         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   440         regs[i].set1(r->as_VMReg());
   441       } else {
   442         regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
   443       }
   444       break;
   446 #ifdef _LP64
   447     case T_OBJECT:
   448     case T_ARRAY:
   449     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   450       if (int_reg < int_reg_max) {
   451         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   452         regs[i].set2(r->as_VMReg());
   453       } else {
   454         regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   455         stk_reg_pairs += 2;
   456       }
   457       break;
   458 #endif // _LP64
   460     case T_LONG:
   461       assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
   462 #ifdef COMPILER2
   463 #ifdef _LP64
   464         // Can't be tiered (yet)
   465         if (int_reg < int_reg_max) {
   466           Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   467           regs[i].set2(r->as_VMReg());
   468         } else {
   469           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   470           stk_reg_pairs += 2;
   471         }
   472 #else
   473         // For 32-bit build, can't pass longs in O-regs because they become
   474         // I-regs and get trashed.  Use G-regs instead.  G1 and G4 are almost
   475         // spare and available.  This convention isn't used by the Sparc ABI or
   476         // anywhere else. If we're tiered then we don't use G-regs because c1
   477         // can't deal with them as a "pair".
   478         // G0: zero
   479         // G1: 1st Long arg
   480         // G2: global allocated to TLS
   481         // G3: used in inline cache check
   482         // G4: 2nd Long arg
   483         // G5: used in inline cache check
   484         // G6: used by OS
   485         // G7: used by OS
   487         if (g_reg == G1) {
   488           regs[i].set2(G1->as_VMReg()); // This long arg in G1
   489           g_reg = G4;                  // Where the next arg goes
   490         } else if (g_reg == G4) {
   491           regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
   492           g_reg = noreg;               // No more longs in registers
   493         } else {
   494           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   495           stk_reg_pairs += 2;
   496         }
   497 #endif // _LP64
   498 #else // COMPILER2
   499         if (int_reg_pairs + 1 < int_reg_max) {
   500           if (is_outgoing) {
   501             regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
   502           } else {
   503             regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
   504           }
   505           int_reg_pairs += 2;
   506         } else {
   507           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   508           stk_reg_pairs += 2;
   509         }
   510 #endif // COMPILER2
   511       break;
   513     case T_FLOAT:
   514       if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
   515       else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
   516       break;
   517     case T_DOUBLE:
   518       assert(sig_bt[i+1] == T_VOID, "expecting half");
   519       if (flt_reg_pairs + 1 < flt_reg_max) {
   520         regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
   521         flt_reg_pairs += 2;
   522       } else {
   523         regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   524         stk_reg_pairs += 2;
   525       }
   526       break;
   527     case T_VOID: regs[i].set_bad();  break; // Halves of longs & doubles
   528     default:
   529       ShouldNotReachHere();
   530     }
   531   }
   533   // retun the amount of stack space these arguments will need.
   534   return stk_reg_pairs;
   536 }
   538 // Helper class mostly to avoid passing masm everywhere, and handle store
   539 // displacement overflow logic for LP64
   540 class AdapterGenerator {
   541   MacroAssembler *masm;
   542 #ifdef _LP64
   543   Register Rdisp;
   544   void set_Rdisp(Register r)  { Rdisp = r; }
   545 #endif // _LP64
   547   void patch_callers_callsite();
   548   void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
   550   // base+st_off points to top of argument
   551   int arg_offset(const int st_off) { return st_off + Interpreter::value_offset_in_bytes(); }
   552   int next_arg_offset(const int st_off) {
   553     return st_off - Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
   554   }
   556 #ifdef _LP64
   557   // On _LP64 argument slot values are loaded first into a register
   558   // because they might not fit into displacement.
   559   Register arg_slot(const int st_off);
   560   Register next_arg_slot(const int st_off);
   561 #else
   562   int arg_slot(const int st_off)      { return arg_offset(st_off); }
   563   int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
   564 #endif // _LP64
   566   // Stores long into offset pointed to by base
   567   void store_c2i_long(Register r, Register base,
   568                       const int st_off, bool is_stack);
   569   void store_c2i_object(Register r, Register base,
   570                         const int st_off);
   571   void store_c2i_int(Register r, Register base,
   572                      const int st_off);
   573   void store_c2i_double(VMReg r_2,
   574                         VMReg r_1, Register base, const int st_off);
   575   void store_c2i_float(FloatRegister f, Register base,
   576                        const int st_off);
   578  public:
   579   void gen_c2i_adapter(int total_args_passed,
   580                               // VMReg max_arg,
   581                               int comp_args_on_stack, // VMRegStackSlots
   582                               const BasicType *sig_bt,
   583                               const VMRegPair *regs,
   584                               Label& skip_fixup);
   585   void gen_i2c_adapter(int total_args_passed,
   586                               // VMReg max_arg,
   587                               int comp_args_on_stack, // VMRegStackSlots
   588                               const BasicType *sig_bt,
   589                               const VMRegPair *regs);
   591   AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
   592 };
   595 // Patch the callers callsite with entry to compiled code if it exists.
   596 void AdapterGenerator::patch_callers_callsite() {
   597   Label L;
   598   __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
   599   __ br_null(G3_scratch, false, __ pt, L);
   600   // Schedule the branch target address early.
   601   __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   602   // Call into the VM to patch the caller, then jump to compiled callee
   603   __ save_frame(4);     // Args in compiled layout; do not blow them
   605   // Must save all the live Gregs the list is:
   606   // G1: 1st Long arg (32bit build)
   607   // G2: global allocated to TLS
   608   // G3: used in inline cache check (scratch)
   609   // G4: 2nd Long arg (32bit build);
   610   // G5: used in inline cache check (methodOop)
   612   // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
   614 #ifdef _LP64
   615   // mov(s,d)
   616   __ mov(G1, L1);
   617   __ mov(G4, L4);
   618   __ mov(G5_method, L5);
   619   __ mov(G5_method, O0);         // VM needs target method
   620   __ mov(I7, O1);                // VM needs caller's callsite
   621   // Must be a leaf call...
   622   // can be very far once the blob has been relocated
   623   Address dest(O7, CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
   624   __ relocate(relocInfo::runtime_call_type);
   625   __ jumpl_to(dest, O7);
   626   __ delayed()->mov(G2_thread, L7_thread_cache);
   627   __ mov(L7_thread_cache, G2_thread);
   628   __ mov(L1, G1);
   629   __ mov(L4, G4);
   630   __ mov(L5, G5_method);
   631 #else
   632   __ stx(G1, FP, -8 + STACK_BIAS);
   633   __ stx(G4, FP, -16 + STACK_BIAS);
   634   __ mov(G5_method, L5);
   635   __ mov(G5_method, O0);         // VM needs target method
   636   __ mov(I7, O1);                // VM needs caller's callsite
   637   // Must be a leaf call...
   638   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);
   639   __ delayed()->mov(G2_thread, L7_thread_cache);
   640   __ mov(L7_thread_cache, G2_thread);
   641   __ ldx(FP, -8 + STACK_BIAS, G1);
   642   __ ldx(FP, -16 + STACK_BIAS, G4);
   643   __ mov(L5, G5_method);
   644   __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   645 #endif /* _LP64 */
   647   __ restore();      // Restore args
   648   __ bind(L);
   649 }
   651 void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
   652                  Register scratch) {
   653   if (TaggedStackInterpreter) {
   654     int tag_off = st_off + Interpreter::tag_offset_in_bytes();
   655 #ifdef _LP64
   656     Register tag_slot = Rdisp;
   657     __ set(tag_off, tag_slot);
   658 #else
   659     int tag_slot = tag_off;
   660 #endif // _LP64
   661     // have to store zero because local slots can be reused (rats!)
   662     if (t == frame::TagValue) {
   663       __ st_ptr(G0, base, tag_slot);
   664     } else if (t == frame::TagCategory2) {
   665       __ st_ptr(G0, base, tag_slot);
   666       int next_tag_off  = st_off - Interpreter::stackElementSize() +
   667                                    Interpreter::tag_offset_in_bytes();
   668 #ifdef _LP64
   669       __ set(next_tag_off, tag_slot);
   670 #else
   671       tag_slot = next_tag_off;
   672 #endif // _LP64
   673       __ st_ptr(G0, base, tag_slot);
   674     } else {
   675       __ mov(t, scratch);
   676       __ st_ptr(scratch, base, tag_slot);
   677     }
   678   }
   679 }
   681 #ifdef _LP64
   682 Register AdapterGenerator::arg_slot(const int st_off) {
   683   __ set( arg_offset(st_off), Rdisp);
   684   return Rdisp;
   685 }
   687 Register AdapterGenerator::next_arg_slot(const int st_off){
   688   __ set( next_arg_offset(st_off), Rdisp);
   689   return Rdisp;
   690 }
   691 #endif // _LP64
   693 // Stores long into offset pointed to by base
   694 void AdapterGenerator::store_c2i_long(Register r, Register base,
   695                                       const int st_off, bool is_stack) {
   696 #ifdef COMPILER2
   697 #ifdef _LP64
   698   // In V9, longs are given 2 64-bit slots in the interpreter, but the
   699   // data is passed in only 1 slot.
   700   __ stx(r, base, next_arg_slot(st_off));
   701 #else
   702   // Misaligned store of 64-bit data
   703   __ stw(r, base, arg_slot(st_off));    // lo bits
   704   __ srlx(r, 32, r);
   705   __ stw(r, base, next_arg_slot(st_off));  // hi bits
   706 #endif // _LP64
   707 #else
   708   if (is_stack) {
   709     // Misaligned store of 64-bit data
   710     __ stw(r, base, arg_slot(st_off));    // lo bits
   711     __ srlx(r, 32, r);
   712     __ stw(r, base, next_arg_slot(st_off));  // hi bits
   713   } else {
   714     __ stw(r->successor(), base, arg_slot(st_off)     ); // lo bits
   715     __ stw(r             , base, next_arg_slot(st_off)); // hi bits
   716   }
   717 #endif // COMPILER2
   718   tag_c2i_arg(frame::TagCategory2, base, st_off, r);
   719 }
   721 void AdapterGenerator::store_c2i_object(Register r, Register base,
   722                       const int st_off) {
   723   __ st_ptr (r, base, arg_slot(st_off));
   724   tag_c2i_arg(frame::TagReference, base, st_off, r);
   725 }
   727 void AdapterGenerator::store_c2i_int(Register r, Register base,
   728                    const int st_off) {
   729   __ st (r, base, arg_slot(st_off));
   730   tag_c2i_arg(frame::TagValue, base, st_off, r);
   731 }
   733 // Stores into offset pointed to by base
   734 void AdapterGenerator::store_c2i_double(VMReg r_2,
   735                       VMReg r_1, Register base, const int st_off) {
   736 #ifdef _LP64
   737   // In V9, doubles are given 2 64-bit slots in the interpreter, but the
   738   // data is passed in only 1 slot.
   739   __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
   740 #else
   741   // Need to marshal 64-bit value from misaligned Lesp loads
   742   __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
   743   __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );
   744 #endif
   745   tag_c2i_arg(frame::TagCategory2, base, st_off, G1_scratch);
   746 }
   748 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
   749                                        const int st_off) {
   750   __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
   751   tag_c2i_arg(frame::TagValue, base, st_off, G1_scratch);
   752 }
   754 void AdapterGenerator::gen_c2i_adapter(
   755                             int total_args_passed,
   756                             // VMReg max_arg,
   757                             int comp_args_on_stack, // VMRegStackSlots
   758                             const BasicType *sig_bt,
   759                             const VMRegPair *regs,
   760                             Label& skip_fixup) {
   762   // Before we get into the guts of the C2I adapter, see if we should be here
   763   // at all.  We've come from compiled code and are attempting to jump to the
   764   // interpreter, which means the caller made a static call to get here
   765   // (vcalls always get a compiled target if there is one).  Check for a
   766   // compiled target.  If there is one, we need to patch the caller's call.
   767   // However we will run interpreted if we come thru here. The next pass
   768   // thru the call site will run compiled. If we ran compiled here then
   769   // we can (theorectically) do endless i2c->c2i->i2c transitions during
   770   // deopt/uncommon trap cycles. If we always go interpreted here then
   771   // we can have at most one and don't need to play any tricks to keep
   772   // from endlessly growing the stack.
   773   //
   774   // Actually if we detected that we had an i2c->c2i transition here we
   775   // ought to be able to reset the world back to the state of the interpreted
   776   // call and not bother building another interpreter arg area. We don't
   777   // do that at this point.
   779   patch_callers_callsite();
   781   __ bind(skip_fixup);
   783   // Since all args are passed on the stack, total_args_passed*wordSize is the
   784   // space we need.  Add in varargs area needed by the interpreter. Round up
   785   // to stack alignment.
   786   const int arg_size = total_args_passed * Interpreter::stackElementSize();
   787   const int varargs_area =
   788                  (frame::varargs_offset - frame::register_save_words)*wordSize;
   789   const int extraspace = round_to(arg_size + varargs_area, 2*wordSize);
   791   int bias = STACK_BIAS;
   792   const int interp_arg_offset = frame::varargs_offset*wordSize +
   793                         (total_args_passed-1)*Interpreter::stackElementSize();
   795   Register base = SP;
   797 #ifdef _LP64
   798   // In the 64bit build because of wider slots and STACKBIAS we can run
   799   // out of bits in the displacement to do loads and stores.  Use g3 as
   800   // temporary displacement.
   801   if (! __ is_simm13(extraspace)) {
   802     __ set(extraspace, G3_scratch);
   803     __ sub(SP, G3_scratch, SP);
   804   } else {
   805     __ sub(SP, extraspace, SP);
   806   }
   807   set_Rdisp(G3_scratch);
   808 #else
   809   __ sub(SP, extraspace, SP);
   810 #endif // _LP64
   812   // First write G1 (if used) to where ever it must go
   813   for (int i=0; i<total_args_passed; i++) {
   814     const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize()) + bias;
   815     VMReg r_1 = regs[i].first();
   816     VMReg r_2 = regs[i].second();
   817     if (r_1 == G1_scratch->as_VMReg()) {
   818       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
   819         store_c2i_object(G1_scratch, base, st_off);
   820       } else if (sig_bt[i] == T_LONG) {
   821         assert(!TieredCompilation, "should not use register args for longs");
   822         store_c2i_long(G1_scratch, base, st_off, false);
   823       } else {
   824         store_c2i_int(G1_scratch, base, st_off);
   825       }
   826     }
   827   }
   829   // Now write the args into the outgoing interpreter space
   830   for (int i=0; i<total_args_passed; i++) {
   831     const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize()) + bias;
   832     VMReg r_1 = regs[i].first();
   833     VMReg r_2 = regs[i].second();
   834     if (!r_1->is_valid()) {
   835       assert(!r_2->is_valid(), "");
   836       continue;
   837     }
   838     // Skip G1 if found as we did it first in order to free it up
   839     if (r_1 == G1_scratch->as_VMReg()) {
   840       continue;
   841     }
   842 #ifdef ASSERT
   843     bool G1_forced = false;
   844 #endif // ASSERT
   845     if (r_1->is_stack()) {        // Pretend stack targets are loaded into G1
   846 #ifdef _LP64
   847       Register ld_off = Rdisp;
   848       __ set(reg2offset(r_1) + extraspace + bias, ld_off);
   849 #else
   850       int ld_off = reg2offset(r_1) + extraspace + bias;
   851 #ifdef ASSERT
   852       G1_forced = true;
   853 #endif // ASSERT
   854 #endif // _LP64
   855       r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle
   856       if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch);
   857       else                  __ ldx(base, ld_off, G1_scratch);
   858     }
   860     if (r_1->is_Register()) {
   861       Register r = r_1->as_Register()->after_restore();
   862       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
   863         store_c2i_object(r, base, st_off);
   864       } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
   865         if (TieredCompilation) {
   866           assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs");
   867         }
   868         store_c2i_long(r, base, st_off, r_2->is_stack());
   869       } else {
   870         store_c2i_int(r, base, st_off);
   871       }
   872     } else {
   873       assert(r_1->is_FloatRegister(), "");
   874       if (sig_bt[i] == T_FLOAT) {
   875         store_c2i_float(r_1->as_FloatRegister(), base, st_off);
   876       } else {
   877         assert(sig_bt[i] == T_DOUBLE, "wrong type");
   878         store_c2i_double(r_2, r_1, base, st_off);
   879       }
   880     }
   881   }
   883 #ifdef _LP64
   884   // Need to reload G3_scratch, used for temporary displacements.
   885   __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   887   // Pass O5_savedSP as an argument to the interpreter.
   888   // The interpreter will restore SP to this value before returning.
   889   __ set(extraspace, G1);
   890   __ add(SP, G1, O5_savedSP);
   891 #else
   892   // Pass O5_savedSP as an argument to the interpreter.
   893   // The interpreter will restore SP to this value before returning.
   894   __ add(SP, extraspace, O5_savedSP);
   895 #endif // _LP64
   897   __ mov((frame::varargs_offset)*wordSize -
   898          1*Interpreter::stackElementSize()+bias+BytesPerWord, G1);
   899   // Jump to the interpreter just as if interpreter was doing it.
   900   __ jmpl(G3_scratch, 0, G0);
   901   // Setup Lesp for the call.  Cannot actually set Lesp as the current Lesp
   902   // (really L0) is in use by the compiled frame as a generic temp.  However,
   903   // the interpreter does not know where its args are without some kind of
   904   // arg pointer being passed in.  Pass it in Gargs.
   905   __ delayed()->add(SP, G1, Gargs);
   906 }
   908 void AdapterGenerator::gen_i2c_adapter(
   909                             int total_args_passed,
   910                             // VMReg max_arg,
   911                             int comp_args_on_stack, // VMRegStackSlots
   912                             const BasicType *sig_bt,
   913                             const VMRegPair *regs) {
   915   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
   916   // layout.  Lesp was saved by the calling I-frame and will be restored on
   917   // return.  Meanwhile, outgoing arg space is all owned by the callee
   918   // C-frame, so we can mangle it at will.  After adjusting the frame size,
   919   // hoist register arguments and repack other args according to the compiled
   920   // code convention.  Finally, end in a jump to the compiled code.  The entry
   921   // point address is the start of the buffer.
   923   // We will only enter here from an interpreted frame and never from after
   924   // passing thru a c2i. Azul allowed this but we do not. If we lose the
   925   // race and use a c2i we will remain interpreted for the race loser(s).
   926   // This removes all sorts of headaches on the x86 side and also eliminates
   927   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
   929   // As you can see from the list of inputs & outputs there are not a lot
   930   // of temp registers to work with: mostly G1, G3 & G4.
   932   // Inputs:
   933   // G2_thread      - TLS
   934   // G5_method      - Method oop
   935   // O0             - Flag telling us to restore SP from O5
   936   // O4_args        - Pointer to interpreter's args
   937   // O5             - Caller's saved SP, to be restored if needed
   938   // O6             - Current SP!
   939   // O7             - Valid return address
   940   // L0-L7, I0-I7    - Caller's temps (no frame pushed yet)
   942   // Outputs:
   943   // G2_thread      - TLS
   944   // G1, G4         - Outgoing long args in 32-bit build
   945   // O0-O5          - Outgoing args in compiled layout
   946   // O6             - Adjusted or restored SP
   947   // O7             - Valid return address
   948   // L0-L7, I0-I7    - Caller's temps (no frame pushed yet)
   949   // F0-F7          - more outgoing args
   952   // O4 is about to get loaded up with compiled callee's args
   953   __ sub(Gargs, BytesPerWord, Gargs);
   955 #ifdef ASSERT
   956   {
   957     // on entry OsavedSP and SP should be equal
   958     Label ok;
   959     __ cmp(O5_savedSP, SP);
   960     __ br(Assembler::equal, false, Assembler::pt, ok);
   961     __ delayed()->nop();
   962     __ stop("I5_savedSP not set");
   963     __ should_not_reach_here();
   964     __ bind(ok);
   965   }
   966 #endif
   968   // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME
   969   // WITH O7 HOLDING A VALID RETURN PC
   970   //
   971   // |              |
   972   // :  java stack  :
   973   // |              |
   974   // +--------------+ <--- start of outgoing args
   975   // |   receiver   |   |
   976   // : rest of args :   |---size is java-arg-words
   977   // |              |   |
   978   // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I
   979   // |              |   |
   980   // :    unused    :   |---Space for max Java stack, plus stack alignment
   981   // |              |   |
   982   // +--------------+ <--- SP + 16*wordsize
   983   // |              |
   984   // :    window    :
   985   // |              |
   986   // +--------------+ <--- SP
   988   // WE REPACK THE STACK.  We use the common calling convention layout as
   989   // discovered by calling SharedRuntime::calling_convention.  We assume it
   990   // causes an arbitrary shuffle of memory, which may require some register
   991   // temps to do the shuffle.  We hope for (and optimize for) the case where
   992   // temps are not needed.  We may have to resize the stack slightly, in case
   993   // we need alignment padding (32-bit interpreter can pass longs & doubles
   994   // misaligned, but the compilers expect them aligned).
   995   //
   996   // |              |
   997   // :  java stack  :
   998   // |              |
   999   // +--------------+ <--- start of outgoing args
  1000   // |  pad, align  |   |
  1001   // +--------------+   |
  1002   // | ints, floats |   |---Outgoing stack args, packed low.
  1003   // +--------------+   |   First few args in registers.
  1004   // :   doubles    :   |
  1005   // |   longs      |   |
  1006   // +--------------+ <--- SP' + 16*wordsize
  1007   // |              |
  1008   // :    window    :
  1009   // |              |
  1010   // +--------------+ <--- SP'
  1012   // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME
  1013   // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP
  1014   // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN.
  1016   // Cut-out for having no stack args.  Since up to 6 args are passed
  1017   // in registers, we will commonly have no stack args.
  1018   if (comp_args_on_stack > 0) {
  1020     // Convert VMReg stack slots to words.
  1021     int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
  1022     // Round up to miminum stack alignment, in wordSize
  1023     comp_words_on_stack = round_to(comp_words_on_stack, 2);
  1024     // Now compute the distance from Lesp to SP.  This calculation does not
  1025     // include the space for total_args_passed because Lesp has not yet popped
  1026     // the arguments.
  1027     __ sub(SP, (comp_words_on_stack)*wordSize, SP);
  1030   // Will jump to the compiled code just as if compiled code was doing it.
  1031   // Pre-load the register-jump target early, to schedule it better.
  1032   __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3);
  1034   // Now generate the shuffle code.  Pick up all register args and move the
  1035   // rest through G1_scratch.
  1036   for (int i=0; i<total_args_passed; i++) {
  1037     if (sig_bt[i] == T_VOID) {
  1038       // Longs and doubles are passed in native word order, but misaligned
  1039       // in the 32-bit build.
  1040       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
  1041       continue;
  1044     // Pick up 0, 1 or 2 words from Lesp+offset.  Assume mis-aligned in the
  1045     // 32-bit build and aligned in the 64-bit build.  Look for the obvious
  1046     // ldx/lddf optimizations.
  1048     // Load in argument order going down.
  1049     const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize();
  1050 #ifdef _LP64
  1051     set_Rdisp(G1_scratch);
  1052 #endif // _LP64
  1054     VMReg r_1 = regs[i].first();
  1055     VMReg r_2 = regs[i].second();
  1056     if (!r_1->is_valid()) {
  1057       assert(!r_2->is_valid(), "");
  1058       continue;
  1060     if (r_1->is_stack()) {        // Pretend stack targets are loaded into F8/F9
  1061       r_1 = F8->as_VMReg();        // as part of the load/store shuffle
  1062       if (r_2->is_valid()) r_2 = r_1->next();
  1064     if (r_1->is_Register()) {  // Register argument
  1065       Register r = r_1->as_Register()->after_restore();
  1066       if (!r_2->is_valid()) {
  1067         __ ld(Gargs, arg_slot(ld_off), r);
  1068       } else {
  1069 #ifdef _LP64
  1070         // In V9, longs are given 2 64-bit slots in the interpreter, but the
  1071         // data is passed in only 1 slot.
  1072         Register slot = (sig_bt[i]==T_LONG) ?
  1073               next_arg_slot(ld_off) : arg_slot(ld_off);
  1074         __ ldx(Gargs, slot, r);
  1075 #else
  1076         // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the
  1077         // stack shuffle.  Load the first 2 longs into G1/G4 later.
  1078 #endif
  1080     } else {
  1081       assert(r_1->is_FloatRegister(), "");
  1082       if (!r_2->is_valid()) {
  1083         __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
  1084       } else {
  1085 #ifdef _LP64
  1086         // In V9, doubles are given 2 64-bit slots in the interpreter, but the
  1087         // data is passed in only 1 slot.  This code also handles longs that
  1088         // are passed on the stack, but need a stack-to-stack move through a
  1089         // spare float register.
  1090         Register slot = (sig_bt[i]==T_LONG || sig_bt[i] == T_DOUBLE) ?
  1091               next_arg_slot(ld_off) : arg_slot(ld_off);
  1092         __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
  1093 #else
  1094         // Need to marshal 64-bit value from misaligned Lesp loads
  1095         __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
  1096         __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
  1097 #endif
  1100     // Was the argument really intended to be on the stack, but was loaded
  1101     // into F8/F9?
  1102     if (regs[i].first()->is_stack()) {
  1103       assert(r_1->as_FloatRegister() == F8, "fix this code");
  1104       // Convert stack slot to an SP offset
  1105       int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
  1106       // Store down the shuffled stack word.  Target address _is_ aligned.
  1107       if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, st_off);
  1108       else                  __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, st_off);
  1111   bool made_space = false;
  1112 #ifndef _LP64
  1113   // May need to pick up a few long args in G1/G4
  1114   bool g4_crushed = false;
  1115   bool g3_crushed = false;
  1116   for (int i=0; i<total_args_passed; i++) {
  1117     if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) {
  1118       // Load in argument order going down
  1119       int ld_off = (total_args_passed-i)*Interpreter::stackElementSize();
  1120       // Need to marshal 64-bit value from misaligned Lesp loads
  1121       Register r = regs[i].first()->as_Register()->after_restore();
  1122       if (r == G1 || r == G4) {
  1123         assert(!g4_crushed, "ordering problem");
  1124         if (r == G4){
  1125           g4_crushed = true;
  1126           __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits
  1127           __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits
  1128         } else {
  1129           // better schedule this way
  1130           __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits
  1131           __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits
  1133         g3_crushed = true;
  1134         __ sllx(r, 32, r);
  1135         __ or3(G3_scratch, r, r);
  1136       } else {
  1137         assert(r->is_out(), "longs passed in two O registers");
  1138         __ ld  (Gargs, arg_slot(ld_off)     , r->successor()); // Load lo bits
  1139         __ ld  (Gargs, next_arg_slot(ld_off), r);              // Load hi bits
  1143 #endif
  1145   // Jump to the compiled code just as if compiled code was doing it.
  1146   //
  1147 #ifndef _LP64
  1148     if (g3_crushed) {
  1149       // Rats load was wasted, at least it is in cache...
  1150       __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3);
  1152 #endif /* _LP64 */
  1154     // 6243940 We might end up in handle_wrong_method if
  1155     // the callee is deoptimized as we race thru here. If that
  1156     // happens we don't want to take a safepoint because the
  1157     // caller frame will look interpreted and arguments are now
  1158     // "compiled" so it is much better to make this transition
  1159     // invisible to the stack walking code. Unfortunately if
  1160     // we try and find the callee by normal means a safepoint
  1161     // is possible. So we stash the desired callee in the thread
  1162     // and the vm will find there should this case occur.
  1163     Address callee_target_addr(G2_thread, 0, in_bytes(JavaThread::callee_target_offset()));
  1164     __ st_ptr(G5_method, callee_target_addr);
  1166     if (StressNonEntrant) {
  1167       // Open a big window for deopt failure
  1168       __ save_frame(0);
  1169       __ mov(G0, L0);
  1170       Label loop;
  1171       __ bind(loop);
  1172       __ sub(L0, 1, L0);
  1173       __ br_null(L0, false, Assembler::pt, loop);
  1174       __ delayed()->nop();
  1176       __ restore();
  1180     __ jmpl(G3, 0, G0);
  1181     __ delayed()->nop();
  1184 // ---------------------------------------------------------------
  1185 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
  1186                                                             int total_args_passed,
  1187                                                             // VMReg max_arg,
  1188                                                             int comp_args_on_stack, // VMRegStackSlots
  1189                                                             const BasicType *sig_bt,
  1190                                                             const VMRegPair *regs) {
  1191   address i2c_entry = __ pc();
  1193   AdapterGenerator agen(masm);
  1195   agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
  1198   // -------------------------------------------------------------------------
  1199   // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
  1200   // args start out packed in the compiled layout.  They need to be unpacked
  1201   // into the interpreter layout.  This will almost always require some stack
  1202   // space.  We grow the current (compiled) stack, then repack the args.  We
  1203   // finally end in a jump to the generic interpreter entry point.  On exit
  1204   // from the interpreter, the interpreter will restore our SP (lest the
  1205   // compiled code, which relys solely on SP and not FP, get sick).
  1207   address c2i_unverified_entry = __ pc();
  1208   Label skip_fixup;
  1210 #if !defined(_LP64) && defined(COMPILER2)
  1211     Register R_temp   = L0;   // another scratch register
  1212 #else
  1213     Register R_temp   = G1;   // another scratch register
  1214 #endif
  1216     Address ic_miss(G3_scratch, SharedRuntime::get_ic_miss_stub());
  1218     __ verify_oop(O0);
  1219     __ verify_oop(G5_method);
  1220     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), G3_scratch);
  1221     __ verify_oop(G3_scratch);
  1223 #if !defined(_LP64) && defined(COMPILER2)
  1224     __ save(SP, -frame::register_save_words*wordSize, SP);
  1225     __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
  1226     __ verify_oop(R_temp);
  1227     __ cmp(G3_scratch, R_temp);
  1228     __ restore();
  1229 #else
  1230     __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);
  1231     __ verify_oop(R_temp);
  1232     __ cmp(G3_scratch, R_temp);
  1233 #endif
  1235     Label ok, ok2;
  1236     __ brx(Assembler::equal, false, Assembler::pt, ok);
  1237     __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method);
  1238     __ jump_to(ic_miss);
  1239     __ delayed()->nop();
  1241     __ bind(ok);
  1242     // Method might have been compiled since the call site was patched to
  1243     // interpreted if that is the case treat it as a miss so we can get
  1244     // the call site corrected.
  1245     __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
  1246     __ bind(ok2);
  1247     __ br_null(G3_scratch, false, __ pt, skip_fixup);
  1248     __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
  1249     __ jump_to(ic_miss);
  1250     __ delayed()->nop();
  1254   address c2i_entry = __ pc();
  1256   agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
  1258   __ flush();
  1259   return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry);
  1263 // Helper function for native calling conventions
  1264 static VMReg int_stk_helper( int i ) {
  1265   // Bias any stack based VMReg we get by ignoring the window area
  1266   // but not the register parameter save area.
  1267   //
  1268   // This is strange for the following reasons. We'd normally expect
  1269   // the calling convention to return an VMReg for a stack slot
  1270   // completely ignoring any abi reserved area. C2 thinks of that
  1271   // abi area as only out_preserve_stack_slots. This does not include
  1272   // the area allocated by the C abi to store down integer arguments
  1273   // because the java calling convention does not use it. So
  1274   // since c2 assumes that there are only out_preserve_stack_slots
  1275   // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
  1276   // location the c calling convention must add in this bias amount
  1277   // to make up for the fact that the out_preserve_stack_slots is
  1278   // insufficient for C calls. What a mess. I sure hope those 6
  1279   // stack words were worth it on every java call!
  1281   // Another way of cleaning this up would be for out_preserve_stack_slots
  1282   // to take a parameter to say whether it was C or java calling conventions.
  1283   // Then things might look a little better (but not much).
  1285   int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
  1286   if( mem_parm_offset < 0 ) {
  1287     return as_oRegister(i)->as_VMReg();
  1288   } else {
  1289     int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
  1290     // Now return a biased offset that will be correct when out_preserve_slots is added back in
  1291     return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
  1296 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
  1297                                          VMRegPair *regs,
  1298                                          int total_args_passed) {
  1300     // Return the number of VMReg stack_slots needed for the args.
  1301     // This value does not include an abi space (like register window
  1302     // save area).
  1304     // The native convention is V8 if !LP64
  1305     // The LP64 convention is the V9 convention which is slightly more sane.
  1307     // We return the amount of VMReg stack slots we need to reserve for all
  1308     // the arguments NOT counting out_preserve_stack_slots. Since we always
  1309     // have space for storing at least 6 registers to memory we start with that.
  1310     // See int_stk_helper for a further discussion.
  1311     int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
  1313 #ifdef _LP64
  1314     // V9 convention: All things "as-if" on double-wide stack slots.
  1315     // Hoist any int/ptr/long's in the first 6 to int regs.
  1316     // Hoist any flt/dbl's in the first 16 dbl regs.
  1317     int j = 0;                  // Count of actual args, not HALVES
  1318     for( int i=0; i<total_args_passed; i++, j++ ) {
  1319       switch( sig_bt[i] ) {
  1320       case T_BOOLEAN:
  1321       case T_BYTE:
  1322       case T_CHAR:
  1323       case T_INT:
  1324       case T_SHORT:
  1325         regs[i].set1( int_stk_helper( j ) ); break;
  1326       case T_LONG:
  1327         assert( sig_bt[i+1] == T_VOID, "expecting half" );
  1328       case T_ADDRESS: // raw pointers, like current thread, for VM calls
  1329       case T_ARRAY:
  1330       case T_OBJECT:
  1331         regs[i].set2( int_stk_helper( j ) );
  1332         break;
  1333       case T_FLOAT:
  1334         if ( j < 16 ) {
  1335           // V9ism: floats go in ODD registers
  1336           regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg());
  1337         } else {
  1338           // V9ism: floats go in ODD stack slot
  1339           regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1)));
  1341         break;
  1342       case T_DOUBLE:
  1343         assert( sig_bt[i+1] == T_VOID, "expecting half" );
  1344         if ( j < 16 ) {
  1345           // V9ism: doubles go in EVEN/ODD regs
  1346           regs[i].set2(as_FloatRegister(j<<1)->as_VMReg());
  1347         } else {
  1348           // V9ism: doubles go in EVEN/ODD stack slots
  1349           regs[i].set2(VMRegImpl::stack2reg(j<<1));
  1351         break;
  1352       case T_VOID:  regs[i].set_bad(); j--; break; // Do not count HALVES
  1353       default:
  1354         ShouldNotReachHere();
  1356       if (regs[i].first()->is_stack()) {
  1357         int off =  regs[i].first()->reg2stack();
  1358         if (off > max_stack_slots) max_stack_slots = off;
  1360       if (regs[i].second()->is_stack()) {
  1361         int off =  regs[i].second()->reg2stack();
  1362         if (off > max_stack_slots) max_stack_slots = off;
  1366 #else // _LP64
  1367     // V8 convention: first 6 things in O-regs, rest on stack.
  1368     // Alignment is willy-nilly.
  1369     for( int i=0; i<total_args_passed; i++ ) {
  1370       switch( sig_bt[i] ) {
  1371       case T_ADDRESS: // raw pointers, like current thread, for VM calls
  1372       case T_ARRAY:
  1373       case T_BOOLEAN:
  1374       case T_BYTE:
  1375       case T_CHAR:
  1376       case T_FLOAT:
  1377       case T_INT:
  1378       case T_OBJECT:
  1379       case T_SHORT:
  1380         regs[i].set1( int_stk_helper( i ) );
  1381         break;
  1382       case T_DOUBLE:
  1383       case T_LONG:
  1384         assert( sig_bt[i+1] == T_VOID, "expecting half" );
  1385         regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) );
  1386         break;
  1387       case T_VOID: regs[i].set_bad(); break;
  1388       default:
  1389         ShouldNotReachHere();
  1391       if (regs[i].first()->is_stack()) {
  1392         int off =  regs[i].first()->reg2stack();
  1393         if (off > max_stack_slots) max_stack_slots = off;
  1395       if (regs[i].second()->is_stack()) {
  1396         int off =  regs[i].second()->reg2stack();
  1397         if (off > max_stack_slots) max_stack_slots = off;
  1400 #endif // _LP64
  1402   return round_to(max_stack_slots + 1, 2);
  1407 // ---------------------------------------------------------------------------
  1408 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1409   switch (ret_type) {
  1410   case T_FLOAT:
  1411     __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
  1412     break;
  1413   case T_DOUBLE:
  1414     __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
  1415     break;
  1419 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1420   switch (ret_type) {
  1421   case T_FLOAT:
  1422     __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0);
  1423     break;
  1424   case T_DOUBLE:
  1425     __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0);
  1426     break;
  1430 // Check and forward and pending exception.  Thread is stored in
  1431 // L7_thread_cache and possibly NOT in G2_thread.  Since this is a native call, there
  1432 // is no exception handler.  We merely pop this frame off and throw the
  1433 // exception in the caller's frame.
  1434 static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) {
  1435   Label L;
  1436   __ br_null(Rex_oop, false, Assembler::pt, L);
  1437   __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception
  1438   // Since this is a native call, we *know* the proper exception handler
  1439   // without calling into the VM: it's the empty function.  Just pop this
  1440   // frame and then jump to forward_exception_entry; O7 will contain the
  1441   // native caller's return PC.
  1442   Address exception_entry(G3_scratch, StubRoutines::forward_exception_entry());
  1443   __ jump_to(exception_entry);
  1444   __ delayed()->restore();      // Pop this frame off.
  1445   __ bind(L);
  1448 // A simple move of integer like type
  1449 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1450   if (src.first()->is_stack()) {
  1451     if (dst.first()->is_stack()) {
  1452       // stack to stack
  1453       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1454       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1455     } else {
  1456       // stack to reg
  1457       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1459   } else if (dst.first()->is_stack()) {
  1460     // reg to stack
  1461     __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1462   } else {
  1463     __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1467 // On 64 bit we will store integer like items to the stack as
  1468 // 64 bits items (sparc abi) even though java would only store
  1469 // 32bits for a parameter. On 32bit it will simply be 32 bits
  1470 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
  1471 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1472   if (src.first()->is_stack()) {
  1473     if (dst.first()->is_stack()) {
  1474       // stack to stack
  1475       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1476       __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1477     } else {
  1478       // stack to reg
  1479       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1481   } else if (dst.first()->is_stack()) {
  1482     // reg to stack
  1483     __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1484   } else {
  1485     __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1490 // An oop arg. Must pass a handle not the oop itself
  1491 static void object_move(MacroAssembler* masm,
  1492                         OopMap* map,
  1493                         int oop_handle_offset,
  1494                         int framesize_in_slots,
  1495                         VMRegPair src,
  1496                         VMRegPair dst,
  1497                         bool is_receiver,
  1498                         int* receiver_offset) {
  1500   // must pass a handle. First figure out the location we use as a handle
  1502   if (src.first()->is_stack()) {
  1503     // Oop is already on the stack
  1504     Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
  1505     __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
  1506     __ ld_ptr(rHandle, 0, L4);
  1507 #ifdef _LP64
  1508     __ movr( Assembler::rc_z, L4, G0, rHandle );
  1509 #else
  1510     __ tst( L4 );
  1511     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
  1512 #endif
  1513     if (dst.first()->is_stack()) {
  1514       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
  1516     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
  1517     if (is_receiver) {
  1518       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
  1520     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
  1521   } else {
  1522     // Oop is in an input register pass we must flush it to the stack
  1523     const Register rOop = src.first()->as_Register();
  1524     const Register rHandle = L5;
  1525     int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
  1526     int offset = oop_slot*VMRegImpl::stack_slot_size;
  1527     Label skip;
  1528     __ st_ptr(rOop, SP, offset + STACK_BIAS);
  1529     if (is_receiver) {
  1530       *receiver_offset = oop_slot * VMRegImpl::stack_slot_size;
  1532     map->set_oop(VMRegImpl::stack2reg(oop_slot));
  1533     __ add(SP, offset + STACK_BIAS, rHandle);
  1534 #ifdef _LP64
  1535     __ movr( Assembler::rc_z, rOop, G0, rHandle );
  1536 #else
  1537     __ tst( rOop );
  1538     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
  1539 #endif
  1541     if (dst.first()->is_stack()) {
  1542       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
  1543     } else {
  1544       __ mov(rHandle, dst.first()->as_Register());
  1549 // A float arg may have to do float reg int reg conversion
  1550 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1551   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
  1553   if (src.first()->is_stack()) {
  1554     if (dst.first()->is_stack()) {
  1555       // stack to stack the easiest of the bunch
  1556       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1557       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1558     } else {
  1559       // stack to reg
  1560       if (dst.first()->is_Register()) {
  1561         __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1562       } else {
  1563         __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
  1566   } else if (dst.first()->is_stack()) {
  1567     // reg to stack
  1568     if (src.first()->is_Register()) {
  1569       __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1570     } else {
  1571       __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1573   } else {
  1574     // reg to reg
  1575     if (src.first()->is_Register()) {
  1576       if (dst.first()->is_Register()) {
  1577         // gpr -> gpr
  1578         __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1579       } else {
  1580         // gpr -> fpr
  1581         __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS);
  1582         __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister());
  1584     } else if (dst.first()->is_Register()) {
  1585       // fpr -> gpr
  1586       __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS);
  1587       __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register());
  1588     } else {
  1589       // fpr -> fpr
  1590       // In theory these overlap but the ordering is such that this is likely a nop
  1591       if ( src.first() != dst.first()) {
  1592         __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
  1598 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1599   VMRegPair src_lo(src.first());
  1600   VMRegPair src_hi(src.second());
  1601   VMRegPair dst_lo(dst.first());
  1602   VMRegPair dst_hi(dst.second());
  1603   simple_move32(masm, src_lo, dst_lo);
  1604   simple_move32(masm, src_hi, dst_hi);
  1607 // A long move
  1608 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1610   // Do the simple ones here else do two int moves
  1611   if (src.is_single_phys_reg() ) {
  1612     if (dst.is_single_phys_reg()) {
  1613       __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1614     } else {
  1615       // split src into two separate registers
  1616       // Remember hi means hi address or lsw on sparc
  1617       // Move msw to lsw
  1618       if (dst.second()->is_reg()) {
  1619         // MSW -> MSW
  1620         __ srax(src.first()->as_Register(), 32, dst.first()->as_Register());
  1621         // Now LSW -> LSW
  1622         // this will only move lo -> lo and ignore hi
  1623         VMRegPair split(dst.second());
  1624         simple_move32(masm, src, split);
  1625       } else {
  1626         VMRegPair split(src.first(), L4->as_VMReg());
  1627         // MSW -> MSW (lo ie. first word)
  1628         __ srax(src.first()->as_Register(), 32, L4);
  1629         split_long_move(masm, split, dst);
  1632   } else if (dst.is_single_phys_reg()) {
  1633     if (src.is_adjacent_aligned_on_stack(2)) {
  1634       __ ldd(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1635     } else {
  1636       // dst is a single reg.
  1637       // Remember lo is low address not msb for stack slots
  1638       // and lo is the "real" register for registers
  1639       // src is
  1641       VMRegPair split;
  1643       if (src.first()->is_reg()) {
  1644         // src.lo (msw) is a reg, src.hi is stk/reg
  1645         // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg]
  1646         split.set_pair(dst.first(), src.first());
  1647       } else {
  1648         // msw is stack move to L5
  1649         // lsw is stack move to dst.lo (real reg)
  1650         // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5
  1651         split.set_pair(dst.first(), L5->as_VMReg());
  1654       // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg)
  1655       // msw   -> src.lo/L5,  lsw -> dst.lo
  1656       split_long_move(masm, src, split);
  1658       // So dst now has the low order correct position the
  1659       // msw half
  1660       __ sllx(split.first()->as_Register(), 32, L5);
  1662       const Register d = dst.first()->as_Register();
  1663       __ or3(L5, d, d);
  1665   } else {
  1666     // For LP64 we can probably do better.
  1667     split_long_move(masm, src, dst);
  1671 // A double move
  1672 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1674   // The painful thing here is that like long_move a VMRegPair might be
  1675   // 1: a single physical register
  1676   // 2: two physical registers (v8)
  1677   // 3: a physical reg [lo] and a stack slot [hi] (v8)
  1678   // 4: two stack slots
  1680   // Since src is always a java calling convention we know that the src pair
  1681   // is always either all registers or all stack (and aligned?)
  1683   // in a register [lo] and a stack slot [hi]
  1684   if (src.first()->is_stack()) {
  1685     if (dst.first()->is_stack()) {
  1686       // stack to stack the easiest of the bunch
  1687       // ought to be a way to do this where if alignment is ok we use ldd/std when possible
  1688       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1689       __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
  1690       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1691       __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1692     } else {
  1693       // stack to reg
  1694       if (dst.second()->is_stack()) {
  1695         // stack -> reg, stack -> stack
  1696         __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
  1697         if (dst.first()->is_Register()) {
  1698           __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1699         } else {
  1700           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
  1702         // This was missing. (very rare case)
  1703         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1704       } else {
  1705         // stack -> reg
  1706         // Eventually optimize for alignment QQQ
  1707         if (dst.first()->is_Register()) {
  1708           __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1709           __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register());
  1710         } else {
  1711           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());
  1712           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister());
  1716   } else if (dst.first()->is_stack()) {
  1717     // reg to stack
  1718     if (src.first()->is_Register()) {
  1719       // Eventually optimize for alignment QQQ
  1720       __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1721       if (src.second()->is_stack()) {
  1722         __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);
  1723         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1724       } else {
  1725         __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS);
  1727     } else {
  1728       // fpr to stack
  1729       if (src.second()->is_stack()) {
  1730         ShouldNotReachHere();
  1731       } else {
  1732         // Is the stack aligned?
  1733         if (reg2offset(dst.first()) & 0x7) {
  1734           // No do as pairs
  1735           __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1736           __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS);
  1737         } else {
  1738           __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1742   } else {
  1743     // reg to reg
  1744     if (src.first()->is_Register()) {
  1745       if (dst.first()->is_Register()) {
  1746         // gpr -> gpr
  1747         __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1748         __ mov(src.second()->as_Register(), dst.second()->as_Register());
  1749       } else {
  1750         // gpr -> fpr
  1751         // ought to be able to do a single store
  1752         __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS);
  1753         __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS);
  1754         // ought to be able to do a single load
  1755         __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister());
  1756         __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister());
  1758     } else if (dst.first()->is_Register()) {
  1759       // fpr -> gpr
  1760       // ought to be able to do a single store
  1761       __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS);
  1762       // ought to be able to do a single load
  1763       // REMEMBER first() is low address not LSB
  1764       __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register());
  1765       if (dst.second()->is_Register()) {
  1766         __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register());
  1767       } else {
  1768         __ ld(FP, -4 + STACK_BIAS, L4);
  1769         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);
  1771     } else {
  1772       // fpr -> fpr
  1773       // In theory these overlap but the ordering is such that this is likely a nop
  1774       if ( src.first() != dst.first()) {
  1775         __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());
  1781 // Creates an inner frame if one hasn't already been created, and
  1782 // saves a copy of the thread in L7_thread_cache
  1783 static void create_inner_frame(MacroAssembler* masm, bool* already_created) {
  1784   if (!*already_created) {
  1785     __ save_frame(0);
  1786     // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below
  1787     // Don't use save_thread because it smashes G2 and we merely want to save a
  1788     // copy
  1789     __ mov(G2_thread, L7_thread_cache);
  1790     *already_created = true;
  1794 // ---------------------------------------------------------------------------
  1795 // Generate a native wrapper for a given method.  The method takes arguments
  1796 // in the Java compiled code convention, marshals them to the native
  1797 // convention (handlizes oops, etc), transitions to native, makes the call,
  1798 // returns to java state (possibly blocking), unhandlizes any result and
  1799 // returns.
  1800 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  1801                                                 methodHandle method,
  1802                                                 int total_in_args,
  1803                                                 int comp_args_on_stack, // in VMRegStackSlots
  1804                                                 BasicType *in_sig_bt,
  1805                                                 VMRegPair *in_regs,
  1806                                                 BasicType ret_type) {
  1809   // Native nmethod wrappers never take possesion of the oop arguments.
  1810   // So the caller will gc the arguments. The only thing we need an
  1811   // oopMap for is if the call is static
  1812   //
  1813   // An OopMap for lock (and class if static), and one for the VM call itself
  1814   OopMapSet *oop_maps = new OopMapSet();
  1815   intptr_t start = (intptr_t)__ pc();
  1817   // First thing make an ic check to see if we should even be here
  1819     Label L;
  1820     const Register temp_reg = G3_scratch;
  1821     Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
  1822     __ verify_oop(O0);
  1823     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
  1824     __ cmp(temp_reg, G5_inline_cache_reg);
  1825     __ brx(Assembler::equal, true, Assembler::pt, L);
  1826     __ delayed()->nop();
  1828     __ jump_to(ic_miss, 0);
  1829     __ delayed()->nop();
  1830     __ align(CodeEntryAlignment);
  1831     __ bind(L);
  1834   int vep_offset = ((intptr_t)__ pc()) - start;
  1836 #ifdef COMPILER1
  1837   if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
  1838     // Object.hashCode can pull the hashCode from the header word
  1839     // instead of doing a full VM transition once it's been computed.
  1840     // Since hashCode is usually polymorphic at call sites we can't do
  1841     // this optimization at the call site without a lot of work.
  1842     Label slowCase;
  1843     Register receiver             = O0;
  1844     Register result               = O0;
  1845     Register header               = G3_scratch;
  1846     Register hash                 = G3_scratch; // overwrite header value with hash value
  1847     Register mask                 = G1;         // to get hash field from header
  1849     // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.
  1850     // We depend on hash_mask being at most 32 bits and avoid the use of
  1851     // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
  1852     // vm: see markOop.hpp.
  1853     __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
  1854     __ sethi(markOopDesc::hash_mask, mask);
  1855     __ btst(markOopDesc::unlocked_value, header);
  1856     __ br(Assembler::zero, false, Assembler::pn, slowCase);
  1857     if (UseBiasedLocking) {
  1858       // Check if biased and fall through to runtime if so
  1859       __ delayed()->nop();
  1860       __ btst(markOopDesc::biased_lock_bit_in_place, header);
  1861       __ br(Assembler::notZero, false, Assembler::pn, slowCase);
  1863     __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
  1865     // Check for a valid (non-zero) hash code and get its value.
  1866 #ifdef _LP64
  1867     __ srlx(header, markOopDesc::hash_shift, hash);
  1868 #else
  1869     __ srl(header, markOopDesc::hash_shift, hash);
  1870 #endif
  1871     __ andcc(hash, mask, hash);
  1872     __ br(Assembler::equal, false, Assembler::pn, slowCase);
  1873     __ delayed()->nop();
  1875     // leaf return.
  1876     __ retl();
  1877     __ delayed()->mov(hash, result);
  1878     __ bind(slowCase);
  1880 #endif // COMPILER1
  1883   // We have received a description of where all the java arg are located
  1884   // on entry to the wrapper. We need to convert these args to where
  1885   // the jni function will expect them. To figure out where they go
  1886   // we convert the java signature to a C signature by inserting
  1887   // the hidden arguments as arg[0] and possibly arg[1] (static method)
  1889   int total_c_args = total_in_args + 1;
  1890   if (method->is_static()) {
  1891     total_c_args++;
  1894   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  1895   VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
  1897   int argc = 0;
  1898   out_sig_bt[argc++] = T_ADDRESS;
  1899   if (method->is_static()) {
  1900     out_sig_bt[argc++] = T_OBJECT;
  1903   for (int i = 0; i < total_in_args ; i++ ) {
  1904     out_sig_bt[argc++] = in_sig_bt[i];
  1907   // Now figure out where the args must be stored and how much stack space
  1908   // they require (neglecting out_preserve_stack_slots but space for storing
  1909   // the 1st six register arguments). It's weird see int_stk_helper.
  1910   //
  1911   int out_arg_slots;
  1912   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
  1914   // Compute framesize for the wrapper.  We need to handlize all oops in
  1915   // registers. We must create space for them here that is disjoint from
  1916   // the windowed save area because we have no control over when we might
  1917   // flush the window again and overwrite values that gc has since modified.
  1918   // (The live window race)
  1919   //
  1920   // We always just allocate 6 word for storing down these object. This allow
  1921   // us to simply record the base and use the Ireg number to decide which
  1922   // slot to use. (Note that the reg number is the inbound number not the
  1923   // outbound number).
  1924   // We must shuffle args to match the native convention, and include var-args space.
  1926   // Calculate the total number of stack slots we will need.
  1928   // First count the abi requirement plus all of the outgoing args
  1929   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  1931   // Now the space for the inbound oop handle area
  1933   int oop_handle_offset = stack_slots;
  1934   stack_slots += 6*VMRegImpl::slots_per_word;
  1936   // Now any space we need for handlizing a klass if static method
  1938   int oop_temp_slot_offset = 0;
  1939   int klass_slot_offset = 0;
  1940   int klass_offset = -1;
  1941   int lock_slot_offset = 0;
  1942   bool is_static = false;
  1944   if (method->is_static()) {
  1945     klass_slot_offset = stack_slots;
  1946     stack_slots += VMRegImpl::slots_per_word;
  1947     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
  1948     is_static = true;
  1951   // Plus a lock if needed
  1953   if (method->is_synchronized()) {
  1954     lock_slot_offset = stack_slots;
  1955     stack_slots += VMRegImpl::slots_per_word;
  1958   // Now a place to save return value or as a temporary for any gpr -> fpr moves
  1959   stack_slots += 2;
  1961   // Ok The space we have allocated will look like:
  1962   //
  1963   //
  1964   // FP-> |                     |
  1965   //      |---------------------|
  1966   //      | 2 slots for moves   |
  1967   //      |---------------------|
  1968   //      | lock box (if sync)  |
  1969   //      |---------------------| <- lock_slot_offset
  1970   //      | klass (if static)   |
  1971   //      |---------------------| <- klass_slot_offset
  1972   //      | oopHandle area      |
  1973   //      |---------------------| <- oop_handle_offset
  1974   //      | outbound memory     |
  1975   //      | based arguments     |
  1976   //      |                     |
  1977   //      |---------------------|
  1978   //      | vararg area         |
  1979   //      |---------------------|
  1980   //      |                     |
  1981   // SP-> | out_preserved_slots |
  1982   //
  1983   //
  1986   // Now compute actual number of stack words we need rounding to make
  1987   // stack properly aligned.
  1988   stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
  1990   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  1992   // Generate stack overflow check before creating frame
  1993   __ generate_stack_overflow_check(stack_size);
  1995   // Generate a new frame for the wrapper.
  1996   __ save(SP, -stack_size, SP);
  1998   int frame_complete = ((intptr_t)__ pc()) - start;
  2000   __ verify_thread();
  2003   //
  2004   // We immediately shuffle the arguments so that any vm call we have to
  2005   // make from here on out (sync slow path, jvmti, etc.) we will have
  2006   // captured the oops from our caller and have a valid oopMap for
  2007   // them.
  2009   // -----------------
  2010   // The Grand Shuffle
  2011   //
  2012   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  2013   // (derived from JavaThread* which is in L7_thread_cache) and, if static,
  2014   // the class mirror instead of a receiver.  This pretty much guarantees that
  2015   // register layout will not match.  We ignore these extra arguments during
  2016   // the shuffle. The shuffle is described by the two calling convention
  2017   // vectors we have in our possession. We simply walk the java vector to
  2018   // get the source locations and the c vector to get the destinations.
  2019   // Because we have a new window and the argument registers are completely
  2020   // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about
  2021   // here.
  2023   // This is a trick. We double the stack slots so we can claim
  2024   // the oops in the caller's frame. Since we are sure to have
  2025   // more args than the caller doubling is enough to make
  2026   // sure we can capture all the incoming oop args from the
  2027   // caller.
  2028   //
  2029   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  2030   int c_arg = total_c_args - 1;
  2031   // Record sp-based slot for receiver on stack for non-static methods
  2032   int receiver_offset = -1;
  2034   // We move the arguments backward because the floating point registers
  2035   // destination will always be to a register with a greater or equal register
  2036   // number or the stack.
  2038 #ifdef ASSERT
  2039   bool reg_destroyed[RegisterImpl::number_of_registers];
  2040   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  2041   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  2042     reg_destroyed[r] = false;
  2044   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  2045     freg_destroyed[f] = false;
  2048 #endif /* ASSERT */
  2050   for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
  2052 #ifdef ASSERT
  2053     if (in_regs[i].first()->is_Register()) {
  2054       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!");
  2055     } else if (in_regs[i].first()->is_FloatRegister()) {
  2056       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!");
  2058     if (out_regs[c_arg].first()->is_Register()) {
  2059       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  2060     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  2061       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true;
  2063 #endif /* ASSERT */
  2065     switch (in_sig_bt[i]) {
  2066       case T_ARRAY:
  2067       case T_OBJECT:
  2068         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
  2069                     ((i == 0) && (!is_static)),
  2070                     &receiver_offset);
  2071         break;
  2072       case T_VOID:
  2073         break;
  2075       case T_FLOAT:
  2076         float_move(masm, in_regs[i], out_regs[c_arg]);
  2077           break;
  2079       case T_DOUBLE:
  2080         assert( i + 1 < total_in_args &&
  2081                 in_sig_bt[i + 1] == T_VOID &&
  2082                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  2083         double_move(masm, in_regs[i], out_regs[c_arg]);
  2084         break;
  2086       case T_LONG :
  2087         long_move(masm, in_regs[i], out_regs[c_arg]);
  2088         break;
  2090       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  2092       default:
  2093         move32_64(masm, in_regs[i], out_regs[c_arg]);
  2097   // Pre-load a static method's oop into O1.  Used both by locking code and
  2098   // the normal JNI call code.
  2099   if (method->is_static()) {
  2100     __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
  2102     // Now handlize the static class mirror in O1.  It's known not-null.
  2103     __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
  2104     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
  2105     __ add(SP, klass_offset + STACK_BIAS, O1);
  2109   const Register L6_handle = L6;
  2111   if (method->is_synchronized()) {
  2112     __ mov(O1, L6_handle);
  2115   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
  2116   // except O6/O7. So if we must call out we must push a new frame. We immediately
  2117   // push a new frame and flush the windows.
  2119 #ifdef _LP64
  2120   intptr_t thepc = (intptr_t) __ pc();
  2122     address here = __ pc();
  2123     // Call the next instruction
  2124     __ call(here + 8, relocInfo::none);
  2125     __ delayed()->nop();
  2127 #else
  2128   intptr_t thepc = __ load_pc_address(O7, 0);
  2129 #endif /* _LP64 */
  2131   // We use the same pc/oopMap repeatedly when we call out
  2132   oop_maps->add_gc_map(thepc - start, map);
  2134   // O7 now has the pc loaded that we will use when we finally call to native.
  2136   // Save thread in L7; it crosses a bunch of VM calls below
  2137   // Don't use save_thread because it smashes G2 and we merely
  2138   // want to save a copy
  2139   __ mov(G2_thread, L7_thread_cache);
  2142   // If we create an inner frame once is plenty
  2143   // when we create it we must also save G2_thread
  2144   bool inner_frame_created = false;
  2146   // dtrace method entry support
  2148     SkipIfEqual skip_if(
  2149       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
  2150     // create inner frame
  2151     __ save_frame(0);
  2152     __ mov(G2_thread, L7_thread_cache);
  2153     __ set_oop_constant(JNIHandles::make_local(method()), O1);
  2154     __ call_VM_leaf(L7_thread_cache,
  2155          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
  2156          G2_thread, O1);
  2157     __ restore();
  2160   // We are in the jni frame unless saved_frame is true in which case
  2161   // we are in one frame deeper (the "inner" frame). If we are in the
  2162   // "inner" frames the args are in the Iregs and if the jni frame then
  2163   // they are in the Oregs.
  2164   // If we ever need to go to the VM (for locking, jvmti) then
  2165   // we will always be in the "inner" frame.
  2167   // Lock a synchronized method
  2168   int lock_offset = -1;         // Set if locked
  2169   if (method->is_synchronized()) {
  2170     Register Roop = O1;
  2171     const Register L3_box = L3;
  2173     create_inner_frame(masm, &inner_frame_created);
  2175     __ ld_ptr(I1, 0, O1);
  2176     Label done;
  2178     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
  2179     __ add(FP, lock_offset+STACK_BIAS, L3_box);
  2180 #ifdef ASSERT
  2181     if (UseBiasedLocking) {
  2182       // making the box point to itself will make it clear it went unused
  2183       // but also be obviously invalid
  2184       __ st_ptr(L3_box, L3_box, 0);
  2186 #endif // ASSERT
  2187     //
  2188     // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch
  2189     //
  2190     __ compiler_lock_object(Roop, L1,    L3_box, L2);
  2191     __ br(Assembler::equal, false, Assembler::pt, done);
  2192     __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box);
  2195     // None of the above fast optimizations worked so we have to get into the
  2196     // slow case of monitor enter.  Inline a special case of call_VM that
  2197     // disallows any pending_exception.
  2198     __ mov(Roop, O0);            // Need oop in O0
  2199     __ mov(L3_box, O1);
  2201     // Record last_Java_sp, in case the VM code releases the JVM lock.
  2203     __ set_last_Java_frame(FP, I7);
  2205     // do the call
  2206     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
  2207     __ delayed()->mov(L7_thread_cache, O2);
  2209     __ restore_thread(L7_thread_cache); // restore G2_thread
  2210     __ reset_last_Java_frame();
  2212 #ifdef ASSERT
  2213     { Label L;
  2214     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
  2215     __ br_null(O0, false, Assembler::pt, L);
  2216     __ delayed()->nop();
  2217     __ stop("no pending exception allowed on exit from IR::monitorenter");
  2218     __ bind(L);
  2220 #endif
  2221     __ bind(done);
  2225   // Finally just about ready to make the JNI call
  2227   __ flush_windows();
  2228   if (inner_frame_created) {
  2229     __ restore();
  2230   } else {
  2231     // Store only what we need from this frame
  2232     // QQQ I think that non-v9 (like we care) we don't need these saves
  2233     // either as the flush traps and the current window goes too.
  2234     __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
  2235     __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
  2238   // get JNIEnv* which is first argument to native
  2240   __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
  2242   // Use that pc we placed in O7 a while back as the current frame anchor
  2244   __ set_last_Java_frame(SP, O7);
  2246   // Transition from _thread_in_Java to _thread_in_native.
  2247   __ set(_thread_in_native, G3_scratch);
  2248   __ st(G3_scratch, G2_thread, in_bytes(JavaThread::thread_state_offset()));
  2250   // We flushed the windows ages ago now mark them as flushed
  2252   // mark windows as flushed
  2253   __ set(JavaFrameAnchor::flushed, G3_scratch);
  2255   Address flags(G2_thread,
  2256                 0,
  2257                 in_bytes(JavaThread::frame_anchor_offset()) + in_bytes(JavaFrameAnchor::flags_offset()));
  2259 #ifdef _LP64
  2260   Address dest(O7, method->native_function());
  2261   __ relocate(relocInfo::runtime_call_type);
  2262   __ jumpl_to(dest, O7);
  2263 #else
  2264   __ call(method->native_function(), relocInfo::runtime_call_type);
  2265 #endif
  2266   __ delayed()->st(G3_scratch, flags);
  2268   __ restore_thread(L7_thread_cache); // restore G2_thread
  2270   // Unpack native results.  For int-types, we do any needed sign-extension
  2271   // and move things into I0.  The return value there will survive any VM
  2272   // calls for blocking or unlocking.  An FP or OOP result (handle) is done
  2273   // specially in the slow-path code.
  2274   switch (ret_type) {
  2275   case T_VOID:    break;        // Nothing to do!
  2276   case T_FLOAT:   break;        // Got it where we want it (unless slow-path)
  2277   case T_DOUBLE:  break;        // Got it where we want it (unless slow-path)
  2278   // In 64 bits build result is in O0, in O0, O1 in 32bit build
  2279   case T_LONG:
  2280 #ifndef _LP64
  2281                   __ mov(O1, I1);
  2282 #endif
  2283                   // Fall thru
  2284   case T_OBJECT:                // Really a handle
  2285   case T_ARRAY:
  2286   case T_INT:
  2287                   __ mov(O0, I0);
  2288                   break;
  2289   case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
  2290   case T_BYTE   : __ sll(O0, 24, O0); __ sra(O0, 24, I0);   break;
  2291   case T_CHAR   : __ sll(O0, 16, O0); __ srl(O0, 16, I0);   break; // cannot use and3, 0xFFFF too big as immediate value!
  2292   case T_SHORT  : __ sll(O0, 16, O0); __ sra(O0, 16, I0);   break;
  2293     break;                      // Cannot de-handlize until after reclaiming jvm_lock
  2294   default:
  2295     ShouldNotReachHere();
  2298   // must we block?
  2300   // Block, if necessary, before resuming in _thread_in_Java state.
  2301   // In order for GC to work, don't clear the last_Java_sp until after blocking.
  2302   { Label no_block;
  2303     Address sync_state(G3_scratch, SafepointSynchronize::address_of_state());
  2305     // Switch thread to "native transition" state before reading the synchronization state.
  2306     // This additional state is necessary because reading and testing the synchronization
  2307     // state is not atomic w.r.t. GC, as this scenario demonstrates:
  2308     //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
  2309     //     VM thread changes sync state to synchronizing and suspends threads for GC.
  2310     //     Thread A is resumed to finish this native method, but doesn't block here since it
  2311     //     didn't see any synchronization is progress, and escapes.
  2312     __ set(_thread_in_native_trans, G3_scratch);
  2313     __ st(G3_scratch, G2_thread, in_bytes(JavaThread::thread_state_offset()));
  2314     if(os::is_MP()) {
  2315       if (UseMembar) {
  2316         // Force this write out before the read below
  2317         __ membar(Assembler::StoreLoad);
  2318       } else {
  2319         // Write serialization page so VM thread can do a pseudo remote membar.
  2320         // We use the current thread pointer to calculate a thread specific
  2321         // offset to write to within the page. This minimizes bus traffic
  2322         // due to cache line collision.
  2323         __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
  2326     __ load_contents(sync_state, G3_scratch);
  2327     __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
  2329     Label L;
  2330     Address suspend_state(G2_thread, 0, in_bytes(JavaThread::suspend_flags_offset()));
  2331     __ br(Assembler::notEqual, false, Assembler::pn, L);
  2332     __ delayed()->
  2333       ld(suspend_state, G3_scratch);
  2334     __ cmp(G3_scratch, 0);
  2335     __ br(Assembler::equal, false, Assembler::pt, no_block);
  2336     __ delayed()->nop();
  2337     __ bind(L);
  2339     // Block.  Save any potential method result value before the operation and
  2340     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
  2341     // lets us share the oopMap we used when we went native rather the create
  2342     // a distinct one for this pc
  2343     //
  2344     save_native_result(masm, ret_type, stack_slots);
  2345     __ call_VM_leaf(L7_thread_cache,
  2346                     CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
  2347                     G2_thread);
  2349     // Restore any method result value
  2350     restore_native_result(masm, ret_type, stack_slots);
  2351     __ bind(no_block);
  2354   // thread state is thread_in_native_trans. Any safepoint blocking has already
  2355   // happened so we can now change state to _thread_in_Java.
  2358   __ set(_thread_in_Java, G3_scratch);
  2359   __ st(G3_scratch, G2_thread, in_bytes(JavaThread::thread_state_offset()));
  2362   Label no_reguard;
  2363   __ ld(G2_thread, in_bytes(JavaThread::stack_guard_state_offset()), G3_scratch);
  2364   __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);
  2365   __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);
  2366   __ delayed()->nop();
  2368     save_native_result(masm, ret_type, stack_slots);
  2369   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
  2370   __ delayed()->nop();
  2372   __ restore_thread(L7_thread_cache); // restore G2_thread
  2373     restore_native_result(masm, ret_type, stack_slots);
  2375   __ bind(no_reguard);
  2377   // Handle possible exception (will unlock if necessary)
  2379   // native result if any is live in freg or I0 (and I1 if long and 32bit vm)
  2381   // Unlock
  2382   if (method->is_synchronized()) {
  2383     Label done;
  2384     Register I2_ex_oop = I2;
  2385     const Register L3_box = L3;
  2386     // Get locked oop from the handle we passed to jni
  2387     __ ld_ptr(L6_handle, 0, L4);
  2388     __ add(SP, lock_offset+STACK_BIAS, L3_box);
  2389     // Must save pending exception around the slow-path VM call.  Since it's a
  2390     // leaf call, the pending exception (if any) can be kept in a register.
  2391     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop);
  2392     // Now unlock
  2393     //                       (Roop, Rmark, Rbox,   Rscratch)
  2394     __ compiler_unlock_object(L4,   L1,    L3_box, L2);
  2395     __ br(Assembler::equal, false, Assembler::pt, done);
  2396     __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box);
  2398     // save and restore any potential method result value around the unlocking
  2399     // operation.  Will save in I0 (or stack for FP returns).
  2400     save_native_result(masm, ret_type, stack_slots);
  2402     // Must clear pending-exception before re-entering the VM.  Since this is
  2403     // a leaf call, pending-exception-oop can be safely kept in a register.
  2404     __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset()));
  2406     // slow case of monitor enter.  Inline a special case of call_VM that
  2407     // disallows any pending_exception.
  2408     __ mov(L3_box, O1);
  2410     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);
  2411     __ delayed()->mov(L4, O0);              // Need oop in O0
  2413     __ restore_thread(L7_thread_cache); // restore G2_thread
  2415 #ifdef ASSERT
  2416     { Label L;
  2417     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
  2418     __ br_null(O0, false, Assembler::pt, L);
  2419     __ delayed()->nop();
  2420     __ stop("no pending exception allowed on exit from IR::monitorexit");
  2421     __ bind(L);
  2423 #endif
  2424     restore_native_result(masm, ret_type, stack_slots);
  2425     // check_forward_pending_exception jump to forward_exception if any pending
  2426     // exception is set.  The forward_exception routine expects to see the
  2427     // exception in pending_exception and not in a register.  Kind of clumsy,
  2428     // since all folks who branch to forward_exception must have tested
  2429     // pending_exception first and hence have it in a register already.
  2430     __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset()));
  2431     __ bind(done);
  2434   // Tell dtrace about this method exit
  2436     SkipIfEqual skip_if(
  2437       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
  2438     save_native_result(masm, ret_type, stack_slots);
  2439     __ set_oop_constant(JNIHandles::make_local(method()), O1);
  2440     __ call_VM_leaf(L7_thread_cache,
  2441        CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
  2442        G2_thread, O1);
  2443     restore_native_result(masm, ret_type, stack_slots);
  2446   // Clear "last Java frame" SP and PC.
  2447   __ verify_thread(); // G2_thread must be correct
  2448   __ reset_last_Java_frame();
  2450   // Unpack oop result
  2451   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
  2452       Label L;
  2453       __ addcc(G0, I0, G0);
  2454       __ brx(Assembler::notZero, true, Assembler::pt, L);
  2455       __ delayed()->ld_ptr(I0, 0, I0);
  2456       __ mov(G0, I0);
  2457       __ bind(L);
  2458       __ verify_oop(I0);
  2461   // reset handle block
  2462   __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
  2463   __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
  2465   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
  2466   check_forward_pending_exception(masm, G3_scratch);
  2469   // Return
  2471 #ifndef _LP64
  2472   if (ret_type == T_LONG) {
  2474     // Must leave proper result in O0,O1 and G1 (c2/tiered only)
  2475     __ sllx(I0, 32, G1);          // Shift bits into high G1
  2476     __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)
  2477     __ or3 (I1, G1, G1);          // OR 64 bits into G1
  2479 #endif
  2481   __ ret();
  2482   __ delayed()->restore();
  2484   __ flush();
  2486   nmethod *nm = nmethod::new_native_nmethod(method,
  2487                                             masm->code(),
  2488                                             vep_offset,
  2489                                             frame_complete,
  2490                                             stack_slots / VMRegImpl::slots_per_word,
  2491                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
  2492                                             in_ByteSize(lock_offset),
  2493                                             oop_maps);
  2494   return nm;
  2498 // this function returns the adjust size (in number of words) to a c2i adapter
  2499 // activation for use during deoptimization
  2500 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  2501   assert(callee_locals >= callee_parameters,
  2502           "test and remove; got more parms than locals");
  2503   if (callee_locals < callee_parameters)
  2504     return 0;                   // No adjustment for negative locals
  2505   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords();
  2506   return round_to(diff, WordsPerLong);
  2509 // "Top of Stack" slots that may be unused by the calling convention but must
  2510 // otherwise be preserved.
  2511 // On Intel these are not necessary and the value can be zero.
  2512 // On Sparc this describes the words reserved for storing a register window
  2513 // when an interrupt occurs.
  2514 uint SharedRuntime::out_preserve_stack_slots() {
  2515   return frame::register_save_words * VMRegImpl::slots_per_word;
  2518 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
  2519 //
  2520 // Common out the new frame generation for deopt and uncommon trap
  2521 //
  2522   Register        G3pcs              = G3_scratch; // Array of new pcs (input)
  2523   Register        Oreturn0           = O0;
  2524   Register        Oreturn1           = O1;
  2525   Register        O2UnrollBlock      = O2;
  2526   Register        O3array            = O3;         // Array of frame sizes (input)
  2527   Register        O4array_size       = O4;         // number of frames (input)
  2528   Register        O7frame_size       = O7;         // number of frames (input)
  2530   __ ld_ptr(O3array, 0, O7frame_size);
  2531   __ sub(G0, O7frame_size, O7frame_size);
  2532   __ save(SP, O7frame_size, SP);
  2533   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
  2535   #ifdef ASSERT
  2536   // make sure that the frames are aligned properly
  2537 #ifndef _LP64
  2538   __ btst(wordSize*2-1, SP);
  2539   __ breakpoint_trap(Assembler::notZero);
  2540 #endif
  2541   #endif
  2543   // Deopt needs to pass some extra live values from frame to frame
  2545   if (deopt) {
  2546     __ mov(Oreturn0->after_save(), Oreturn0);
  2547     __ mov(Oreturn1->after_save(), Oreturn1);
  2550   __ mov(O4array_size->after_save(), O4array_size);
  2551   __ sub(O4array_size, 1, O4array_size);
  2552   __ mov(O3array->after_save(), O3array);
  2553   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
  2554   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
  2556   #ifdef ASSERT
  2557   // trash registers to show a clear pattern in backtraces
  2558   __ set(0xDEAD0000, I0);
  2559   __ add(I0,  2, I1);
  2560   __ add(I0,  4, I2);
  2561   __ add(I0,  6, I3);
  2562   __ add(I0,  8, I4);
  2563   // Don't touch I5 could have valuable savedSP
  2564   __ set(0xDEADBEEF, L0);
  2565   __ mov(L0, L1);
  2566   __ mov(L0, L2);
  2567   __ mov(L0, L3);
  2568   __ mov(L0, L4);
  2569   __ mov(L0, L5);
  2571   // trash the return value as there is nothing to return yet
  2572   __ set(0xDEAD0001, O7);
  2573   #endif
  2575   __ mov(SP, O5_savedSP);
  2579 static void make_new_frames(MacroAssembler* masm, bool deopt) {
  2580   //
  2581   // loop through the UnrollBlock info and create new frames
  2582   //
  2583   Register        G3pcs              = G3_scratch;
  2584   Register        Oreturn0           = O0;
  2585   Register        Oreturn1           = O1;
  2586   Register        O2UnrollBlock      = O2;
  2587   Register        O3array            = O3;
  2588   Register        O4array_size       = O4;
  2589   Label           loop;
  2591   // Before we make new frames, check to see if stack is available.
  2592   // Do this after the caller's return address is on top of stack
  2593   if (UseStackBanging) {
  2594     // Get total frame size for interpreted frames
  2595     __ ld(Address(O2UnrollBlock, 0,
  2596          Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
  2597     __ bang_stack_size(O4, O3, G3_scratch);
  2600   __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
  2601   __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
  2603   __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
  2605   // Adjust old interpreter frame to make space for new frame's extra java locals
  2606   //
  2607   // We capture the original sp for the transition frame only because it is needed in
  2608   // order to properly calculate interpreter_sp_adjustment. Even though in real life
  2609   // every interpreter frame captures a savedSP it is only needed at the transition
  2610   // (fortunately). If we had to have it correct everywhere then we would need to
  2611   // be told the sp_adjustment for each frame we create. If the frame size array
  2612   // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
  2613   // for each frame we create and keep up the illusion every where.
  2614   //
  2616   __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
  2617   __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
  2618   __ sub(SP, O7, SP);
  2620 #ifdef ASSERT
  2621   // make sure that there is at least one entry in the array
  2622   __ tst(O4array_size);
  2623   __ breakpoint_trap(Assembler::zero);
  2624 #endif
  2626   // Now push the new interpreter frames
  2627   __ bind(loop);
  2629   // allocate a new frame, filling the registers
  2631   gen_new_frame(masm, deopt);        // allocate an interpreter frame
  2633   __ tst(O4array_size);
  2634   __ br(Assembler::notZero, false, Assembler::pn, loop);
  2635   __ delayed()->add(O3array, wordSize, O3array);
  2636   __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
  2640 //------------------------------generate_deopt_blob----------------------------
  2641 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  2642 // instead.
  2643 void SharedRuntime::generate_deopt_blob() {
  2644   // allocate space for the code
  2645   ResourceMark rm;
  2646   // setup code generation tools
  2647   int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
  2648 #ifdef _LP64
  2649   CodeBuffer buffer("deopt_blob", 2100+pad, 512);
  2650 #else
  2651   // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
  2652   // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
  2653   CodeBuffer buffer("deopt_blob", 1600+pad, 512);
  2654 #endif /* _LP64 */
  2655   MacroAssembler* masm               = new MacroAssembler(&buffer);
  2656   FloatRegister   Freturn0           = F0;
  2657   Register        Greturn1           = G1;
  2658   Register        Oreturn0           = O0;
  2659   Register        Oreturn1           = O1;
  2660   Register        O2UnrollBlock      = O2;
  2661   Register        O3tmp              = O3;
  2662   Register        I5exception_tmp    = I5;
  2663   Register        G4exception_tmp    = G4_scratch;
  2664   int             frame_size_words;
  2665   Address         saved_Freturn0_addr(FP, 0, -sizeof(double) + STACK_BIAS);
  2666 #if !defined(_LP64) && defined(COMPILER2)
  2667   Address         saved_Greturn1_addr(FP, 0, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
  2668 #endif
  2669   Label           cont;
  2671   OopMapSet *oop_maps = new OopMapSet();
  2673   //
  2674   // This is the entry point for code which is returning to a de-optimized
  2675   // frame.
  2676   // The steps taken by this frame are as follows:
  2677   //   - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
  2678   //     and all potentially live registers (at a pollpoint many registers can be live).
  2679   //
  2680   //   - call the C routine: Deoptimization::fetch_unroll_info (this function
  2681   //     returns information about the number and size of interpreter frames
  2682   //     which are equivalent to the frame which is being deoptimized)
  2683   //   - deallocate the unpack frame, restoring only results values. Other
  2684   //     volatile registers will now be captured in the vframeArray as needed.
  2685   //   - deallocate the deoptimization frame
  2686   //   - in a loop using the information returned in the previous step
  2687   //     push new interpreter frames (take care to propagate the return
  2688   //     values through each new frame pushed)
  2689   //   - create a dummy "unpack_frame" and save the return values (O0, O1, F0)
  2690   //   - call the C routine: Deoptimization::unpack_frames (this function
  2691   //     lays out values on the interpreter frame which was just created)
  2692   //   - deallocate the dummy unpack_frame
  2693   //   - ensure that all the return values are correctly set and then do
  2694   //     a return to the interpreter entry point
  2695   //
  2696   // Refer to the following methods for more information:
  2697   //   - Deoptimization::fetch_unroll_info
  2698   //   - Deoptimization::unpack_frames
  2700   OopMap* map = NULL;
  2702   int start = __ offset();
  2704   // restore G2, the trampoline destroyed it
  2705   __ get_thread();
  2707   // On entry we have been called by the deoptimized nmethod with a call that
  2708   // replaced the original call (or safepoint polling location) so the deoptimizing
  2709   // pc is now in O7. Return values are still in the expected places
  2711   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  2712   __ ba(false, cont);
  2713   __ delayed()->mov(Deoptimization::Unpack_deopt, I5exception_tmp);
  2715   int exception_offset = __ offset() - start;
  2717   // restore G2, the trampoline destroyed it
  2718   __ get_thread();
  2720   // On entry we have been jumped to by the exception handler (or exception_blob
  2721   // for server).  O0 contains the exception oop and O7 contains the original
  2722   // exception pc.  So if we push a frame here it will look to the
  2723   // stack walking code (fetch_unroll_info) just like a normal call so
  2724   // state will be extracted normally.
  2726   // save exception oop in JavaThread and fall through into the
  2727   // exception_in_tls case since they are handled in same way except
  2728   // for where the pending exception is kept.
  2729   __ st_ptr(Oexception, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
  2731   //
  2732   // Vanilla deoptimization with an exception pending in exception_oop
  2733   //
  2734   int exception_in_tls_offset = __ offset() - start;
  2736   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
  2737   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  2739   // Restore G2_thread
  2740   __ get_thread();
  2742 #ifdef ASSERT
  2744     // verify that there is really an exception oop in exception_oop
  2745     Label has_exception;
  2746     __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
  2747     __ br_notnull(Oexception, false, Assembler::pt, has_exception);
  2748     __ delayed()-> nop();
  2749     __ stop("no exception in thread");
  2750     __ bind(has_exception);
  2752     // verify that there is no pending exception
  2753     Label no_pending_exception;
  2754     Address exception_addr(G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
  2755     __ ld_ptr(exception_addr, Oexception);
  2756     __ br_null(Oexception, false, Assembler::pt, no_pending_exception);
  2757     __ delayed()->nop();
  2758     __ stop("must not have pending exception here");
  2759     __ bind(no_pending_exception);
  2761 #endif
  2763   __ ba(false, cont);
  2764   __ delayed()->mov(Deoptimization::Unpack_exception, I5exception_tmp);;
  2766   //
  2767   // Reexecute entry, similar to c2 uncommon trap
  2768   //
  2769   int reexecute_offset = __ offset() - start;
  2771   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
  2772   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  2774   __ mov(Deoptimization::Unpack_reexecute, I5exception_tmp);
  2776   __ bind(cont);
  2778   __ set_last_Java_frame(SP, noreg);
  2780   // do the call by hand so we can get the oopmap
  2782   __ mov(G2_thread, L7_thread_cache);
  2783   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
  2784   __ delayed()->mov(G2_thread, O0);
  2786   // Set an oopmap for the call site this describes all our saved volatile registers
  2788   oop_maps->add_gc_map( __ offset()-start, map);
  2790   __ mov(L7_thread_cache, G2_thread);
  2792   __ reset_last_Java_frame();
  2794   // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
  2795   // so this move will survive
  2797   __ mov(I5exception_tmp, G4exception_tmp);
  2799   __ mov(O0, O2UnrollBlock->after_save());
  2801   RegisterSaver::restore_result_registers(masm);
  2803   Label noException;
  2804   __ cmp(G4exception_tmp, Deoptimization::Unpack_exception);   // Was exception pending?
  2805   __ br(Assembler::notEqual, false, Assembler::pt, noException);
  2806   __ delayed()->nop();
  2808   // Move the pending exception from exception_oop to Oexception so
  2809   // the pending exception will be picked up the interpreter.
  2810   __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);
  2811   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
  2812   __ bind(noException);
  2814   // deallocate the deoptimization frame taking care to preserve the return values
  2815   __ mov(Oreturn0,     Oreturn0->after_save());
  2816   __ mov(Oreturn1,     Oreturn1->after_save());
  2817   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
  2818   __ restore();
  2820   // Allocate new interpreter frame(s) and possible c2i adapter frame
  2822   make_new_frames(masm, true);
  2824   // push a dummy "unpack_frame" taking care of float return values and
  2825   // call Deoptimization::unpack_frames to have the unpacker layout
  2826   // information in the interpreter frames just created and then return
  2827   // to the interpreter entry point
  2828   __ save(SP, -frame_size_words*wordSize, SP);
  2829   __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
  2830 #if !defined(_LP64)
  2831 #if defined(COMPILER2)
  2832   if (!TieredCompilation) {
  2833     // 32-bit 1-register longs return longs in G1
  2834     __ stx(Greturn1, saved_Greturn1_addr);
  2836 #endif
  2837   __ set_last_Java_frame(SP, noreg);
  2838   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4exception_tmp);
  2839 #else
  2840   // LP64 uses g4 in set_last_Java_frame
  2841   __ mov(G4exception_tmp, O1);
  2842   __ set_last_Java_frame(SP, G0);
  2843   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
  2844 #endif
  2845   __ reset_last_Java_frame();
  2846   __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
  2848   // In tiered we never use C2 to compile methods returning longs so
  2849   // the result is where we expect it already.
  2851 #if !defined(_LP64) && defined(COMPILER2)
  2852   // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
  2853   // I0/I1 if the return value is long.  In the tiered world there is
  2854   // a mismatch between how C1 and C2 return longs compiles and so
  2855   // currently compilation of methods which return longs is disabled
  2856   // for C2 and so is this code.  Eventually C1 and C2 will do the
  2857   // same thing for longs in the tiered world.
  2858   if (!TieredCompilation) {
  2859     Label not_long;
  2860     __ cmp(O0,T_LONG);
  2861     __ br(Assembler::notEqual, false, Assembler::pt, not_long);
  2862     __ delayed()->nop();
  2863     __ ldd(saved_Greturn1_addr,I0);
  2864     __ bind(not_long);
  2866 #endif
  2867   __ ret();
  2868   __ delayed()->restore();
  2870   masm->flush();
  2871   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
  2872   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
  2875 #ifdef COMPILER2
  2877 //------------------------------generate_uncommon_trap_blob--------------------
  2878 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  2879 // instead.
  2880 void SharedRuntime::generate_uncommon_trap_blob() {
  2881   // allocate space for the code
  2882   ResourceMark rm;
  2883   // setup code generation tools
  2884   int pad = VerifyThread ? 512 : 0;
  2885 #ifdef _LP64
  2886   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
  2887 #else
  2888   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
  2889   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
  2890   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
  2891 #endif
  2892   MacroAssembler* masm               = new MacroAssembler(&buffer);
  2893   Register        O2UnrollBlock      = O2;
  2894   Register        O3tmp              = O3;
  2895   Register        O2klass_index      = O2;
  2897   //
  2898   // This is the entry point for all traps the compiler takes when it thinks
  2899   // it cannot handle further execution of compilation code. The frame is
  2900   // deoptimized in these cases and converted into interpreter frames for
  2901   // execution
  2902   // The steps taken by this frame are as follows:
  2903   //   - push a fake "unpack_frame"
  2904   //   - call the C routine Deoptimization::uncommon_trap (this function
  2905   //     packs the current compiled frame into vframe arrays and returns
  2906   //     information about the number and size of interpreter frames which
  2907   //     are equivalent to the frame which is being deoptimized)
  2908   //   - deallocate the "unpack_frame"
  2909   //   - deallocate the deoptimization frame
  2910   //   - in a loop using the information returned in the previous step
  2911   //     push interpreter frames;
  2912   //   - create a dummy "unpack_frame"
  2913   //   - call the C routine: Deoptimization::unpack_frames (this function
  2914   //     lays out values on the interpreter frame which was just created)
  2915   //   - deallocate the dummy unpack_frame
  2916   //   - return to the interpreter entry point
  2917   //
  2918   //  Refer to the following methods for more information:
  2919   //   - Deoptimization::uncommon_trap
  2920   //   - Deoptimization::unpack_frame
  2922   // the unloaded class index is in O0 (first parameter to this blob)
  2924   // push a dummy "unpack_frame"
  2925   // and call Deoptimization::uncommon_trap to pack the compiled frame into
  2926   // vframe array and return the UnrollBlock information
  2927   __ save_frame(0);
  2928   __ set_last_Java_frame(SP, noreg);
  2929   __ mov(I0, O2klass_index);
  2930   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
  2931   __ reset_last_Java_frame();
  2932   __ mov(O0, O2UnrollBlock->after_save());
  2933   __ restore();
  2935   // deallocate the deoptimized frame taking care to preserve the return values
  2936   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
  2937   __ restore();
  2939   // Allocate new interpreter frame(s) and possible c2i adapter frame
  2941   make_new_frames(masm, false);
  2943   // push a dummy "unpack_frame" taking care of float return values and
  2944   // call Deoptimization::unpack_frames to have the unpacker layout
  2945   // information in the interpreter frames just created and then return
  2946   // to the interpreter entry point
  2947   __ save_frame(0);
  2948   __ set_last_Java_frame(SP, noreg);
  2949   __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case
  2950   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3);
  2951   __ reset_last_Java_frame();
  2952   __ ret();
  2953   __ delayed()->restore();
  2955   masm->flush();
  2956   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize);
  2959 #endif // COMPILER2
  2961 //------------------------------generate_handler_blob-------------------
  2962 //
  2963 // Generate a special Compile2Runtime blob that saves all registers, and sets
  2964 // up an OopMap.
  2965 //
  2966 // This blob is jumped to (via a breakpoint and the signal handler) from a
  2967 // safepoint in compiled code.  On entry to this blob, O7 contains the
  2968 // address in the original nmethod at which we should resume normal execution.
  2969 // Thus, this blob looks like a subroutine which must preserve lots of
  2970 // registers and return normally.  Note that O7 is never register-allocated,
  2971 // so it is guaranteed to be free here.
  2972 //
  2974 // The hardest part of what this blob must do is to save the 64-bit %o
  2975 // registers in the 32-bit build.  A simple 'save' turn the %o's to %i's and
  2976 // an interrupt will chop off their heads.  Making space in the caller's frame
  2977 // first will let us save the 64-bit %o's before save'ing, but we cannot hand
  2978 // the adjusted FP off to the GC stack-crawler: this will modify the caller's
  2979 // SP and mess up HIS OopMaps.  So we first adjust the caller's SP, then save
  2980 // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
  2981 // Tricky, tricky, tricky...
  2983 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
  2984   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  2986   // allocate space for the code
  2987   ResourceMark rm;
  2988   // setup code generation tools
  2989   // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
  2990   // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
  2991   // even larger with TraceJumps
  2992   int pad = TraceJumps ? 512 : 0;
  2993   CodeBuffer buffer("handler_blob", 1600 + pad, 512);
  2994   MacroAssembler* masm                = new MacroAssembler(&buffer);
  2995   int             frame_size_words;
  2996   OopMapSet *oop_maps = new OopMapSet();
  2997   OopMap* map = NULL;
  2999   int start = __ offset();
  3001   // If this causes a return before the processing, then do a "restore"
  3002   if (cause_return) {
  3003     __ restore();
  3004   } else {
  3005     // Make it look like we were called via the poll
  3006     // so that frame constructor always sees a valid return address
  3007     __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7);
  3008     __ sub(O7, frame::pc_return_offset, O7);
  3011   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3013   // setup last_Java_sp (blows G4)
  3014   __ set_last_Java_frame(SP, noreg);
  3016   // call into the runtime to handle illegal instructions exception
  3017   // Do not use call_VM_leaf, because we need to make a GC map at this call site.
  3018   __ mov(G2_thread, O0);
  3019   __ save_thread(L7_thread_cache);
  3020   __ call(call_ptr);
  3021   __ delayed()->nop();
  3023   // Set an oopmap for the call site.
  3024   // We need this not only for callee-saved registers, but also for volatile
  3025   // registers that the compiler might be keeping live across a safepoint.
  3027   oop_maps->add_gc_map( __ offset() - start, map);
  3029   __ restore_thread(L7_thread_cache);
  3030   // clear last_Java_sp
  3031   __ reset_last_Java_frame();
  3033   // Check for exceptions
  3034   Label pending;
  3036   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
  3037   __ tst(O1);
  3038   __ brx(Assembler::notEqual, true, Assembler::pn, pending);
  3039   __ delayed()->nop();
  3041   RegisterSaver::restore_live_registers(masm);
  3043   // We are back the the original state on entry and ready to go.
  3045   __ retl();
  3046   __ delayed()->nop();
  3048   // Pending exception after the safepoint
  3050   __ bind(pending);
  3052   RegisterSaver::restore_live_registers(masm);
  3054   // We are back the the original state on entry.
  3056   // Tail-call forward_exception_entry, with the issuing PC in O7,
  3057   // so it looks like the original nmethod called forward_exception_entry.
  3058   __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
  3059   __ JMP(O0, 0);
  3060   __ delayed()->nop();
  3062   // -------------
  3063   // make sure all code is generated
  3064   masm->flush();
  3066   // return exception blob
  3067   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
  3070 //
  3071 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
  3072 //
  3073 // Generate a stub that calls into vm to find out the proper destination
  3074 // of a java call. All the argument registers are live at this point
  3075 // but since this is generic code we don't know what they are and the caller
  3076 // must do any gc of the args.
  3077 //
  3078 static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
  3079   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  3081   // allocate space for the code
  3082   ResourceMark rm;
  3083   // setup code generation tools
  3084   // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)
  3085   // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)
  3086   // even larger with TraceJumps
  3087   int pad = TraceJumps ? 512 : 0;
  3088   CodeBuffer buffer(name, 1600 + pad, 512);
  3089   MacroAssembler* masm                = new MacroAssembler(&buffer);
  3090   int             frame_size_words;
  3091   OopMapSet *oop_maps = new OopMapSet();
  3092   OopMap* map = NULL;
  3094   int start = __ offset();
  3096   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  3098   int frame_complete = __ offset();
  3100   // setup last_Java_sp (blows G4)
  3101   __ set_last_Java_frame(SP, noreg);
  3103   // call into the runtime to handle illegal instructions exception
  3104   // Do not use call_VM_leaf, because we need to make a GC map at this call site.
  3105   __ mov(G2_thread, O0);
  3106   __ save_thread(L7_thread_cache);
  3107   __ call(destination, relocInfo::runtime_call_type);
  3108   __ delayed()->nop();
  3110   // O0 contains the address we are going to jump to assuming no exception got installed
  3112   // Set an oopmap for the call site.
  3113   // We need this not only for callee-saved registers, but also for volatile
  3114   // registers that the compiler might be keeping live across a safepoint.
  3116   oop_maps->add_gc_map( __ offset() - start, map);
  3118   __ restore_thread(L7_thread_cache);
  3119   // clear last_Java_sp
  3120   __ reset_last_Java_frame();
  3122   // Check for exceptions
  3123   Label pending;
  3125   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
  3126   __ tst(O1);
  3127   __ brx(Assembler::notEqual, true, Assembler::pn, pending);
  3128   __ delayed()->nop();
  3130   // get the returned methodOop
  3132   __ get_vm_result(G5_method);
  3133   __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS);
  3135   // O0 is where we want to jump, overwrite G3 which is saved and scratch
  3137   __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS);
  3139   RegisterSaver::restore_live_registers(masm);
  3141   // We are back the the original state on entry and ready to go.
  3143   __ JMP(G3, 0);
  3144   __ delayed()->nop();
  3146   // Pending exception after the safepoint
  3148   __ bind(pending);
  3150   RegisterSaver::restore_live_registers(masm);
  3152   // We are back the the original state on entry.
  3154   // Tail-call forward_exception_entry, with the issuing PC in O7,
  3155   // so it looks like the original nmethod called forward_exception_entry.
  3156   __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);
  3157   __ JMP(O0, 0);
  3158   __ delayed()->nop();
  3160   // -------------
  3161   // make sure all code is generated
  3162   masm->flush();
  3164   // return the  blob
  3165   // frame_size_words or bytes??
  3166   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
  3169 void SharedRuntime::generate_stubs() {
  3171   _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
  3172                                              "wrong_method_stub");
  3174   _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
  3175                                         "ic_miss_stub");
  3177   _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
  3178                                         "resolve_opt_virtual_call");
  3180   _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),
  3181                                         "resolve_virtual_call");
  3183   _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
  3184                                         "resolve_static_call");
  3186   _polling_page_safepoint_handler_blob =
  3187     generate_handler_blob(CAST_FROM_FN_PTR(address,
  3188                    SafepointSynchronize::handle_polling_page_exception), false);
  3190   _polling_page_return_handler_blob =
  3191     generate_handler_blob(CAST_FROM_FN_PTR(address,
  3192                    SafepointSynchronize::handle_polling_page_exception), true);
  3194   generate_deopt_blob();
  3196 #ifdef COMPILER2
  3197   generate_uncommon_trap_blob();
  3198 #endif // COMPILER2

mercurial