jdk8-mips64-public/hotspot: src/cpu/sparc/vm/sharedRuntime

7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error
Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer.
Reviewed-by: kvn, iveresov, never, tonyp, dholmes

     1 /*

     2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "asm/assembler.hpp"

    27 #include "assembler_sparc.inline.hpp"

    28 #include "code/debugInfoRec.hpp"

    29 #include "code/icBuffer.hpp"

    30 #include "code/vtableStubs.hpp"

    31 #include "interpreter/interpreter.hpp"

    32 #include "oops/compiledICHolderOop.hpp"

    33 #include "prims/jvmtiRedefineClassesTrace.hpp"

    34 #include "runtime/sharedRuntime.hpp"

    35 #include "runtime/vframeArray.hpp"

    36 #include "vmreg_sparc.inline.hpp"

    37 #ifdef COMPILER1

    38 #include "c1/c1_Runtime1.hpp"

    39 #endif

    40 #ifdef COMPILER2

    41 #include "opto/runtime.hpp"

    42 #endif

    43 #ifdef SHARK

    44 #include "compiler/compileBroker.hpp"

    45 #include "shark/sharkCompiler.hpp"

    46 #endif

    48 #define __ masm->

    50 #ifdef COMPILER2

    51 UncommonTrapBlob*   SharedRuntime::_uncommon_trap_blob;

    52 #endif // COMPILER2

    54 DeoptimizationBlob* SharedRuntime::_deopt_blob;

    55 SafepointBlob*      SharedRuntime::_polling_page_safepoint_handler_blob;

    56 SafepointBlob*      SharedRuntime::_polling_page_return_handler_blob;

    57 RuntimeStub*        SharedRuntime::_wrong_method_blob;

    58 RuntimeStub*        SharedRuntime::_ic_miss_blob;

    59 RuntimeStub*        SharedRuntime::_resolve_opt_virtual_call_blob;

    60 RuntimeStub*        SharedRuntime::_resolve_virtual_call_blob;

    61 RuntimeStub*        SharedRuntime::_resolve_static_call_blob;

    63 class RegisterSaver {

    65   // Used for saving volatile registers. This is Gregs, Fregs, I/L/O.

    66   // The Oregs are problematic. In the 32bit build the compiler can

    67   // have O registers live with 64 bit quantities. A window save will

    68   // cut the heads off of the registers. We have to do a very extensive

    69   // stack dance to save and restore these properly.

    71   // Note that the Oregs problem only exists if we block at either a polling

    72   // page exception a compiled code safepoint that was not originally a call

    73   // or deoptimize following one of these kinds of safepoints.

    75   // Lots of registers to save.  For all builds, a window save will preserve

    76   // the %i and %l registers.  For the 32-bit longs-in-two entries and 64-bit

    77   // builds a window-save will preserve the %o registers.  In the LION build

    78   // we need to save the 64-bit %o registers which requires we save them

    79   // before the window-save (as then they become %i registers and get their

    80   // heads chopped off on interrupt).  We have to save some %g registers here

    81   // as well.

    82   enum {

    83     // This frame's save area.  Includes extra space for the native call:

    84     // vararg's layout space and the like.  Briefly holds the caller's

    85     // register save area.

    86     call_args_area = frame::register_save_words_sp_offset +

    87                      frame::memory_parameter_word_sp_offset*wordSize,

    88     // Make sure save locations are always 8 byte aligned.

    89     // can't use round_to because it doesn't produce compile time constant

    90     start_of_extra_save_area = ((call_args_area + 7) & ~7),

    91     g1_offset = start_of_extra_save_area, // g-regs needing saving

    92     g3_offset = g1_offset+8,

    93     g4_offset = g3_offset+8,

    94     g5_offset = g4_offset+8,

    95     o0_offset = g5_offset+8,

    96     o1_offset = o0_offset+8,

    97     o2_offset = o1_offset+8,

    98     o3_offset = o2_offset+8,

    99     o4_offset = o3_offset+8,

   100     o5_offset = o4_offset+8,

   101     start_of_flags_save_area = o5_offset+8,

   102     ccr_offset = start_of_flags_save_area,

   103     fsr_offset = ccr_offset + 8,

   104     d00_offset = fsr_offset+8,  // Start of float save area

   105     register_save_size = d00_offset+8*32

   106   };

   109   public:

   111   static int Oexception_offset() { return o0_offset; };

   112   static int G3_offset() { return g3_offset; };

   113   static int G5_offset() { return g5_offset; };

   114   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);

   115   static void restore_live_registers(MacroAssembler* masm);

   117   // During deoptimization only the result register need to be restored

   118   // all the other values have already been extracted.

   120   static void restore_result_registers(MacroAssembler* masm);

   121 };

   123 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {

   124   // Record volatile registers as callee-save values in an OopMap so their save locations will be

   125   // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for

   126   // deoptimization; see compiledVFrame::create_stack_value).  The caller's I, L and O registers

   127   // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame

   128   // (as the stub's I's) when the runtime routine called by the stub creates its frame.

   129   int i;

   130   // Always make the frame size 16 byte aligned.

   131   int frame_size = round_to(additional_frame_words + register_save_size, 16);

   132   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words

   133   int frame_size_in_slots = frame_size / sizeof(jint);

   134   // CodeBlob frame size is in words.

   135   *total_frame_words = frame_size / wordSize;

   136   // OopMap* map = new OopMap(*total_frame_words, 0);

   137   OopMap* map = new OopMap(frame_size_in_slots, 0);

   139 #if !defined(_LP64)

   141   // Save 64-bit O registers; they will get their heads chopped off on a 'save'.

   142   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);

   143   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);

   144   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);

   145   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);

   146   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);

   147   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);

   148 #endif /* _LP64 */

   150   __ save(SP, -frame_size, SP);

   152 #ifndef _LP64

   153   // Reload the 64 bit Oregs. Although they are now Iregs we load them

   154   // to Oregs here to avoid interrupts cutting off their heads

   156   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);

   157   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);

   158   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);

   159   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);

   160   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);

   161   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);

   163   __ stx(O0, SP, o0_offset+STACK_BIAS);

   164   map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());

   166   __ stx(O1, SP, o1_offset+STACK_BIAS);

   168   map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());

   170   __ stx(O2, SP, o2_offset+STACK_BIAS);

   171   map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());

   173   __ stx(O3, SP, o3_offset+STACK_BIAS);

   174   map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());

   176   __ stx(O4, SP, o4_offset+STACK_BIAS);

   177   map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());

   179   __ stx(O5, SP, o5_offset+STACK_BIAS);

   180   map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());

   181 #endif /* _LP64 */

   184 #ifdef _LP64

   185   int debug_offset = 0;

   186 #else

   187   int debug_offset = 4;

   188 #endif

   189   // Save the G's

   190   __ stx(G1, SP, g1_offset+STACK_BIAS);

   191   map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());

   193   __ stx(G3, SP, g3_offset+STACK_BIAS);

   194   map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());

   196   __ stx(G4, SP, g4_offset+STACK_BIAS);

   197   map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());

   199   __ stx(G5, SP, g5_offset+STACK_BIAS);

   200   map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());

   202   // This is really a waste but we'll keep things as they were for now

   203   if (true) {

   204 #ifndef _LP64

   205     map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());

   206     map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());

   207     map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());

   208     map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());

   209     map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());

   210     map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());

   211     map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());

   212     map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());

   213     map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());

   214     map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());

   215 #endif /* _LP64 */

   216   }

   219   // Save the flags

   220   __ rdccr( G5 );

   221   __ stx(G5, SP, ccr_offset+STACK_BIAS);

   222   __ stxfsr(SP, fsr_offset+STACK_BIAS);

   224   // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)

   225   int offset = d00_offset;

   226   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {

   227     FloatRegister f = as_FloatRegister(i);

   228     __ stf(FloatRegisterImpl::D,  f, SP, offset+STACK_BIAS);

   229     // Record as callee saved both halves of double registers (2 float registers).

   230     map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());

   231     map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());

   232     offset += sizeof(double);

   233   }

   235   // And we're done.

   237   return map;

   238 }

   241 // Pop the current frame and restore all the registers that we

   242 // saved.

   243 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {

   245   // Restore all the FP registers

   246   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {

   247     __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));

   248   }

   250   __ ldx(SP, ccr_offset+STACK_BIAS, G1);

   251   __ wrccr (G1) ;

   253   // Restore the G's

   254   // Note that G2 (AKA GThread) must be saved and restored separately.

   255   // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.

   257   __ ldx(SP, g1_offset+STACK_BIAS, G1);

   258   __ ldx(SP, g3_offset+STACK_BIAS, G3);

   259   __ ldx(SP, g4_offset+STACK_BIAS, G4);

   260   __ ldx(SP, g5_offset+STACK_BIAS, G5);

   263 #if !defined(_LP64)

   264   // Restore the 64-bit O's.

   265   __ ldx(SP, o0_offset+STACK_BIAS, O0);

   266   __ ldx(SP, o1_offset+STACK_BIAS, O1);

   267   __ ldx(SP, o2_offset+STACK_BIAS, O2);

   268   __ ldx(SP, o3_offset+STACK_BIAS, O3);

   269   __ ldx(SP, o4_offset+STACK_BIAS, O4);

   270   __ ldx(SP, o5_offset+STACK_BIAS, O5);

   272   // And temporarily place them in TLS

   274   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);

   275   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);

   276   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);

   277   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);

   278   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);

   279   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);

   280 #endif /* _LP64 */

   282   // Restore flags

   284   __ ldxfsr(SP, fsr_offset+STACK_BIAS);

   286   __ restore();

   288 #if !defined(_LP64)

   289   // Now reload the 64bit Oregs after we've restore the window.

   290   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);

   291   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);

   292   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);

   293   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);

   294   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);

   295   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);

   296 #endif /* _LP64 */

   298 }

   300 // Pop the current frame and restore the registers that might be holding

   301 // a result.

   302 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {

   304 #if !defined(_LP64)

   305   // 32bit build returns longs in G1

   306   __ ldx(SP, g1_offset+STACK_BIAS, G1);

   308   // Retrieve the 64-bit O's.

   309   __ ldx(SP, o0_offset+STACK_BIAS, O0);

   310   __ ldx(SP, o1_offset+STACK_BIAS, O1);

   311   // and save to TLS

   312   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);

   313   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);

   314 #endif /* _LP64 */

   316   __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));

   318   __ restore();

   320 #if !defined(_LP64)

   321   // Now reload the 64bit Oregs after we've restore the window.

   322   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);

   323   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);

   324 #endif /* _LP64 */

   326 }

   328 // The java_calling_convention describes stack locations as ideal slots on

   329 // a frame with no abi restrictions. Since we must observe abi restrictions

   330 // (like the placement of the register window) the slots must be biased by

   331 // the following value.

   332 static int reg2offset(VMReg r) {

   333   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;

   334 }

   336 // ---------------------------------------------------------------------------

   337 // Read the array of BasicTypes from a signature, and compute where the

   338 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)

   339 // quantities.  Values less than VMRegImpl::stack0 are registers, those above

   340 // refer to 4-byte stack slots.  All stack slots are based off of the window

   341 // top.  VMRegImpl::stack0 refers to the first slot past the 16-word window,

   342 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register

   343 // values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit

   344 // integer registers.  Values 64-95 are the (32-bit only) float registers.

   345 // Each 32-bit quantity is given its own number, so the integer registers

   346 // (in either 32- or 64-bit builds) use 2 numbers.  For example, there is

   347 // an O0-low and an O0-high.  Essentially, all int register numbers are doubled.

   349 // Register results are passed in O0-O5, for outgoing call arguments.  To

   350 // convert to incoming arguments, convert all O's to I's.  The regs array

   351 // refer to the low and hi 32-bit words of 64-bit registers or stack slots.

   352 // If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a

   353 // 32-bit value was passed).  If both are VMRegImpl::Bad(), it means no value was

   354 // passed (used as a placeholder for the other half of longs and doubles in

   355 // the 64-bit build).  regs[].second() is either VMRegImpl::Bad() or regs[].second() is

   356 // regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).

   357 // Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()

   358 // == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the

   359 // same VMRegPair.

   361 // Note: the INPUTS in sig_bt are in units of Java argument words, which are

   362 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit

   363 // units regardless of build.

   366 // ---------------------------------------------------------------------------

   367 // The compiled Java calling convention.  The Java convention always passes

   368 // 64-bit values in adjacent aligned locations (either registers or stack),

   369 // floats in float registers and doubles in aligned float pairs.  Values are

   370 // packed in the registers.  There is no backing varargs store for values in

   371 // registers.  In the 32-bit build, longs are passed in G1 and G4 (cannot be

   372 // passed in I's, because longs in I's get their heads chopped off at

   373 // interrupt).

   374 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,

   375                                            VMRegPair *regs,

   376                                            int total_args_passed,

   377                                            int is_outgoing) {

   378   assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");

   380   // Convention is to pack the first 6 int/oop args into the first 6 registers

   381   // (I0-I5), extras spill to the stack.  Then pack the first 8 float args

   382   // into F0-F7, extras spill to the stack.  Then pad all register sets to

   383   // align.  Then put longs and doubles into the same registers as they fit,

   384   // else spill to the stack.

   385   const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;

   386   const int flt_reg_max = 8;

   387   //

   388   // Where 32-bit 1-reg longs start being passed

   389   // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.

   390   // So make it look like we've filled all the G regs that c2 wants to use.

   391   Register g_reg = TieredCompilation ? noreg : G1;

   393   // Count int/oop and float args.  See how many stack slots we'll need and

   394   // where the longs & doubles will go.

   395   int int_reg_cnt   = 0;

   396   int flt_reg_cnt   = 0;

   397   // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);

   398   // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();

   399   int stk_reg_pairs = 0;

   400   for (int i = 0; i < total_args_passed; i++) {

   401     switch (sig_bt[i]) {

   402     case T_LONG:                // LP64, longs compete with int args

   403       assert(sig_bt[i+1] == T_VOID, "");

   404 #ifdef _LP64

   405       if (int_reg_cnt < int_reg_max) int_reg_cnt++;

   406 #endif

   407       break;

   408     case T_OBJECT:

   409     case T_ARRAY:

   410     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address

   411       if (int_reg_cnt < int_reg_max) int_reg_cnt++;

   412 #ifndef _LP64

   413       else                            stk_reg_pairs++;

   414 #endif

   415       break;

   416     case T_INT:

   417     case T_SHORT:

   418     case T_CHAR:

   419     case T_BYTE:

   420     case T_BOOLEAN:

   421       if (int_reg_cnt < int_reg_max) int_reg_cnt++;

   422       else                            stk_reg_pairs++;

   423       break;

   424     case T_FLOAT:

   425       if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;

   426       else                            stk_reg_pairs++;

   427       break;

   428     case T_DOUBLE:

   429       assert(sig_bt[i+1] == T_VOID, "");

   430       break;

   431     case T_VOID:

   432       break;

   433     default:

   434       ShouldNotReachHere();

   435     }

   436   }

   438   // This is where the longs/doubles start on the stack.

   439   stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round

   441   int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only

   442   int flt_reg_pairs = (flt_reg_cnt+1) & ~1;

   444   // int stk_reg = frame::register_save_words*(wordSize>>2);

   445   // int stk_reg = SharedRuntime::out_preserve_stack_slots();

   446   int stk_reg = 0;

   447   int int_reg = 0;

   448   int flt_reg = 0;

   450   // Now do the signature layout

   451   for (int i = 0; i < total_args_passed; i++) {

   452     switch (sig_bt[i]) {

   453     case T_INT:

   454     case T_SHORT:

   455     case T_CHAR:

   456     case T_BYTE:

   457     case T_BOOLEAN:

   458 #ifndef _LP64

   459     case T_OBJECT:

   460     case T_ARRAY:

   461     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address

   462 #endif // _LP64

   463       if (int_reg < int_reg_max) {

   464         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);

   465         regs[i].set1(r->as_VMReg());

   466       } else {

   467         regs[i].set1(VMRegImpl::stack2reg(stk_reg++));

   468       }

   469       break;

   471 #ifdef _LP64

   472     case T_OBJECT:

   473     case T_ARRAY:

   474     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address

   475       if (int_reg < int_reg_max) {

   476         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);

   477         regs[i].set2(r->as_VMReg());

   478       } else {

   479         regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));

   480         stk_reg_pairs += 2;

   481       }

   482       break;

   483 #endif // _LP64

   485     case T_LONG:

   486       assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");

   487 #ifdef _LP64

   488         if (int_reg < int_reg_max) {

   489           Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);

   490           regs[i].set2(r->as_VMReg());

   491         } else {

   492           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));

   493           stk_reg_pairs += 2;

   494         }

   495 #else

   496 #ifdef COMPILER2

   497         // For 32-bit build, can't pass longs in O-regs because they become

   498         // I-regs and get trashed.  Use G-regs instead.  G1 and G4 are almost

   499         // spare and available.  This convention isn't used by the Sparc ABI or

   500         // anywhere else. If we're tiered then we don't use G-regs because c1

   501         // can't deal with them as a "pair". (Tiered makes this code think g's are filled)

   502         // G0: zero

   503         // G1: 1st Long arg

   504         // G2: global allocated to TLS

   505         // G3: used in inline cache check

   506         // G4: 2nd Long arg

   507         // G5: used in inline cache check

   508         // G6: used by OS

   509         // G7: used by OS

   511         if (g_reg == G1) {

   512           regs[i].set2(G1->as_VMReg()); // This long arg in G1

   513           g_reg = G4;                  // Where the next arg goes

   514         } else if (g_reg == G4) {

   515           regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4

   516           g_reg = noreg;               // No more longs in registers

   517         } else {

   518           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));

   519           stk_reg_pairs += 2;

   520         }

   521 #else // COMPILER2

   522         if (int_reg_pairs + 1 < int_reg_max) {

   523           if (is_outgoing) {

   524             regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());

   525           } else {

   526             regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());

   527           }

   528           int_reg_pairs += 2;

   529         } else {

   530           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));

   531           stk_reg_pairs += 2;

   532         }

   533 #endif // COMPILER2

   534 #endif // _LP64

   535       break;

   537     case T_FLOAT:

   538       if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());

   539       else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));

   540       break;

   541     case T_DOUBLE:

   542       assert(sig_bt[i+1] == T_VOID, "expecting half");

   543       if (flt_reg_pairs + 1 < flt_reg_max) {

   544         regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());

   545         flt_reg_pairs += 2;

   546       } else {

   547         regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));

   548         stk_reg_pairs += 2;

   549       }

   550       break;

   551     case T_VOID: regs[i].set_bad();  break; // Halves of longs & doubles

   552     default:

   553       ShouldNotReachHere();

   554     }

   555   }

   557   // retun the amount of stack space these arguments will need.

   558   return stk_reg_pairs;

   560 }

   562 // Helper class mostly to avoid passing masm everywhere, and handle

   563 // store displacement overflow logic.

   564 class AdapterGenerator {

   565   MacroAssembler *masm;

   566   Register Rdisp;

   567   void set_Rdisp(Register r)  { Rdisp = r; }

   569   void patch_callers_callsite();

   571   // base+st_off points to top of argument

   572   int arg_offset(const int st_off) { return st_off; }

   573   int next_arg_offset(const int st_off) {

   574     return st_off - Interpreter::stackElementSize;

   575   }

   577   // Argument slot values may be loaded first into a register because

   578   // they might not fit into displacement.

   579   RegisterOrConstant arg_slot(const int st_off);

   580   RegisterOrConstant next_arg_slot(const int st_off);

   582   // Stores long into offset pointed to by base

   583   void store_c2i_long(Register r, Register base,

   584                       const int st_off, bool is_stack);

   585   void store_c2i_object(Register r, Register base,

   586                         const int st_off);

   587   void store_c2i_int(Register r, Register base,

   588                      const int st_off);

   589   void store_c2i_double(VMReg r_2,

   590                         VMReg r_1, Register base, const int st_off);

   591   void store_c2i_float(FloatRegister f, Register base,

   592                        const int st_off);

   594  public:

   595   void gen_c2i_adapter(int total_args_passed,

   596                               // VMReg max_arg,

   597                               int comp_args_on_stack, // VMRegStackSlots

   598                               const BasicType *sig_bt,

   599                               const VMRegPair *regs,

   600                               Label& skip_fixup);

   601   void gen_i2c_adapter(int total_args_passed,

   602                               // VMReg max_arg,

   603                               int comp_args_on_stack, // VMRegStackSlots

   604                               const BasicType *sig_bt,

   605                               const VMRegPair *regs);

   607   AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}

   608 };

   611 // Patch the callers callsite with entry to compiled code if it exists.

   612 void AdapterGenerator::patch_callers_callsite() {

   613   Label L;

   614   __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);

   615   __ br_null(G3_scratch, false, __ pt, L);

   616   // Schedule the branch target address early.

   617   __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);

   618   // Call into the VM to patch the caller, then jump to compiled callee

   619   __ save_frame(4);     // Args in compiled layout; do not blow them

   621   // Must save all the live Gregs the list is:

   622   // G1: 1st Long arg (32bit build)

   623   // G2: global allocated to TLS

   624   // G3: used in inline cache check (scratch)

   625   // G4: 2nd Long arg (32bit build);

   626   // G5: used in inline cache check (methodOop)

   628   // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.

   630 #ifdef _LP64

   631   // mov(s,d)

   632   __ mov(G1, L1);

   633   __ mov(G4, L4);

   634   __ mov(G5_method, L5);

   635   __ mov(G5_method, O0);         // VM needs target method

   636   __ mov(I7, O1);                // VM needs caller's callsite

   637   // Must be a leaf call...

   638   // can be very far once the blob has been relocated

   639   AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));

   640   __ relocate(relocInfo::runtime_call_type);

   641   __ jumpl_to(dest, O7, O7);

   642   __ delayed()->mov(G2_thread, L7_thread_cache);

   643   __ mov(L7_thread_cache, G2_thread);

   644   __ mov(L1, G1);

   645   __ mov(L4, G4);

   646   __ mov(L5, G5_method);

   647 #else

   648   __ stx(G1, FP, -8 + STACK_BIAS);

   649   __ stx(G4, FP, -16 + STACK_BIAS);

   650   __ mov(G5_method, L5);

   651   __ mov(G5_method, O0);         // VM needs target method

   652   __ mov(I7, O1);                // VM needs caller's callsite

   653   // Must be a leaf call...

   654   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);

   655   __ delayed()->mov(G2_thread, L7_thread_cache);

   656   __ mov(L7_thread_cache, G2_thread);

   657   __ ldx(FP, -8 + STACK_BIAS, G1);

   658   __ ldx(FP, -16 + STACK_BIAS, G4);

   659   __ mov(L5, G5_method);

   660   __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);

   661 #endif /* _LP64 */

   663   __ restore();      // Restore args

   664   __ bind(L);

   665 }

   668 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {

   669   RegisterOrConstant roc(arg_offset(st_off));

   670   return __ ensure_simm13_or_reg(roc, Rdisp);

   671 }

   673 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {

   674   RegisterOrConstant roc(next_arg_offset(st_off));

   675   return __ ensure_simm13_or_reg(roc, Rdisp);

   676 }

   679 // Stores long into offset pointed to by base

   680 void AdapterGenerator::store_c2i_long(Register r, Register base,

   681                                       const int st_off, bool is_stack) {

   682 #ifdef _LP64

   683   // In V9, longs are given 2 64-bit slots in the interpreter, but the

   684   // data is passed in only 1 slot.

   685   __ stx(r, base, next_arg_slot(st_off));

   686 #else

   687 #ifdef COMPILER2

   688   // Misaligned store of 64-bit data

   689   __ stw(r, base, arg_slot(st_off));    // lo bits

   690   __ srlx(r, 32, r);

   691   __ stw(r, base, next_arg_slot(st_off));  // hi bits

   692 #else

   693   if (is_stack) {

   694     // Misaligned store of 64-bit data

   695     __ stw(r, base, arg_slot(st_off));    // lo bits

   696     __ srlx(r, 32, r);

   697     __ stw(r, base, next_arg_slot(st_off));  // hi bits

   698   } else {

   699     __ stw(r->successor(), base, arg_slot(st_off)     ); // lo bits

   700     __ stw(r             , base, next_arg_slot(st_off)); // hi bits

   701   }

   702 #endif // COMPILER2

   703 #endif // _LP64

   704 }

   706 void AdapterGenerator::store_c2i_object(Register r, Register base,

   707                       const int st_off) {

   708   __ st_ptr (r, base, arg_slot(st_off));

   709 }

   711 void AdapterGenerator::store_c2i_int(Register r, Register base,

   712                    const int st_off) {

   713   __ st (r, base, arg_slot(st_off));

   714 }

   716 // Stores into offset pointed to by base

   717 void AdapterGenerator::store_c2i_double(VMReg r_2,

   718                       VMReg r_1, Register base, const int st_off) {

   719 #ifdef _LP64

   720   // In V9, doubles are given 2 64-bit slots in the interpreter, but the

   721   // data is passed in only 1 slot.

   722   __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));

   723 #else

   724   // Need to marshal 64-bit value from misaligned Lesp loads

   725   __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));

   726   __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );

   727 #endif

   728 }

   730 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,

   731                                        const int st_off) {

   732   __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));

   733 }

   735 void AdapterGenerator::gen_c2i_adapter(

   736                             int total_args_passed,

   737                             // VMReg max_arg,

   738                             int comp_args_on_stack, // VMRegStackSlots

   739                             const BasicType *sig_bt,

   740                             const VMRegPair *regs,

   741                             Label& skip_fixup) {

   743   // Before we get into the guts of the C2I adapter, see if we should be here

   744   // at all.  We've come from compiled code and are attempting to jump to the

   745   // interpreter, which means the caller made a static call to get here

   746   // (vcalls always get a compiled target if there is one).  Check for a

   747   // compiled target.  If there is one, we need to patch the caller's call.

   748   // However we will run interpreted if we come thru here. The next pass

   749   // thru the call site will run compiled. If we ran compiled here then

   750   // we can (theorectically) do endless i2c->c2i->i2c transitions during

   751   // deopt/uncommon trap cycles. If we always go interpreted here then

   752   // we can have at most one and don't need to play any tricks to keep

   753   // from endlessly growing the stack.

   754   //

   755   // Actually if we detected that we had an i2c->c2i transition here we

   756   // ought to be able to reset the world back to the state of the interpreted

   757   // call and not bother building another interpreter arg area. We don't

   758   // do that at this point.

   760   patch_callers_callsite();

   762   __ bind(skip_fixup);

   764   // Since all args are passed on the stack, total_args_passed*wordSize is the

   765   // space we need.  Add in varargs area needed by the interpreter. Round up

   766   // to stack alignment.

   767   const int arg_size = total_args_passed * Interpreter::stackElementSize;

   768   const int varargs_area =

   769                  (frame::varargs_offset - frame::register_save_words)*wordSize;

   770   const int extraspace = round_to(arg_size + varargs_area, 2*wordSize);

   772   int bias = STACK_BIAS;

   773   const int interp_arg_offset = frame::varargs_offset*wordSize +

   774                         (total_args_passed-1)*Interpreter::stackElementSize;

   776   Register base = SP;

   778 #ifdef _LP64

   779   // In the 64bit build because of wider slots and STACKBIAS we can run

   780   // out of bits in the displacement to do loads and stores.  Use g3 as

   781   // temporary displacement.

   782   if (! __ is_simm13(extraspace)) {

   783     __ set(extraspace, G3_scratch);

   784     __ sub(SP, G3_scratch, SP);

   785   } else {

   786     __ sub(SP, extraspace, SP);

   787   }

   788   set_Rdisp(G3_scratch);

   789 #else

   790   __ sub(SP, extraspace, SP);

   791 #endif // _LP64

   793   // First write G1 (if used) to where ever it must go

   794   for (int i=0; i<total_args_passed; i++) {

   795     const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;

   796     VMReg r_1 = regs[i].first();

   797     VMReg r_2 = regs[i].second();

   798     if (r_1 == G1_scratch->as_VMReg()) {

   799       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {

   800         store_c2i_object(G1_scratch, base, st_off);

   801       } else if (sig_bt[i] == T_LONG) {

   802         assert(!TieredCompilation, "should not use register args for longs");

   803         store_c2i_long(G1_scratch, base, st_off, false);

   804       } else {

   805         store_c2i_int(G1_scratch, base, st_off);

   806       }

   807     }

   808   }

   810   // Now write the args into the outgoing interpreter space

   811   for (int i=0; i<total_args_passed; i++) {

   812     const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;

   813     VMReg r_1 = regs[i].first();

   814     VMReg r_2 = regs[i].second();

   815     if (!r_1->is_valid()) {

   816       assert(!r_2->is_valid(), "");

   817       continue;

   818     }

   819     // Skip G1 if found as we did it first in order to free it up

   820     if (r_1 == G1_scratch->as_VMReg()) {

   821       continue;

   822     }

   823 #ifdef ASSERT

   824     bool G1_forced = false;

   825 #endif // ASSERT

   826     if (r_1->is_stack()) {        // Pretend stack targets are loaded into G1

   827 #ifdef _LP64

   828       Register ld_off = Rdisp;

   829       __ set(reg2offset(r_1) + extraspace + bias, ld_off);

   830 #else

   831       int ld_off = reg2offset(r_1) + extraspace + bias;

   832 #endif // _LP64

   833 #ifdef ASSERT

   834       G1_forced = true;

   835 #endif // ASSERT

   836       r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle

   837       if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch);

   838       else                  __ ldx(base, ld_off, G1_scratch);

   839     }

   841     if (r_1->is_Register()) {

   842       Register r = r_1->as_Register()->after_restore();

   843       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {

   844         store_c2i_object(r, base, st_off);

   845       } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {

   846 #ifndef _LP64

   847         if (TieredCompilation) {

   848           assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs");

   849         }

   850 #endif // _LP64

   851         store_c2i_long(r, base, st_off, r_2->is_stack());

   852       } else {

   853         store_c2i_int(r, base, st_off);

   854       }

   855     } else {

   856       assert(r_1->is_FloatRegister(), "");

   857       if (sig_bt[i] == T_FLOAT) {

   858         store_c2i_float(r_1->as_FloatRegister(), base, st_off);

   859       } else {

   860         assert(sig_bt[i] == T_DOUBLE, "wrong type");

   861         store_c2i_double(r_2, r_1, base, st_off);

   862       }

   863     }

   864   }

   866 #ifdef _LP64

   867   // Need to reload G3_scratch, used for temporary displacements.

   868   __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);

   870   // Pass O5_savedSP as an argument to the interpreter.

   871   // The interpreter will restore SP to this value before returning.

   872   __ set(extraspace, G1);

   873   __ add(SP, G1, O5_savedSP);

   874 #else

   875   // Pass O5_savedSP as an argument to the interpreter.

   876   // The interpreter will restore SP to this value before returning.

   877   __ add(SP, extraspace, O5_savedSP);

   878 #endif // _LP64

   880   __ mov((frame::varargs_offset)*wordSize -

   881          1*Interpreter::stackElementSize+bias+BytesPerWord, G1);

   882   // Jump to the interpreter just as if interpreter was doing it.

   883   __ jmpl(G3_scratch, 0, G0);

   884   // Setup Lesp for the call.  Cannot actually set Lesp as the current Lesp

   885   // (really L0) is in use by the compiled frame as a generic temp.  However,

   886   // the interpreter does not know where its args are without some kind of

   887   // arg pointer being passed in.  Pass it in Gargs.

   888   __ delayed()->add(SP, G1, Gargs);

   889 }

   891 void AdapterGenerator::gen_i2c_adapter(

   892                             int total_args_passed,

   893                             // VMReg max_arg,

   894                             int comp_args_on_stack, // VMRegStackSlots

   895                             const BasicType *sig_bt,

   896                             const VMRegPair *regs) {

   898   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame

   899   // layout.  Lesp was saved by the calling I-frame and will be restored on

   900   // return.  Meanwhile, outgoing arg space is all owned by the callee

   901   // C-frame, so we can mangle it at will.  After adjusting the frame size,

   902   // hoist register arguments and repack other args according to the compiled

   903   // code convention.  Finally, end in a jump to the compiled code.  The entry

   904   // point address is the start of the buffer.

   906   // We will only enter here from an interpreted frame and never from after

   907   // passing thru a c2i. Azul allowed this but we do not. If we lose the

   908   // race and use a c2i we will remain interpreted for the race loser(s).

   909   // This removes all sorts of headaches on the x86 side and also eliminates

   910   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.

   912   // As you can see from the list of inputs & outputs there are not a lot

   913   // of temp registers to work with: mostly G1, G3 & G4.

   915   // Inputs:

   916   // G2_thread      - TLS

   917   // G5_method      - Method oop

   918   // G4 (Gargs)     - Pointer to interpreter's args

   919   // O0..O4         - free for scratch

   920   // O5_savedSP     - Caller's saved SP, to be restored if needed

   921   // O6             - Current SP!

   922   // O7             - Valid return address

   923   // L0-L7, I0-I7   - Caller's temps (no frame pushed yet)

   925   // Outputs:

   926   // G2_thread      - TLS

   927   // G1, G4         - Outgoing long args in 32-bit build

   928   // O0-O5          - Outgoing args in compiled layout

   929   // O6             - Adjusted or restored SP

   930   // O7             - Valid return address

   931   // L0-L7, I0-I7   - Caller's temps (no frame pushed yet)

   932   // F0-F7          - more outgoing args

   935   // Gargs is the incoming argument base, and also an outgoing argument.

   936   __ sub(Gargs, BytesPerWord, Gargs);

   938   // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME

   939   // WITH O7 HOLDING A VALID RETURN PC

   940   //

   941   // |              |

   942   // :  java stack  :

   943   // |              |

   944   // +--------------+ <--- start of outgoing args

   945   // |   receiver   |   |

   946   // : rest of args :   |---size is java-arg-words

   947   // |              |   |

   948   // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I

   949   // |              |   |

   950   // :    unused    :   |---Space for max Java stack, plus stack alignment

   951   // |              |   |

   952   // +--------------+ <--- SP + 16*wordsize

   953   // |              |

   954   // :    window    :

   955   // |              |

   956   // +--------------+ <--- SP

   958   // WE REPACK THE STACK.  We use the common calling convention layout as

   959   // discovered by calling SharedRuntime::calling_convention.  We assume it

   960   // causes an arbitrary shuffle of memory, which may require some register

   961   // temps to do the shuffle.  We hope for (and optimize for) the case where

   962   // temps are not needed.  We may have to resize the stack slightly, in case

   963   // we need alignment padding (32-bit interpreter can pass longs & doubles

   964   // misaligned, but the compilers expect them aligned).

   965   //

   966   // |              |

   967   // :  java stack  :

   968   // |              |

   969   // +--------------+ <--- start of outgoing args

   970   // |  pad, align  |   |

   971   // +--------------+   |

   972   // | ints, floats |   |---Outgoing stack args, packed low.

   973   // +--------------+   |   First few args in registers.

   974   // :   doubles    :   |

   975   // |   longs      |   |

   976   // +--------------+ <--- SP' + 16*wordsize

   977   // |              |

   978   // :    window    :

   979   // |              |

   980   // +--------------+ <--- SP'

   982   // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME

   983   // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP

   984   // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN.

   986   // Cut-out for having no stack args.  Since up to 6 args are passed

   987   // in registers, we will commonly have no stack args.

   988   if (comp_args_on_stack > 0) {

   990     // Convert VMReg stack slots to words.

   991     int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;

   992     // Round up to miminum stack alignment, in wordSize

   993     comp_words_on_stack = round_to(comp_words_on_stack, 2);

   994     // Now compute the distance from Lesp to SP.  This calculation does not

   995     // include the space for total_args_passed because Lesp has not yet popped

   996     // the arguments.

   997     __ sub(SP, (comp_words_on_stack)*wordSize, SP);

   998   }

  1000   // Will jump to the compiled code just as if compiled code was doing it.

  1001   // Pre-load the register-jump target early, to schedule it better.

  1002   __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3);

  1004   // Now generate the shuffle code.  Pick up all register args and move the

  1005   // rest through G1_scratch.

  1006   for (int i=0; i<total_args_passed; i++) {

  1007     if (sig_bt[i] == T_VOID) {

  1008       // Longs and doubles are passed in native word order, but misaligned

  1009       // in the 32-bit build.

  1010       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");

  1011       continue;

  1012     }

  1014     // Pick up 0, 1 or 2 words from Lesp+offset.  Assume mis-aligned in the

  1015     // 32-bit build and aligned in the 64-bit build.  Look for the obvious

  1016     // ldx/lddf optimizations.

  1018     // Load in argument order going down.

  1019     const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;

  1020     set_Rdisp(G1_scratch);

  1022     VMReg r_1 = regs[i].first();

  1023     VMReg r_2 = regs[i].second();

  1024     if (!r_1->is_valid()) {

  1025       assert(!r_2->is_valid(), "");

  1026       continue;

  1027     }

  1028     if (r_1->is_stack()) {        // Pretend stack targets are loaded into F8/F9

  1029       r_1 = F8->as_VMReg();        // as part of the load/store shuffle

  1030       if (r_2->is_valid()) r_2 = r_1->next();

  1031     }

  1032     if (r_1->is_Register()) {  // Register argument

  1033       Register r = r_1->as_Register()->after_restore();

  1034       if (!r_2->is_valid()) {

  1035         __ ld(Gargs, arg_slot(ld_off), r);

  1036       } else {

  1037 #ifdef _LP64

  1038         // In V9, longs are given 2 64-bit slots in the interpreter, but the

  1039         // data is passed in only 1 slot.

  1040         RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?

  1041               next_arg_slot(ld_off) : arg_slot(ld_off);

  1042         __ ldx(Gargs, slot, r);

  1043 #else

  1044         // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the

  1045         // stack shuffle.  Load the first 2 longs into G1/G4 later.

  1046 #endif

  1047       }

  1048     } else {

  1049       assert(r_1->is_FloatRegister(), "");

  1050       if (!r_2->is_valid()) {

  1051         __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());

  1052       } else {

  1053 #ifdef _LP64

  1054         // In V9, doubles are given 2 64-bit slots in the interpreter, but the

  1055         // data is passed in only 1 slot.  This code also handles longs that

  1056         // are passed on the stack, but need a stack-to-stack move through a

  1057         // spare float register.

  1058         RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?

  1059               next_arg_slot(ld_off) : arg_slot(ld_off);

  1060         __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());

  1061 #else

  1062         // Need to marshal 64-bit value from misaligned Lesp loads

  1063         __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());

  1064         __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());

  1065 #endif

  1066       }

  1067     }

  1068     // Was the argument really intended to be on the stack, but was loaded

  1069     // into F8/F9?

  1070     if (regs[i].first()->is_stack()) {

  1071       assert(r_1->as_FloatRegister() == F8, "fix this code");

  1072       // Convert stack slot to an SP offset

  1073       int st_off = reg2offset(regs[i].first()) + STACK_BIAS;

  1074       // Store down the shuffled stack word.  Target address _is_ aligned.

  1075       RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);

  1076       if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);

  1077       else                  __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);

  1078     }

  1079   }

  1080   bool made_space = false;

  1081 #ifndef _LP64

  1082   // May need to pick up a few long args in G1/G4

  1083   bool g4_crushed = false;

  1084   bool g3_crushed = false;

  1085   for (int i=0; i<total_args_passed; i++) {

  1086     if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) {

  1087       // Load in argument order going down

  1088       int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;

  1089       // Need to marshal 64-bit value from misaligned Lesp loads

  1090       Register r = regs[i].first()->as_Register()->after_restore();

  1091       if (r == G1 || r == G4) {

  1092         assert(!g4_crushed, "ordering problem");

  1093         if (r == G4){

  1094           g4_crushed = true;

  1095           __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits

  1096           __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits

  1097         } else {

  1098           // better schedule this way

  1099           __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits

  1100           __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits

  1101         }

  1102         g3_crushed = true;

  1103         __ sllx(r, 32, r);

  1104         __ or3(G3_scratch, r, r);

  1105       } else {

  1106         assert(r->is_out(), "longs passed in two O registers");

  1107         __ ld  (Gargs, arg_slot(ld_off)     , r->successor()); // Load lo bits

  1108         __ ld  (Gargs, next_arg_slot(ld_off), r);              // Load hi bits

  1109       }

  1110     }

  1111   }

  1112 #endif

  1114   // Jump to the compiled code just as if compiled code was doing it.

  1115   //

  1116 #ifndef _LP64

  1117     if (g3_crushed) {

  1118       // Rats load was wasted, at least it is in cache...

  1119       __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3);

  1120     }

  1121 #endif /* _LP64 */

  1123     // 6243940 We might end up in handle_wrong_method if

  1124     // the callee is deoptimized as we race thru here. If that

  1125     // happens we don't want to take a safepoint because the

  1126     // caller frame will look interpreted and arguments are now

  1127     // "compiled" so it is much better to make this transition

  1128     // invisible to the stack walking code. Unfortunately if

  1129     // we try and find the callee by normal means a safepoint

  1130     // is possible. So we stash the desired callee in the thread

  1131     // and the vm will find there should this case occur.

  1132     Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());

  1133     __ st_ptr(G5_method, callee_target_addr);

  1135     if (StressNonEntrant) {

  1136       // Open a big window for deopt failure

  1137       __ save_frame(0);

  1138       __ mov(G0, L0);

  1139       Label loop;

  1140       __ bind(loop);

  1141       __ sub(L0, 1, L0);

  1142       __ br_null(L0, false, Assembler::pt, loop);

  1143       __ delayed()->nop();

  1145       __ restore();

  1146     }

  1149     __ jmpl(G3, 0, G0);

  1150     __ delayed()->nop();

  1151 }

  1153 // ---------------------------------------------------------------

  1154 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,

  1155                                                             int total_args_passed,

  1156                                                             // VMReg max_arg,

  1157                                                             int comp_args_on_stack, // VMRegStackSlots

  1158                                                             const BasicType *sig_bt,

  1159                                                             const VMRegPair *regs,

  1160                                                             AdapterFingerPrint* fingerprint) {

  1161   address i2c_entry = __ pc();

  1163   AdapterGenerator agen(masm);

  1165   agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);

  1168   // -------------------------------------------------------------------------

  1169   // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The

  1170   // args start out packed in the compiled layout.  They need to be unpacked

  1171   // into the interpreter layout.  This will almost always require some stack

  1172   // space.  We grow the current (compiled) stack, then repack the args.  We

  1173   // finally end in a jump to the generic interpreter entry point.  On exit

  1174   // from the interpreter, the interpreter will restore our SP (lest the

  1175   // compiled code, which relys solely on SP and not FP, get sick).

  1177   address c2i_unverified_entry = __ pc();

  1178   Label skip_fixup;

  1179   {

  1180 #if !defined(_LP64) && defined(COMPILER2)

  1181     Register R_temp   = L0;   // another scratch register

  1182 #else

  1183     Register R_temp   = G1;   // another scratch register

  1184 #endif

  1186     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());

  1188     __ verify_oop(O0);

  1189     __ verify_oop(G5_method);

  1190     __ load_klass(O0, G3_scratch);

  1191     __ verify_oop(G3_scratch);

  1193 #if !defined(_LP64) && defined(COMPILER2)

  1194     __ save(SP, -frame::register_save_words*wordSize, SP);

  1195     __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);

  1196     __ verify_oop(R_temp);

  1197     __ cmp(G3_scratch, R_temp);

  1198     __ restore();

  1199 #else

  1200     __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp);

  1201     __ verify_oop(R_temp);

  1202     __ cmp(G3_scratch, R_temp);

  1203 #endif

  1205     Label ok, ok2;

  1206     __ brx(Assembler::equal, false, Assembler::pt, ok);

  1207     __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method);

  1208     __ jump_to(ic_miss, G3_scratch);

  1209     __ delayed()->nop();

  1211     __ bind(ok);

  1212     // Method might have been compiled since the call site was patched to

  1213     // interpreted if that is the case treat it as a miss so we can get

  1214     // the call site corrected.

  1215     __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);

  1216     __ bind(ok2);

  1217     __ br_null(G3_scratch, false, __ pt, skip_fixup);

  1218     __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);

  1219     __ jump_to(ic_miss, G3_scratch);

  1220     __ delayed()->nop();

  1222   }

  1224   address c2i_entry = __ pc();

  1226   agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);

  1228   __ flush();

  1229   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);

  1231 }

  1233 // Helper function for native calling conventions

  1234 static VMReg int_stk_helper( int i ) {

  1235   // Bias any stack based VMReg we get by ignoring the window area

  1236   // but not the register parameter save area.

  1237   //

  1238   // This is strange for the following reasons. We'd normally expect

  1239   // the calling convention to return an VMReg for a stack slot

  1240   // completely ignoring any abi reserved area. C2 thinks of that

  1241   // abi area as only out_preserve_stack_slots. This does not include

  1242   // the area allocated by the C abi to store down integer arguments

  1243   // because the java calling convention does not use it. So

  1244   // since c2 assumes that there are only out_preserve_stack_slots

  1245   // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack

  1246   // location the c calling convention must add in this bias amount

  1247   // to make up for the fact that the out_preserve_stack_slots is

  1248   // insufficient for C calls. What a mess. I sure hope those 6

  1249   // stack words were worth it on every java call!

  1251   // Another way of cleaning this up would be for out_preserve_stack_slots

  1252   // to take a parameter to say whether it was C or java calling conventions.

  1253   // Then things might look a little better (but not much).

  1255   int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;

  1256   if( mem_parm_offset < 0 ) {

  1257     return as_oRegister(i)->as_VMReg();

  1258   } else {

  1259     int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;

  1260     // Now return a biased offset that will be correct when out_preserve_slots is added back in

  1261     return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());

  1262   }

  1263 }

  1266 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,

  1267                                          VMRegPair *regs,

  1268                                          int total_args_passed) {

  1270     // Return the number of VMReg stack_slots needed for the args.

  1271     // This value does not include an abi space (like register window

  1272     // save area).

  1274     // The native convention is V8 if !LP64

  1275     // The LP64 convention is the V9 convention which is slightly more sane.

  1277     // We return the amount of VMReg stack slots we need to reserve for all

  1278     // the arguments NOT counting out_preserve_stack_slots. Since we always

  1279     // have space for storing at least 6 registers to memory we start with that.

  1280     // See int_stk_helper for a further discussion.

  1281     int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();

  1283 #ifdef _LP64

  1284     // V9 convention: All things "as-if" on double-wide stack slots.

  1285     // Hoist any int/ptr/long's in the first 6 to int regs.

  1286     // Hoist any flt/dbl's in the first 16 dbl regs.

  1287     int j = 0;                  // Count of actual args, not HALVES

  1288     for( int i=0; i<total_args_passed; i++, j++ ) {

  1289       switch( sig_bt[i] ) {

  1290       case T_BOOLEAN:

  1291       case T_BYTE:

  1292       case T_CHAR:

  1293       case T_INT:

  1294       case T_SHORT:

  1295         regs[i].set1( int_stk_helper( j ) ); break;

  1296       case T_LONG:

  1297         assert( sig_bt[i+1] == T_VOID, "expecting half" );

  1298       case T_ADDRESS: // raw pointers, like current thread, for VM calls

  1299       case T_ARRAY:

  1300       case T_OBJECT:

  1301         regs[i].set2( int_stk_helper( j ) );

  1302         break;

  1303       case T_FLOAT:

  1304         if ( j < 16 ) {

  1305           // V9ism: floats go in ODD registers

  1306           regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg());

  1307         } else {

  1308           // V9ism: floats go in ODD stack slot

  1309           regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1)));

  1310         }

  1311         break;

  1312       case T_DOUBLE:

  1313         assert( sig_bt[i+1] == T_VOID, "expecting half" );

  1314         if ( j < 16 ) {

  1315           // V9ism: doubles go in EVEN/ODD regs

  1316           regs[i].set2(as_FloatRegister(j<<1)->as_VMReg());

  1317         } else {

  1318           // V9ism: doubles go in EVEN/ODD stack slots

  1319           regs[i].set2(VMRegImpl::stack2reg(j<<1));

  1320         }

  1321         break;

  1322       case T_VOID:  regs[i].set_bad(); j--; break; // Do not count HALVES

  1323       default:

  1324         ShouldNotReachHere();

  1325       }

  1326       if (regs[i].first()->is_stack()) {

  1327         int off =  regs[i].first()->reg2stack();

  1328         if (off > max_stack_slots) max_stack_slots = off;

  1329       }

  1330       if (regs[i].second()->is_stack()) {

  1331         int off =  regs[i].second()->reg2stack();

  1332         if (off > max_stack_slots) max_stack_slots = off;

  1333       }

  1334     }

  1336 #else // _LP64

  1337     // V8 convention: first 6 things in O-regs, rest on stack.

  1338     // Alignment is willy-nilly.

  1339     for( int i=0; i<total_args_passed; i++ ) {

  1340       switch( sig_bt[i] ) {

  1341       case T_ADDRESS: // raw pointers, like current thread, for VM calls

  1342       case T_ARRAY:

  1343       case T_BOOLEAN:

  1344       case T_BYTE:

  1345       case T_CHAR:

  1346       case T_FLOAT:

  1347       case T_INT:

  1348       case T_OBJECT:

  1349       case T_SHORT:

  1350         regs[i].set1( int_stk_helper( i ) );

  1351         break;

  1352       case T_DOUBLE:

  1353       case T_LONG:

  1354         assert( sig_bt[i+1] == T_VOID, "expecting half" );

  1355         regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) );

  1356         break;

  1357       case T_VOID: regs[i].set_bad(); break;

  1358       default:

  1359         ShouldNotReachHere();

  1360       }

  1361       if (regs[i].first()->is_stack()) {

  1362         int off =  regs[i].first()->reg2stack();

  1363         if (off > max_stack_slots) max_stack_slots = off;

  1364       }

  1365       if (regs[i].second()->is_stack()) {

  1366         int off =  regs[i].second()->reg2stack();

  1367         if (off > max_stack_slots) max_stack_slots = off;

  1368       }

  1369     }

  1370 #endif // _LP64

  1372   return round_to(max_stack_slots + 1, 2);

  1374 }

  1377 // ---------------------------------------------------------------------------

  1378 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {

  1379   switch (ret_type) {

  1380   case T_FLOAT:

  1381     __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);

  1382     break;

  1383   case T_DOUBLE:

  1384     __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);

  1385     break;

  1386   }

  1387 }

  1389 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {

  1390   switch (ret_type) {

  1391   case T_FLOAT:

  1392     __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0);

  1393     break;

  1394   case T_DOUBLE:

  1395     __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0);

  1396     break;

  1397   }

  1398 }

  1400 // Check and forward and pending exception.  Thread is stored in

  1401 // L7_thread_cache and possibly NOT in G2_thread.  Since this is a native call, there

  1402 // is no exception handler.  We merely pop this frame off and throw the

  1403 // exception in the caller's frame.

  1404 static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) {

  1405   Label L;

  1406   __ br_null(Rex_oop, false, Assembler::pt, L);

  1407   __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception

  1408   // Since this is a native call, we *know* the proper exception handler

  1409   // without calling into the VM: it's the empty function.  Just pop this

  1410   // frame and then jump to forward_exception_entry; O7 will contain the

  1411   // native caller's return PC.

  1412  AddressLiteral exception_entry(StubRoutines::forward_exception_entry());

  1413   __ jump_to(exception_entry, G3_scratch);

  1414   __ delayed()->restore();      // Pop this frame off.

  1415   __ bind(L);

  1416 }

  1418 // A simple move of integer like type

  1419 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {

  1420   if (src.first()->is_stack()) {

  1421     if (dst.first()->is_stack()) {

  1422       // stack to stack

  1423       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);

  1424       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);

  1425     } else {

  1426       // stack to reg

  1427       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());

  1428     }

  1429   } else if (dst.first()->is_stack()) {

  1430     // reg to stack

  1431     __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1432   } else {

  1433     __ mov(src.first()->as_Register(), dst.first()->as_Register());

  1434   }

  1435 }

  1437 // On 64 bit we will store integer like items to the stack as

  1438 // 64 bits items (sparc abi) even though java would only store

  1439 // 32bits for a parameter. On 32bit it will simply be 32 bits

  1440 // So this routine will do 32->32 on 32bit and 32->64 on 64bit

  1441 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {

  1442   if (src.first()->is_stack()) {

  1443     if (dst.first()->is_stack()) {

  1444       // stack to stack

  1445       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);

  1446       __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);

  1447     } else {

  1448       // stack to reg

  1449       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());

  1450     }

  1451   } else if (dst.first()->is_stack()) {

  1452     // reg to stack

  1453     __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1454   } else {

  1455     __ mov(src.first()->as_Register(), dst.first()->as_Register());

  1456   }

  1457 }

  1460 // An oop arg. Must pass a handle not the oop itself

  1461 static void object_move(MacroAssembler* masm,

  1462                         OopMap* map,

  1463                         int oop_handle_offset,

  1464                         int framesize_in_slots,

  1465                         VMRegPair src,

  1466                         VMRegPair dst,

  1467                         bool is_receiver,

  1468                         int* receiver_offset) {

  1470   // must pass a handle. First figure out the location we use as a handle

  1472   if (src.first()->is_stack()) {

  1473     // Oop is already on the stack

  1474     Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();

  1475     __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);

  1476     __ ld_ptr(rHandle, 0, L4);

  1477 #ifdef _LP64

  1478     __ movr( Assembler::rc_z, L4, G0, rHandle );

  1479 #else

  1480     __ tst( L4 );

  1481     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );

  1482 #endif

  1483     if (dst.first()->is_stack()) {

  1484       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);

  1485     }

  1486     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();

  1487     if (is_receiver) {

  1488       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;

  1489     }

  1490     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));

  1491   } else {

  1492     // Oop is in an input register pass we must flush it to the stack

  1493     const Register rOop = src.first()->as_Register();

  1494     const Register rHandle = L5;

  1495     int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;

  1496     int offset = oop_slot*VMRegImpl::stack_slot_size;

  1497     Label skip;

  1498     __ st_ptr(rOop, SP, offset + STACK_BIAS);

  1499     if (is_receiver) {

  1500       *receiver_offset = oop_slot * VMRegImpl::stack_slot_size;

  1501     }

  1502     map->set_oop(VMRegImpl::stack2reg(oop_slot));

  1503     __ add(SP, offset + STACK_BIAS, rHandle);

  1504 #ifdef _LP64

  1505     __ movr( Assembler::rc_z, rOop, G0, rHandle );

  1506 #else

  1507     __ tst( rOop );

  1508     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );

  1509 #endif

  1511     if (dst.first()->is_stack()) {

  1512       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);

  1513     } else {

  1514       __ mov(rHandle, dst.first()->as_Register());

  1515     }

  1516   }

  1517 }

  1519 // A float arg may have to do float reg int reg conversion

  1520 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {

  1521   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");

  1523   if (src.first()->is_stack()) {

  1524     if (dst.first()->is_stack()) {

  1525       // stack to stack the easiest of the bunch

  1526       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);

  1527       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);

  1528     } else {

  1529       // stack to reg

  1530       if (dst.first()->is_Register()) {

  1531         __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());

  1532       } else {

  1533         __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());

  1534       }

  1535     }

  1536   } else if (dst.first()->is_stack()) {

  1537     // reg to stack

  1538     if (src.first()->is_Register()) {

  1539       __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1540     } else {

  1541       __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1542     }

  1543   } else {

  1544     // reg to reg

  1545     if (src.first()->is_Register()) {

  1546       if (dst.first()->is_Register()) {

  1547         // gpr -> gpr

  1548         __ mov(src.first()->as_Register(), dst.first()->as_Register());

  1549       } else {

  1550         // gpr -> fpr

  1551         __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS);

  1552         __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister());

  1553       }

  1554     } else if (dst.first()->is_Register()) {

  1555       // fpr -> gpr

  1556       __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS);

  1557       __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register());

  1558     } else {

  1559       // fpr -> fpr

  1560       // In theory these overlap but the ordering is such that this is likely a nop

  1561       if ( src.first() != dst.first()) {

  1562         __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());

  1563       }

  1564     }

  1565   }

  1566 }

  1568 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {

  1569   VMRegPair src_lo(src.first());

  1570   VMRegPair src_hi(src.second());

  1571   VMRegPair dst_lo(dst.first());

  1572   VMRegPair dst_hi(dst.second());

  1573   simple_move32(masm, src_lo, dst_lo);

  1574   simple_move32(masm, src_hi, dst_hi);

  1575 }

  1577 // A long move

  1578 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {

  1580   // Do the simple ones here else do two int moves

  1581   if (src.is_single_phys_reg() ) {

  1582     if (dst.is_single_phys_reg()) {

  1583       __ mov(src.first()->as_Register(), dst.first()->as_Register());

  1584     } else {

  1585       // split src into two separate registers

  1586       // Remember hi means hi address or lsw on sparc

  1587       // Move msw to lsw

  1588       if (dst.second()->is_reg()) {

  1589         // MSW -> MSW

  1590         __ srax(src.first()->as_Register(), 32, dst.first()->as_Register());

  1591         // Now LSW -> LSW

  1592         // this will only move lo -> lo and ignore hi

  1593         VMRegPair split(dst.second());

  1594         simple_move32(masm, src, split);

  1595       } else {

  1596         VMRegPair split(src.first(), L4->as_VMReg());

  1597         // MSW -> MSW (lo ie. first word)

  1598         __ srax(src.first()->as_Register(), 32, L4);

  1599         split_long_move(masm, split, dst);

  1600       }

  1601     }

  1602   } else if (dst.is_single_phys_reg()) {

  1603     if (src.is_adjacent_aligned_on_stack(2)) {

  1604       __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());

  1605     } else {

  1606       // dst is a single reg.

  1607       // Remember lo is low address not msb for stack slots

  1608       // and lo is the "real" register for registers

  1609       // src is

  1611       VMRegPair split;

  1613       if (src.first()->is_reg()) {

  1614         // src.lo (msw) is a reg, src.hi is stk/reg

  1615         // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg]

  1616         split.set_pair(dst.first(), src.first());

  1617       } else {

  1618         // msw is stack move to L5

  1619         // lsw is stack move to dst.lo (real reg)

  1620         // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5

  1621         split.set_pair(dst.first(), L5->as_VMReg());

  1622       }

  1624       // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg)

  1625       // msw   -> src.lo/L5,  lsw -> dst.lo

  1626       split_long_move(masm, src, split);

  1628       // So dst now has the low order correct position the

  1629       // msw half

  1630       __ sllx(split.first()->as_Register(), 32, L5);

  1632       const Register d = dst.first()->as_Register();

  1633       __ or3(L5, d, d);

  1634     }

  1635   } else {

  1636     // For LP64 we can probably do better.

  1637     split_long_move(masm, src, dst);

  1638   }

  1639 }

  1641 // A double move

  1642 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {

  1644   // The painful thing here is that like long_move a VMRegPair might be

  1645   // 1: a single physical register

  1646   // 2: two physical registers (v8)

  1647   // 3: a physical reg [lo] and a stack slot [hi] (v8)

  1648   // 4: two stack slots

  1650   // Since src is always a java calling convention we know that the src pair

  1651   // is always either all registers or all stack (and aligned?)

  1653   // in a register [lo] and a stack slot [hi]

  1654   if (src.first()->is_stack()) {

  1655     if (dst.first()->is_stack()) {

  1656       // stack to stack the easiest of the bunch

  1657       // ought to be a way to do this where if alignment is ok we use ldd/std when possible

  1658       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);

  1659       __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);

  1660       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);

  1661       __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);

  1662     } else {

  1663       // stack to reg

  1664       if (dst.second()->is_stack()) {

  1665         // stack -> reg, stack -> stack

  1666         __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);

  1667         if (dst.first()->is_Register()) {

  1668           __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());

  1669         } else {

  1670           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());

  1671         }

  1672         // This was missing. (very rare case)

  1673         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);

  1674       } else {

  1675         // stack -> reg

  1676         // Eventually optimize for alignment QQQ

  1677         if (dst.first()->is_Register()) {

  1678           __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());

  1679           __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register());

  1680         } else {

  1681           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister());

  1682           __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister());

  1683         }

  1684       }

  1685     }

  1686   } else if (dst.first()->is_stack()) {

  1687     // reg to stack

  1688     if (src.first()->is_Register()) {

  1689       // Eventually optimize for alignment QQQ

  1690       __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1691       if (src.second()->is_stack()) {

  1692         __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4);

  1693         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);

  1694       } else {

  1695         __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS);

  1696       }

  1697     } else {

  1698       // fpr to stack

  1699       if (src.second()->is_stack()) {

  1700         ShouldNotReachHere();

  1701       } else {

  1702         // Is the stack aligned?

  1703         if (reg2offset(dst.first()) & 0x7) {

  1704           // No do as pairs

  1705           __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1706           __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS);

  1707         } else {

  1708           __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS);

  1709         }

  1710       }

  1711     }

  1712   } else {

  1713     // reg to reg

  1714     if (src.first()->is_Register()) {

  1715       if (dst.first()->is_Register()) {

  1716         // gpr -> gpr

  1717         __ mov(src.first()->as_Register(), dst.first()->as_Register());

  1718         __ mov(src.second()->as_Register(), dst.second()->as_Register());

  1719       } else {

  1720         // gpr -> fpr

  1721         // ought to be able to do a single store

  1722         __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS);

  1723         __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS);

  1724         // ought to be able to do a single load

  1725         __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister());

  1726         __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister());

  1727       }

  1728     } else if (dst.first()->is_Register()) {

  1729       // fpr -> gpr

  1730       // ought to be able to do a single store

  1731       __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS);

  1732       // ought to be able to do a single load

  1733       // REMEMBER first() is low address not LSB

  1734       __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register());

  1735       if (dst.second()->is_Register()) {

  1736         __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register());

  1737       } else {

  1738         __ ld(FP, -4 + STACK_BIAS, L4);

  1739         __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS);

  1740       }

  1741     } else {

  1742       // fpr -> fpr

  1743       // In theory these overlap but the ordering is such that this is likely a nop

  1744       if ( src.first() != dst.first()) {

  1745         __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister());

  1746       }

  1747     }

  1748   }

  1749 }

  1751 // Creates an inner frame if one hasn't already been created, and

  1752 // saves a copy of the thread in L7_thread_cache

  1753 static void create_inner_frame(MacroAssembler* masm, bool* already_created) {

  1754   if (!*already_created) {

  1755     __ save_frame(0);

  1756     // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below

  1757     // Don't use save_thread because it smashes G2 and we merely want to save a

  1758     // copy

  1759     __ mov(G2_thread, L7_thread_cache);

  1760     *already_created = true;

  1761   }

  1762 }

  1764 // ---------------------------------------------------------------------------

  1765 // Generate a native wrapper for a given method.  The method takes arguments

  1766 // in the Java compiled code convention, marshals them to the native

  1767 // convention (handlizes oops, etc), transitions to native, makes the call,

  1768 // returns to java state (possibly blocking), unhandlizes any result and

  1769 // returns.

  1770 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

  1771                                                 methodHandle method,

  1772                                                 int total_in_args,

  1773                                                 int comp_args_on_stack, // in VMRegStackSlots

  1774                                                 BasicType *in_sig_bt,

  1775                                                 VMRegPair *in_regs,

  1776                                                 BasicType ret_type) {

  1778   // Native nmethod wrappers never take possesion of the oop arguments.

  1779   // So the caller will gc the arguments. The only thing we need an

  1780   // oopMap for is if the call is static

  1781   //

  1782   // An OopMap for lock (and class if static), and one for the VM call itself

  1783   OopMapSet *oop_maps = new OopMapSet();

  1784   intptr_t start = (intptr_t)__ pc();

  1786   // First thing make an ic check to see if we should even be here

  1787   {

  1788     Label L;

  1789     const Register temp_reg = G3_scratch;

  1790     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());

  1791     __ verify_oop(O0);

  1792     __ load_klass(O0, temp_reg);

  1793     __ cmp(temp_reg, G5_inline_cache_reg);

  1794     __ brx(Assembler::equal, true, Assembler::pt, L);

  1795     __ delayed()->nop();

  1797     __ jump_to(ic_miss, temp_reg);

  1798     __ delayed()->nop();

  1799     __ align(CodeEntryAlignment);

  1800     __ bind(L);

  1801   }

  1803   int vep_offset = ((intptr_t)__ pc()) - start;

  1805 #ifdef COMPILER1

  1806   if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {

  1807     // Object.hashCode can pull the hashCode from the header word

  1808     // instead of doing a full VM transition once it's been computed.

  1809     // Since hashCode is usually polymorphic at call sites we can't do

  1810     // this optimization at the call site without a lot of work.

  1811     Label slowCase;

  1812     Register receiver             = O0;

  1813     Register result               = O0;

  1814     Register header               = G3_scratch;

  1815     Register hash                 = G3_scratch; // overwrite header value with hash value

  1816     Register mask                 = G1;         // to get hash field from header

  1818     // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.

  1819     // We depend on hash_mask being at most 32 bits and avoid the use of

  1820     // hash_mask_in_place because it could be larger than 32 bits in a 64-bit

  1821     // vm: see markOop.hpp.

  1822     __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);

  1823     __ sethi(markOopDesc::hash_mask, mask);

  1824     __ btst(markOopDesc::unlocked_value, header);

  1825     __ br(Assembler::zero, false, Assembler::pn, slowCase);

  1826     if (UseBiasedLocking) {

  1827       // Check if biased and fall through to runtime if so

  1828       __ delayed()->nop();

  1829       __ btst(markOopDesc::biased_lock_bit_in_place, header);

  1830       __ br(Assembler::notZero, false, Assembler::pn, slowCase);

  1831     }

  1832     __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);

  1834     // Check for a valid (non-zero) hash code and get its value.

  1835 #ifdef _LP64

  1836     __ srlx(header, markOopDesc::hash_shift, hash);

  1837 #else

  1838     __ srl(header, markOopDesc::hash_shift, hash);

  1839 #endif

  1840     __ andcc(hash, mask, hash);

  1841     __ br(Assembler::equal, false, Assembler::pn, slowCase);

  1842     __ delayed()->nop();

  1844     // leaf return.

  1845     __ retl();

  1846     __ delayed()->mov(hash, result);

  1847     __ bind(slowCase);

  1848   }

  1849 #endif // COMPILER1

  1852   // We have received a description of where all the java arg are located

  1853   // on entry to the wrapper. We need to convert these args to where

  1854   // the jni function will expect them. To figure out where they go

  1855   // we convert the java signature to a C signature by inserting

  1856   // the hidden arguments as arg[0] and possibly arg[1] (static method)

  1858   int total_c_args = total_in_args + 1;

  1859   if (method->is_static()) {

  1860     total_c_args++;

  1861   }

  1863   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);

  1864   VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);

  1866   int argc = 0;

  1867   out_sig_bt[argc++] = T_ADDRESS;

  1868   if (method->is_static()) {

  1869     out_sig_bt[argc++] = T_OBJECT;

  1870   }

  1872   for (int i = 0; i < total_in_args ; i++ ) {

  1873     out_sig_bt[argc++] = in_sig_bt[i];

  1874   }

  1876   // Now figure out where the args must be stored and how much stack space

  1877   // they require (neglecting out_preserve_stack_slots but space for storing

  1878   // the 1st six register arguments). It's weird see int_stk_helper.

  1879   //

  1880   int out_arg_slots;

  1881   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);

  1883   // Compute framesize for the wrapper.  We need to handlize all oops in

  1884   // registers. We must create space for them here that is disjoint from

  1885   // the windowed save area because we have no control over when we might

  1886   // flush the window again and overwrite values that gc has since modified.

  1887   // (The live window race)

  1888   //

  1889   // We always just allocate 6 word for storing down these object. This allow

  1890   // us to simply record the base and use the Ireg number to decide which

  1891   // slot to use. (Note that the reg number is the inbound number not the

  1892   // outbound number).

  1893   // We must shuffle args to match the native convention, and include var-args space.

  1895   // Calculate the total number of stack slots we will need.

  1897   // First count the abi requirement plus all of the outgoing args

  1898   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;

  1900   // Now the space for the inbound oop handle area

  1902   int oop_handle_offset = stack_slots;

  1903   stack_slots += 6*VMRegImpl::slots_per_word;

  1905   // Now any space we need for handlizing a klass if static method

  1907   int oop_temp_slot_offset = 0;

  1908   int klass_slot_offset = 0;

  1909   int klass_offset = -1;

  1910   int lock_slot_offset = 0;

  1911   bool is_static = false;

  1913   if (method->is_static()) {

  1914     klass_slot_offset = stack_slots;

  1915     stack_slots += VMRegImpl::slots_per_word;

  1916     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;

  1917     is_static = true;

  1918   }

  1920   // Plus a lock if needed

  1922   if (method->is_synchronized()) {

  1923     lock_slot_offset = stack_slots;

  1924     stack_slots += VMRegImpl::slots_per_word;

  1925   }

  1927   // Now a place to save return value or as a temporary for any gpr -> fpr moves

  1928   stack_slots += 2;

  1930   // Ok The space we have allocated will look like:

  1931   //

  1932   //

  1933   // FP-> |                     |

  1934   //      |---------------------|

  1935   //      | 2 slots for moves   |

  1936   //      |---------------------|

  1937   //      | lock box (if sync)  |

  1938   //      |---------------------| <- lock_slot_offset

  1939   //      | klass (if static)   |

  1940   //      |---------------------| <- klass_slot_offset

  1941   //      | oopHandle area      |

  1942   //      |---------------------| <- oop_handle_offset

  1943   //      | outbound memory     |

  1944   //      | based arguments     |

  1945   //      |                     |

  1946   //      |---------------------|

  1947   //      | vararg area         |

  1948   //      |---------------------|

  1949   //      |                     |

  1950   // SP-> | out_preserved_slots |

  1951   //

  1952   //

  1955   // Now compute actual number of stack words we need rounding to make

  1956   // stack properly aligned.

  1957   stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);

  1959   int stack_size = stack_slots * VMRegImpl::stack_slot_size;

  1961   // Generate stack overflow check before creating frame

  1962   __ generate_stack_overflow_check(stack_size);

  1964   // Generate a new frame for the wrapper.

  1965   __ save(SP, -stack_size, SP);

  1967   int frame_complete = ((intptr_t)__ pc()) - start;

  1969   __ verify_thread();

  1972   //

  1973   // We immediately shuffle the arguments so that any vm call we have to

  1974   // make from here on out (sync slow path, jvmti, etc.) we will have

  1975   // captured the oops from our caller and have a valid oopMap for

  1976   // them.

  1978   // -----------------

  1979   // The Grand Shuffle

  1980   //

  1981   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*

  1982   // (derived from JavaThread* which is in L7_thread_cache) and, if static,

  1983   // the class mirror instead of a receiver.  This pretty much guarantees that

  1984   // register layout will not match.  We ignore these extra arguments during

  1985   // the shuffle. The shuffle is described by the two calling convention

  1986   // vectors we have in our possession. We simply walk the java vector to

  1987   // get the source locations and the c vector to get the destinations.

  1988   // Because we have a new window and the argument registers are completely

  1989   // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about

  1990   // here.

  1992   // This is a trick. We double the stack slots so we can claim

  1993   // the oops in the caller's frame. Since we are sure to have

  1994   // more args than the caller doubling is enough to make

  1995   // sure we can capture all the incoming oop args from the

  1996   // caller.

  1997   //

  1998   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);

  1999   int c_arg = total_c_args - 1;

  2000   // Record sp-based slot for receiver on stack for non-static methods

  2001   int receiver_offset = -1;

  2003   // We move the arguments backward because the floating point registers

  2004   // destination will always be to a register with a greater or equal register

  2005   // number or the stack.

  2007 #ifdef ASSERT

  2008   bool reg_destroyed[RegisterImpl::number_of_registers];

  2009   bool freg_destroyed[FloatRegisterImpl::number_of_registers];

  2010   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {

  2011     reg_destroyed[r] = false;

  2012   }

  2013   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {

  2014     freg_destroyed[f] = false;

  2015   }

  2017 #endif /* ASSERT */

  2019   for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {

  2021 #ifdef ASSERT

  2022     if (in_regs[i].first()->is_Register()) {

  2023       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!");

  2024     } else if (in_regs[i].first()->is_FloatRegister()) {

  2025       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!");

  2026     }

  2027     if (out_regs[c_arg].first()->is_Register()) {

  2028       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;

  2029     } else if (out_regs[c_arg].first()->is_FloatRegister()) {

  2030       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true;

  2031     }

  2032 #endif /* ASSERT */

  2034     switch (in_sig_bt[i]) {

  2035       case T_ARRAY:

  2036       case T_OBJECT:

  2037         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],

  2038                     ((i == 0) && (!is_static)),

  2039                     &receiver_offset);

  2040         break;

  2041       case T_VOID:

  2042         break;

  2044       case T_FLOAT:

  2045         float_move(masm, in_regs[i], out_regs[c_arg]);

  2046           break;

  2048       case T_DOUBLE:

  2049         assert( i + 1 < total_in_args &&

  2050                 in_sig_bt[i + 1] == T_VOID &&

  2051                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");

  2052         double_move(masm, in_regs[i], out_regs[c_arg]);

  2053         break;

  2055       case T_LONG :

  2056         long_move(masm, in_regs[i], out_regs[c_arg]);

  2057         break;

  2059       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");

  2061       default:

  2062         move32_64(masm, in_regs[i], out_regs[c_arg]);

  2063     }

  2064   }

  2066   // Pre-load a static method's oop into O1.  Used both by locking code and

  2067   // the normal JNI call code.

  2068   if (method->is_static()) {

  2069     __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);

  2071     // Now handlize the static class mirror in O1.  It's known not-null.

  2072     __ st_ptr(O1, SP, klass_offset + STACK_BIAS);

  2073     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));

  2074     __ add(SP, klass_offset + STACK_BIAS, O1);

  2075   }

  2078   const Register L6_handle = L6;

  2080   if (method->is_synchronized()) {

  2081     __ mov(O1, L6_handle);

  2082   }

  2084   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs

  2085   // except O6/O7. So if we must call out we must push a new frame. We immediately

  2086   // push a new frame and flush the windows.

  2088 #ifdef _LP64

  2089   intptr_t thepc = (intptr_t) __ pc();

  2090   {

  2091     address here = __ pc();

  2092     // Call the next instruction

  2093     __ call(here + 8, relocInfo::none);

  2094     __ delayed()->nop();

  2095   }

  2096 #else

  2097   intptr_t thepc = __ load_pc_address(O7, 0);

  2098 #endif /* _LP64 */

  2100   // We use the same pc/oopMap repeatedly when we call out

  2101   oop_maps->add_gc_map(thepc - start, map);

  2103   // O7 now has the pc loaded that we will use when we finally call to native.

  2105   // Save thread in L7; it crosses a bunch of VM calls below

  2106   // Don't use save_thread because it smashes G2 and we merely

  2107   // want to save a copy

  2108   __ mov(G2_thread, L7_thread_cache);

  2111   // If we create an inner frame once is plenty

  2112   // when we create it we must also save G2_thread

  2113   bool inner_frame_created = false;

  2115   // dtrace method entry support

  2116   {

  2117     SkipIfEqual skip_if(

  2118       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);

  2119     // create inner frame

  2120     __ save_frame(0);

  2121     __ mov(G2_thread, L7_thread_cache);

  2122     __ set_oop_constant(JNIHandles::make_local(method()), O1);

  2123     __ call_VM_leaf(L7_thread_cache,

  2124          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),

  2125          G2_thread, O1);

  2126     __ restore();

  2127   }

  2129   // RedefineClasses() tracing support for obsolete method entry

  2130   if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {

  2131     // create inner frame

  2132     __ save_frame(0);

  2133     __ mov(G2_thread, L7_thread_cache);

  2134     __ set_oop_constant(JNIHandles::make_local(method()), O1);

  2135     __ call_VM_leaf(L7_thread_cache,

  2136          CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),

  2137          G2_thread, O1);

  2138     __ restore();

  2139   }

  2141   // We are in the jni frame unless saved_frame is true in which case

  2142   // we are in one frame deeper (the "inner" frame). If we are in the

  2143   // "inner" frames the args are in the Iregs and if the jni frame then

  2144   // they are in the Oregs.

  2145   // If we ever need to go to the VM (for locking, jvmti) then

  2146   // we will always be in the "inner" frame.

  2148   // Lock a synchronized method

  2149   int lock_offset = -1;         // Set if locked

  2150   if (method->is_synchronized()) {

  2151     Register Roop = O1;

  2152     const Register L3_box = L3;

  2154     create_inner_frame(masm, &inner_frame_created);

  2156     __ ld_ptr(I1, 0, O1);

  2157     Label done;

  2159     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);

  2160     __ add(FP, lock_offset+STACK_BIAS, L3_box);

  2161 #ifdef ASSERT

  2162     if (UseBiasedLocking) {

  2163       // making the box point to itself will make it clear it went unused

  2164       // but also be obviously invalid

  2165       __ st_ptr(L3_box, L3_box, 0);

  2166     }

  2167 #endif // ASSERT

  2168     //

  2169     // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch

  2170     //

  2171     __ compiler_lock_object(Roop, L1,    L3_box, L2);

  2172     __ br(Assembler::equal, false, Assembler::pt, done);

  2173     __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box);

  2176     // None of the above fast optimizations worked so we have to get into the

  2177     // slow case of monitor enter.  Inline a special case of call_VM that

  2178     // disallows any pending_exception.

  2179     __ mov(Roop, O0);            // Need oop in O0

  2180     __ mov(L3_box, O1);

  2182     // Record last_Java_sp, in case the VM code releases the JVM lock.

  2184     __ set_last_Java_frame(FP, I7);

  2186     // do the call

  2187     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);

  2188     __ delayed()->mov(L7_thread_cache, O2);

  2190     __ restore_thread(L7_thread_cache); // restore G2_thread

  2191     __ reset_last_Java_frame();

  2193 #ifdef ASSERT

  2194     { Label L;

  2195     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);

  2196     __ br_null(O0, false, Assembler::pt, L);

  2197     __ delayed()->nop();

  2198     __ stop("no pending exception allowed on exit from IR::monitorenter");

  2199     __ bind(L);

  2200     }

  2201 #endif

  2202     __ bind(done);

  2203   }

  2206   // Finally just about ready to make the JNI call

  2208   __ flush_windows();

  2209   if (inner_frame_created) {

  2210     __ restore();

  2211   } else {

  2212     // Store only what we need from this frame

  2213     // QQQ I think that non-v9 (like we care) we don't need these saves

  2214     // either as the flush traps and the current window goes too.

  2215     __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);

  2216     __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);

  2217   }

  2219   // get JNIEnv* which is first argument to native

  2221   __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);

  2223   // Use that pc we placed in O7 a while back as the current frame anchor

  2225   __ set_last_Java_frame(SP, O7);

  2227   // Transition from _thread_in_Java to _thread_in_native.

  2228   __ set(_thread_in_native, G3_scratch);

  2229   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());

  2231   // We flushed the windows ages ago now mark them as flushed

  2233   // mark windows as flushed

  2234   __ set(JavaFrameAnchor::flushed, G3_scratch);

  2236   Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());

  2238 #ifdef _LP64

  2239   AddressLiteral dest(method->native_function());

  2240   __ relocate(relocInfo::runtime_call_type);

  2241   __ jumpl_to(dest, O7, O7);

  2242 #else

  2243   __ call(method->native_function(), relocInfo::runtime_call_type);

  2244 #endif

  2245   __ delayed()->st(G3_scratch, flags);

  2247   __ restore_thread(L7_thread_cache); // restore G2_thread

  2249   // Unpack native results.  For int-types, we do any needed sign-extension

  2250   // and move things into I0.  The return value there will survive any VM

  2251   // calls for blocking or unlocking.  An FP or OOP result (handle) is done

  2252   // specially in the slow-path code.

  2253   switch (ret_type) {

  2254   case T_VOID:    break;        // Nothing to do!

  2255   case T_FLOAT:   break;        // Got it where we want it (unless slow-path)

  2256   case T_DOUBLE:  break;        // Got it where we want it (unless slow-path)

  2257   // In 64 bits build result is in O0, in O0, O1 in 32bit build

  2258   case T_LONG:

  2259 #ifndef _LP64

  2260                   __ mov(O1, I1);

  2261 #endif

  2262                   // Fall thru

  2263   case T_OBJECT:                // Really a handle

  2264   case T_ARRAY:

  2265   case T_INT:

  2266                   __ mov(O0, I0);

  2267                   break;

  2268   case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false

  2269   case T_BYTE   : __ sll(O0, 24, O0); __ sra(O0, 24, I0);   break;

  2270   case T_CHAR   : __ sll(O0, 16, O0); __ srl(O0, 16, I0);   break; // cannot use and3, 0xFFFF too big as immediate value!

  2271   case T_SHORT  : __ sll(O0, 16, O0); __ sra(O0, 16, I0);   break;

  2272     break;                      // Cannot de-handlize until after reclaiming jvm_lock

  2273   default:

  2274     ShouldNotReachHere();

  2275   }

  2277   // must we block?

  2279   // Block, if necessary, before resuming in _thread_in_Java state.

  2280   // In order for GC to work, don't clear the last_Java_sp until after blocking.

  2281   { Label no_block;

  2282     AddressLiteral sync_state(SafepointSynchronize::address_of_state());

  2284     // Switch thread to "native transition" state before reading the synchronization state.

  2285     // This additional state is necessary because reading and testing the synchronization

  2286     // state is not atomic w.r.t. GC, as this scenario demonstrates:

  2287     //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.

  2288     //     VM thread changes sync state to synchronizing and suspends threads for GC.

  2289     //     Thread A is resumed to finish this native method, but doesn't block here since it

  2290     //     didn't see any synchronization is progress, and escapes.

  2291     __ set(_thread_in_native_trans, G3_scratch);

  2292     __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());

  2293     if(os::is_MP()) {

  2294       if (UseMembar) {

  2295         // Force this write out before the read below

  2296         __ membar(Assembler::StoreLoad);

  2297       } else {

  2298         // Write serialization page so VM thread can do a pseudo remote membar.

  2299         // We use the current thread pointer to calculate a thread specific

  2300         // offset to write to within the page. This minimizes bus traffic

  2301         // due to cache line collision.

  2302         __ serialize_memory(G2_thread, G1_scratch, G3_scratch);

  2303       }

  2304     }

  2305     __ load_contents(sync_state, G3_scratch);

  2306     __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);

  2308     Label L;

  2309     Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());

  2310     __ br(Assembler::notEqual, false, Assembler::pn, L);

  2311     __ delayed()->ld(suspend_state, G3_scratch);

  2312     __ cmp(G3_scratch, 0);

  2313     __ br(Assembler::equal, false, Assembler::pt, no_block);

  2314     __ delayed()->nop();

  2315     __ bind(L);

  2317     // Block.  Save any potential method result value before the operation and

  2318     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this

  2319     // lets us share the oopMap we used when we went native rather the create

  2320     // a distinct one for this pc

  2321     //

  2322     save_native_result(masm, ret_type, stack_slots);

  2323     __ call_VM_leaf(L7_thread_cache,

  2324                     CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),

  2325                     G2_thread);

  2327     // Restore any method result value

  2328     restore_native_result(masm, ret_type, stack_slots);

  2329     __ bind(no_block);

  2330   }

  2332   // thread state is thread_in_native_trans. Any safepoint blocking has already

  2333   // happened so we can now change state to _thread_in_Java.

  2336   __ set(_thread_in_Java, G3_scratch);

  2337   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());

  2340   Label no_reguard;

  2341   __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);

  2342   __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);

  2343   __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);

  2344   __ delayed()->nop();

  2346     save_native_result(masm, ret_type, stack_slots);

  2347   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));

  2348   __ delayed()->nop();

  2350   __ restore_thread(L7_thread_cache); // restore G2_thread

  2351     restore_native_result(masm, ret_type, stack_slots);

  2353   __ bind(no_reguard);

  2355   // Handle possible exception (will unlock if necessary)

  2357   // native result if any is live in freg or I0 (and I1 if long and 32bit vm)

  2359   // Unlock

  2360   if (method->is_synchronized()) {

  2361     Label done;

  2362     Register I2_ex_oop = I2;

  2363     const Register L3_box = L3;

  2364     // Get locked oop from the handle we passed to jni

  2365     __ ld_ptr(L6_handle, 0, L4);

  2366     __ add(SP, lock_offset+STACK_BIAS, L3_box);

  2367     // Must save pending exception around the slow-path VM call.  Since it's a

  2368     // leaf call, the pending exception (if any) can be kept in a register.

  2369     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop);

  2370     // Now unlock

  2371     //                       (Roop, Rmark, Rbox,   Rscratch)

  2372     __ compiler_unlock_object(L4,   L1,    L3_box, L2);

  2373     __ br(Assembler::equal, false, Assembler::pt, done);

  2374     __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box);

  2376     // save and restore any potential method result value around the unlocking

  2377     // operation.  Will save in I0 (or stack for FP returns).

  2378     save_native_result(masm, ret_type, stack_slots);

  2380     // Must clear pending-exception before re-entering the VM.  Since this is

  2381     // a leaf call, pending-exception-oop can be safely kept in a register.

  2382     __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset()));

  2384     // slow case of monitor enter.  Inline a special case of call_VM that

  2385     // disallows any pending_exception.

  2386     __ mov(L3_box, O1);

  2388     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);

  2389     __ delayed()->mov(L4, O0);              // Need oop in O0

  2391     __ restore_thread(L7_thread_cache); // restore G2_thread

  2393 #ifdef ASSERT

  2394     { Label L;

  2395     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);

  2396     __ br_null(O0, false, Assembler::pt, L);

  2397     __ delayed()->nop();

  2398     __ stop("no pending exception allowed on exit from IR::monitorexit");

  2399     __ bind(L);

  2400     }

  2401 #endif

  2402     restore_native_result(masm, ret_type, stack_slots);

  2403     // check_forward_pending_exception jump to forward_exception if any pending

  2404     // exception is set.  The forward_exception routine expects to see the

  2405     // exception in pending_exception and not in a register.  Kind of clumsy,

  2406     // since all folks who branch to forward_exception must have tested

  2407     // pending_exception first and hence have it in a register already.

  2408     __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset()));

  2409     __ bind(done);

  2410   }

  2412   // Tell dtrace about this method exit

  2413   {

  2414     SkipIfEqual skip_if(

  2415       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);

  2416     save_native_result(masm, ret_type, stack_slots);

  2417     __ set_oop_constant(JNIHandles::make_local(method()), O1);

  2418     __ call_VM_leaf(L7_thread_cache,

  2419        CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),

  2420        G2_thread, O1);

  2421     restore_native_result(masm, ret_type, stack_slots);

  2422   }

  2424   // Clear "last Java frame" SP and PC.

  2425   __ verify_thread(); // G2_thread must be correct

  2426   __ reset_last_Java_frame();

  2428   // Unpack oop result

  2429   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {

  2430       Label L;

  2431       __ addcc(G0, I0, G0);

  2432       __ brx(Assembler::notZero, true, Assembler::pt, L);

  2433       __ delayed()->ld_ptr(I0, 0, I0);

  2434       __ mov(G0, I0);

  2435       __ bind(L);

  2436       __ verify_oop(I0);

  2437   }

  2439   // reset handle block

  2440   __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);

  2441   __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());

  2443   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);

  2444   check_forward_pending_exception(masm, G3_scratch);

  2447   // Return

  2449 #ifndef _LP64

  2450   if (ret_type == T_LONG) {

  2452     // Must leave proper result in O0,O1 and G1 (c2/tiered only)

  2453     __ sllx(I0, 32, G1);          // Shift bits into high G1

  2454     __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)

  2455     __ or3 (I1, G1, G1);          // OR 64 bits into G1

  2456   }

  2457 #endif

  2459   __ ret();

  2460   __ delayed()->restore();

  2462   __ flush();

  2464   nmethod *nm = nmethod::new_native_nmethod(method,

  2465                                             masm->code(),

  2466                                             vep_offset,

  2467                                             frame_complete,

  2468                                             stack_slots / VMRegImpl::slots_per_word,

  2469                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),

  2470                                             in_ByteSize(lock_offset),

  2471                                             oop_maps);

  2472   return nm;

  2474 }

  2476 #ifdef HAVE_DTRACE_H

  2477 // ---------------------------------------------------------------------------

  2478 // Generate a dtrace nmethod for a given signature.  The method takes arguments

  2479 // in the Java compiled code convention, marshals them to the native

  2480 // abi and then leaves nops at the position you would expect to call a native

  2481 // function. When the probe is enabled the nops are replaced with a trap

  2482 // instruction that dtrace inserts and the trace will cause a notification

  2483 // to dtrace.

  2484 //

  2485 // The probes are only able to take primitive types and java/lang/String as

  2486 // arguments.  No other java types are allowed. Strings are converted to utf8

  2487 // strings so that from dtrace point of view java strings are converted to C

  2488 // strings. There is an arbitrary fixed limit on the total space that a method

  2489 // can use for converting the strings. (256 chars per string in the signature).

  2490 // So any java string larger then this is truncated.

  2492 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };

  2493 static bool offsets_initialized = false;

  2495 static VMRegPair reg64_to_VMRegPair(Register r) {

  2496   VMRegPair ret;

  2497   if (wordSize == 8) {

  2498     ret.set2(r->as_VMReg());

  2499   } else {

  2500     ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());

  2501   }

  2502   return ret;

  2503 }

  2506 nmethod *SharedRuntime::generate_dtrace_nmethod(

  2507     MacroAssembler *masm, methodHandle method) {

  2510   // generate_dtrace_nmethod is guarded by a mutex so we are sure to

  2511   // be single threaded in this method.

  2512   assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");

  2514   // Fill in the signature array, for the calling-convention call.

  2515   int total_args_passed = method->size_of_parameters();

  2517   BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);

  2518   VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);

  2520   // The signature we are going to use for the trap that dtrace will see

  2521   // java/lang/String is converted. We drop "this" and any other object

  2522   // is converted to NULL.  (A one-slot java/lang/Long object reference

  2523   // is converted to a two-slot long, which is why we double the allocation).

  2524   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);

  2525   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);

  2527   int i=0;

  2528   int total_strings = 0;

  2529   int first_arg_to_pass = 0;

  2530   int total_c_args = 0;

  2532   // Skip the receiver as dtrace doesn't want to see it

  2533   if( !method->is_static() ) {

  2534     in_sig_bt[i++] = T_OBJECT;

  2535     first_arg_to_pass = 1;

  2536   }

  2538   SignatureStream ss(method->signature());

  2539   for ( ; !ss.at_return_type(); ss.next()) {

  2540     BasicType bt = ss.type();

  2541     in_sig_bt[i++] = bt;  // Collect remaining bits of signature

  2542     out_sig_bt[total_c_args++] = bt;

  2543     if( bt == T_OBJECT) {

  2544       Symbol* s = ss.as_symbol_or_null();

  2545       if (s == vmSymbols::java_lang_String()) {

  2546         total_strings++;

  2547         out_sig_bt[total_c_args-1] = T_ADDRESS;

  2548       } else if (s == vmSymbols::java_lang_Boolean() ||

  2549                  s == vmSymbols::java_lang_Byte()) {

  2550         out_sig_bt[total_c_args-1] = T_BYTE;

  2551       } else if (s == vmSymbols::java_lang_Character() ||

  2552                  s == vmSymbols::java_lang_Short()) {

  2553         out_sig_bt[total_c_args-1] = T_SHORT;

  2554       } else if (s == vmSymbols::java_lang_Integer() ||

  2555                  s == vmSymbols::java_lang_Float()) {

  2556         out_sig_bt[total_c_args-1] = T_INT;

  2557       } else if (s == vmSymbols::java_lang_Long() ||

  2558                  s == vmSymbols::java_lang_Double()) {

  2559         out_sig_bt[total_c_args-1] = T_LONG;

  2560         out_sig_bt[total_c_args++] = T_VOID;

  2561       }

  2562     } else if ( bt == T_LONG || bt == T_DOUBLE ) {

  2563       in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots

  2564       // We convert double to long

  2565       out_sig_bt[total_c_args-1] = T_LONG;

  2566       out_sig_bt[total_c_args++] = T_VOID;

  2567     } else if ( bt == T_FLOAT) {

  2568       // We convert float to int

  2569       out_sig_bt[total_c_args-1] = T_INT;

  2570     }

  2571   }

  2573   assert(i==total_args_passed, "validly parsed signature");

  2575   // Now get the compiled-Java layout as input arguments

  2576   int comp_args_on_stack;

  2577   comp_args_on_stack = SharedRuntime::java_calling_convention(

  2578       in_sig_bt, in_regs, total_args_passed, false);

  2580   // We have received a description of where all the java arg are located

  2581   // on entry to the wrapper. We need to convert these args to where

  2582   // the a  native (non-jni) function would expect them. To figure out

  2583   // where they go we convert the java signature to a C signature and remove

  2584   // T_VOID for any long/double we might have received.

  2587   // Now figure out where the args must be stored and how much stack space

  2588   // they require (neglecting out_preserve_stack_slots but space for storing

  2589   // the 1st six register arguments). It's weird see int_stk_helper.

  2590   //

  2591   int out_arg_slots;

  2592   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);

  2594   // Calculate the total number of stack slots we will need.

  2596   // First count the abi requirement plus all of the outgoing args

  2597   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;

  2599   // Plus a temp for possible converion of float/double/long register args

  2601   int conversion_temp = stack_slots;

  2602   stack_slots += 2;

  2605   // Now space for the string(s) we must convert

  2607   int string_locs = stack_slots;

  2608   stack_slots += total_strings *

  2609                    (max_dtrace_string_size / VMRegImpl::stack_slot_size);

  2611   // Ok The space we have allocated will look like:

  2612   //

  2613   //

  2614   // FP-> |                     |

  2615   //      |---------------------|

  2616   //      | string[n]           |

  2617   //      |---------------------| <- string_locs[n]

  2618   //      | string[n-1]         |

  2619   //      |---------------------| <- string_locs[n-1]

  2620   //      | ...                 |

  2621   //      | ...                 |

  2622   //      |---------------------| <- string_locs[1]

  2623   //      | string[0]           |

  2624   //      |---------------------| <- string_locs[0]

  2625   //      | temp                |

  2626   //      |---------------------| <- conversion_temp

  2627   //      | outbound memory     |

  2628   //      | based arguments     |

  2629   //      |                     |

  2630   //      |---------------------|

  2631   //      |                     |

  2632   // SP-> | out_preserved_slots |

  2633   //

  2634   //

  2636   // Now compute actual number of stack words we need rounding to make

  2637   // stack properly aligned.

  2638   stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);

  2640   int stack_size = stack_slots * VMRegImpl::stack_slot_size;

  2642   intptr_t start = (intptr_t)__ pc();

  2644   // First thing make an ic check to see if we should even be here

  2646   {

  2647     Label L;

  2648     const Register temp_reg = G3_scratch;

  2649     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());

  2650     __ verify_oop(O0);

  2651     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);

  2652     __ cmp(temp_reg, G5_inline_cache_reg);

  2653     __ brx(Assembler::equal, true, Assembler::pt, L);

  2654     __ delayed()->nop();

  2656     __ jump_to(ic_miss, temp_reg);

  2657     __ delayed()->nop();

  2658     __ align(CodeEntryAlignment);

  2659     __ bind(L);

  2660   }

  2662   int vep_offset = ((intptr_t)__ pc()) - start;

  2665   // The instruction at the verified entry point must be 5 bytes or longer

  2666   // because it can be patched on the fly by make_non_entrant. The stack bang

  2667   // instruction fits that requirement.

  2669   // Generate stack overflow check before creating frame

  2670   __ generate_stack_overflow_check(stack_size);

  2672   assert(((intptr_t)__ pc() - start - vep_offset) >= 5,

  2673          "valid size for make_non_entrant");

  2675   // Generate a new frame for the wrapper.

  2676   __ save(SP, -stack_size, SP);

  2678   // Frame is now completed as far a size and linkage.

  2680   int frame_complete = ((intptr_t)__ pc()) - start;

  2682 #ifdef ASSERT

  2683   bool reg_destroyed[RegisterImpl::number_of_registers];

  2684   bool freg_destroyed[FloatRegisterImpl::number_of_registers];

  2685   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {

  2686     reg_destroyed[r] = false;

  2687   }

  2688   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {

  2689     freg_destroyed[f] = false;

  2690   }

  2692 #endif /* ASSERT */

  2694   VMRegPair zero;

  2695   const Register g0 = G0; // without this we get a compiler warning (why??)

  2696   zero.set2(g0->as_VMReg());

  2698   int c_arg, j_arg;

  2700   Register conversion_off = noreg;

  2702   for (j_arg = first_arg_to_pass, c_arg = 0 ;

  2703        j_arg < total_args_passed ; j_arg++, c_arg++ ) {

  2705     VMRegPair src = in_regs[j_arg];

  2706     VMRegPair dst = out_regs[c_arg];

  2708 #ifdef ASSERT

  2709     if (src.first()->is_Register()) {

  2710       assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");

  2711     } else if (src.first()->is_FloatRegister()) {

  2712       assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(

  2713                                                FloatRegisterImpl::S)], "ack!");

  2714     }

  2715     if (dst.first()->is_Register()) {

  2716       reg_destroyed[dst.first()->as_Register()->encoding()] = true;

  2717     } else if (dst.first()->is_FloatRegister()) {

  2718       freg_destroyed[dst.first()->as_FloatRegister()->encoding(

  2719                                                  FloatRegisterImpl::S)] = true;

  2720     }

  2721 #endif /* ASSERT */

  2723     switch (in_sig_bt[j_arg]) {

  2724       case T_ARRAY:

  2725       case T_OBJECT:

  2726         {

  2727           if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||

  2728               out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {

  2729             // need to unbox a one-slot value

  2730             Register in_reg = L0;

  2731             Register tmp = L2;

  2732             if ( src.first()->is_reg() ) {

  2733               in_reg = src.first()->as_Register();

  2734             } else {

  2735               assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),

  2736                      "must be");

  2737               __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);

  2738             }

  2739             // If the final destination is an acceptable register

  2740             if ( dst.first()->is_reg() ) {

  2741               if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {

  2742                 tmp = dst.first()->as_Register();

  2743               }

  2744             }

  2746             Label skipUnbox;

  2747             if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {

  2748               __ mov(G0, tmp->successor());

  2749             }

  2750             __ br_null(in_reg, true, Assembler::pn, skipUnbox);

  2751             __ delayed()->mov(G0, tmp);

  2753             BasicType bt = out_sig_bt[c_arg];

  2754             int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);

  2755             switch (bt) {

  2756                 case T_BYTE:

  2757                   __ ldub(in_reg, box_offset, tmp); break;

  2758                 case T_SHORT:

  2759                   __ lduh(in_reg, box_offset, tmp); break;

  2760                 case T_INT:

  2761                   __ ld(in_reg, box_offset, tmp); break;

  2762                 case T_LONG:

  2763                   __ ld_long(in_reg, box_offset, tmp); break;

  2764                 default: ShouldNotReachHere();

  2765             }

  2767             __ bind(skipUnbox);

  2768             // If tmp wasn't final destination copy to final destination

  2769             if (tmp == L2) {

  2770               VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);

  2771               if (out_sig_bt[c_arg] == T_LONG) {

  2772                 long_move(masm, tmp_as_VM, dst);

  2773               } else {

  2774                 move32_64(masm, tmp_as_VM, out_regs[c_arg]);

  2775               }

  2776             }

  2777             if (out_sig_bt[c_arg] == T_LONG) {

  2778               assert(out_sig_bt[c_arg+1] == T_VOID, "must be");

  2779               ++c_arg; // move over the T_VOID to keep the loop indices in sync

  2780             }

  2781           } else if (out_sig_bt[c_arg] == T_ADDRESS) {

  2782             Register s =

  2783                 src.first()->is_reg() ? src.first()->as_Register() : L2;

  2784             Register d =

  2785                 dst.first()->is_reg() ? dst.first()->as_Register() : L2;

  2787             // We store the oop now so that the conversion pass can reach

  2788             // while in the inner frame. This will be the only store if

  2789             // the oop is NULL.

  2790             if (s != L2) {

  2791               // src is register

  2792               if (d != L2) {

  2793                 // dst is register

  2794                 __ mov(s, d);

  2795               } else {

  2796                 assert(Assembler::is_simm13(reg2offset(dst.first()) +

  2797                           STACK_BIAS), "must be");

  2798                 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);

  2799               }

  2800             } else {

  2801                 // src not a register

  2802                 assert(Assembler::is_simm13(reg2offset(src.first()) +

  2803                            STACK_BIAS), "must be");

  2804                 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);

  2805                 if (d == L2) {

  2806                   assert(Assembler::is_simm13(reg2offset(dst.first()) +

  2807                              STACK_BIAS), "must be");

  2808                   __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);

  2809                 }

  2810             }

  2811           } else if (out_sig_bt[c_arg] != T_VOID) {

  2812             // Convert the arg to NULL

  2813             if (dst.first()->is_reg()) {

  2814               __ mov(G0, dst.first()->as_Register());

  2815             } else {

  2816               assert(Assembler::is_simm13(reg2offset(dst.first()) +

  2817                          STACK_BIAS), "must be");

  2818               __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);

  2819             }

  2820           }

  2821         }

  2822         break;

  2823       case T_VOID:

  2824         break;

  2826       case T_FLOAT:

  2827         if (src.first()->is_stack()) {

  2828           // Stack to stack/reg is simple

  2829           move32_64(masm, src, dst);

  2830         } else {

  2831           if (dst.first()->is_reg()) {

  2832             // freg -> reg

  2833             int off =

  2834               STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;

  2835             Register d = dst.first()->as_Register();

  2836             if (Assembler::is_simm13(off)) {

  2837               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),

  2838                      SP, off);

  2839               __ ld(SP, off, d);

  2840             } else {

  2841               if (conversion_off == noreg) {

  2842                 __ set(off, L6);

  2843                 conversion_off = L6;

  2844               }

  2845               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),

  2846                      SP, conversion_off);

  2847               __ ld(SP, conversion_off , d);

  2848             }

  2849           } else {

  2850             // freg -> mem

  2851             int off = STACK_BIAS + reg2offset(dst.first());

  2852             if (Assembler::is_simm13(off)) {

  2853               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),

  2854                      SP, off);

  2855             } else {

  2856               if (conversion_off == noreg) {

  2857                 __ set(off, L6);

  2858                 conversion_off = L6;

  2859               }

  2860               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),

  2861                      SP, conversion_off);

  2862             }

  2863           }

  2864         }

  2865         break;

  2867       case T_DOUBLE:

  2868         assert( j_arg + 1 < total_args_passed &&

  2869                 in_sig_bt[j_arg + 1] == T_VOID &&

  2870                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");

  2871         if (src.first()->is_stack()) {

  2872           // Stack to stack/reg is simple

  2873           long_move(masm, src, dst);

  2874         } else {

  2875           Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;

  2877           // Destination could be an odd reg on 32bit in which case

  2878           // we can't load direct to the destination.

  2880           if (!d->is_even() && wordSize == 4) {

  2881             d = L2;

  2882           }

  2883           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;

  2884           if (Assembler::is_simm13(off)) {

  2885             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),

  2886                    SP, off);

  2887             __ ld_long(SP, off, d);

  2888           } else {

  2889             if (conversion_off == noreg) {

  2890               __ set(off, L6);

  2891               conversion_off = L6;

  2892             }

  2893             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),

  2894                    SP, conversion_off);

  2895             __ ld_long(SP, conversion_off, d);

  2896           }

  2897           if (d == L2) {

  2898             long_move(masm, reg64_to_VMRegPair(L2), dst);

  2899           }

  2900         }

  2901         break;

  2903       case T_LONG :

  2904         // 32bit can't do a split move of something like g1 -> O0, O1

  2905         // so use a memory temp

  2906         if (src.is_single_phys_reg() && wordSize == 4) {

  2907           Register tmp = L2;

  2908           if (dst.first()->is_reg() &&

  2909               (wordSize == 8 || dst.first()->as_Register()->is_even())) {

  2910             tmp = dst.first()->as_Register();

  2911           }

  2913           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;

  2914           if (Assembler::is_simm13(off)) {

  2915             __ stx(src.first()->as_Register(), SP, off);

  2916             __ ld_long(SP, off, tmp);

  2917           } else {

  2918             if (conversion_off == noreg) {

  2919               __ set(off, L6);

  2920               conversion_off = L6;

  2921             }

  2922             __ stx(src.first()->as_Register(), SP, conversion_off);

  2923             __ ld_long(SP, conversion_off, tmp);

  2924           }

  2926           if (tmp == L2) {

  2927             long_move(masm, reg64_to_VMRegPair(L2), dst);

  2928           }

  2929         } else {

  2930           long_move(masm, src, dst);

  2931         }

  2932         break;

  2934       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");

  2936       default:

  2937         move32_64(masm, src, dst);

  2938     }

  2939   }

  2942   // If we have any strings we must store any register based arg to the stack

  2943   // This includes any still live xmm registers too.

  2945   if (total_strings > 0 ) {

  2947     // protect all the arg registers

  2948     __ save_frame(0);

  2949     __ mov(G2_thread, L7_thread_cache);

  2950     const Register L2_string_off = L2;

  2952     // Get first string offset

  2953     __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);

  2955     for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {

  2956       if (out_sig_bt[c_arg] == T_ADDRESS) {

  2958         VMRegPair dst = out_regs[c_arg];

  2959         const Register d = dst.first()->is_reg() ?

  2960             dst.first()->as_Register()->after_save() : noreg;

  2962         // It's a string the oop and it was already copied to the out arg

  2963         // position

  2964         if (d != noreg) {

  2965           __ mov(d, O0);

  2966         } else {

  2967           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),

  2968                  "must be");

  2969           __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);

  2970         }

  2971         Label skip;

  2973         __ br_null(O0, false, Assembler::pn, skip);

  2974         __ delayed()->add(FP, L2_string_off, O1);

  2976         if (d != noreg) {

  2977           __ mov(O1, d);

  2978         } else {

  2979           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),

  2980                  "must be");

  2981           __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);

  2982         }

  2984         __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),

  2985                 relocInfo::runtime_call_type);

  2986         __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);

  2988         __ bind(skip);

  2990       }

  2992     }

  2993     __ mov(L7_thread_cache, G2_thread);

  2994     __ restore();

  2996   }

  2999   // Ok now we are done. Need to place the nop that dtrace wants in order to

  3000   // patch in the trap

  3002   int patch_offset = ((intptr_t)__ pc()) - start;

  3004   __ nop();

  3007   // Return

  3009   __ ret();

  3010   __ delayed()->restore();

  3012   __ flush();

  3014   nmethod *nm = nmethod::new_dtrace_nmethod(

  3015       method, masm->code(), vep_offset, patch_offset, frame_complete,

  3016       stack_slots / VMRegImpl::slots_per_word);

  3017   return nm;

  3019 }

  3021 #endif // HAVE_DTRACE_H

  3023 // this function returns the adjust size (in number of words) to a c2i adapter

  3024 // activation for use during deoptimization

  3025 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {

  3026   assert(callee_locals >= callee_parameters,

  3027           "test and remove; got more parms than locals");

  3028   if (callee_locals < callee_parameters)

  3029     return 0;                   // No adjustment for negative locals

  3030   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;

  3031   return round_to(diff, WordsPerLong);

  3032 }

  3034 // "Top of Stack" slots that may be unused by the calling convention but must

  3035 // otherwise be preserved.

  3036 // On Intel these are not necessary and the value can be zero.

  3037 // On Sparc this describes the words reserved for storing a register window

  3038 // when an interrupt occurs.

  3039 uint SharedRuntime::out_preserve_stack_slots() {

  3040   return frame::register_save_words * VMRegImpl::slots_per_word;

  3041 }

  3043 static void gen_new_frame(MacroAssembler* masm, bool deopt) {

  3044 //

  3045 // Common out the new frame generation for deopt and uncommon trap

  3046 //

  3047   Register        G3pcs              = G3_scratch; // Array of new pcs (input)

  3048   Register        Oreturn0           = O0;

  3049   Register        Oreturn1           = O1;

  3050   Register        O2UnrollBlock      = O2;

  3051   Register        O3array            = O3;         // Array of frame sizes (input)

  3052   Register        O4array_size       = O4;         // number of frames (input)

  3053   Register        O7frame_size       = O7;         // number of frames (input)

  3055   __ ld_ptr(O3array, 0, O7frame_size);

  3056   __ sub(G0, O7frame_size, O7frame_size);

  3057   __ save(SP, O7frame_size, SP);

  3058   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc

  3060   #ifdef ASSERT

  3061   // make sure that the frames are aligned properly

  3062 #ifndef _LP64

  3063   __ btst(wordSize*2-1, SP);

  3064   __ breakpoint_trap(Assembler::notZero);

  3065 #endif

  3066   #endif

  3068   // Deopt needs to pass some extra live values from frame to frame

  3070   if (deopt) {

  3071     __ mov(Oreturn0->after_save(), Oreturn0);

  3072     __ mov(Oreturn1->after_save(), Oreturn1);

  3073   }

  3075   __ mov(O4array_size->after_save(), O4array_size);

  3076   __ sub(O4array_size, 1, O4array_size);

  3077   __ mov(O3array->after_save(), O3array);

  3078   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);

  3079   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value

  3081   #ifdef ASSERT

  3082   // trash registers to show a clear pattern in backtraces

  3083   __ set(0xDEAD0000, I0);

  3084   __ add(I0,  2, I1);

  3085   __ add(I0,  4, I2);

  3086   __ add(I0,  6, I3);

  3087   __ add(I0,  8, I4);

  3088   // Don't touch I5 could have valuable savedSP

  3089   __ set(0xDEADBEEF, L0);

  3090   __ mov(L0, L1);

  3091   __ mov(L0, L2);

  3092   __ mov(L0, L3);

  3093   __ mov(L0, L4);

  3094   __ mov(L0, L5);

  3096   // trash the return value as there is nothing to return yet

  3097   __ set(0xDEAD0001, O7);

  3098   #endif

  3100   __ mov(SP, O5_savedSP);

  3101 }

  3104 static void make_new_frames(MacroAssembler* masm, bool deopt) {

  3105   //

  3106   // loop through the UnrollBlock info and create new frames

  3107   //

  3108   Register        G3pcs              = G3_scratch;

  3109   Register        Oreturn0           = O0;

  3110   Register        Oreturn1           = O1;

  3111   Register        O2UnrollBlock      = O2;

  3112   Register        O3array            = O3;

  3113   Register        O4array_size       = O4;

  3114   Label           loop;

  3116   // Before we make new frames, check to see if stack is available.

  3117   // Do this after the caller's return address is on top of stack

  3118   if (UseStackBanging) {

  3119     // Get total frame size for interpreted frames

  3120     __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4);

  3121     __ bang_stack_size(O4, O3, G3_scratch);

  3122   }

  3124   __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size);

  3125   __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs);

  3126   __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array);

  3128   // Adjust old interpreter frame to make space for new frame's extra java locals

  3129   //

  3130   // We capture the original sp for the transition frame only because it is needed in

  3131   // order to properly calculate interpreter_sp_adjustment. Even though in real life

  3132   // every interpreter frame captures a savedSP it is only needed at the transition

  3133   // (fortunately). If we had to have it correct everywhere then we would need to

  3134   // be told the sp_adjustment for each frame we create. If the frame size array

  3135   // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]

  3136   // for each frame we create and keep up the illusion every where.

  3137   //

  3139   __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7);

  3140   __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment

  3141   __ sub(SP, O7, SP);

  3143 #ifdef ASSERT

  3144   // make sure that there is at least one entry in the array

  3145   __ tst(O4array_size);

  3146   __ breakpoint_trap(Assembler::zero);

  3147 #endif

  3149   // Now push the new interpreter frames

  3150   __ bind(loop);

  3152   // allocate a new frame, filling the registers

  3154   gen_new_frame(masm, deopt);        // allocate an interpreter frame

  3156   __ tst(O4array_size);

  3157   __ br(Assembler::notZero, false, Assembler::pn, loop);

  3158   __ delayed()->add(O3array, wordSize, O3array);

  3159   __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc

  3161 }

  3163 //------------------------------generate_deopt_blob----------------------------

  3164 // Ought to generate an ideal graph & compile, but here's some SPARC ASM

  3165 // instead.

  3166 void SharedRuntime::generate_deopt_blob() {

  3167   // allocate space for the code

  3168   ResourceMark rm;

  3169   // setup code generation tools

  3170   int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code

  3171 #ifdef _LP64

  3172   CodeBuffer buffer("deopt_blob", 2100+pad, 512);

  3173 #else

  3174   // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)

  3175   // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)

  3176   CodeBuffer buffer("deopt_blob", 1600+pad, 512);

  3177 #endif /* _LP64 */

  3178   MacroAssembler* masm               = new MacroAssembler(&buffer);

  3179   FloatRegister   Freturn0           = F0;

  3180   Register        Greturn1           = G1;

  3181   Register        Oreturn0           = O0;

  3182   Register        Oreturn1           = O1;

  3183   Register        O2UnrollBlock      = O2;

  3184   Register        L0deopt_mode       = L0;

  3185   Register        G4deopt_mode       = G4_scratch;

  3186   int             frame_size_words;

  3187   Address         saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);

  3188 #if !defined(_LP64) && defined(COMPILER2)

  3189   Address         saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);

  3190 #endif

  3191   Label           cont;

  3193   OopMapSet *oop_maps = new OopMapSet();

  3195   //

  3196   // This is the entry point for code which is returning to a de-optimized

  3197   // frame.

  3198   // The steps taken by this frame are as follows:

  3199   //   - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)

  3200   //     and all potentially live registers (at a pollpoint many registers can be live).

  3201   //

  3202   //   - call the C routine: Deoptimization::fetch_unroll_info (this function

  3203   //     returns information about the number and size of interpreter frames

  3204   //     which are equivalent to the frame which is being deoptimized)

  3205   //   - deallocate the unpack frame, restoring only results values. Other

  3206   //     volatile registers will now be captured in the vframeArray as needed.

  3207   //   - deallocate the deoptimization frame

  3208   //   - in a loop using the information returned in the previous step

  3209   //     push new interpreter frames (take care to propagate the return

  3210   //     values through each new frame pushed)

  3211   //   - create a dummy "unpack_frame" and save the return values (O0, O1, F0)

  3212   //   - call the C routine: Deoptimization::unpack_frames (this function

  3213   //     lays out values on the interpreter frame which was just created)

  3214   //   - deallocate the dummy unpack_frame

  3215   //   - ensure that all the return values are correctly set and then do

  3216   //     a return to the interpreter entry point

  3217   //

  3218   // Refer to the following methods for more information:

  3219   //   - Deoptimization::fetch_unroll_info

  3220   //   - Deoptimization::unpack_frames

  3222   OopMap* map = NULL;

  3224   int start = __ offset();

  3226   // restore G2, the trampoline destroyed it

  3227   __ get_thread();

  3229   // On entry we have been called by the deoptimized nmethod with a call that

  3230   // replaced the original call (or safepoint polling location) so the deoptimizing

  3231   // pc is now in O7. Return values are still in the expected places

  3233   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

  3234   __ ba(false, cont);

  3235   __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);

  3237   int exception_offset = __ offset() - start;

  3239   // restore G2, the trampoline destroyed it

  3240   __ get_thread();

  3242   // On entry we have been jumped to by the exception handler (or exception_blob

  3243   // for server).  O0 contains the exception oop and O7 contains the original

  3244   // exception pc.  So if we push a frame here it will look to the

  3245   // stack walking code (fetch_unroll_info) just like a normal call so

  3246   // state will be extracted normally.

  3248   // save exception oop in JavaThread and fall through into the

  3249   // exception_in_tls case since they are handled in same way except

  3250   // for where the pending exception is kept.

  3251   __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset());

  3253   //

  3254   // Vanilla deoptimization with an exception pending in exception_oop

  3255   //

  3256   int exception_in_tls_offset = __ offset() - start;

  3258   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap

  3259   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

  3261   // Restore G2_thread

  3262   __ get_thread();

  3264 #ifdef ASSERT

  3265   {

  3266     // verify that there is really an exception oop in exception_oop

  3267     Label has_exception;

  3268     __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);

  3269     __ br_notnull(Oexception, false, Assembler::pt, has_exception);

  3270     __ delayed()-> nop();

  3271     __ stop("no exception in thread");

  3272     __ bind(has_exception);

  3274     // verify that there is no pending exception

  3275     Label no_pending_exception;

  3276     Address exception_addr(G2_thread, Thread::pending_exception_offset());

  3277     __ ld_ptr(exception_addr, Oexception);

  3278     __ br_null(Oexception, false, Assembler::pt, no_pending_exception);

  3279     __ delayed()->nop();

  3280     __ stop("must not have pending exception here");

  3281     __ bind(no_pending_exception);

  3282   }

  3283 #endif

  3285   __ ba(false, cont);

  3286   __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;

  3288   //

  3289   // Reexecute entry, similar to c2 uncommon trap

  3290   //

  3291   int reexecute_offset = __ offset() - start;

  3293   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap

  3294   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

  3296   __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode);

  3298   __ bind(cont);

  3300   __ set_last_Java_frame(SP, noreg);

  3302   // do the call by hand so we can get the oopmap

  3304   __ mov(G2_thread, L7_thread_cache);

  3305   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);

  3306   __ delayed()->mov(G2_thread, O0);

  3308   // Set an oopmap for the call site this describes all our saved volatile registers

  3310   oop_maps->add_gc_map( __ offset()-start, map);

  3312   __ mov(L7_thread_cache, G2_thread);

  3314   __ reset_last_Java_frame();

  3316   // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers

  3317   // so this move will survive

  3319   __ mov(L0deopt_mode, G4deopt_mode);

  3321   __ mov(O0, O2UnrollBlock->after_save());

  3323   RegisterSaver::restore_result_registers(masm);

  3325   Label noException;

  3326   __ cmp(G4deopt_mode, Deoptimization::Unpack_exception);   // Was exception pending?

  3327   __ br(Assembler::notEqual, false, Assembler::pt, noException);

  3328   __ delayed()->nop();

  3330   // Move the pending exception from exception_oop to Oexception so

  3331   // the pending exception will be picked up the interpreter.

  3332   __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception);

  3333   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));

  3334   __ bind(noException);

  3336   // deallocate the deoptimization frame taking care to preserve the return values

  3337   __ mov(Oreturn0,     Oreturn0->after_save());

  3338   __ mov(Oreturn1,     Oreturn1->after_save());

  3339   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());

  3340   __ restore();

  3342   // Allocate new interpreter frame(s) and possible c2i adapter frame

  3344   make_new_frames(masm, true);

  3346   // push a dummy "unpack_frame" taking care of float return values and

  3347   // call Deoptimization::unpack_frames to have the unpacker layout

  3348   // information in the interpreter frames just created and then return

  3349   // to the interpreter entry point

  3350   __ save(SP, -frame_size_words*wordSize, SP);

  3351   __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);

  3352 #if !defined(_LP64)

  3353 #if defined(COMPILER2)

  3354   // 32-bit 1-register longs return longs in G1

  3355   __ stx(Greturn1, saved_Greturn1_addr);

  3356 #endif

  3357   __ set_last_Java_frame(SP, noreg);

  3358   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode);

  3359 #else

  3360   // LP64 uses g4 in set_last_Java_frame

  3361   __ mov(G4deopt_mode, O1);

  3362   __ set_last_Java_frame(SP, G0);

  3363   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);

  3364 #endif

  3365   __ reset_last_Java_frame();

  3366   __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);

  3368 #if !defined(_LP64) && defined(COMPILER2)

  3369   // In 32 bit, C2 returns longs in G1 so restore the saved G1 into

  3370   // I0/I1 if the return value is long.

  3371   Label not_long;

  3372   __ cmp(O0,T_LONG);

  3373   __ br(Assembler::notEqual, false, Assembler::pt, not_long);

  3374   __ delayed()->nop();

  3375   __ ldd(saved_Greturn1_addr,I0);

  3376   __ bind(not_long);

  3377 #endif

  3378   __ ret();

  3379   __ delayed()->restore();

  3381   masm->flush();

  3382   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);

  3383   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);

  3384 }

  3386 #ifdef COMPILER2

  3388 //------------------------------generate_uncommon_trap_blob--------------------

  3389 // Ought to generate an ideal graph & compile, but here's some SPARC ASM

  3390 // instead.

  3391 void SharedRuntime::generate_uncommon_trap_blob() {

  3392   // allocate space for the code

  3393   ResourceMark rm;

  3394   // setup code generation tools

  3395   int pad = VerifyThread ? 512 : 0;

  3396 #ifdef _LP64

  3397   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);

  3398 #else

  3399   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)

  3400   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)

  3401   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);

  3402 #endif

  3403   MacroAssembler* masm               = new MacroAssembler(&buffer);

  3404   Register        O2UnrollBlock      = O2;

  3405   Register        O2klass_index      = O2;

  3407   //

  3408   // This is the entry point for all traps the compiler takes when it thinks

  3409   // it cannot handle further execution of compilation code. The frame is

  3410   // deoptimized in these cases and converted into interpreter frames for

  3411   // execution

  3412   // The steps taken by this frame are as follows:

  3413   //   - push a fake "unpack_frame"

  3414   //   - call the C routine Deoptimization::uncommon_trap (this function

  3415   //     packs the current compiled frame into vframe arrays and returns

  3416   //     information about the number and size of interpreter frames which

  3417   //     are equivalent to the frame which is being deoptimized)

  3418   //   - deallocate the "unpack_frame"

  3419   //   - deallocate the deoptimization frame

  3420   //   - in a loop using the information returned in the previous step

  3421   //     push interpreter frames;

  3422   //   - create a dummy "unpack_frame"

  3423   //   - call the C routine: Deoptimization::unpack_frames (this function

  3424   //     lays out values on the interpreter frame which was just created)

  3425   //   - deallocate the dummy unpack_frame

  3426   //   - return to the interpreter entry point

  3427   //

  3428   //  Refer to the following methods for more information:

  3429   //   - Deoptimization::uncommon_trap

  3430   //   - Deoptimization::unpack_frame

  3432   // the unloaded class index is in O0 (first parameter to this blob)

  3434   // push a dummy "unpack_frame"

  3435   // and call Deoptimization::uncommon_trap to pack the compiled frame into

  3436   // vframe array and return the UnrollBlock information

  3437   __ save_frame(0);

  3438   __ set_last_Java_frame(SP, noreg);

  3439   __ mov(I0, O2klass_index);

  3440   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);

  3441   __ reset_last_Java_frame();

  3442   __ mov(O0, O2UnrollBlock->after_save());

  3443   __ restore();

  3445   // deallocate the deoptimized frame taking care to preserve the return values

  3446   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());

  3447   __ restore();

  3449   // Allocate new interpreter frame(s) and possible c2i adapter frame

  3451   make_new_frames(masm, false);

  3453   // push a dummy "unpack_frame" taking care of float return values and

  3454   // call Deoptimization::unpack_frames to have the unpacker layout

  3455   // information in the interpreter frames just created and then return

  3456   // to the interpreter entry point

  3457   __ save_frame(0);

  3458   __ set_last_Java_frame(SP, noreg);

  3459   __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case

  3460   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3);

  3461   __ reset_last_Java_frame();

  3462   __ ret();

  3463   __ delayed()->restore();

  3465   masm->flush();

  3466   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize);

  3467 }

  3469 #endif // COMPILER2

  3471 //------------------------------generate_handler_blob-------------------

  3472 //

  3473 // Generate a special Compile2Runtime blob that saves all registers, and sets

  3474 // up an OopMap.

  3475 //

  3476 // This blob is jumped to (via a breakpoint and the signal handler) from a

  3477 // safepoint in compiled code.  On entry to this blob, O7 contains the

  3478 // address in the original nmethod at which we should resume normal execution.

  3479 // Thus, this blob looks like a subroutine which must preserve lots of

  3480 // registers and return normally.  Note that O7 is never register-allocated,

  3481 // so it is guaranteed to be free here.

  3482 //

  3484 // The hardest part of what this blob must do is to save the 64-bit %o

  3485 // registers in the 32-bit build.  A simple 'save' turn the %o's to %i's and

  3486 // an interrupt will chop off their heads.  Making space in the caller's frame

  3487 // first will let us save the 64-bit %o's before save'ing, but we cannot hand

  3488 // the adjusted FP off to the GC stack-crawler: this will modify the caller's

  3489 // SP and mess up HIS OopMaps.  So we first adjust the caller's SP, then save

  3490 // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).

  3491 // Tricky, tricky, tricky...

  3493 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {

  3494   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");

  3496   // allocate space for the code

  3497   ResourceMark rm;

  3498   // setup code generation tools

  3499   // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)

  3500   // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)

  3501   // even larger with TraceJumps

  3502   int pad = TraceJumps ? 512 : 0;

  3503   CodeBuffer buffer("handler_blob", 1600 + pad, 512);

  3504   MacroAssembler* masm                = new MacroAssembler(&buffer);

  3505   int             frame_size_words;

  3506   OopMapSet *oop_maps = new OopMapSet();

  3507   OopMap* map = NULL;

  3509   int start = __ offset();

  3511   // If this causes a return before the processing, then do a "restore"

  3512   if (cause_return) {

  3513     __ restore();

  3514   } else {

  3515     // Make it look like we were called via the poll

  3516     // so that frame constructor always sees a valid return address

  3517     __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7);

  3518     __ sub(O7, frame::pc_return_offset, O7);

  3519   }

  3521   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

  3523   // setup last_Java_sp (blows G4)

  3524   __ set_last_Java_frame(SP, noreg);

  3526   // call into the runtime to handle illegal instructions exception

  3527   // Do not use call_VM_leaf, because we need to make a GC map at this call site.

  3528   __ mov(G2_thread, O0);

  3529   __ save_thread(L7_thread_cache);

  3530   __ call(call_ptr);

  3531   __ delayed()->nop();

  3533   // Set an oopmap for the call site.

  3534   // We need this not only for callee-saved registers, but also for volatile

  3535   // registers that the compiler might be keeping live across a safepoint.

  3537   oop_maps->add_gc_map( __ offset() - start, map);

  3539   __ restore_thread(L7_thread_cache);

  3540   // clear last_Java_sp

  3541   __ reset_last_Java_frame();

  3543   // Check for exceptions

  3544   Label pending;

  3546   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);

  3547   __ tst(O1);

  3548   __ brx(Assembler::notEqual, true, Assembler::pn, pending);

  3549   __ delayed()->nop();

  3551   RegisterSaver::restore_live_registers(masm);

  3553   // We are back the the original state on entry and ready to go.

  3555   __ retl();

  3556   __ delayed()->nop();

  3558   // Pending exception after the safepoint

  3560   __ bind(pending);

  3562   RegisterSaver::restore_live_registers(masm);

  3564   // We are back the the original state on entry.

  3566   // Tail-call forward_exception_entry, with the issuing PC in O7,

  3567   // so it looks like the original nmethod called forward_exception_entry.

  3568   __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);

  3569   __ JMP(O0, 0);

  3570   __ delayed()->nop();

  3572   // -------------

  3573   // make sure all code is generated

  3574   masm->flush();

  3576   // return exception blob

  3577   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);

  3578 }

  3580 //

  3581 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss

  3582 //

  3583 // Generate a stub that calls into vm to find out the proper destination

  3584 // of a java call. All the argument registers are live at this point

  3585 // but since this is generic code we don't know what they are and the caller

  3586 // must do any gc of the args.

  3587 //

  3588 static RuntimeStub* generate_resolve_blob(address destination, const char* name) {

  3589   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");

  3591   // allocate space for the code

  3592   ResourceMark rm;

  3593   // setup code generation tools

  3594   // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread)

  3595   // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread)

  3596   // even larger with TraceJumps

  3597   int pad = TraceJumps ? 512 : 0;

  3598   CodeBuffer buffer(name, 1600 + pad, 512);

  3599   MacroAssembler* masm                = new MacroAssembler(&buffer);

  3600   int             frame_size_words;

  3601   OopMapSet *oop_maps = new OopMapSet();

  3602   OopMap* map = NULL;

  3604   int start = __ offset();

  3606   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);

  3608   int frame_complete = __ offset();

  3610   // setup last_Java_sp (blows G4)

  3611   __ set_last_Java_frame(SP, noreg);

  3613   // call into the runtime to handle illegal instructions exception

  3614   // Do not use call_VM_leaf, because we need to make a GC map at this call site.

  3615   __ mov(G2_thread, O0);

  3616   __ save_thread(L7_thread_cache);

  3617   __ call(destination, relocInfo::runtime_call_type);

  3618   __ delayed()->nop();

  3620   // O0 contains the address we are going to jump to assuming no exception got installed

  3622   // Set an oopmap for the call site.

  3623   // We need this not only for callee-saved registers, but also for volatile

  3624   // registers that the compiler might be keeping live across a safepoint.

  3626   oop_maps->add_gc_map( __ offset() - start, map);

  3628   __ restore_thread(L7_thread_cache);

  3629   // clear last_Java_sp

  3630   __ reset_last_Java_frame();

  3632   // Check for exceptions

  3633   Label pending;

  3635   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);

  3636   __ tst(O1);

  3637   __ brx(Assembler::notEqual, true, Assembler::pn, pending);

  3638   __ delayed()->nop();

  3640   // get the returned methodOop

  3642   __ get_vm_result(G5_method);

  3643   __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS);

  3645   // O0 is where we want to jump, overwrite G3 which is saved and scratch

  3647   __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS);

  3649   RegisterSaver::restore_live_registers(masm);

  3651   // We are back the the original state on entry and ready to go.

  3653   __ JMP(G3, 0);

  3654   __ delayed()->nop();

  3656   // Pending exception after the safepoint

  3658   __ bind(pending);

  3660   RegisterSaver::restore_live_registers(masm);

  3662   // We are back the the original state on entry.

  3664   // Tail-call forward_exception_entry, with the issuing PC in O7,

  3665   // so it looks like the original nmethod called forward_exception_entry.

  3666   __ set((intptr_t)StubRoutines::forward_exception_entry(), O0);

  3667   __ JMP(O0, 0);

  3668   __ delayed()->nop();

  3670   // -------------

  3671   // make sure all code is generated

  3672   masm->flush();

  3674   // return the  blob

  3675   // frame_size_words or bytes??

  3676   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);

  3677 }

  3679 void SharedRuntime::generate_stubs() {

  3681   _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),

  3682                                              "wrong_method_stub");

  3684   _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),

  3685                                         "ic_miss_stub");

  3687   _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),

  3688                                         "resolve_opt_virtual_call");

  3690   _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),

  3691                                         "resolve_virtual_call");

  3693   _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),

  3694                                         "resolve_static_call");

  3696   _polling_page_safepoint_handler_blob =

  3697     generate_handler_blob(CAST_FROM_FN_PTR(address,

  3698                    SafepointSynchronize::handle_polling_page_exception), false);

  3700   _polling_page_return_handler_blob =

  3701     generate_handler_blob(CAST_FROM_FN_PTR(address,

  3702                    SafepointSynchronize::handle_polling_page_exception), true);

  3704   generate_deopt_blob();

  3706 #ifdef COMPILER2

  3707   generate_uncommon_trap_blob();

  3708 #endif // COMPILER2

  3709 }

src/cpu/sparc/vm/sharedRuntime_sparc.cpp@e1162778c1c8

src/cpu/sparc/vm/sharedRuntime_sparc.cpp

Mercurial > jdk8-mips64-public > hotspot / file revision

src/cpu/sparc/vm/sharedRuntime_sparc.cpp@e1162778c1c8

src/cpu/sparc/vm/sharedRuntime_sparc.cpp