src/cpu/mips/vm/sharedRuntime_mips_64.cpp

Fri, 29 Apr 2016 00:06:10 +0800

author
aoqi
date
Fri, 29 Apr 2016 00:06:10 +0800
changeset 1
2d8a650513c2
child 5
7e3d3484b275
permissions
-rw-r--r--

Added MIPS 64-bit port.

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "asm/macroAssembler.inline.hpp"
    29 #include "code/debugInfoRec.hpp"
    30 #include "code/icBuffer.hpp"
    31 #include "code/vtableStubs.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "oops/compiledICHolder.hpp"
    34 #include "prims/jvmtiRedefineClassesTrace.hpp"
    35 #include "runtime/sharedRuntime.hpp"
    36 #include "runtime/vframeArray.hpp"
    37 #include "vmreg_mips.inline.hpp"
    38 #ifdef COMPILER1
    39 #include "c1/c1_Runtime1.hpp"
    40 #endif
    41 #ifdef COMPILER2
    42 #include "opto/runtime.hpp"
    43 #endif
    45 #define __ masm->
    46 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
    48 class RegisterSaver {
    49 	enum { FPU_regs_live = 32 };
    50 	// Capture info about frame layout
    51 	enum layout { 
    52 #define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
    53 		DEF_LAYOUT_OFFS(for_16_bytes_aligned)
    54 		DEF_LAYOUT_OFFS(fpr0)
    55 		DEF_LAYOUT_OFFS(fpr1)
    56 		DEF_LAYOUT_OFFS(fpr2)
    57 		DEF_LAYOUT_OFFS(fpr3)
    58 		DEF_LAYOUT_OFFS(fpr4)
    59 		DEF_LAYOUT_OFFS(fpr5)
    60 		DEF_LAYOUT_OFFS(fpr6)
    61 		DEF_LAYOUT_OFFS(fpr7)
    62 		DEF_LAYOUT_OFFS(fpr8)
    63 		DEF_LAYOUT_OFFS(fpr9)
    64 		DEF_LAYOUT_OFFS(fpr10)
    65 		DEF_LAYOUT_OFFS(fpr11)
    66 		DEF_LAYOUT_OFFS(fpr12)
    67 		DEF_LAYOUT_OFFS(fpr13)
    68 		DEF_LAYOUT_OFFS(fpr14)
    69 		DEF_LAYOUT_OFFS(fpr15)
    70 		DEF_LAYOUT_OFFS(fpr16)
    71 		DEF_LAYOUT_OFFS(fpr17)
    72 		DEF_LAYOUT_OFFS(fpr18)
    73 		DEF_LAYOUT_OFFS(fpr19)
    74 		DEF_LAYOUT_OFFS(fpr20)
    75 		DEF_LAYOUT_OFFS(fpr21)
    76 		DEF_LAYOUT_OFFS(fpr22)
    77 		DEF_LAYOUT_OFFS(fpr23)
    78 		DEF_LAYOUT_OFFS(fpr24)
    79 		DEF_LAYOUT_OFFS(fpr25)
    80 		DEF_LAYOUT_OFFS(fpr26)
    81 		DEF_LAYOUT_OFFS(fpr27)
    82 		DEF_LAYOUT_OFFS(fpr28)
    83 		DEF_LAYOUT_OFFS(fpr29)
    84 		DEF_LAYOUT_OFFS(fpr30)
    85 		DEF_LAYOUT_OFFS(fpr31)
    87 		DEF_LAYOUT_OFFS(v0)
    88 		DEF_LAYOUT_OFFS(v1)
    89 		DEF_LAYOUT_OFFS(a0)
    90 		DEF_LAYOUT_OFFS(a1)
    91 		DEF_LAYOUT_OFFS(a2)
    92 		DEF_LAYOUT_OFFS(a3)
    93 		DEF_LAYOUT_OFFS(a4)
    94 		DEF_LAYOUT_OFFS(a5)
    95 		DEF_LAYOUT_OFFS(a6)
    96 		DEF_LAYOUT_OFFS(a7)
    97 		DEF_LAYOUT_OFFS(t0)
    98 		DEF_LAYOUT_OFFS(t1)
    99 		DEF_LAYOUT_OFFS(t2)
   100 		DEF_LAYOUT_OFFS(t3)
   101 		DEF_LAYOUT_OFFS(s0)
   102 		DEF_LAYOUT_OFFS(s1)
   103 		DEF_LAYOUT_OFFS(s2)
   104 		DEF_LAYOUT_OFFS(s3)
   105 		DEF_LAYOUT_OFFS(s4)
   106 		DEF_LAYOUT_OFFS(s5)
   107 		DEF_LAYOUT_OFFS(s6)
   108 		DEF_LAYOUT_OFFS(s7)
   109 		DEF_LAYOUT_OFFS(t8)
   110 		DEF_LAYOUT_OFFS(t9)
   112 		DEF_LAYOUT_OFFS(gp)
   113 		DEF_LAYOUT_OFFS(fp)
   114 		DEF_LAYOUT_OFFS(return)
   115 /*
   116 		fpr0_off, fpr1_off,
   117 		fpr2_off, fpr3_off,
   118 		fpr4_off, fpr5_off,
   119 		fpr6_off, fpr7_off,
   120 		fpr8_off, fpr9_off,
   121 		fpr10_off, fpr11_off,
   122 		fpr12_off, fpr13_off,
   123 		fpr14_off, fpr15_off,
   124 		fpr16_off, fpr17_off,
   125 		fpr18_off, fpr19_off,
   126 		fpr20_off, fpr21_off,
   127 		fpr22_off, fpr23_off,
   128 		fpr24_off, fpr25_off,
   129 		fpr26_off, fpr27_off,
   130 		fpr28_off, fpr29_off,
   131 		fpr30_off, fpr31_off,
   133 		v0_off, v1_off,
   134 		a0_off, a1_off,
   135 		a2_off, a3_off,
   136 		a4_off, a5_off,
   137 		a6_off, a7_off,
   138 		t0_off, t1_off, t2_off, t3_off,
   139 		s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off,
   140 		t8_off, t9_off,
   142 		gp_off, fp_off,
   143 		return_off,
   144 */
   145 		reg_save_size
   146 	};
   148   public:
   150 	static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
   151 	static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
   152 	//FIXME, I have no idea which register to use
   153 	static int raOffset(void) { return return_off / 2; }
   154 	//Rmethod
   155 	static int methodOffset(void) { return s3_off / 2; }
   157 	static int v0Offset(void) { return v0_off / 2; }
   158 	static int v1Offset(void) { return v1_off / 2; }
   160 	static int fpResultOffset(void) { return fpr0_off / 2; }
   162 	// During deoptimization only the result register need to be restored
   163 	// all the other values have already been extracted.
   165 	static void restore_result_registers(MacroAssembler* masm);
   166 };
   168 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
   170 /*
   171   int frame_words = reg_save_size + additional_frame_words;
   172   int frame_size_in_bytes =  frame_words * wordSize;
   173   *total_frame_words = frame_words;
   174   */
   175   // Always make the frame size 16-byte aligned
   176   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
   177                                      reg_save_size*BytesPerInt, 16);
   178   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
   179   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
   180   // The caller will allocate additional_frame_words
   181   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
   182   // CodeBlob frame size is in words.
   183   int frame_size_in_words = frame_size_in_bytes / wordSize;
   184   *total_frame_words = frame_size_in_words;
   186   // save registers, fpu state, and flags  
   187   // We assume caller has already has return address slot on the stack
   188   // We push epb twice in this sequence because we want the real ebp
   189   // to be under the return like a normal enter and we want to use pushad
   190   // We push by hand instead of pusing push
   192   __ daddiu(SP, SP, - reg_save_size * jintSize);
   194   __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
   195   __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
   196   __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
   197   __ sdc1(F6, SP, fpr6_off * jintSize);	__ sdc1(F7, SP, fpr7_off * jintSize);
   198   __ sdc1(F8, SP, fpr8_off * jintSize);	__ sdc1(F9, SP, fpr9_off * jintSize);
   199   __ sdc1(F10, SP, fpr10_off * jintSize);	__ sdc1(F11, SP, fpr11_off * jintSize);
   200   __ sdc1(F12, SP, fpr12_off * jintSize);	__ sdc1(F13, SP, fpr13_off * jintSize);
   201   __ sdc1(F14, SP, fpr14_off * jintSize);	__ sdc1(F15, SP, fpr15_off * jintSize);
   202   __ sdc1(F16, SP, fpr16_off * jintSize);	__ sdc1(F17, SP, fpr17_off * jintSize);
   203   __ sdc1(F18, SP, fpr18_off * jintSize);	__ sdc1(F19, SP, fpr19_off * jintSize);
   204   __ sdc1(F20, SP, fpr20_off * jintSize);	__ sdc1(F21, SP, fpr21_off * jintSize);
   205   __ sdc1(F22, SP, fpr22_off * jintSize);	__ sdc1(F23, SP, fpr23_off * jintSize);
   206   __ sdc1(F24, SP, fpr24_off * jintSize);	__ sdc1(F25, SP, fpr25_off * jintSize);
   207   __ sdc1(F26, SP, fpr26_off * jintSize);	__ sdc1(F27, SP, fpr27_off * jintSize);
   208   __ sdc1(F28, SP, fpr28_off * jintSize);	__ sdc1(F29, SP, fpr29_off * jintSize);
   209   __ sdc1(F30, SP, fpr30_off * jintSize);	__ sdc1(F31, SP, fpr31_off * jintSize);
   210   __ sd(V0, SP, v0_off * jintSize);	__ sd(V1, SP, v1_off * jintSize);
   211   __ sd(A0, SP, a0_off * jintSize);	__ sd(A1, SP, a1_off * jintSize);
   212   __ sd(A2, SP, a2_off * jintSize);	__ sd(A3, SP, a3_off * jintSize);
   213   __ sd(A4, SP, a4_off * jintSize);	__ sd(A5, SP, a5_off * jintSize);
   214   __ sd(A6, SP, a6_off * jintSize);	__ sd(A7, SP, a7_off * jintSize);
   215   __ sd(T0, SP, t0_off * jintSize);
   216   __ sd(T1, SP, t1_off * jintSize);
   217   __ sd(T2, SP, t2_off * jintSize);
   218   __ sd(T3, SP, t3_off * jintSize);
   219   __ sd(S0, SP, s0_off * jintSize);
   220   __ sd(S1, SP, s1_off * jintSize);
   221   __ sd(S2, SP, s2_off * jintSize);
   222   __ sd(S3, SP, s3_off * jintSize);
   223   __ sd(S4, SP, s4_off * jintSize);
   224   __ sd(S5, SP, s5_off * jintSize);
   225   __ sd(S6, SP, s6_off * jintSize);
   226   __ sd(S7, SP, s7_off * jintSize);
   228   __ sd(T8, SP, t8_off * jintSize);
   229   __ sd(T9, SP, t9_off * jintSize);
   231   __ sd(GP, SP, gp_off * jintSize);
   232   __ sd(FP, SP, fp_off * jintSize);
   233   __ sd(RA, SP, return_off * jintSize);
   234   __ daddi(FP, SP, fp_off * jintSize);
   236   OopMapSet *oop_maps = new OopMapSet();
   237   //OopMap* map =  new OopMap( frame_words, 0 );  
   238   OopMap* map =  new OopMap( frame_size_in_slots, 0 );  
   241 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
   242 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
   243   map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
   244   map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
   245   map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
   246   map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
   247   map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
   248   map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
   249   map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
   250   map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
   251   map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
   252   map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
   253   map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
   254   map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
   255   map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
   256   map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
   257   map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
   258   map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
   259   map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
   260   map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
   261   map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
   262   map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
   263   map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
   264   map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
   265   map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
   266   map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
   267   map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
   268   map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
   269   map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
   271   map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
   272   map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
   273   map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
   274   map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
   275   map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
   276   map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
   277   map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
   278   map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
   279   map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
   280   map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
   281   map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
   282   map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
   283   map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
   284   map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
   285   map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
   286   map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
   287   map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
   288   map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
   289   map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
   290   map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
   291   map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
   292   map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
   293   map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
   294   map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
   295   map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
   296   map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
   297   map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
   298   map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
   299   map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
   300   map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
   301   map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
   302   map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
   304 /*
   305   if (true) {
   306     map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next());
   307     map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next());
   308     map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next());
   309     map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next());
   310     map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next());
   311     map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next());
   312     map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next());
   313     map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next());
   314     map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next());
   315     map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next());
   316     map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next());
   317     map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next());
   318     map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next());
   319     map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next());
   320     map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next());
   321     map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next());
   322     map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next());
   323     map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next());
   324     map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next());
   325     map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next());
   326     map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next());
   327     map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next());
   328     map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next());
   329     map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next());
   330     map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next());
   331     map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next());
   332     map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next());
   334     map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next());
   335     map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next());
   336     map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next());
   337     map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next());
   338     map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next());
   339     map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next());
   340     map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next());
   341     map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next());
   342     map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next());
   343     map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next());
   344     map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next());
   345     map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next());
   346     map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next());
   347     map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next());
   348     map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next());
   349     map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next());
   350   }
   351 */
   352 #undef STACK_OFFSET
   353   return map;
   354 }
   357 // Pop the current frame and restore all the registers that we
   358 // saved.
   359 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   360   __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
   361   __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
   362   __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
   363   __ ldc1(F6, SP, fpr6_off * jintSize);	__ ldc1(F7, SP, fpr7_off * jintSize);
   364   __ ldc1(F8, SP, fpr8_off * jintSize);	__ ldc1(F9, SP, fpr9_off * jintSize);
   365   __ ldc1(F10, SP, fpr10_off * jintSize);	__ ldc1(F11, SP, fpr11_off * jintSize);
   366   __ ldc1(F12, SP, fpr12_off * jintSize);	__ ldc1(F13, SP, fpr13_off * jintSize);
   367   __ ldc1(F14, SP, fpr14_off * jintSize);	__ ldc1(F15, SP, fpr15_off * jintSize);
   368   __ ldc1(F16, SP, fpr16_off * jintSize);	__ ldc1(F17, SP, fpr17_off * jintSize);
   369   __ ldc1(F18, SP, fpr18_off * jintSize);	__ ldc1(F19, SP, fpr19_off * jintSize);
   370   __ ldc1(F20, SP, fpr20_off * jintSize);	__ ldc1(F21, SP, fpr21_off * jintSize);
   371   __ ldc1(F22, SP, fpr22_off * jintSize);	__ ldc1(F23, SP, fpr23_off * jintSize);
   372   __ ldc1(F24, SP, fpr24_off * jintSize);	__ ldc1(F25, SP, fpr25_off * jintSize);
   373   __ ldc1(F26, SP, fpr26_off * jintSize);	__ ldc1(F27, SP, fpr27_off * jintSize);
   374   __ ldc1(F28, SP, fpr28_off * jintSize);	__ ldc1(F29, SP, fpr29_off * jintSize);
   375   __ ldc1(F30, SP, fpr30_off * jintSize);	__ ldc1(F31, SP, fpr31_off * jintSize);
   377   __ ld(V0, SP, v0_off * jintSize);	__ ld(V1, SP, v1_off * jintSize);
   378   __ ld(A0, SP, a0_off * jintSize);	__ ld(A1, SP, a1_off * jintSize);
   379   __ ld(A2, SP, a2_off * jintSize);	__ ld(A3, SP, a3_off * jintSize);
   380   __ ld(A4, SP, a4_off * jintSize);	__ ld(A5, SP, a5_off * jintSize);
   381   __ ld(A6, SP, a6_off * jintSize);	__ ld(A7, SP, a7_off * jintSize);
   382   __ ld(T0, SP, t0_off * jintSize);
   383   __ ld(T1, SP, t1_off * jintSize);
   384   __ ld(T2, SP, t2_off * jintSize);
   385   __ ld(T3, SP, t3_off * jintSize);
   386   __ ld(S0, SP, s0_off * jintSize);
   387   __ ld(S1, SP, s1_off * jintSize);
   388   __ ld(S2, SP, s2_off * jintSize);
   389   __ ld(S3, SP, s3_off * jintSize);
   390   __ ld(S4, SP, s4_off * jintSize);
   391   __ ld(S5, SP, s5_off * jintSize);
   392   __ ld(S6, SP, s6_off * jintSize);
   393   __ ld(S7, SP, s7_off * jintSize);
   395   __ ld(T8, SP, t8_off * jintSize);
   396   __ ld(T9, SP, t9_off * jintSize);
   398   __ ld(GP, SP, gp_off * jintSize);
   399   __ ld(FP, SP, fp_off * jintSize);
   400   __ ld(RA, SP, return_off * jintSize);
   402   __ addiu(SP, SP, reg_save_size * jintSize);
   403 }
   405 // Pop the current frame and restore the registers that might be holding
   406 // a result.
   407 // FIXME, if the result is float?
   408 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
   409   // Just restore result register. Only used by deoptimization. By
   410   // now any callee save register that needs to be restore to a c2
   411   // caller of the deoptee has been extracted into the vframeArray
   412   // and will be stuffed into the c2i adapter we create for later
   413   // restoration so only result registers need to be restored here.
   414   //
   415   __ ld(V0, SP, v0_off * jintSize);
   416   __ ld(V1, SP, v1_off * jintSize);
   417   __ addiu(SP, SP, return_off * jintSize); 
   418 }
   420  // Is vector's size (in bytes) bigger than a size saved by default?
   421  // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
   422  bool SharedRuntime::is_wide_vector(int size) {
   423    return size > 16;
   424  }
   426 // The java_calling_convention describes stack locations as ideal slots on
   427 // a frame with no abi restrictions. Since we must observe abi restrictions
   428 // (like the placement of the register window) the slots must be biased by
   429 // the following value.
   431 static int reg2offset_in(VMReg r) { 
   432 	// Account for saved ebp and return address
   433 	// This should really be in_preserve_stack_slots
   434 	return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
   435 }
   437 static int reg2offset_out(VMReg r) { 
   438 	return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   439 }
   441 // ---------------------------------------------------------------------------
   442 // Read the array of BasicTypes from a signature, and compute where the
   443 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
   444 // quantities.  Values less than SharedInfo::stack0 are registers, those above
   445 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
   446 // as framesizes are fixed.
   447 // VMRegImpl::stack0 refers to the first slot 0(sp).
   448 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
   449 // up to RegisterImpl::number_of_registers) are the 32-bit
   450 // integer registers.
   452 // Pass first five oop/int args in registers T0, A0 - A3.
   453 // Pass float/double/long args in stack.
   454 // Doubles have precedence, so if you pass a mix of floats and doubles
   455 // the doubles will grab the registers before the floats will.
   457 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
   458 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
   459 // units regardless of build. Of course for i486 there is no 64 bit build
   462 // ---------------------------------------------------------------------------
   463 // The compiled Java calling convention.
   464 // Pass first five oop/int args in registers T0, A0 - A3.
   465 // Pass float/double/long args in stack.
   466 // Doubles have precedence, so if you pass a mix of floats and doubles
   467 // the doubles will grab the registers before the floats will.
   469 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
   470                                            VMRegPair *regs,
   471                                            int total_args_passed,
   472                                            int is_outgoing) {
   473 //#define aoqi_test
   474 #ifdef aoqi_test
   475 tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed);
   476 #endif
   478   // Create the mapping between argument positions and
   479   // registers.
   480   //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
   481   static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
   482     T0, A0, A1, A2, A3, A4, A5, A6, A7
   483   };
   484   //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
   485   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
   486     F12, F13, F14, F15, F16, F17, F18, F19
   487   };
   490   uint args = 0;
   491   uint stk_args = 0; // inc by 2 each time
   493   for (int i = 0; i < total_args_passed; i++) {
   494     switch (sig_bt[i]) {
   495     case T_VOID:
   496       // halves of T_LONG or T_DOUBLE
   497       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
   498       regs[i].set_bad();
   499       break;
   500     case T_BOOLEAN:
   501     case T_CHAR:
   502     case T_BYTE:
   503     case T_SHORT:
   504     case T_INT:
   505       if (args < Argument::n_register_parameters) {
   506         regs[i].set1(INT_ArgReg[args++]->as_VMReg());
   507       } else {
   508         regs[i].set1(VMRegImpl::stack2reg(stk_args));
   509         stk_args += 2;
   510       }
   511       break;
   512     case T_LONG:
   513       assert(sig_bt[i + 1] == T_VOID, "expecting half");
   514       // fall through
   515     case T_OBJECT:
   516     case T_ARRAY:
   517     case T_ADDRESS:
   518       if (args < Argument::n_register_parameters) {
   519         regs[i].set2(INT_ArgReg[args++]->as_VMReg());
   520       } else {
   521         regs[i].set2(VMRegImpl::stack2reg(stk_args));
   522         stk_args += 2;
   523       }
   524       break;
   525     case T_FLOAT:
   526       if (args < Argument::n_float_register_parameters) {
   527         regs[i].set1(FP_ArgReg[args++]->as_VMReg());
   528       } else {
   529         regs[i].set1(VMRegImpl::stack2reg(stk_args));
   530         stk_args += 2;
   531       }
   532       break;
   533     case T_DOUBLE:
   534       assert(sig_bt[i + 1] == T_VOID, "expecting half");
   535       if (args < Argument::n_float_register_parameters) {
   536         regs[i].set2(FP_ArgReg[args++]->as_VMReg());
   537       } else {
   538         regs[i].set2(VMRegImpl::stack2reg(stk_args));
   539         stk_args += 2;
   540       }
   541       break;
   542     default:
   543       ShouldNotReachHere();
   544       break;
   545     }
   546 #ifdef aoqi_test
   547 tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args);
   548 #endif
   549   }
   551   return round_to(stk_args, 2);
   552 /*
   553 	// Starting stack position for args on stack
   554   uint    stack = 0;
   556 	// Pass first five oop/int args in registers T0, A0 - A3.
   557 	uint reg_arg0 = 9999;
   558 	uint reg_arg1 = 9999;
   559 	uint reg_arg2 = 9999;
   560 	uint reg_arg3 = 9999;
   561 	uint reg_arg4 = 9999;
   564   // Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
   565 	int i;
   566 	for( i = 0; i < total_args_passed; i++) {
   567 		if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
   568 			stack += 2;
   569 		}
   570 	}
   571 	int dstack = 0;  // Separate counter for placing doubles
   572   for( i = 0; i < total_args_passed; i++) {
   573     // From the type and the argument number (count) compute the location
   574     switch( sig_bt[i] ) {
   575     case T_SHORT:
   576     case T_CHAR:
   577     case T_BYTE:
   578     case T_BOOLEAN:
   579     case T_INT:
   580     case T_ARRAY:
   581     case T_OBJECT:
   582     case T_ADDRESS:
   583 	    if( reg_arg0 == 9999 )  {
   584 		    reg_arg0 = i;
   585 		    regs[i].set1(T0->as_VMReg());
   586 	    } else if( reg_arg1 == 9999 ) {
   587 		    reg_arg1 = i;
   588 		    regs[i].set1(A0->as_VMReg());
   589 	    } else if( reg_arg2 == 9999 ) {
   590 		    reg_arg2 = i;
   591 		    regs[i].set1(A1->as_VMReg());
   592 	    }else if( reg_arg3 == 9999 ) {
   593 		    reg_arg3 = i;
   594 		    regs[i].set1(A2->as_VMReg());
   595 	    }else if( reg_arg4 == 9999 ) {
   596 		    reg_arg4 = i;
   597 		    regs[i].set1(A3->as_VMReg());
   598 	    } else {
   599 		    regs[i].set1(VMRegImpl::stack2reg(stack++));
   600 	    }
   601 	    break;
   602     case T_FLOAT:
   603 	    regs[i].set1(VMRegImpl::stack2reg(stack++));
   604 	    break;
   605     case T_LONG:      
   606 	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
   607 	    regs[i].set2(VMRegImpl::stack2reg(dstack));
   608 	    dstack += 2;
   609 	    break;
   610     case T_DOUBLE:
   611 	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
   612 	    regs[i].set2(VMRegImpl::stack2reg(dstack));
   613 	    dstack += 2;
   614 	    break;
   615     case T_VOID: regs[i].set_bad(); break;
   616 		 break;
   617     default:
   618 		 ShouldNotReachHere();
   619 		 break;
   620     }
   621  }
   622   // return value can be odd number of VMRegImpl stack slots make multiple of 2
   623   return round_to(stack, 2);
   624 */
   625 }
   627 // Helper class mostly to avoid passing masm everywhere, and handle store
   628 // displacement overflow logic for LP64
   629 class AdapterGenerator {
   630   MacroAssembler *masm;
   631 #ifdef _LP64
   632   Register Rdisp;
   633   void set_Rdisp(Register r)  { Rdisp = r; }
   634 #endif // _LP64
   636   void patch_callers_callsite();
   637 //  void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
   639   // base+st_off points to top of argument
   640   int arg_offset(const int st_off) { return st_off; }
   641   int next_arg_offset(const int st_off) {
   642     return st_off - Interpreter::stackElementSize;
   643   }
   645 #ifdef _LP64
   646   // On _LP64 argument slot values are loaded first into a register
   647   // because they might not fit into displacement.
   648   Register arg_slot(const int st_off);
   649   Register next_arg_slot(const int st_off);
   650 #else
   651   int arg_slot(const int st_off)      { return arg_offset(st_off); }
   652   int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
   653 #endif // _LP64
   655   // Stores long into offset pointed to by base
   656   void store_c2i_long(Register r, Register base,
   657                       const int st_off, bool is_stack);
   658   void store_c2i_object(Register r, Register base,
   659                         const int st_off);
   660   void store_c2i_int(Register r, Register base,
   661                      const int st_off);
   662   void store_c2i_double(VMReg r_2,
   663                         VMReg r_1, Register base, const int st_off);
   664   void store_c2i_float(FloatRegister f, Register base,
   665                        const int st_off);
   667  public:
   668   //void tag_stack(const BasicType sig, int st_off);
   669   void gen_c2i_adapter(int total_args_passed,
   670                               // VMReg max_arg,
   671                               int comp_args_on_stack, // VMRegStackSlots
   672                               const BasicType *sig_bt,
   673                               const VMRegPair *regs,
   674                               Label& skip_fixup);
   675   void gen_i2c_adapter(int total_args_passed,
   676                               // VMReg max_arg,
   677                               int comp_args_on_stack, // VMRegStackSlots
   678                               const BasicType *sig_bt,
   679                               const VMRegPair *regs);
   681   AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
   682 };
   685 // Patch the callers callsite with entry to compiled code if it exists.
   686 void AdapterGenerator::patch_callers_callsite() {
   687 	Label L;
   688 	//FIXME , what is stored in eax? 
   689 	//__ verify_oop(ebx);
   690 	__ verify_oop(Rmethod);
   691 	// __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
   692 	__ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); 
   693 	//__ jcc(Assembler::equal, L);
   694 	__ beq(AT,R0,L); 
   695 	__ delayed()->nop(); 
   696 	// Schedule the branch target address early.
   697 	// Call into the VM to patch the caller, then jump to compiled callee
   698 	// eax isn't live so capture return address while we easily can
   699 	//  __ movl(eax, Address(esp, 0));
   700 //	__ lw(T5,SP,0);  
   701 	__ move(V0, RA);
   703 	__ pushad();
   704       	//jerome_for_debug
   705 	// __ pushad();
   706 	// __ pushfd();
   707 #ifdef COMPILER2
   708 	// C2 may leave the stack dirty if not in SSE2+ mode
   709 	__ empty_FPU_stack();
   710 #endif /* COMPILER2 */
   712 	// VM needs caller's callsite
   713 	//  __ pushl(eax);
   715 	// VM needs target method
   716 	// __ pushl(ebx);
   717 	//  __ push(Rmethod);
   718 	// __ verify_oop(ebx);
   720 	__ move(A0, Rmethod); 
   721 	__ move(A1, V0); 
   722 //	__ addi(SP, SP, -8);
   723 //we should preserve the return address
   724 	__ verify_oop(Rmethod);
   725         __ move(S0, SP); 
   726         __ move(AT, -(StackAlignmentInBytes));   // align the stack
   727         __ andr(SP, SP, AT);
   728       	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), 
   729 			relocInfo::runtime_call_type);
   730 	//__ addl(esp, 2*wordSize);
   732 	__ delayed()->nop(); 
   733   //      __ addi(SP, SP, 8);
   734 	//  __ popfd();
   735         __ move(SP, S0);
   736 	__ popad();
   737 	__ bind(L);
   738 }
   739 /*
   740 void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
   741                  Register scratch) {
   742 	Unimplemented();
   743 }*/
   745 #ifdef _LP64
   746 Register AdapterGenerator::arg_slot(const int st_off) {
   747 	Unimplemented();
   748 }
   750 Register AdapterGenerator::next_arg_slot(const int st_off){
   751 	Unimplemented();
   752 }
   753 #endif // _LP64
   755 // Stores long into offset pointed to by base
   756 void AdapterGenerator::store_c2i_long(Register r, Register base,
   757                                       const int st_off, bool is_stack) {
   758 	Unimplemented();
   759 }
   761 void AdapterGenerator::store_c2i_object(Register r, Register base,
   762                       const int st_off) {
   763 	Unimplemented();
   764 }
   766 void AdapterGenerator::store_c2i_int(Register r, Register base,
   767                    const int st_off) {
   768 	Unimplemented();
   769 }
   771 // Stores into offset pointed to by base
   772 void AdapterGenerator::store_c2i_double(VMReg r_2,
   773                       VMReg r_1, Register base, const int st_off) {
   774 	Unimplemented();
   775 }
   777 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
   778                                        const int st_off) {
   779 	Unimplemented();
   780 }
   781 /*
   782 void  AdapterGenerator::tag_stack(const BasicType sig, int st_off) {
   783 	if (TaggedStackInterpreter) {
   784 		int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
   785 		if (sig == T_OBJECT || sig == T_ARRAY) {
   786 			//   __ movl(Address(esp, tag_offset), frame::TagReference);
   787 			//  __ addi(AT,R0, frame::TagReference); 
   789 			__ move(AT, frame::TagReference);
   790 			__ sw (AT, SP, tag_offset); 
   791 		} else if (sig == T_LONG || sig == T_DOUBLE) {
   792 			int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
   793 			// __ movl(Address(esp, next_tag_offset), frame::TagValue);
   794 			// __ addi(AT,R0, frame::TagValue); 
   795 			__ move(AT, frame::TagValue); 
   796 			__ sw (AT, SP, next_tag_offset); 
   797 			//__ movl(Address(esp, tag_offset), frame::TagValue);
   798 			//   __ addi(AT,R0, frame::TagValue); 
   799 			__ move(AT, frame::TagValue); 
   800 			__ sw (AT, SP, tag_offset); 
   802 		} else {
   803 			//  __ movl(Address(esp, tag_offset), frame::TagValue);
   804 			//__ addi(AT,R0, frame::TagValue); 
   805 			__ move(AT, frame::TagValue); 
   806 			__ sw (AT, SP, tag_offset); 
   808 		}
   809 	}
   810 }*/
   812 void AdapterGenerator::gen_c2i_adapter(
   813                             int total_args_passed,
   814                             // VMReg max_arg,
   815                             int comp_args_on_stack, // VMRegStackSlots
   816                             const BasicType *sig_bt,
   817                             const VMRegPair *regs,
   818                             Label& skip_fixup) {
   820   // Before we get into the guts of the C2I adapter, see if we should be here
   821   // at all.  We've come from compiled code and are attempting to jump to the
   822   // interpreter, which means the caller made a static call to get here
   823   // (vcalls always get a compiled target if there is one).  Check for a
   824   // compiled target.  If there is one, we need to patch the caller's call.
   825   // However we will run interpreted if we come thru here. The next pass
   826   // thru the call site will run compiled. If we ran compiled here then
   827   // we can (theorectically) do endless i2c->c2i->i2c transitions during
   828   // deopt/uncommon trap cycles. If we always go interpreted here then
   829   // we can have at most one and don't need to play any tricks to keep
   830   // from endlessly growing the stack.
   831   //
   832   // Actually if we detected that we had an i2c->c2i transition here we
   833   // ought to be able to reset the world back to the state of the interpreted
   834   // call and not bother building another interpreter arg area. We don't
   835   // do that at this point.
   837 	patch_callers_callsite();
   839 	__ bind(skip_fixup);
   841 #ifdef COMPILER2
   842 	__ empty_FPU_stack();
   843 #endif /* COMPILER2 */
   844 	//this is for native ?
   845 	// Since all args are passed on the stack, total_args_passed * interpreter_
   846 	// stack_element_size  is the
   847 	// space we need.
   848 	int extraspace = total_args_passed * Interpreter::stackElementSize;
   850         // stack is aligned, keep it that way
   851         extraspace = round_to(extraspace, 2*wordSize);
   853 	// Get return address
   854 	// __ popl(eax);
   855 	//__ pop(T4);
   856         __ move(V0, RA);		
   857 	// set senderSP value
   858 	// __ movl(esi, esp);
   859 //refer to interpreter_mips.cpp:generate_asm_entry
   860 	__ move(Rsender, SP); 
   861 	//__ subl(esp, extraspace);
   862 	__ addi(SP, SP, -extraspace);
   864 	// Now write the args into the outgoing interpreter space
   865 	for (int i = 0; i < total_args_passed; i++) {
   866 		if (sig_bt[i] == T_VOID) {
   867 			assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), 
   868 					"missing half");
   869 			continue;
   870 		}
   872 		// st_off points to lowest address on stack.
   873 		int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
   874 #ifdef aoqi_test
   875 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
   876 #endif
   877 		// Say 4 args:
   878 		// i   st_off
   879 		// 0   12 T_LONG
   880 		// 1    8 T_VOID
   881 		// 2    4 T_OBJECT
   882 		// 3    0 T_BOOL
   883 		VMReg r_1 = regs[i].first();
   884 		VMReg r_2 = regs[i].second();
   885 		if (!r_1->is_valid()) {
   886 			assert(!r_2->is_valid(), "");
   887 			continue;
   888 		}
   890 		if (r_1->is_stack()) {
   891 			// memory to memory use fpu stack top
   892 			int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
   893 #ifdef aoqi_test
   894 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off);
   895 #endif
   897 			if (!r_2->is_valid()) {
   898 #ifdef aoqi_test
   899 tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
   900 #endif
   901 				__ ld_ptr(AT, SP, ld_off); 
   902 				__ st_ptr(AT, SP, st_off); 
   903 				//tag_stack(sig_bt[i], st_off);
   904 			} else {
   905 #ifdef aoqi_test
   906 tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
   907 #endif
   909 				// ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
   910 				// st_off == MSW, st_off-wordSize == LSW
   912 				int next_off = st_off - Interpreter::stackElementSize;
   913 				/*
   914 				__ lw(AT, SP, ld_off); 
   915 				__ sw(AT, SP, next_off);
   916 				__ lw(AT, SP, ld_off + wordSize);
   917 				__ sw(AT, SP, st_off);
   918 				*/
   919 				__ ld_ptr(AT, SP, ld_off); 
   920 				__ st_ptr(AT, SP, st_off); 
   922 				/* Ref to is_Register condition */
   923 				if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
   924 					__ st_ptr(AT,SP,st_off - 8);
   925 				//tag_stack(sig_bt[i], next_off);
   926 			}
   927 		} else if (r_1->is_Register()) {
   928 			Register r = r_1->as_Register();
   929 			if (!r_2->is_valid()) {
   930 #ifdef aoqi_test
   931 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
   932 #endif
   933 			  // __ movl(Address(esp, st_off), r);
   934 			    __ sd(r,SP, st_off); //aoqi_test FIXME
   935 			  //tag_stack(sig_bt[i], st_off);
   936 			} else {
   937 #ifdef aoqi_test
   938 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
   939 #endif
   940 				//FIXME, mips will not enter here 
   941 				// long/double in gpr
   942 			    __ sd(r,SP, st_off); //aoqi_test FIXME
   943 /* Jin: In [java/util/zip/ZipFile.java] 
   945     private static native long open(String name, int mode, long lastModified);
   946     private static native int getTotal(long jzfile);
   947  *
   948  * We need to transfer T_LONG paramenters from a compiled method to a native method.
   949  * It's a complex process:
   950  *
   951  * Caller -> lir_static_call -> gen_resolve_stub
   952       -> -- resolve_static_call_C
   953          `- gen_c2i_adapter()	[*]
   954              |
   955 	     `- AdapterHandlerLibrary::get_create_apapter_index
   956       -> generate_native_entry
   957       -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
   959  * In [**], T_Long parameter is stored in stack as:
   961    (high)
   962     |         |
   963     -----------
   964     | 8 bytes |
   965     | (void)  |
   966     -----------
   967     | 8 bytes |
   968     | (long)  |
   969     -----------
   970     |         |
   971    (low)
   972  *
   973  * However, the sequence is reversed here: 
   974  *
   975    (high)
   976     |         |
   977     -----------
   978     | 8 bytes |
   979     | (long)  |
   980     -----------
   981     | 8 bytes |
   982     | (void)  |
   983     -----------
   984     |         |
   985    (low)
   986  *
   987  * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
   988  */
   989 			    if (sig_bt[i] == T_LONG)
   990 			        __ sd(r,SP, st_off - 8);
   991 			//	ShouldNotReachHere();
   992 			//	int next_off = st_off - Interpreter::stackElementSize;
   993 			//	__ sw(r_2->as_Register(),SP, st_off);
   994 			//	__ sw(r,SP, next_off);
   995 			//	tag_stack(masm, sig_bt[i], next_off);
   996 			}
   997 		} else if (r_1->is_FloatRegister()) {
   998 			assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
  1000 			FloatRegister fr = r_1->as_FloatRegister();
  1001 			if (sig_bt[i] == T_FLOAT)
  1002 		            __ swc1(fr,SP, st_off);
  1003 			else
  1005 		            __ sdc1(fr,SP, st_off);
  1006 		            __ sdc1(fr,SP, st_off - 8);	/* T_DOUBLE needs two slots */
  1011 	// Schedule the branch target address early.
  1012 	__ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); 
  1013 	// And repush original return address
  1014 	__ move(RA, V0);	
  1015 	__ jr (AT); 
  1016 	__ delayed()->nop();
  1019 void AdapterGenerator::gen_i2c_adapter(
  1020                             int total_args_passed,
  1021                             // VMReg max_arg,
  1022                             int comp_args_on_stack, // VMRegStackSlots
  1023                             const BasicType *sig_bt,
  1024 			    const VMRegPair *regs) {
  1026   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
  1027   // layout.  Lesp was saved by the calling I-frame and will be restored on
  1028   // return.  Meanwhile, outgoing arg space is all owned by the callee
  1029   // C-frame, so we can mangle it at will.  After adjusting the frame size,
  1030   // hoist register arguments and repack other args according to the compiled
  1031   // code convention.  Finally, end in a jump to the compiled code.  The entry
  1032   // point address is the start of the buffer.
  1034   // We will only enter here from an interpreted frame and never from after
  1035   // passing thru a c2i. Azul allowed this but we do not. If we lose the
  1036   // race and use a c2i we will remain interpreted for the race loser(s).
  1037   // This removes all sorts of headaches on the mips side and also eliminates
  1038   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
  1041   __ move(T9, SP);
  1043   // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
  1044   // in registers, we will occasionally have no stack args.
  1045   int comp_words_on_stack = 0;
  1046   if (comp_args_on_stack) {
  1047     // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
  1048     // registers are below.  By subtracting stack0, we either get a negative
  1049     // number (all values in registers) or the maximum stack slot accessed.
  1050     // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
  1051     // Convert 4-byte stack slots to words.
  1052     // did mips need round? FIXME  aoqi
  1053     comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
  1054     // Round up to miminum stack alignment, in wordSize
  1055     comp_words_on_stack = round_to(comp_words_on_stack, 2);
  1056     __ daddi(SP, SP, -comp_words_on_stack * wordSize);
  1059   // Align the outgoing SP
  1060   __ move(AT, -(StackAlignmentInBytes));
  1061   __ andr(SP, SP, AT);	
  1062   // push the return address on the stack (note that pushing, rather
  1063   // than storing it, yields the correct frame alignment for the callee)
  1064   // Put saved SP in another register
  1065   // const Register saved_sp = eax;
  1066   const Register saved_sp = V0;
  1067   __ move(saved_sp, T9);
  1070   // Will jump to the compiled code just as if compiled code was doing it.
  1071   // Pre-load the register-jump target early, to schedule it better.
  1072   __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
  1074   // Now generate the shuffle code.  Pick up all register args and move the
  1075   // rest through the floating point stack top.
  1076   for (int i = 0; i < total_args_passed; i++) {
  1077     if (sig_bt[i] == T_VOID) {
  1078       // Longs and doubles are passed in native word order, but misaligned
  1079       // in the 32-bit build.
  1080       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
  1081       continue;
  1084     // Pick up 0, 1 or 2 words from SP+offset.  
  1086   //FIXME. aoqi. just delete the assert
  1087     //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
  1088     // Load in argument order going down.
  1089     int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
  1090     // Point to interpreter value (vs. tag)
  1091     int next_off = ld_off - Interpreter::stackElementSize;
  1092     //
  1093     //  
  1094     //
  1095     VMReg r_1 = regs[i].first();
  1096     VMReg r_2 = regs[i].second();
  1097     if (!r_1->is_valid()) {
  1098       assert(!r_2->is_valid(), "");
  1099       continue;
  1101 #ifdef aoqi_test
  1102 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off);
  1103 #endif
  1104     if (r_1->is_stack()) { 
  1105       // Convert stack slot to an SP offset (+ wordSize to 
  1106       // account for return address )
  1107       //NOTICE HERE!!!! I sub a wordSize here	
  1108       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; 
  1109       //+ wordSize;
  1111       // We can use esi as a temp here because compiled code doesn't 
  1112       // need esi as an input
  1113       // and if we end up going thru a c2i because of a miss a reasonable 
  1114       // value of esi 
  1115       // we be generated. 
  1116       if (!r_2->is_valid()) {
  1117 #ifdef aoqi_test
  1118 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
  1119 #endif
  1120 	__ ld(AT, saved_sp, ld_off);
  1121 	__ sd(AT, SP, st_off); 
  1122       } else {
  1123 #ifdef aoqi_test
  1124 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
  1125 #endif
  1126 	// Interpreter local[n] == MSW, local[n+1] == LSW however locals
  1127 	// are accessed as negative so LSW is at LOW address
  1129 	// ld_off is MSW so get LSW
  1130 	// st_off is LSW (i.e. reg.first())
  1131 	/*
  1132 	__ ld(AT, saved_sp, next_off); 
  1133 	__ sd(AT, SP, st_off); 
  1134 	__ ld(AT, saved_sp, ld_off); 
  1135 	__ sd(AT, SP, st_off + wordSize); 
  1136 	*/
  1138 	/* 2012/4/9 Jin
  1139 	 * [./org/eclipse/swt/graphics/GC.java] 
  1140 	 * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, 
  1141 		int destX, int destY, int destWidth, int destHeight, 
  1142 		boolean simple, 
  1143 		int imgWidth, int imgHeight, 
  1144 		long maskPixmap,	<-- Pass T_LONG in stack
  1145 		int maskType);
  1146 	 * Before this modification, Eclipse displays icons with solid black background.
  1147 	 */
  1148 	__ ld(AT, saved_sp, ld_off);
  1149         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
  1150 	  __ ld(AT, saved_sp, ld_off - 8);
  1151 	__ sd(AT, SP, st_off); 
  1152 	//__ ld(AT, saved_sp, next_off); 
  1153 	//__ sd(AT, SP, st_off + wordSize); 
  1155     } else if (r_1->is_Register()) {  // Register argument
  1156       Register r = r_1->as_Register();
  1157       // assert(r != eax, "must be different");
  1158       if (r_2->is_valid()) {
  1159 #ifdef aoqi_test
  1160 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
  1161 #endif
  1162 	//  assert(r_2->as_Register() != eax, "need another temporary register");
  1163 	// Remember r_1 is low address (and LSB on mips)
  1164 	// So r_2 gets loaded from high address regardless of the platform
  1165 	//aoqi
  1166 	assert(r_2->as_Register() == r_1->as_Register(), "");
  1167 	//__ ld(r_2->as_Register(), saved_sp, ld_off);
  1168 	//__ ld(r, saved_sp, next_off);
  1169 	__ ld(r, saved_sp, ld_off);
  1171 /* Jin: 
  1173  * For T_LONG type, the real layout is as below:
  1175    (high)
  1176     |         |
  1177     -----------
  1178     | 8 bytes |
  1179     | (void)  |
  1180     -----------
  1181     | 8 bytes |
  1182     | (long)  |
  1183     -----------
  1184     |         |
  1185    (low)
  1187  * We should load the low-8 bytes.
  1188  */
  1189       if (sig_bt[i] == T_LONG)
  1190 	__ ld(r, saved_sp, ld_off - 8);
  1191       } else {
  1192 #ifdef aoqi_test
  1193 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
  1194 #endif
  1195 	__ lw(r, saved_sp, ld_off);
  1197     } else if (r_1->is_FloatRegister()) { // Float Register
  1198 	assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
  1200 	FloatRegister fr = r_1->as_FloatRegister();
  1201 	if (sig_bt[i] == T_FLOAT)
  1202 	    __ lwc1(fr, saved_sp, ld_off);
  1203 	else
  1205 	    __ ldc1(fr, saved_sp, ld_off);
  1206 	    __ ldc1(fr, saved_sp, ld_off - 8);
  1211   // 6243940 We might end up in handle_wrong_method if
  1212   // the callee is deoptimized as we race thru here. If that
  1213   // happens we don't want to take a safepoint because the
  1214   // caller frame will look interpreted and arguments are now
  1215   // "compiled" so it is much better to make this transition
  1216   // invisible to the stack walking code. Unfortunately if
  1217   // we try and find the callee by normal means a safepoint
  1218   // is possible. So we stash the desired callee in the thread
  1219   // and the vm will find there should this case occur.
  1220   __ get_thread(T8);
  1221   __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
  1223   // move methodOop to eax in case we end up in an c2i adapter.
  1224   // the c2i adapters expect methodOop in eax (c2) because c2's
  1225   // resolve stubs return the result (the method) in eax.
  1226   // I'd love to fix this. 
  1227   __ move(V0, Rmethod);	
  1228   __ jr(T9);
  1229   __ delayed()->nop();
  1232 // ---------------------------------------------------------------
  1233 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
  1234                                                             int total_args_passed,
  1235                                                             // VMReg max_arg,
  1236                                                             int comp_args_on_stack, // VMRegStackSlots
  1237                                                             const BasicType *sig_bt,
  1238                                                             const VMRegPair *regs,
  1239                                                             AdapterFingerPrint* fingerprint) {
  1240   address i2c_entry = __ pc();
  1242   AdapterGenerator agen(masm);
  1244   agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
  1247   // -------------------------------------------------------------------------
  1248   // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
  1249   // args start out packed in the compiled layout.  They need to be unpacked
  1250   // into the interpreter layout.  This will almost always require some stack
  1251   // space.  We grow the current (compiled) stack, then repack the args.  We
  1252   // finally end in a jump to the generic interpreter entry point.  On exit
  1253   // from the interpreter, the interpreter will restore our SP (lest the
  1254   // compiled code, which relys solely on SP and not FP, get sick).
  1256   address c2i_unverified_entry = __ pc();
  1257   Label skip_fixup;
  1259     Register holder = T1;
  1260     Register receiver = T0;
  1261     Register temp = T8;
  1262     address ic_miss = SharedRuntime::get_ic_miss_stub();
  1264     Label missed;
  1266     __ verify_oop(holder);
  1267     // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
  1268     //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes());
  1269     //add for compressedoops
  1270     __ load_klass(temp, receiver);
  1271     __ verify_oop(temp);
  1273     //  __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
  1274     __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); 
  1275     //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset()));
  1276     __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset());
  1277     //__ jcc(Assembler::notEqual, missed);
  1278     __ bne(AT, temp, missed); 
  1279     __ delayed()->nop(); 
  1280     // Method might have been compiled since the call site was patched to
  1281     // interpreted if that is the case treat it as a miss so we can get
  1282     // the call site corrected.
  1283     //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
  1284     //__ jcc(Assembler::equal, skip_fixup);
  1285     __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
  1286     __ beq(AT, R0, skip_fixup); 
  1287     __ delayed()->nop(); 
  1288     __ bind(missed);
  1289     //   __ move(AT, (int)&jerome7);	
  1290     //	__ sw(RA, AT, 0);	
  1292     __ jmp(ic_miss, relocInfo::runtime_call_type);
  1293     __ delayed()->nop(); 
  1296   address c2i_entry = __ pc();
  1298   agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
  1300   __ flush();
  1301   return  AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry);
  1304 /*
  1305 // Helper function for native calling conventions
  1306 static VMReg int_stk_helper( int i ) {
  1307   // Bias any stack based VMReg we get by ignoring the window area
  1308   // but not the register parameter save area.
  1309   //
  1310   // This is strange for the following reasons. We'd normally expect
  1311   // the calling convention to return an VMReg for a stack slot
  1312   // completely ignoring any abi reserved area. C2 thinks of that
  1313   // abi area as only out_preserve_stack_slots. This does not include
  1314   // the area allocated by the C abi to store down integer arguments
  1315   // because the java calling convention does not use it. So
  1316   // since c2 assumes that there are only out_preserve_stack_slots
  1317   // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
  1318   // location the c calling convention must add in this bias amount
  1319   // to make up for the fact that the out_preserve_stack_slots is
  1320   // insufficient for C calls. What a mess. I sure hope those 6
  1321   // stack words were worth it on every java call!
  1323   // Another way of cleaning this up would be for out_preserve_stack_slots
  1324   // to take a parameter to say whether it was C or java calling conventions.
  1325   // Then things might look a little better (but not much).
  1327   int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
  1328   if( mem_parm_offset < 0 ) {
  1329     return as_oRegister(i)->as_VMReg();
  1330   } else {
  1331     int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
  1332     // Now return a biased offset that will be correct when out_preserve_slots is added back in
  1333     return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
  1336 */
  1339 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
  1340                                          VMRegPair *regs,
  1341                                          VMRegPair *regs2,
  1342                                          int total_args_passed) {
  1343     assert(regs2 == NULL, "not needed on MIPS");
  1344 #ifdef aoqi_test
  1345 tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
  1346 #endif
  1347     // Return the number of VMReg stack_slots needed for the args.
  1348     // This value does not include an abi space (like register window
  1349     // save area).
  1351     // The native convention is V8 if !LP64
  1352     // The LP64 convention is the V9 convention which is slightly more sane.
  1354     // We return the amount of VMReg stack slots we need to reserve for all
  1355     // the arguments NOT counting out_preserve_stack_slots. Since we always
  1356     // have space for storing at least 6 registers to memory we start with that.
  1357     // See int_stk_helper for a further discussion.
  1358 	// We return the amount of VMRegImpl stack slots we need to reserve for all
  1359 	// the arguments NOT counting out_preserve_stack_slots. 
  1360   static const Register INT_ArgReg[Argument::n_register_parameters] = {
  1361     A0, A1, A2, A3, A4, A5, A6, A7
  1362   };
  1363   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
  1364     F12, F13, F14, F15, F16, F17, F18, F19
  1365   };
  1366     uint args = 0;
  1367     uint stk_args = 0; // inc by 2 each time
  1369 /* Example:
  1370 ---   n   java.lang.UNIXProcess::forkAndExec
  1371     private native int forkAndExec(byte[] prog,
  1372                                    byte[] argBlock, int argc,
  1373                                    byte[] envBlock, int envc,
  1374                                    byte[] dir,
  1375                                    boolean redirectErrorStream,
  1376                                    FileDescriptor stdin_fd,
  1377                                    FileDescriptor stdout_fd,
  1378                                    FileDescriptor stderr_fd)
  1379 JNIEXPORT jint JNICALL
  1380 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
  1381                                        jobject process,
  1382                                        jbyteArray prog,
  1383                                        jbyteArray argBlock, jint argc,
  1384                                        jbyteArray envBlock, jint envc,
  1385                                        jbyteArray dir,
  1386                                        jboolean redirectErrorStream,
  1387                                        jobject stdin_fd,
  1388                                        jobject stdout_fd,
  1389                                        jobject stderr_fd)
  1391 ::c_calling_convention
  1392 0: 		// env		<-- a0
  1393 1: L		// klass/obj	<-- t0 => a1
  1394 2: [		// prog[]	<-- a0 => a2
  1395 3: [		// argBlock[]	<-- a1 => a3
  1396 4: I		// argc
  1397 5: [		// envBlock[]	<-- a3 => a5
  1398 6: I		// envc
  1399 7: [		// dir[]	<-- a5 => a7
  1400 8: Z		// redirectErrorStream	a6 => sp[0]
  1401 9: L		// stdin		a7 => sp[8]
  1402 10: L		// stdout		fp[16] => sp[16]
  1403 11: L		// stderr		fp[24] => sp[24]
  1404 */
  1405     for (int i = 0; i < total_args_passed; i++) {
  1406       switch (sig_bt[i]) {
  1407       case T_VOID: // Halves of longs and doubles
  1408         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
  1409         regs[i].set_bad();
  1410         break;
  1411       case T_BOOLEAN:
  1412       case T_CHAR:
  1413       case T_BYTE:
  1414       case T_SHORT:
  1415       case T_INT:
  1416         if (args < Argument::n_register_parameters) {
  1417           regs[i].set1(INT_ArgReg[args++]->as_VMReg());
  1418         } else {
  1419           regs[i].set1(VMRegImpl::stack2reg(stk_args));
  1420           stk_args += 2;
  1422         break;
  1423       case T_LONG:
  1424         assert(sig_bt[i + 1] == T_VOID, "expecting half");
  1425         // fall through
  1426       case T_OBJECT:
  1427       case T_ARRAY:
  1428       case T_ADDRESS:
  1429       case T_METADATA:
  1430         if (args < Argument::n_register_parameters) {
  1431           regs[i].set2(INT_ArgReg[args++]->as_VMReg());
  1432         } else {
  1433           regs[i].set2(VMRegImpl::stack2reg(stk_args));
  1434           stk_args += 2;
  1436         break;
  1437       case T_FLOAT:
  1438         if (args < Argument::n_float_register_parameters) {
  1439           regs[i].set1(FP_ArgReg[args++]->as_VMReg());
  1440         } else {
  1441           regs[i].set1(VMRegImpl::stack2reg(stk_args));
  1442           stk_args += 2;
  1444         break;
  1445       case T_DOUBLE:
  1446         assert(sig_bt[i + 1] == T_VOID, "expecting half");
  1447         if (args < Argument::n_float_register_parameters) {
  1448           regs[i].set2(FP_ArgReg[args++]->as_VMReg());
  1449         } else {
  1450           regs[i].set2(VMRegImpl::stack2reg(stk_args));
  1451           stk_args += 2;
  1453         break;
  1454       default:
  1455         ShouldNotReachHere();
  1456         break;
  1460   return round_to(stk_args, 2);
  1462 /*
  1463 int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, 
  1464                                          VMRegPair *regs,
  1465                                          int total_args_passed) {
  1466 // We return the amount of VMRegImpl stack slots we need to reserve for all
  1467 // the arguments NOT counting out_preserve_stack_slots. 
  1468    bool unalign = 0;
  1469   uint    stack = 0;        // All arguments on stack
  1470 #ifdef aoqi_test
  1471 tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
  1472 #endif
  1474   for( int i = 0; i < total_args_passed; i++) {
  1475     // From the type and the argument number (count) compute the location
  1476     switch( sig_bt[i] ) {
  1477     case T_BOOLEAN:
  1478     case T_CHAR:
  1479     case T_FLOAT:
  1480     case T_BYTE:
  1481     case T_SHORT:
  1482     case T_INT:
  1483     case T_OBJECT:
  1484     case T_ARRAY:
  1485     case T_ADDRESS:
  1486       regs[i].set1(VMRegImpl::stack2reg(stack++));
  1487       unalign = !unalign;
  1488       break;
  1489     case T_LONG:
  1490     case T_DOUBLE: // The stack numbering is reversed from Java
  1491       // Since C arguments do not get reversed, the ordering for
  1492       // doubles on the stack must be opposite the Java convention
  1493       assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
  1494       if(unalign){
  1495             stack += 1; 
  1496      	    unalign = ! unalign; 
  1498       regs[i].set2(VMRegImpl::stack2reg(stack));
  1499       stack += 2;
  1500       break;
  1501     case T_VOID: regs[i].set_bad(); break;
  1502     default:
  1503       ShouldNotReachHere();
  1504       break;
  1507   return stack;
  1509 */
  1511 // ---------------------------------------------------------------------------
  1512 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1513 	// We always ignore the frame_slots arg and just use the space just below frame pointer
  1514 	// which by this time is free to use
  1515 	switch (ret_type) {
  1516 		case T_FLOAT:
  1517 			__ swc1(FSF, FP, -wordSize); 
  1518 			break;
  1519 		case T_DOUBLE:
  1520 			__ sdc1(FSF, FP, -wordSize ); 
  1521 			break;
  1522 		case T_VOID:  break;
  1523 		case T_LONG:
  1524 			      __ sd(V0, FP, -wordSize);
  1525 			      break;
  1526 		case T_OBJECT:
  1527 		case T_ARRAY:
  1528 			__ sd(V0, FP, -wordSize);
  1529 			break;
  1530 		default: {
  1531 				 __ sw(V0, FP, -wordSize);
  1536 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1537 	// We always ignore the frame_slots arg and just use the space just below frame pointer
  1538 	// which by this time is free to use
  1539 	switch (ret_type) {
  1540 		case T_FLOAT:
  1541 			__ lwc1(FSF, FP, -wordSize); 
  1542 			break;
  1543 		case T_DOUBLE:
  1544 			__ ldc1(FSF, FP, -wordSize ); 
  1545 			break;
  1546 		case T_LONG:
  1547 			__ ld(V0, FP, -wordSize);
  1548 			break;
  1549 		case T_VOID:  break;
  1550 		case T_OBJECT:
  1551 		case T_ARRAY:
  1552 			__ ld(V0, FP, -wordSize);
  1553 			break;
  1554 		default: {
  1555 				 __ lw(V0, FP, -wordSize);
  1560 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
  1561     for ( int i = first_arg ; i < arg_count ; i++ ) {
  1562       if (args[i].first()->is_Register()) {
  1563         __ push(args[i].first()->as_Register());
  1564       } else if (args[i].first()->is_FloatRegister()) {
  1565         __ push(args[i].first()->as_FloatRegister());
  1570 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
  1571     for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
  1572       if (args[i].first()->is_Register()) {
  1573         __ pop(args[i].first()->as_Register());
  1574       } else if (args[i].first()->is_FloatRegister()) {
  1575         __ pop(args[i].first()->as_FloatRegister());
  1580 // A simple move of integer like type
  1581 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1582   if (src.first()->is_stack()) {
  1583     if (dst.first()->is_stack()) {
  1584       // stack to stack
  1585 		__ lw(AT, FP, reg2offset_in(src.first())); 
  1586 		__ sd(AT,SP, reg2offset_out(dst.first())); 
  1587     } else {
  1588       // stack to reg
  1589       //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1590 			__ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first())); 
  1592   } else if (dst.first()->is_stack()) {
  1593     // reg to stack
  1594 		__ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
  1595   } else {
  1596     //__ mov(src.first()->as_Register(), dst.first()->as_Register());
  1597 	  if (dst.first() != src.first()){ 
  1598 		__ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
  1602 /*
  1603 // On 64 bit we will store integer like items to the stack as
  1604 // 64 bits items (sparc abi) even though java would only store
  1605 // 32bits for a parameter. On 32bit it will simply be 32 bits
  1606 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
  1607 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1608   if (src.first()->is_stack()) {
  1609     if (dst.first()->is_stack()) {
  1610       // stack to stack
  1611       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1612       __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1613     } else {
  1614       // stack to reg
  1615       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1617   } else if (dst.first()->is_stack()) {
  1618     // reg to stack
  1619     __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1620   } else {
  1621     __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1624 */
  1626 // An oop arg. Must pass a handle not the oop itself
  1627 static void object_move(MacroAssembler* masm,
  1628                         OopMap* map,
  1629                         int oop_handle_offset,
  1630                         int framesize_in_slots,
  1631                         VMRegPair src,
  1632                         VMRegPair dst,
  1633                         bool is_receiver,
  1634                         int* receiver_offset) {
  1636   // must pass a handle. First figure out the location we use as a handle
  1638 	//FIXME, for mips, dst can be register
  1639 	if (src.first()->is_stack()) {
  1640 		// Oop is already on the stack as an argument
  1641 		Register rHandle = V0;
  1642 		Label nil;
  1643 		//__ xorl(rHandle, rHandle);
  1644 		__ xorr(rHandle, rHandle, rHandle);
  1645 		//__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD);
  1646 		__ ld(AT, FP, reg2offset_in(src.first())); 
  1647 		//__ jcc(Assembler::equal, nil);
  1648 		__ beq(AT,R0, nil); 
  1649 		__ delayed()->nop(); 
  1650 		// __ leal(rHandle, Address(ebp, reg2offset_in(src.first())));
  1651 		__ lea(rHandle, Address(FP, reg2offset_in(src.first())));
  1652 		__ bind(nil);
  1653 		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
  1654 		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
  1655 		else                       __ move( (dst.first())->as_Register(),rHandle); 
  1656 		//if dst is register 
  1657 	//FIXME, do mips need out preserve stack slots?	
  1658 		int offset_in_older_frame = src.first()->reg2stack() 
  1659 			+ SharedRuntime::out_preserve_stack_slots();
  1660 		map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
  1661 		if (is_receiver) {
  1662 			*receiver_offset = (offset_in_older_frame 
  1663 					+ framesize_in_slots) * VMRegImpl::stack_slot_size;
  1665 	} else {
  1666 		// Oop is in an a register we must store it to the space we reserve
  1667 		// on the stack for oop_handles
  1668 		const Register rOop = src.first()->as_Register();
  1669 		assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
  1670 		//   const Register rHandle = eax;
  1671 		const Register rHandle = V0;
  1672 		//Important: refer to java_calling_convertion	
  1673 		int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
  1674 		int offset = oop_slot*VMRegImpl::stack_slot_size;
  1675 		Label skip;
  1676 		// __ movl(Address(esp, offset), rOop);
  1677 		__ sd( rOop , SP, offset );
  1678 		map->set_oop(VMRegImpl::stack2reg(oop_slot));
  1679 		//    __ xorl(rHandle, rHandle);
  1680 		__ xorr( rHandle, rHandle, rHandle);
  1681 		//__ cmpl(rOop, NULL_WORD);
  1682 		// __ jcc(Assembler::equal, skip);
  1683 		__ beq(rOop, R0, skip); 
  1684 		__ delayed()->nop(); 
  1685 		//  __ leal(rHandle, Address(esp, offset));
  1686 		__ lea(rHandle, Address(SP, offset));
  1687 		__ bind(skip);
  1688 		// Store the handle parameter
  1689 		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
  1690 		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
  1691 		else                       __ move((dst.first())->as_Register(), rHandle); 
  1692 		//if dst is register 
  1694 		if (is_receiver) {
  1695 			*receiver_offset = offset;
  1700 // A float arg may have to do float reg int reg conversion
  1701 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1702   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
  1704 	if (src.first()->is_stack()) {
  1705 		if(dst.first()->is_stack()){
  1706 			//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1707 			__ lwc1(F12 , FP, reg2offset_in(src.first()));
  1708 			// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1709 			__ swc1(F12 ,SP, reg2offset_out(dst.first()));
  1711 		else
  1712 			__ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); 
  1713 	} else {
  1714 		// reg to stack
  1715 		// __ movss(Address(esp, reg2offset_out(dst.first())), 
  1716 		// src.first()->as_XMMRegister());
  1717 		// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1718 		if(dst.first()->is_stack())
  1719 			__ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
  1720 		else
  1721 			__ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 
  1724 /*
  1725 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1726   VMRegPair src_lo(src.first());
  1727   VMRegPair src_hi(src.second());
  1728   VMRegPair dst_lo(dst.first());
  1729   VMRegPair dst_hi(dst.second());
  1730   simple_move32(masm, src_lo, dst_lo);
  1731   simple_move32(masm, src_hi, dst_hi);
  1733 */
  1734 // A long move
  1735 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1737 	// The only legal possibility for a long_move VMRegPair is:
  1738 	// 1: two stack slots (possibly unaligned)
  1739 	// as neither the java  or C calling convention will use registers
  1740 	// for longs.
  1742 	if (src.first()->is_stack()) {
  1743 		assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
  1744 		//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1745 		if( dst.first()->is_stack()){ 
  1746 			__ ld(AT, FP, reg2offset_in(src.first()));
  1747 			//  __ movl(ebx, address(ebp, reg2offset_in(src.second())));
  1748 			//__ lw(V0, FP, reg2offset_in(src.second())); 
  1749 			// __ movl(address(esp, reg2offset_out(dst.first())), eax);
  1750 			__ sd(AT, SP, reg2offset_out(dst.first()));
  1751 			// __ movl(address(esp, reg2offset_out(dst.second())), ebx);
  1752 			//__ sw(V0, SP,  reg2offset_out(dst.second())); 
  1753 		} else{
  1754 			__ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
  1755 			//__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); 
  1757 	} else {
  1758 		if( dst.first()->is_stack()){ 
  1759 			__ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
  1760 			//__ sw( (src.second())->as_Register(), SP,  reg2offset_out(dst.second())); 
  1761 		} else{
  1762 			__ move( (dst.first())->as_Register() , (src.first())->as_Register());
  1763 			//__ move( (dst.second())->as_Register(), (src.second())->as_Register()); 
  1768 // A double move
  1769 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1771 	// The only legal possibilities for a double_move VMRegPair are:
  1772 	// The painful thing here is that like long_move a VMRegPair might be
  1774 	// Because of the calling convention we know that src is either
  1775 	//   1: a single physical register (xmm registers only)
  1776 	//   2: two stack slots (possibly unaligned)
  1777 	// dst can only be a pair of stack slots.
  1779 	// assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || 
  1780 	// src.first()->is_stack()), "bad args");
  1781 	//  assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args");
  1783 	if (src.first()->is_stack()) {
  1784 		// source is all stack
  1785 		// __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1786 		if( dst.first()->is_stack()){ 
  1787 			__ ldc1(F12, FP, reg2offset_in(src.first()));
  1788 			//__ movl(ebx, Address(ebp, reg2offset_in(src.second())));
  1789 			//__ lwc1(F14, FP, reg2offset_in(src.second()));
  1791 			//   __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1792 			__ sdc1(F12, SP, reg2offset_out(dst.first())); 
  1793 			//  __ movl(Address(esp, reg2offset_out(dst.second())), ebx);
  1794 			//__ swc1(F14, SP, reg2offset_out(dst.second()));
  1795 		} else{
  1796 			__ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
  1797 			//__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second()));
  1800 	} else {
  1801 		// reg to stack
  1802 		// No worries about stack alignment
  1803 		// __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
  1804 		if( dst.first()->is_stack()){ 
  1805 			__ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
  1806 			//__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second()));
  1808 		else
  1809 			__ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
  1810 			//__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); 
  1815 static void verify_oop_args(MacroAssembler* masm,
  1816                             methodHandle method,
  1817                             const BasicType* sig_bt,
  1818                             const VMRegPair* regs) {
  1819   Register temp_reg = T9;  // not part of any compiled calling seq
  1820   if (VerifyOops) {
  1821     for (int i = 0; i < method->size_of_parameters(); i++) {
  1822       if (sig_bt[i] == T_OBJECT ||
  1823           sig_bt[i] == T_ARRAY) {
  1824         VMReg r = regs[i].first();
  1825         assert(r->is_valid(), "bad oop arg");
  1826         if (r->is_stack()) {
  1827 //          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1828           __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1829           __ verify_oop(temp_reg);
  1830         } else {
  1831           __ verify_oop(r->as_Register());
  1838 static void gen_special_dispatch(MacroAssembler* masm,
  1839                                  methodHandle method,
  1840                                  const BasicType* sig_bt,
  1841                                  const VMRegPair* regs) {
  1842   verify_oop_args(masm, method, sig_bt, regs);
  1843   vmIntrinsics::ID iid = method->intrinsic_id();
  1845   // Now write the args into the outgoing interpreter space
  1846   bool     has_receiver   = false;
  1847   Register receiver_reg   = noreg;
  1848   int      member_arg_pos = -1;
  1849   Register member_reg     = noreg;
  1850   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
  1851   if (ref_kind != 0) {
  1852     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
  1853 //    member_reg = rbx;  // known to be free at this point
  1854     member_reg = S3;  // known to be free at this point
  1855     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
  1856   } else if (iid == vmIntrinsics::_invokeBasic) {
  1857     has_receiver = true;
  1858   } else {
  1859     fatal(err_msg_res("unexpected intrinsic id %d", iid));
  1862   if (member_reg != noreg) {
  1863     // Load the member_arg into register, if necessary.
  1864     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
  1865     VMReg r = regs[member_arg_pos].first();
  1866     if (r->is_stack()) {
  1867 //      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1868       __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1869     } else {
  1870       // no data motion is needed
  1871       member_reg = r->as_Register();
  1875   if (has_receiver) {
  1876     // Make sure the receiver is loaded into a register.
  1877     assert(method->size_of_parameters() > 0, "oob");
  1878     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
  1879     VMReg r = regs[0].first();
  1880     assert(r->is_valid(), "bad receiver arg");
  1881     if (r->is_stack()) {
  1882       // Porting note:  This assumes that compiled calling conventions always
  1883       // pass the receiver oop in a register.  If this is not true on some
  1884       // platform, pick a temp and load the receiver from stack.
  1885       fatal("receiver always in a register");
  1886 //      receiver_reg = j_rarg0;  // known to be free at this point
  1887       receiver_reg = SSR;  // known to be free at this point
  1888 //      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1889       __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1890     } else {
  1891       // no data motion is needed
  1892       receiver_reg = r->as_Register();
  1896   // Figure out which address we are really jumping to:
  1897   MethodHandles::generate_method_handle_dispatch(masm, iid,
  1898                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
  1901 // ---------------------------------------------------------------------------
  1902 // Generate a native wrapper for a given method.  The method takes arguments
  1903 // in the Java compiled code convention, marshals them to the native
  1904 // convention (handlizes oops, etc), transitions to native, makes the call,
  1905 // returns to java state (possibly blocking), unhandlizes any result and
  1906 // returns.
  1907 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  1908                                                 methodHandle method,
  1909                                                 int compile_id,
  1910                                                 BasicType *in_sig_bt,
  1911                                                 VMRegPair *in_regs,
  1912                                                 BasicType ret_type) {
  1914   if (method->is_method_handle_intrinsic()) {
  1915     vmIntrinsics::ID iid = method->intrinsic_id();
  1916     intptr_t start = (intptr_t)__ pc();
  1917     int vep_offset = ((intptr_t)__ pc()) - start;
  1919     gen_special_dispatch(masm,
  1920                          method,
  1921                          in_sig_bt,
  1922                          in_regs);
  1924     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
  1925     __ flush();
  1926     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
  1927     return nmethod::new_native_nmethod(method,
  1928                                        compile_id,
  1929                                        masm->code(),
  1930                                        vep_offset,
  1931                                        frame_complete,
  1932                                        stack_slots / VMRegImpl::slots_per_word,
  1933                                        in_ByteSize(-1),
  1934                                        in_ByteSize(-1),
  1935                                        (OopMapSet*)NULL);
  1937   bool is_critical_native = true;
  1938   address native_func = method->critical_native_function();
  1939   if (native_func == NULL) {
  1940     native_func = method->native_function();
  1941     is_critical_native = false;
  1943   assert(native_func != NULL, "must have function");
  1945   // Native nmethod wrappers never take possesion of the oop arguments.
  1946   // So the caller will gc the arguments. The only thing we need an
  1947   // oopMap for is if the call is static
  1948   //
  1949   // An OopMap for lock (and class if static), and one for the VM call itself
  1950   OopMapSet *oop_maps = new OopMapSet();
  1952 	// We have received a description of where all the java arg are located
  1953 	// on entry to the wrapper. We need to convert these args to where
  1954 	// the jni function will expect them. To figure out where they go
  1955 	// we convert the java signature to a C signature by inserting
  1956 	// the hidden arguments as arg[0] and possibly arg[1] (static method)
  1958   const int total_in_args = method->size_of_parameters();
  1959   int total_c_args = total_in_args;
  1960   if (!is_critical_native) {
  1961     total_c_args += 1;
  1962     if (method->is_static()) {
  1963       total_c_args++;
  1965   } else {
  1966     for (int i = 0; i < total_in_args; i++) {
  1967       if (in_sig_bt[i] == T_ARRAY) {
  1968         total_c_args++;
  1973 	BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  1974 	VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
  1975   BasicType* in_elem_bt = NULL;
  1977   int argc = 0;
  1978   if (!is_critical_native) {
  1979     out_sig_bt[argc++] = T_ADDRESS;
  1980     if (method->is_static()) {
  1981       out_sig_bt[argc++] = T_OBJECT;
  1984     for (int i = 0; i < total_in_args ; i++ ) {
  1985       out_sig_bt[argc++] = in_sig_bt[i];
  1987   } else {
  1988     Thread* THREAD = Thread::current();
  1989     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
  1990     SignatureStream ss(method->signature());
  1991     for (int i = 0; i < total_in_args ; i++ ) {
  1992       if (in_sig_bt[i] == T_ARRAY) {
  1993         // Arrays are passed as int, elem* pair
  1994         out_sig_bt[argc++] = T_INT;
  1995         out_sig_bt[argc++] = T_ADDRESS;
  1996         Symbol* atype = ss.as_symbol(CHECK_NULL);
  1997         const char* at = atype->as_C_string();
  1998         if (strlen(at) == 2) {
  1999           assert(at[0] == '[', "must be");
  2000           switch (at[1]) {
  2001             case 'B': in_elem_bt[i]  = T_BYTE; break;
  2002             case 'C': in_elem_bt[i]  = T_CHAR; break;
  2003             case 'D': in_elem_bt[i]  = T_DOUBLE; break;
  2004             case 'F': in_elem_bt[i]  = T_FLOAT; break;
  2005             case 'I': in_elem_bt[i]  = T_INT; break;
  2006             case 'J': in_elem_bt[i]  = T_LONG; break;
  2007             case 'S': in_elem_bt[i]  = T_SHORT; break;
  2008             case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
  2009             default: ShouldNotReachHere();
  2012       } else {
  2013         out_sig_bt[argc++] = in_sig_bt[i];
  2014         in_elem_bt[i] = T_VOID;
  2016       if (in_sig_bt[i] != T_VOID) {
  2017         assert(in_sig_bt[i] == ss.type(), "must match");
  2018         ss.next();
  2023   // Now figure out where the args must be stored and how much stack space
  2024   // they require (neglecting out_preserve_stack_slots but space for storing
  2025   // the 1st six register arguments). It's weird see int_stk_helper.
  2026   //
  2027   int out_arg_slots;
  2028   //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
  2029 	out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
  2031   // Compute framesize for the wrapper.  We need to handlize all oops in
  2032   // registers. We must create space for them here that is disjoint from
  2033   // the windowed save area because we have no control over when we might
  2034   // flush the window again and overwrite values that gc has since modified.
  2035   // (The live window race)
  2036   //
  2037   // We always just allocate 6 word for storing down these object. This allow
  2038   // us to simply record the base and use the Ireg number to decide which
  2039   // slot to use. (Note that the reg number is the inbound number not the
  2040   // outbound number).
  2041   // We must shuffle args to match the native convention, and include var-args space.
  2043   // Calculate the total number of stack slots we will need.
  2045   // First count the abi requirement plus all of the outgoing args
  2046   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  2048   // Now the space for the inbound oop handle area
  2049   int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
  2050   if (is_critical_native) {
  2051     // Critical natives may have to call out so they need a save area
  2052     // for register arguments.
  2053     int double_slots = 0;
  2054     int single_slots = 0;
  2055     for ( int i = 0; i < total_in_args; i++) {
  2056       if (in_regs[i].first()->is_Register()) {
  2057         const Register reg = in_regs[i].first()->as_Register();
  2058         switch (in_sig_bt[i]) {
  2059           case T_BOOLEAN:
  2060           case T_BYTE:
  2061           case T_SHORT:
  2062           case T_CHAR:
  2063           case T_INT:  single_slots++; break;
  2064           case T_ARRAY:  // specific to LP64 (7145024)
  2065           case T_LONG: double_slots++; break;
  2066           default:  ShouldNotReachHere();
  2068       } else if (in_regs[i].first()->is_FloatRegister()) {
  2069         switch (in_sig_bt[i]) {
  2070           case T_FLOAT:  single_slots++; break;
  2071           case T_DOUBLE: double_slots++; break;
  2072           default:  ShouldNotReachHere();
  2076     total_save_slots = double_slots * 2 + single_slots;
  2077     // align the save area
  2078     if (double_slots != 0) {
  2079       stack_slots = round_to(stack_slots, 2);
  2083   int oop_handle_offset = stack_slots;
  2084 //  stack_slots += 9*VMRegImpl::slots_per_word;	// T0, A0 ~ A7
  2085   stack_slots += total_save_slots;
  2087   // Now any space we need for handlizing a klass if static method
  2089 	int klass_slot_offset = 0;
  2090 	int klass_offset = -1;
  2091 	int lock_slot_offset = 0;
  2092 	bool is_static = false;
  2093 	//int oop_temp_slot_offset = 0;
  2095   if (method->is_static()) {
  2096     klass_slot_offset = stack_slots;
  2097     stack_slots += VMRegImpl::slots_per_word;
  2098     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
  2099     is_static = true;
  2102   // Plus a lock if needed
  2104   if (method->is_synchronized()) {
  2105     lock_slot_offset = stack_slots;
  2106     stack_slots += VMRegImpl::slots_per_word;
  2109   // Now a place to save return value or as a temporary for any gpr -> fpr moves
  2110 	// + 2 for return address (which we own) and saved ebp
  2111   //stack_slots += 2;
  2112   stack_slots += 2 + 9 * VMRegImpl::slots_per_word;	// (T0, A0, A1, A2, A3, A4, A5, A6, A7)
  2114   // Ok The space we have allocated will look like:
  2115   //
  2116   //
  2117   // FP-> |                     |
  2118   //      |---------------------|
  2119   //      | 2 slots for moves   |
  2120   //      |---------------------|
  2121   //      | lock box (if sync)  |
  2122   //      |---------------------| <- lock_slot_offset
  2123   //      | klass (if static)   |
  2124   //      |---------------------| <- klass_slot_offset
  2125   //      | oopHandle area      |
  2126   //      |---------------------| <- oop_handle_offset
  2127   //      | outbound memory     |
  2128   //      | based arguments     |
  2129   //      |                     |
  2130   //      |---------------------|
  2131   //      | vararg area         |
  2132   //      |---------------------|
  2133   //      |                     |
  2134   // SP-> | out_preserved_slots |
  2135   //
  2136   //
  2139   // Now compute actual number of stack words we need rounding to make
  2140   // stack properly aligned.
  2141   stack_slots = round_to(stack_slots, StackAlignmentInSlots);
  2143   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  2145 	intptr_t start = (intptr_t)__ pc();
  2149 	// First thing make an ic check to see if we should even be here
  2150 	address ic_miss = SharedRuntime::get_ic_miss_stub();
  2152 	// We are free to use all registers as temps without saving them and
  2153 	// restoring them except ebp. ebp is the only callee save register
  2154 	// as far as the interpreter and the compiler(s) are concerned.
  2156   //refer to register_mips.hpp:IC_Klass
  2157 	const Register ic_reg = T1;
  2158 	const Register receiver = T0;
  2159 	Label hit;
  2160 	Label exception_pending;
  2162 	__ verify_oop(receiver);
  2163 	//__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); 
  2164 	//add for compressedoops
  2165 	__ load_klass(AT, receiver);
  2166 	__ beq(AT, ic_reg, hit); 
  2167 	__ delayed()->nop(); 
  2168 	__ jmp(ic_miss, relocInfo::runtime_call_type);
  2169 	__ delayed()->nop();
  2170 	// verified entry must be aligned for code patching.
  2171 	// and the first 5 bytes must be in the same cache line
  2172 	// if we align at 8 then we will be sure 5 bytes are in the same line
  2173 	__ align(8);
  2175 	__ bind(hit);
  2178 	int vep_offset = ((intptr_t)__ pc()) - start;
  2179 #ifdef COMPILER1
  2180 	if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
  2181 		// Object.hashCode can pull the hashCode from the header word
  2182 		// instead of doing a full VM transition once it's been computed.
  2183 		// Since hashCode is usually polymorphic at call sites we can't do
  2184 		// this optimization at the call site without a lot of work.
  2185 		Label slowCase;
  2186 		Register receiver = T0;
  2187 		Register result = V0;
  2188 		__ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); 
  2189 		// check if locked
  2190 		__ andi(AT, result, markOopDesc::unlocked_value); 
  2191 		__ beq(AT, R0, slowCase); 
  2192 		__ delayed()->nop(); 
  2193 		if (UseBiasedLocking) {
  2194 			// Check if biased and fall through to runtime if so
  2195 			__ andi (AT, result, markOopDesc::biased_lock_bit_in_place);	  
  2196 			__ bne(AT,R0, slowCase); 
  2197 			__ delayed()->nop(); 
  2199 		// get hash
  2200 		__ li(AT, markOopDesc::hash_mask_in_place);
  2201 		__ andr (AT, result, AT);
  2202 		// test if hashCode exists
  2203 		__ beq (AT, R0, slowCase); 
  2204 		__ delayed()->nop(); 
  2205 		__ shr(result, markOopDesc::hash_shift);
  2206 		__ jr(RA); 
  2207 		__ delayed()->nop(); 
  2208 		__ bind (slowCase);
  2210 #endif // COMPILER1
  2212 	// The instruction at the verified entry point must be 5 bytes or longer
  2213 	// because it can be patched on the fly by make_non_entrant. The stack bang
  2214 	// instruction fits that requirement. 
  2216 	// Generate stack overflow check
  2218 	if (UseStackBanging) {
  2219 	//this function will modify the value in A0	
  2220 		__ push(A0);
  2221 		__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
  2222 		__ pop(A0);
  2223 	} else {
  2224 		// need a 5 byte instruction to allow MT safe patching to non-entrant
  2225 		__ nop(); 
  2226 		__ nop(); 
  2227 		__ nop(); 
  2228 		__ nop(); 
  2229 		__ nop(); 
  2231 	// Generate a new frame for the wrapper.
  2232 	// do mips need this ? 
  2233 #ifndef OPT_THREAD
  2234 	__ get_thread(TREG);
  2235 #endif
  2236 //FIXME here
  2237 	__ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
  2238 	// -2 because return address is already present and so is saved ebp
  2239 	__ move(AT, -(StackAlignmentInBytes));
  2240 	__ andr(SP, SP, AT);
  2242 	__ enter();
  2243 	__ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
  2245 	// Frame is now completed as far a size and linkage.
  2247 	int frame_complete = ((intptr_t)__ pc()) - start;
  2249 	// Calculate the difference between esp and ebp. We need to know it
  2250 	// after the native call because on windows Java Natives will pop
  2251 	// the arguments and it is painful to do esp relative addressing
  2252 	// in a platform independent way. So after the call we switch to
  2253 	// ebp relative addressing.
  2254 //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
  2255 //the SP 
  2256 	int fp_adjustment = stack_size - 2*wordSize;
  2258 #ifdef COMPILER2
  2259 	// C2 may leave the stack dirty if not in SSE2+ mode
  2260 	// if (UseSSE >= 2) {
  2261 	//  __ verify_FPU(0, "c2i transition should have clean FPU stack");
  2262 	//} else {
  2263 	__ empty_FPU_stack();
  2264 	//}
  2265 #endif /* COMPILER2 */
  2267 	// Compute the ebp offset for any slots used after the jni call
  2269 	int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
  2270 	// We use edi as a thread pointer because it is callee save and
  2271 	// if we load it once it is usable thru the entire wrapper
  2272 	// const Register thread = edi;
  2273 	const Register thread = TREG;
  2275 	// We use esi as the oop handle for the receiver/klass
  2276 	// It is callee save so it survives the call to native
  2278 	// const Register oop_handle_reg = esi;
  2279 	const Register oop_handle_reg = S4;
  2280   if (is_critical_native) {
  2281      __ stop("generate_native_wrapper in sharedRuntime <2>");
  2282 //TODO:Fu
  2283 /*
  2284     check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
  2285                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
  2286 */
  2289 #ifndef OPT_THREAD
  2290 	__ get_thread(thread);
  2291 #endif
  2293   //
  2294   // We immediately shuffle the arguments so that any vm call we have to
  2295   // make from here on out (sync slow path, jvmpi, etc.) we will have
  2296   // captured the oops from our caller and have a valid oopMap for
  2297   // them.
  2299   // -----------------
  2300   // The Grand Shuffle 
  2301   //
  2302   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  2303   // and, if static, the class mirror instead of a receiver.  This pretty much
  2304   // guarantees that register layout will not match (and mips doesn't use reg
  2305   // parms though amd does).  Since the native abi doesn't use register args
  2306   // and the java conventions does we don't have to worry about collisions.
  2307   // All of our moved are reg->stack or stack->stack.
  2308   // We ignore the extra arguments during the shuffle and handle them at the
  2309   // last moment. The shuffle is described by the two calling convention
  2310   // vectors we have in our possession. We simply walk the java vector to
  2311   // get the source locations and the c vector to get the destinations.
  2313 	int c_arg = method->is_static() ? 2 : 1 ;
  2315 	// Record esp-based slot for receiver on stack for non-static methods
  2316 	int receiver_offset = -1;
  2318 	// This is a trick. We double the stack slots so we can claim
  2319 	// the oops in the caller's frame. Since we are sure to have
  2320 	// more args than the caller doubling is enough to make
  2321 	// sure we can capture all the incoming oop args from the
  2322 	// caller. 
  2323 	//
  2324 	OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  2326   // Mark location of rbp (someday)
  2327   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
  2329   // Use eax, ebx as temporaries during any memory-memory moves we have to do
  2330   // All inbound args are referenced based on rbp and all outbound args via rsp.
  2334 #ifdef ASSERT
  2335   bool reg_destroyed[RegisterImpl::number_of_registers];
  2336   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  2337   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  2338     reg_destroyed[r] = false;
  2340   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  2341     freg_destroyed[f] = false;
  2344 #endif /* ASSERT */
  2346 	// We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
  2347 	// Are free to temporaries if we have to do  stack to steck moves.
  2348 	// All inbound args are referenced based on ebp and all outbound args via esp.
  2350   // This may iterate in two different directions depending on the
  2351   // kind of native it is.  The reason is that for regular JNI natives
  2352   // the incoming and outgoing registers are offset upwards and for
  2353   // critical natives they are offset down.
  2354   GrowableArray<int> arg_order(2 * total_in_args);
  2355   VMRegPair tmp_vmreg;
  2356 //  tmp_vmreg.set1(rbx->as_VMReg());
  2357   tmp_vmreg.set1(T8->as_VMReg());
  2359   if (!is_critical_native) {
  2360     for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
  2361       arg_order.push(i);
  2362       arg_order.push(c_arg);
  2364   } else {
  2365     // Compute a valid move order, using tmp_vmreg to break any cycles
  2366      __ stop("generate_native_wrapper in sharedRuntime <2>");
  2367 //TODO:Fu
  2368 //    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
  2371   int temploc = -1;
  2372   for (int ai = 0; ai < arg_order.length(); ai += 2) {
  2373     int i = arg_order.at(ai);
  2374     int c_arg = arg_order.at(ai + 1);
  2375     __ block_comment(err_msg("move %d -> %d", i, c_arg));
  2376     if (c_arg == -1) {
  2377       assert(is_critical_native, "should only be required for critical natives");
  2378       // This arg needs to be moved to a temporary
  2379       __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
  2380       in_regs[i] = tmp_vmreg;
  2381       temploc = i;
  2382       continue;
  2383     } else if (i == -1) {
  2384       assert(is_critical_native, "should only be required for critical natives");
  2385       // Read from the temporary location
  2386       assert(temploc != -1, "must be valid");
  2387       i = temploc;
  2388       temploc = -1;
  2390 #ifdef ASSERT
  2391     if (in_regs[i].first()->is_Register()) {
  2392       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
  2393     } else if (in_regs[i].first()->is_FloatRegister()) {
  2394       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
  2396     if (out_regs[c_arg].first()->is_Register()) {
  2397       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  2398     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  2399       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
  2401 #endif /* ASSERT */
  2402     switch (in_sig_bt[i]) {
  2403       case T_ARRAY:
  2404         if (is_critical_native) {
  2405 	  __ stop("generate_native_wrapper in sharedRuntime <2>");
  2406          //TODO:Fu
  2407          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
  2408           c_arg++;
  2409 #ifdef ASSERT
  2410           if (out_regs[c_arg].first()->is_Register()) {
  2411             reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  2412           } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  2413             freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
  2415 #endif
  2416           break;
  2418       case T_OBJECT:
  2419         assert(!is_critical_native, "no oop arguments");
  2420         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
  2421                     ((i == 0) && (!is_static)),
  2422                     &receiver_offset);
  2423         break;
  2424       case T_VOID:
  2425         break;
  2427       case T_FLOAT:
  2428         float_move(masm, in_regs[i], out_regs[c_arg]);
  2429           break;
  2431       case T_DOUBLE:
  2432         assert( i + 1 < total_in_args &&
  2433                 in_sig_bt[i + 1] == T_VOID &&
  2434                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  2435         double_move(masm, in_regs[i], out_regs[c_arg]);
  2436         break;
  2438       case T_LONG :
  2439         long_move(masm, in_regs[i], out_regs[c_arg]);
  2440         break;
  2442       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  2444       default:
  2445 //        move32_64(masm, in_regs[i], out_regs[c_arg]);
  2446         simple_move32(masm, in_regs[i], out_regs[c_arg]);
  2450   // point c_arg at the first arg that is already loaded in case we
  2451   // need to spill before we call out
  2452    c_arg = total_c_args - total_in_args;
  2453 	// Pre-load a static method's oop into esi.  Used both by locking code and
  2454 	// the normal JNI call code.
  2456 	__ move(oop_handle_reg, A1);
  2458 	if (method->is_static() && !is_critical_native) {
  2460 		//  load opp into a register
  2461 		int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
  2462 					(method->method_holder())->java_mirror()));
  2465 		RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2466 		__ relocate(rspec);
  2467 		//__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local(
  2468 		//	Klass::cast(method->method_holder())->java_mirror())));
  2469 		//__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int)
  2470 		//    JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())));
  2471 		__ li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
  2472 	//	__ verify_oop(oop_handle_reg);
  2473 		// Now handlize the static class mirror it's known not-null.
  2474 		__ sd( oop_handle_reg, SP, klass_offset); 
  2475 		map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
  2477 		// Now get the handle
  2478 		__ lea(oop_handle_reg, Address(SP, klass_offset));
  2479 		// store the klass handle as second argument
  2480 		__ move(A1, oop_handle_reg);
  2481                 // and protect the arg if we must spill
  2482                 c_arg--;
  2484   // Change state to native (we save the return address in the thread, since it might not
  2485   // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
  2486   // points into the right code segment. It does not have to be the correct return pc.
  2487   // We use the same pc/oopMap repeatedly when we call out
  2489 	intptr_t the_pc = (intptr_t) __ pc();
  2491 	oop_maps->add_gc_map(the_pc - start, map);
  2493 	//__ set_last_Java_frame(thread, esp, noreg, (address)the_pc);
  2494 	__ set_last_Java_frame(SP, noreg, NULL);
  2495 	__ relocate(relocInfo::internal_pc_type); 
  2497 		intptr_t save_pc = (intptr_t)the_pc ;
  2498 		__ li48(AT, save_pc);
  2500 	__ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  2503 	// We have all of the arguments setup at this point. We must not touch any register
  2504 	// argument registers at this point (what if we save/restore them there are no oop?
  2506 		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
  2507 		int metadata_index = __ oop_recorder()->find_index(method());
  2508 		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
  2509 		__ relocate(rspec);
  2510 		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
  2511 		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
  2512 		__ li48(AT, (long)(method()));
  2514 		__ call_VM_leaf(
  2515 				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 
  2516 		   thread, AT); 
  2520   // These are register definitions we need for locking/unlocking 
  2521 //  const Register swap_reg = eax;  // Must use eax for cmpxchg instruction
  2522 //  const Register obj_reg  = ecx;  // Will contain the oop
  2523  // const Register lock_reg = edx;  // Address of compiler lock object (BasicLock)
  2524 //FIXME, I hava no idea which register to use
  2525 	const Register swap_reg = T8;  // Must use eax for cmpxchg instruction
  2526 	const Register obj_reg  = T9;  // Will contain the oop
  2527 	//const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
  2528 	const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
  2532 	Label slow_path_lock;
  2533 	Label lock_done;
  2535 	// Lock a synchronized method
  2536 	if (method->is_synchronized()) {
  2537                 assert(!is_critical_native, "unhandled");
  2539 		const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
  2541 		// Get the handle (the 2nd argument)
  2542 		__ move(oop_handle_reg, A1);
  2544 		// Get address of the box
  2545 		__ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
  2547 		// Load the oop from the handle 
  2548 		__ ld(obj_reg, oop_handle_reg, 0);
  2550 		if (UseBiasedLocking) {
  2551 			// Note that oop_handle_reg is trashed during this call
  2552 		__ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, 
  2553 				false, lock_done, &slow_path_lock);
  2556 		// Load immediate 1 into swap_reg %eax
  2557 		__ move(swap_reg, 1);
  2559 		__ ld(AT, obj_reg, 0);   
  2560 		__ orr(swap_reg, swap_reg, AT); 
  2562 		__ sd( swap_reg, lock_reg, mark_word_offset);
  2563 		__ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
  2564 		__ bne(AT, R0, lock_done);
  2565 		__ delayed()->nop(); 
  2566 		// Test if the oopMark is an obvious stack pointer, i.e.,
  2567 		//  1) (mark & 3) == 0, and
  2568 		//  2) esp <= mark < mark + os::pagesize()
  2569 		// These 3 tests can be done by evaluating the following
  2570 		// expression: ((mark - esp) & (3 - os::vm_page_size())),
  2571 		// assuming both stack pointer and pagesize have their
  2572 		// least significant 2 bits clear.
  2573 		// NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
  2575 		__ dsub(swap_reg, swap_reg,SP);
  2576  		__ move(AT, 3 - os::vm_page_size());
  2577 		__ andr(swap_reg , swap_reg, AT);
  2578 		// Save the test result, for recursive case, the result is zero
  2579 		__ sd(swap_reg, lock_reg, mark_word_offset); 
  2580 	//FIXME here, Why notEqual? 	
  2581 		__ bne(swap_reg,R0, slow_path_lock);
  2582 		__ delayed()->nop();  
  2583 		// Slow path will re-enter here
  2584 		__ bind(lock_done);
  2586 		if (UseBiasedLocking) {
  2587 			// Re-fetch oop_handle_reg as we trashed it above
  2588 			__ move(A1, oop_handle_reg);
  2593 	// Finally just about ready to make the JNI call
  2596 	// get JNIEnv* which is first argument to native
  2597   if (!is_critical_native) {
  2598 	__ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
  2601 	// Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
  2602 	/* Load the second arguments into A1 */
  2603 	//__ ld(A1, SP , wordSize ); 	// klass
  2605 	// Now set thread in native
  2606 	__ addi(AT, R0, _thread_in_native); 
  2607 	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 
  2608 	/* Jin: do the call */
  2609 	__ call(method->native_function(), relocInfo::runtime_call_type);
  2610 	__ delayed()->nop();
  2611 	// WARNING - on Windows Java Natives use pascal calling convention and pop the
  2612 	// arguments off of the stack. We could just re-adjust the stack pointer here
  2613 	// and continue to do SP relative addressing but we instead switch to FP
  2614 	// relative addressing.
  2616 	// Unpack native results.  
  2617 	switch (ret_type) {
  2618 	case T_BOOLEAN: __ c2bool(V0);            break;
  2619 	case T_CHAR   : __ andi(V0,V0, 0xFFFF);      break;
  2620 	case T_BYTE   : __ sign_extend_byte (V0); break;
  2621 	case T_SHORT  : __ sign_extend_short(V0); break;
  2622 	case T_INT    : // nothing to do         break;
  2623 	case T_DOUBLE :
  2624 	case T_FLOAT  :
  2625 	// Result is in st0 we'll save as needed
  2626 	break;
  2627 	case T_ARRAY:                 // Really a handle
  2628 	case T_OBJECT:                // Really a handle
  2629 	break; // can't de-handlize until after safepoint check
  2630 	case T_VOID: break;
  2631 	case T_LONG: break;
  2632 	default       : ShouldNotReachHere();
  2634 	// Switch thread to "native transition" state before reading the synchronization state.
  2635 	// This additional state is necessary because reading and testing the synchronization
  2636 	// state is not atomic w.r.t. GC, as this scenario demonstrates:
  2637 	//     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
  2638 	//     VM thread changes sync state to synchronizing and suspends threads for GC.
  2639 	//     Thread A is resumed to finish this native method, but doesn't block here since it
  2640 	//     didn't see any synchronization is progress, and escapes.
  2641 	// __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);    
  2642 	//__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());    
  2643 	//   __ move(AT, (int)_thread_in_native_trans);
  2644 	__ addi(AT, R0, _thread_in_native_trans); 
  2645 	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));    
  2647   Label after_transition;
  2649 	// check for safepoint operation in progress and/or pending suspend requests
  2650 	{ Label Continue;
  2651 //FIXME here, which regiser should we use?
  2652 		//        SafepointSynchronize::_not_synchronized);
  2653 		__ li(AT, SafepointSynchronize::address_of_state());
  2654 		__ lw(A0, AT, 0);	
  2655 		__ addi(AT, A0, -SafepointSynchronize::_not_synchronized); 
  2656 		Label L;
  2657 		__ bne(AT,R0, L); 
  2658 		__ delayed()->nop();	
  2659 		__ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); 
  2660 		__ beq(AT, R0, Continue); 
  2661 		__ delayed()->nop(); 
  2662 		__ bind(L);
  2664 		// Don't use call_VM as it will see a possible pending exception and forward it
  2665 		// and never return here preventing us from clearing _last_native_pc down below.
  2666 		// Also can't use call_VM_leaf either as it will check to see if esi & edi are
  2667 		// preserved and correspond to the bcp/locals pointers. So we do a runtime call
  2668 		// by hand.
  2669 		//
  2670 		save_native_result(masm, ret_type, stack_slots);
  2671 		__ move (A0, thread); 
  2672 		__ addi(SP,SP, -wordSize); 
  2673     if (!is_critical_native) {
  2674       __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
  2675       __ delayed()->nop(); 
  2676     } else {
  2677       __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
  2678       __ delayed()->nop(); 
  2680 //		__ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
  2681 //		__ delayed()->nop(); 
  2682 		__ addi(SP,SP, wordSize); 
  2683 		//add for compressedoops
  2684 		__ reinit_heapbase();
  2685 		// Restore any method result value
  2686 		restore_native_result(masm, ret_type, stack_slots);
  2688     if (is_critical_native) {
  2689       // The call above performed the transition to thread_in_Java so
  2690       // skip the transition logic below.
  2691       __ beq(R0, R0, after_transition);
  2692       __ delayed()->nop(); 
  2695 		__ bind(Continue);
  2698 	// change thread state
  2699 	__ addi(AT, R0, _thread_in_Java); 
  2700 	__ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset())); 
  2701   __ bind(after_transition);
  2702 	Label reguard;
  2703 	Label reguard_done;
  2704 	__ ld(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); 
  2705 	__ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); 
  2706 	__ beq(AT, R0, reguard);
  2707 	__ delayed()->nop();  
  2708 	// slow path reguard  re-enters here
  2709 	__ bind(reguard_done);
  2711 	// Handle possible exception (will unlock if necessary)
  2713 	// native result if any is live 
  2715 	// Unlock
  2716 	Label slow_path_unlock;
  2717 	Label unlock_done;
  2718 	if (method->is_synchronized()) {
  2720 		Label done;
  2722 		// Get locked oop from the handle we passed to jni
  2723 		__ ld( obj_reg, oop_handle_reg, 0);
  2724 		//FIXME 
  2725 		if (UseBiasedLocking) {
  2726 			__ biased_locking_exit(obj_reg, T8, done);
  2730 		// Simple recursive lock?
  2732 		__ ld(AT, FP, lock_slot_ebp_offset); 
  2733 		__ beq(AT, R0, done);
  2734 		__ delayed()->nop();	
  2735 		// Must save eax if if it is live now because cmpxchg must use it
  2736 		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
  2737 			save_native_result(masm, ret_type, stack_slots);
  2740 		//  get old displaced header
  2741 		__ ld (T8, FP, lock_slot_ebp_offset);
  2742 		// get address of the stack lock
  2743 		//FIXME aoqi
  2744 		//__ addi (T6, FP, lock_slot_ebp_offset);
  2745 		__ addi (c_rarg0, FP, lock_slot_ebp_offset);
  2746 		// Atomic swap old header if oop still contains the stack lock
  2747 		//FIXME aoqi
  2748 		//__ cmpxchg(T8, Address(obj_reg, 0),T6 );
  2749 		__ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
  2751 		__ beq(AT, R0, slow_path_unlock);
  2752 		__ delayed()->nop(); 
  2753 		// slow path re-enters here
  2754 		__ bind(unlock_done);
  2755 		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
  2756 			restore_native_result(masm, ret_type, stack_slots);
  2759 		__ bind(done);
  2763 		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
  2764 		// Tell dtrace about this method exit
  2765 		save_native_result(masm, ret_type, stack_slots);
  2766 		int metadata_index = __ oop_recorder()->find_index( (method()));
  2767 		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
  2768 		__ relocate(rspec);
  2769 		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
  2770 		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
  2771 		__ li48(AT, (long)(method()));
  2773 		__ call_VM_leaf(
  2774 				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 
  2775 				thread, AT);
  2776 		restore_native_result(masm, ret_type, stack_slots);
  2779 	// We can finally stop using that last_Java_frame we setup ages ago
  2781 	__ reset_last_Java_frame(false, true);
  2783 	// Unpack oop result
  2784 	if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
  2785 		Label L;
  2786 		//  __ cmpl(eax, NULL_WORD);
  2787 		//  __ jcc(Assembler::equal, L);
  2788 		__ beq(V0, R0,L ); 
  2789 		__ delayed()->nop(); 
  2790 		//  __ movl(eax, Address(eax));
  2791 		__ ld(V0, V0, 0);	
  2792 		__ bind(L);
  2793 		// __ verify_oop(eax);
  2794 		__ verify_oop(V0);
  2797   if (!is_critical_native) {
  2798 	// reset handle block
  2799 	__ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
  2800 	__ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); 
  2803   if (!is_critical_native) {
  2804 	// Any exception pending?
  2805 	__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2807 	__ bne(AT, R0, exception_pending);
  2808 	__ delayed()->nop();
  2810 	// no exception, we're almost done
  2812 	// check that only result value is on FPU stack
  2813 	__ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
  2815   // Fixup floating pointer results so that result looks like a return from a compiled method
  2816 /*  if (ret_type == T_FLOAT) {
  2817     if (UseSSE >= 1) {
  2818       // Pop st0 and store as float and reload into xmm register
  2819       __ fstp_s(Address(ebp, -4));
  2820       __ movss(xmm0, Address(ebp, -4));
  2822   } else if (ret_type == T_DOUBLE) {
  2823     if (UseSSE >= 2) {
  2824       // Pop st0 and store as double and reload into xmm register
  2825       __ fstp_d(Address(ebp, -8));
  2826       __ movsd(xmm0, Address(ebp, -8));
  2829 */
  2830   // Return
  2831 #ifndef OPT_THREAD
  2832        __ get_thread(TREG);
  2833 #endif
  2834 	__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
  2835 	__ leave();
  2837 	__ jr(RA);
  2838 	__ delayed()->nop(); 
  2839 	// Unexpected paths are out of line and go here
  2840 /*
  2841   if (!is_critical_native) {
  2842     // forward the exception
  2843     __ bind(exception_pending);
  2845     // and forward the exception
  2846     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  2848 */
  2849 	// Slow path locking & unlocking
  2850 	if (method->is_synchronized()) {
  2852 		// BEGIN Slow path lock
  2854 		__ bind(slow_path_lock);
  2856                 // protect the args we've loaded
  2857                 save_args(masm, total_c_args, c_arg, out_regs);
  2859 		// has last_Java_frame setup. No exceptions so do vanilla call not call_VM
  2860 		// args are (oop obj, BasicLock* lock, JavaThread* thread)
  2862 		__ move(A0, obj_reg); 
  2863 		__ move(A1, lock_reg); 
  2864 		__ move(A2, thread); 
  2865 		__ addi(SP, SP, - 3*wordSize); 
  2867                 __ move(AT, -(StackAlignmentInBytes));
  2868                 __ move(S2, SP);     // use S2 as a sender SP holder
  2869                 __ andr(SP, SP, AT); // align stack as required by ABI
  2871 		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
  2872 		__ delayed()->nop();
  2873                 __ move(SP, S2);
  2874 		__ addi(SP, SP, 3*wordSize); 
  2876                 restore_args(masm, total_c_args, c_arg, out_regs);
  2878 #ifdef ASSERT
  2879 		{ Label L;
  2880 			// __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
  2881 			__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2882 			//__ jcc(Assembler::equal, L);
  2883 			__ beq(AT, R0, L); 
  2884 			__ delayed()->nop(); 
  2885 			__ stop("no pending exception allowed on exit from monitorenter");
  2886 			__ bind(L);
  2888 #endif
  2889 		__ b(lock_done);
  2890 		__ delayed()->nop();
  2891 		// END Slow path lock
  2893 		// BEGIN Slow path unlock
  2894 		__ bind(slow_path_unlock);
  2896 		// Slow path unlock
  2898 		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
  2899 			save_native_result(masm, ret_type, stack_slots);
  2901 		// Save pending exception around call to VM (which contains an EXCEPTION_MARK)
  2903 		__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2904 		__ push(AT); 
  2905 		__ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
  2907                 __ move(AT, -(StackAlignmentInBytes));
  2908                 __ move(S2, SP);     // use S2 as a sender SP holder
  2909                 __ andr(SP, SP, AT); // align stack as required by ABI
  2911 		// should be a peal
  2912 		// +wordSize because of the push above
  2913 		__ addi(A1, FP, lock_slot_ebp_offset);
  2915 		__ move(A0, obj_reg); 
  2916 		__ addi(SP,SP, -2*wordSize);
  2917 		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
  2918 				relocInfo::runtime_call_type);
  2919 		__ delayed()->nop(); 
  2920 		__ addi(SP,SP, 2*wordSize);
  2921                 __ move(SP, S2);
  2922 		//add for compressedoops
  2923 		__ reinit_heapbase();
  2924 #ifdef ASSERT
  2926 			Label L;
  2927 			//    __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
  2928 			__ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2929 			//__ jcc(Assembler::equal, L);
  2930 			__ beq(AT, R0, L); 
  2931 			__ delayed()->nop(); 
  2932 			__ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
  2933 			__ bind(L);
  2935 #endif /* ASSERT */
  2937 		__ pop(AT); 
  2938 		__ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
  2939 		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
  2940 			restore_native_result(masm, ret_type, stack_slots);
  2942 		__ b(unlock_done);
  2943 		__ delayed()->nop(); 
  2944 		// END Slow path unlock
  2948 	// SLOW PATH Reguard the stack if needed
  2950 	__ bind(reguard);
  2951 	save_native_result(masm, ret_type, stack_slots);
  2952 	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 
  2953 			relocInfo::runtime_call_type);
  2954 	__ delayed()->nop();	
  2955 	//add for compressedoops
  2956 	__ reinit_heapbase();
  2957 	restore_native_result(masm, ret_type, stack_slots);
  2958 	__ b(reguard_done);
  2959 	__ delayed()->nop();
  2961 	// BEGIN EXCEPTION PROCESSING
  2962     if (!is_critical_native) {
  2963 	// Forward  the exception
  2964 	__ bind(exception_pending);
  2966 	// remove possible return value from FPU register stack
  2967 	__ empty_FPU_stack();
  2969 	// pop our frame
  2970  //forward_exception_entry need return address on stack
  2971         __ addiu(SP, FP, wordSize);
  2972 	__ ld(FP, SP, (-1) * wordSize);
  2974 	// and forward the exception
  2975 	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  2976 	__ delayed()->nop();
  2978 	__ flush();
  2980 	nmethod *nm = nmethod::new_native_nmethod(method,
  2981                         compile_id,
  2982 			masm->code(),
  2983 			vep_offset,
  2984 			frame_complete,
  2985 			stack_slots / VMRegImpl::slots_per_word,
  2986 			(is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
  2987 			in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
  2988 			oop_maps);
  2990   if (is_critical_native) {
  2991     nm->set_lazy_critical_native(true);
  2993 	return nm;
  2998 #ifdef HAVE_DTRACE_H
  2999 // ---------------------------------------------------------------------------
  3000 // Generate a dtrace nmethod for a given signature.  The method takes arguments
  3001 // in the Java compiled code convention, marshals them to the native
  3002 // abi and then leaves nops at the position you would expect to call a native
  3003 // function. When the probe is enabled the nops are replaced with a trap
  3004 // instruction that dtrace inserts and the trace will cause a notification
  3005 // to dtrace.
  3006 //
  3007 // The probes are only able to take primitive types and java/lang/String as
  3008 // arguments.  No other java types are allowed. Strings are converted to utf8
  3009 // strings so that from dtrace point of view java strings are converted to C
  3010 // strings. There is an arbitrary fixed limit on the total space that a method
  3011 // can use for converting the strings. (256 chars per string in the signature).
  3012 // So any java string larger then this is truncated.
  3014 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
  3015 static bool offsets_initialized = false;
  3017 static VMRegPair reg64_to_VMRegPair(Register r) {
  3018   VMRegPair ret;
  3019   if (wordSize == 8) {
  3020     ret.set2(r->as_VMReg());
  3021   } else {
  3022     ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
  3024   return ret;
  3028 nmethod *SharedRuntime::generate_dtrace_nmethod(
  3029     MacroAssembler *masm, methodHandle method) {
  3032   // generate_dtrace_nmethod is guarded by a mutex so we are sure to
  3033   // be single threaded in this method.
  3034   assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
  3036   // Fill in the signature array, for the calling-convention call.
  3037   int total_args_passed = method->size_of_parameters();
  3039   BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
  3040   VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
  3042   // The signature we are going to use for the trap that dtrace will see
  3043   // java/lang/String is converted. We drop "this" and any other object
  3044   // is converted to NULL.  (A one-slot java/lang/Long object reference
  3045   // is converted to a two-slot long, which is why we double the allocation).
  3046   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
  3047   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
  3049   int i=0;
  3050   int total_strings = 0;
  3051   int first_arg_to_pass = 0;
  3052   int total_c_args = 0;
  3054   // Skip the receiver as dtrace doesn't want to see it
  3055   if( !method->is_static() ) {
  3056     in_sig_bt[i++] = T_OBJECT;
  3057     first_arg_to_pass = 1;
  3060   SignatureStream ss(method->signature());
  3061   for ( ; !ss.at_return_type(); ss.next()) {
  3062     BasicType bt = ss.type();
  3063     in_sig_bt[i++] = bt;  // Collect remaining bits of signature
  3064     out_sig_bt[total_c_args++] = bt;
  3065     if( bt == T_OBJECT) {
  3066       symbolOop s = ss.as_symbol_or_null();
  3067       if (s == vmSymbols::java_lang_String()) {
  3068         total_strings++;
  3069         out_sig_bt[total_c_args-1] = T_ADDRESS;
  3070       } else if (s == vmSymbols::java_lang_Boolean() ||
  3071                  s == vmSymbols::java_lang_Byte()) {
  3072         out_sig_bt[total_c_args-1] = T_BYTE;
  3073       } else if (s == vmSymbols::java_lang_Character() ||
  3074                  s == vmSymbols::java_lang_Short()) {
  3075         out_sig_bt[total_c_args-1] = T_SHORT;
  3076       } else if (s == vmSymbols::java_lang_Integer() ||
  3077                  s == vmSymbols::java_lang_Float()) {
  3078         out_sig_bt[total_c_args-1] = T_INT;
  3079       } else if (s == vmSymbols::java_lang_Long() ||
  3080                  s == vmSymbols::java_lang_Double()) {
  3081         out_sig_bt[total_c_args-1] = T_LONG;
  3082         out_sig_bt[total_c_args++] = T_VOID;
  3084     } else if ( bt == T_LONG || bt == T_DOUBLE ) {
  3085       in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
  3086       // We convert double to long
  3087       out_sig_bt[total_c_args-1] = T_LONG;
  3088       out_sig_bt[total_c_args++] = T_VOID;
  3089     } else if ( bt == T_FLOAT) {
  3090       // We convert float to int
  3091       out_sig_bt[total_c_args-1] = T_INT;
  3095   assert(i==total_args_passed, "validly parsed signature");
  3097   // Now get the compiled-Java layout as input arguments
  3098   int comp_args_on_stack;
  3099   comp_args_on_stack = SharedRuntime::java_calling_convention(
  3100       in_sig_bt, in_regs, total_args_passed, false);
  3102   // We have received a description of where all the java arg are located
  3103   // on entry to the wrapper. We need to convert these args to where
  3104   // the a  native (non-jni) function would expect them. To figure out
  3105   // where they go we convert the java signature to a C signature and remove
  3106   // T_VOID for any long/double we might have received.
  3109   // Now figure out where the args must be stored and how much stack space
  3110   // they require (neglecting out_preserve_stack_slots but space for storing
  3111   // the 1st six register arguments). It's weird see int_stk_helper.
  3112   //
  3113   int out_arg_slots;
  3114   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
  3116   // Calculate the total number of stack slots we will need.
  3118   // First count the abi requirement plus all of the outgoing args
  3119   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  3121   // Plus a temp for possible converion of float/double/long register args
  3123   int conversion_temp = stack_slots;
  3124   stack_slots += 2;
  3127   // Now space for the string(s) we must convert
  3129   int string_locs = stack_slots;
  3130   stack_slots += total_strings *
  3131                    (max_dtrace_string_size / VMRegImpl::stack_slot_size);
  3133   // Ok The space we have allocated will look like:
  3134   //
  3135   //
  3136   // FP-> |                     |
  3137   //      |---------------------|
  3138   //      | string[n]           |
  3139   //      |---------------------| <- string_locs[n]
  3140   //      | string[n-1]         |
  3141   //      |---------------------| <- string_locs[n-1]
  3142   //      | ...                 |
  3143   //      | ...                 |
  3144   //      |---------------------| <- string_locs[1]
  3145   //      | string[0]           |
  3146   //      |---------------------| <- string_locs[0]
  3147   //      | temp                |
  3148   //      |---------------------| <- conversion_temp
  3149   //      | outbound memory     |
  3150   //      | based arguments     |
  3151   //      |                     |
  3152   //      |---------------------|
  3153   //      |                     |
  3154   // SP-> | out_preserved_slots |
  3155   //
  3156   //
  3158   // Now compute actual number of stack words we need rounding to make
  3159   // stack properly aligned.
  3160   stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
  3162   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  3164   intptr_t start = (intptr_t)__ pc();
  3166   // First thing make an ic check to see if we should even be here
  3169     Label L;
  3170     const Register temp_reg = G3_scratch;
  3171     Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
  3172     __ verify_oop(O0);
  3173     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
  3174     __ cmp(temp_reg, G5_inline_cache_reg);
  3175     __ brx(Assembler::equal, true, Assembler::pt, L);
  3176     __ delayed()->nop();
  3178     __ jump_to(ic_miss, 0);
  3179     __ delayed()->nop();
  3180     __ align(CodeEntryAlignment);
  3181     __ bind(L);
  3184   int vep_offset = ((intptr_t)__ pc()) - start;
  3187   // The instruction at the verified entry point must be 5 bytes or longer
  3188   // because it can be patched on the fly by make_non_entrant. The stack bang
  3189   // instruction fits that requirement.
  3191   // Generate stack overflow check before creating frame
  3192   __ generate_stack_overflow_check(stack_size);
  3194   assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
  3195          "valid size for make_non_entrant");
  3197   // Generate a new frame for the wrapper.
  3198   __ save(SP, -stack_size, SP);
  3200   // Frame is now completed as far a size and linkage.
  3202   int frame_complete = ((intptr_t)__ pc()) - start;
  3204 #ifdef ASSERT
  3205   bool reg_destroyed[RegisterImpl::number_of_registers];
  3206   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  3207   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  3208     reg_destroyed[r] = false;
  3210   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  3211     freg_destroyed[f] = false;
  3214 #endif /* ASSERT */
  3216   VMRegPair zero;
  3217   const Register g0 = G0; // without this we get a compiler warning (why??)
  3218   zero.set2(g0->as_VMReg());
  3220   int c_arg, j_arg;
  3222   Register conversion_off = noreg;
  3224   for (j_arg = first_arg_to_pass, c_arg = 0 ;
  3225        j_arg < total_args_passed ; j_arg++, c_arg++ ) {
  3227     VMRegPair src = in_regs[j_arg];
  3228     VMRegPair dst = out_regs[c_arg];
  3230 #ifdef ASSERT
  3231     if (src.first()->is_Register()) {
  3232       assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
  3233     } else if (src.first()->is_FloatRegister()) {
  3234       assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
  3235                                                FloatRegisterImpl::S)], "ack!");
  3237     if (dst.first()->is_Register()) {
  3238       reg_destroyed[dst.first()->as_Register()->encoding()] = true;
  3239     } else if (dst.first()->is_FloatRegister()) {
  3240       freg_destroyed[dst.first()->as_FloatRegister()->encoding(
  3241                                                  FloatRegisterImpl::S)] = true;
  3243 #endif /* ASSERT */
  3245     switch (in_sig_bt[j_arg]) {
  3246       case T_ARRAY:
  3247       case T_OBJECT:
  3249           if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
  3250               out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
  3251             // need to unbox a one-slot value
  3252             Register in_reg = L0;
  3253             Register tmp = L2;
  3254             if ( src.first()->is_reg() ) {
  3255               in_reg = src.first()->as_Register();
  3256             } else {
  3257               assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
  3258                      "must be");
  3259               __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
  3261             // If the final destination is an acceptable register
  3262             if ( dst.first()->is_reg() ) {
  3263               if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
  3264                 tmp = dst.first()->as_Register();
  3268             Label skipUnbox;
  3269             if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
  3270               __ mov(G0, tmp->successor());
  3272             __ br_null(in_reg, true, Assembler::pn, skipUnbox);
  3273             __ delayed()->mov(G0, tmp);
  3275             BasicType bt = out_sig_bt[c_arg];
  3276             int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
  3277             switch (bt) {
  3278                 case T_BYTE:
  3279                   __ ldub(in_reg, box_offset, tmp); break;
  3280                 case T_SHORT:
  3281                   __ lduh(in_reg, box_offset, tmp); break;
  3282                 case T_INT:
  3283                   __ ld(in_reg, box_offset, tmp); break;
  3284                 case T_LONG:
  3285                   __ ld_long(in_reg, box_offset, tmp); break;
  3286                 default: ShouldNotReachHere();
  3289             __ bind(skipUnbox);
  3290             // If tmp wasn't final destination copy to final destination
  3291             if (tmp == L2) {
  3292               VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
  3293               if (out_sig_bt[c_arg] == T_LONG) {
  3294                 long_move(masm, tmp_as_VM, dst);
  3295               } else {
  3296                 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
  3299             if (out_sig_bt[c_arg] == T_LONG) {
  3300               assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
  3301               ++c_arg; // move over the T_VOID to keep the loop indices in sync
  3303           } else if (out_sig_bt[c_arg] == T_ADDRESS) {
  3304             Register s =
  3305                 src.first()->is_reg() ? src.first()->as_Register() : L2;
  3306             Register d =
  3307                 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  3309             // We store the oop now so that the conversion pass can reach
  3310             // while in the inner frame. This will be the only store if
  3311             // the oop is NULL.
  3312             if (s != L2) {
  3313               // src is register
  3314               if (d != L2) {
  3315                 // dst is register
  3316                 __ mov(s, d);
  3317               } else {
  3318                 assert(Assembler::is_simm13(reg2offset(dst.first()) +
  3319                           STACK_BIAS), "must be");
  3320                 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
  3322             } else {
  3323                 // src not a register
  3324                 assert(Assembler::is_simm13(reg2offset(src.first()) +
  3325                            STACK_BIAS), "must be");
  3326                 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
  3327                 if (d == L2) {
  3328                   assert(Assembler::is_simm13(reg2offset(dst.first()) +
  3329                              STACK_BIAS), "must be");
  3330                   __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
  3333           } else if (out_sig_bt[c_arg] != T_VOID) {
  3334             // Convert the arg to NULL
  3335             if (dst.first()->is_reg()) {
  3336               __ mov(G0, dst.first()->as_Register());
  3337             } else {
  3338               assert(Assembler::is_simm13(reg2offset(dst.first()) +
  3339                          STACK_BIAS), "must be");
  3340               __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
  3344         break;
  3345       case T_VOID:
  3346         break;
  3348       case T_FLOAT:
  3349         if (src.first()->is_stack()) {
  3350           // Stack to stack/reg is simple
  3351           move32_64(masm, src, dst);
  3352         } else {
  3353           if (dst.first()->is_reg()) {
  3354             // freg -> reg
  3355             int off =
  3356               STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  3357             Register d = dst.first()->as_Register();
  3358             if (Assembler::is_simm13(off)) {
  3359               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3360                      SP, off);
  3361               __ ld(SP, off, d);
  3362             } else {
  3363               if (conversion_off == noreg) {
  3364                 __ set(off, L6);
  3365                 conversion_off = L6;
  3367               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3368                      SP, conversion_off);
  3369               __ ld(SP, conversion_off , d);
  3371           } else {
  3372             // freg -> mem
  3373             int off = STACK_BIAS + reg2offset(dst.first());
  3374             if (Assembler::is_simm13(off)) {
  3375               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3376                      SP, off);
  3377             } else {
  3378               if (conversion_off == noreg) {
  3379                 __ set(off, L6);
  3380                 conversion_off = L6;
  3382               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3383                      SP, conversion_off);
  3387         break;
  3389       case T_DOUBLE:
  3390         assert( j_arg + 1 < total_args_passed &&
  3391                 in_sig_bt[j_arg + 1] == T_VOID &&
  3392                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  3393         if (src.first()->is_stack()) {
  3394           // Stack to stack/reg is simple
  3395           long_move(masm, src, dst);
  3396         } else {
  3397           Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  3399           // Destination could be an odd reg on 32bit in which case
  3400           // we can't load direct to the destination.
  3402           if (!d->is_even() && wordSize == 4) {
  3403             d = L2;
  3405           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  3406           if (Assembler::is_simm13(off)) {
  3407             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  3408                    SP, off);
  3409             __ ld_long(SP, off, d);
  3410           } else {
  3411             if (conversion_off == noreg) {
  3412               __ set(off, L6);
  3413               conversion_off = L6;
  3415             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  3416                    SP, conversion_off);
  3417             __ ld_long(SP, conversion_off, d);
  3419           if (d == L2) {
  3420             long_move(masm, reg64_to_VMRegPair(L2), dst);
  3423         break;
  3425       case T_LONG :
  3426         // 32bit can't do a split move of something like g1 -> O0, O1
  3427         // so use a memory temp
  3428         if (src.is_single_phys_reg() && wordSize == 4) {
  3429           Register tmp = L2;
  3430           if (dst.first()->is_reg() &&
  3431               (wordSize == 8 || dst.first()->as_Register()->is_even())) {
  3432             tmp = dst.first()->as_Register();
  3435           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  3436           if (Assembler::is_simm13(off)) {
  3437             __ stx(src.first()->as_Register(), SP, off);
  3438             __ ld_long(SP, off, tmp);
  3439           } else {
  3440             if (conversion_off == noreg) {
  3441               __ set(off, L6);
  3442               conversion_off = L6;
  3444             __ stx(src.first()->as_Register(), SP, conversion_off);
  3445             __ ld_long(SP, conversion_off, tmp);
  3448           if (tmp == L2) {
  3449             long_move(masm, reg64_to_VMRegPair(L2), dst);
  3451         } else {
  3452           long_move(masm, src, dst);
  3454         break;
  3456       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  3458       default:
  3459         move32_64(masm, src, dst);
  3464   // If we have any strings we must store any register based arg to the stack
  3465   // This includes any still live xmm registers too.
  3467   if (total_strings > 0 ) {
  3469     // protect all the arg registers
  3470     __ save_frame(0);
  3471     __ mov(G2_thread, L7_thread_cache);
  3472     const Register L2_string_off = L2;
  3474     // Get first string offset
  3475     __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
  3477     for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
  3478       if (out_sig_bt[c_arg] == T_ADDRESS) {
  3480         VMRegPair dst = out_regs[c_arg];
  3481         const Register d = dst.first()->is_reg() ?
  3482             dst.first()->as_Register()->after_save() : noreg;
  3484         // It's a string the oop and it was already copied to the out arg
  3485         // position
  3486         if (d != noreg) {
  3487           __ mov(d, O0);
  3488         } else {
  3489           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  3490                  "must be");
  3491           __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
  3493         Label skip;
  3495         __ br_null(O0, false, Assembler::pn, skip);
  3496         __ delayed()->add(FP, L2_string_off, O1);
  3498         if (d != noreg) {
  3499           __ mov(O1, d);
  3500         } else {
  3501           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  3502                  "must be");
  3503           __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
  3506         __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
  3507                 relocInfo::runtime_call_type);
  3508         __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
  3510         __ bind(skip);
  3515     __ mov(L7_thread_cache, G2_thread);
  3516     __ restore();
  3521   // Ok now we are done. Need to place the nop that dtrace wants in order to
  3522   // patch in the trap
  3524   int patch_offset = ((intptr_t)__ pc()) - start;
  3526   __ nop();
  3529   // Return
  3531   __ ret();
  3532   __ delayed()->restore();
  3534   __ flush();
  3536   nmethod *nm = nmethod::new_dtrace_nmethod(
  3537       method, masm->code(), vep_offset, patch_offset, frame_complete,
  3538       stack_slots / VMRegImpl::slots_per_word);
  3539   return nm;
  3543 #endif // HAVE_DTRACE_H
  3545 // this function returns the adjust size (in number of words) to a c2i adapter
  3546 // activation for use during deoptimization
  3547 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  3548 	return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
  3551 // "Top of Stack" slots that may be unused by the calling convention but must
  3552 // otherwise be preserved.
  3553 // On Intel these are not necessary and the value can be zero.
  3554 // On Sparc this describes the words reserved for storing a register window
  3555 // when an interrupt occurs.
  3556 uint SharedRuntime::out_preserve_stack_slots() {
  3557   //return frame::register_save_words * VMRegImpl::slots_per_word;
  3558 	 return 0;
  3560 /*
  3561 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
  3562 //
  3563 // Common out the new frame generation for deopt and uncommon trap
  3564 //
  3565   Register        G3pcs              = G3_scratch; // Array of new pcs (input)
  3566   Register        Oreturn0           = O0;
  3567   Register        Oreturn1           = O1;
  3568   Register        O2UnrollBlock      = O2;
  3569   Register        O3array            = O3;         // Array of frame sizes (input)
  3570   Register        O4array_size       = O4;         // number of frames (input)
  3571   Register        O7frame_size       = O7;         // number of frames (input)
  3573   __ ld_ptr(O3array, 0, O7frame_size);
  3574   __ sub(G0, O7frame_size, O7frame_size);
  3575   __ save(SP, O7frame_size, SP);
  3576   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
  3578   #ifdef ASSERT
  3579   // make sure that the frames are aligned properly
  3580 #ifndef _LP64
  3581   __ btst(wordSize*2-1, SP);
  3582   __ breakpoint_trap(Assembler::notZero);
  3583 #endif
  3584   #endif
  3586   // Deopt needs to pass some extra live values from frame to frame
  3588   if (deopt) {
  3589     __ mov(Oreturn0->after_save(), Oreturn0);
  3590     __ mov(Oreturn1->after_save(), Oreturn1);
  3593   __ mov(O4array_size->after_save(), O4array_size);
  3594   __ sub(O4array_size, 1, O4array_size);
  3595   __ mov(O3array->after_save(), O3array);
  3596   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
  3597   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
  3599   #ifdef ASSERT
  3600   // trash registers to show a clear pattern in backtraces
  3601   __ set(0xDEAD0000, I0);
  3602   __ add(I0,  2, I1);
  3603   __ add(I0,  4, I2);
  3604   __ add(I0,  6, I3);
  3605   __ add(I0,  8, I4);
  3606   // Don't touch I5 could have valuable savedSP
  3607   __ set(0xDEADBEEF, L0);
  3608   __ mov(L0, L1);
  3609   __ mov(L0, L2);
  3610   __ mov(L0, L3);
  3611   __ mov(L0, L4);
  3612   __ mov(L0, L5);
  3614   // trash the return value as there is nothing to return yet
  3615   __ set(0xDEAD0001, O7);
  3616   #endif
  3618   __ mov(SP, O5_savedSP);
  3622 static void make_new_frames(MacroAssembler* masm, bool deopt) {
  3623   //
  3624   // loop through the UnrollBlock info and create new frames
  3625   //
  3626   Register        G3pcs              = G3_scratch;
  3627   Register        Oreturn0           = O0;
  3628   Register        Oreturn1           = O1;
  3629   Register        O2UnrollBlock      = O2;
  3630   Register        O3array            = O3;
  3631   Register        O4array_size       = O4;
  3632   Label           loop;
  3634   // Before we make new frames, check to see if stack is available.
  3635   // Do this after the caller's return address is on top of stack
  3636   if (UseStackBanging) {
  3637     // Get total frame size for interpreted frames
  3638     __ ld(Address(O2UnrollBlock, 0,
  3639          Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
  3640     __ bang_stack_size(O4, O3, G3_scratch);
  3643   __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
  3644   __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
  3646   __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
  3648   // Adjust old interpreter frame to make space for new frame's extra java locals
  3649   //
  3650   // We capture the original sp for the transition frame only because it is needed in
  3651   // order to properly calculate interpreter_sp_adjustment. Even though in real life
  3652   // every interpreter frame captures a savedSP it is only needed at the transition
  3653   // (fortunately). If we had to have it correct everywhere then we would need to
  3654   // be told the sp_adjustment for each frame we create. If the frame size array
  3655   // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
  3656   // for each frame we create and keep up the illusion every where.
  3657   //
  3659   __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
  3660   __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
  3661   __ sub(SP, O7, SP);
  3663 #ifdef ASSERT
  3664   // make sure that there is at least one entry in the array
  3665   __ tst(O4array_size);
  3666   __ breakpoint_trap(Assembler::zero);
  3667 #endif
  3669   // Now push the new interpreter frames
  3670   __ bind(loop);
  3672   // allocate a new frame, filling the registers
  3674   gen_new_frame(masm, deopt);        // allocate an interpreter frame
  3676   __ tst(O4array_size);
  3677   __ br(Assembler::notZero, false, Assembler::pn, loop);
  3678   __ delayed()->add(O3array, wordSize, O3array);
  3679   __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
  3682 */
  3684 //------------------------------generate_deopt_blob----------------------------
  3685 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  3686 // instead.
  3687 void SharedRuntime::generate_deopt_blob() {
  3688   // allocate space for the code
  3689   ResourceMark rm;
  3690   // setup code generation tools
  3691   //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
  3692   CodeBuffer     buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
  3693   MacroAssembler* masm  = new MacroAssembler( & buffer);
  3694   int frame_size_in_words;
  3695   OopMap* map = NULL;
  3696   // Account for the extra args we place on the stack
  3697   // by the time we call fetch_unroll_info
  3698   const int additional_words = 2; // deopt kind, thread
  3700   OopMapSet *oop_maps = new OopMapSet();
  3702   address start = __ pc();
  3703   Label cont;
  3704   // we use S3 for DeOpt reason register
  3705   Register reason = S3;
  3706   // use S6 for thread register
  3707   Register thread = TREG;
  3708   // use S7 for fetch_unroll_info returned UnrollBlock
  3709   Register unroll = S7;
  3710   // Prolog for non exception case!
  3711   // Correct the return address we were given.
  3712   //FIXME, return address is on the tos or Ra? 
  3713   __ addi(RA, RA, - (NativeCall::return_address_offset));
  3714   // Save everything in sight.
  3715   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  3716   // Normal deoptimization
  3717   __ move(reason, Deoptimization::Unpack_deopt);
  3718   __ b(cont);
  3719   __ delayed()->nop();
  3721   int reexecute_offset = __ pc() - start;
  3723    // Reexecute case
  3724    // return address is the pc describes what bci to do re-execute at
  3726    // No need to update map as each call to save_live_registers will produce identical oopmap
  3727   //__ addi(RA, RA, - (NativeCall::return_address_offset));
  3728   (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  3729   __ move(reason, Deoptimization::Unpack_reexecute); 
  3730   __ b(cont);
  3731   __ delayed()->nop();
  3733   int   exception_offset = __ pc() - start;
  3734   // Prolog for exception case
  3736   // all registers are dead at this entry point, except for eax and
  3737   // edx which contain the exception oop and exception pc
  3738   // respectively.  Set them in TLS and fall thru to the
  3739   // unpack_with_exception_in_tls entry point.
  3741   __ get_thread(thread);
  3742   __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 
  3743   __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
  3744   int exception_in_tls_offset = __ pc() - start;
  3745   // new implementation because exception oop is now passed in JavaThread
  3747   // Prolog for exception case
  3748   // All registers must be preserved because they might be used by LinearScan
  3749   // Exceptiop oop and throwing PC are passed in JavaThread
  3750   // tos: stack at point of call to method that threw the exception (i.e. only
  3751   // args are on the stack, no return address)
  3753   // Return address will be patched later with the throwing pc. The correct value is not 
  3754   // available now because loading it from memory would destroy registers.
  3755    // Save everything in sight.
  3756   // No need to update map as each call to save_live_registers will produce identical oopmap
  3757   __ addi(RA, RA, - (NativeCall::return_address_offset));
  3758   (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  3760   // Now it is safe to overwrite any register
  3761   // store the correct deoptimization type
  3762   __ move(reason, Deoptimization::Unpack_exception);
  3763   // load throwing pc from JavaThread and patch it as the return address 
  3764   // of the current frame. Then clear the field in JavaThread
  3765   __ get_thread(thread);
  3766   __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
  3767   __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
  3768   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
  3771 #ifdef ASSERT
  3772   // verify that there is really an exception oop in JavaThread
  3773   __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
  3774   __ verify_oop(AT);
  3775   // verify that there is no pending exception
  3776   Label no_pending_exception;
  3777   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  3778   __ beq(AT, R0, no_pending_exception); 
  3779   __ delayed()->nop(); 
  3780   __ stop("must not have pending exception here");
  3781   __ bind(no_pending_exception);
  3782 #endif
  3783   __ bind(cont);
  3784   // Compiled code leaves the floating point stack dirty, empty it.
  3785   __ empty_FPU_stack();
  3788   // Call C code.  Need thread and this frame, but NOT official VM entry
  3789   // crud.  We cannot block on this call, no GC can happen.  
  3790 #ifndef OPT_THREAD
  3791   __ get_thread(thread);
  3792 #endif
  3794 /*
  3796    0x000000555bd82aec: dadd a0, s6, zero                ; __ move(A0, thread);
  3797    0x000000555bd82af0: daddi sp, sp, 0xfffffff0         ; __ addi(SP, SP, -additional_words  * wordSize);
  3798    0x000000555bd82af4: sd sp, 0x1c8(s6)                 ; __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
  3799    0x000000555bd82af8: lui at, 0x0                      ; __ li64(AT, save_pc);
  3800    0x000000555bd82afc: ori at, at, 0x55
  3801    0x000000555bd82b00: dsll at, at, 16
  3802    0x000000555bd82b04: ori at, at, 0x5bd8
  3803    0x000000555bd82b08: dsll at, at, 16
  3804    0x000000555bd82b0c: ori at, at, 0x2b34       ; save_pc = pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4
  3805    0x000000555bd82b10: sd at, 0x1d0(s6)
  3806    0x000000555bd82b14: lui t9, 0x0
  3807    0x000000555bd82b18: ori t9, t9, 0x55
  3808    0x000000555bd82b1c: dsll t9, t9, 16
  3809    0x000000555bd82b20: ori t9, t9, 0x5aa6
  3810    0x000000555bd82b24: dsll t9, t9, 16
  3811    0x000000555bd82b28: ori t9, t9, 0x4074
  3812    0x000000555bd82b2c: jalr t9
  3813    0x000000555bd82b30: sll zero, zero, 0
  3815    0x000000555bd82b34: daddiu sp, sp, 0x10	; save_pc
  3816  */
  3817   __ move(A0, thread);
  3818   __ addi(SP, SP, -additional_words  * wordSize);
  3820   __ set_last_Java_frame(NOREG, NOREG, NULL);
  3822   // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
  3823   // this call, no GC can happen.  Call should capture return values.
  3825   __ relocate(relocInfo::internal_pc_type); 
  3827     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  3828     __ li48(AT, save_pc);
  3830   __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  3832   __ call((address)Deoptimization::fetch_unroll_info);
  3833   //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
  3834   __ delayed()->nop();
  3835   oop_maps->add_gc_map(__ pc() - start, map);
  3836   __ addiu(SP, SP, additional_words * wordSize);
  3837   __ get_thread(thread);
  3838   __ reset_last_Java_frame(false, true);
  3840   // Load UnrollBlock into S7
  3841   __ move(unroll, V0);
  3844   // Move the unpack kind to a safe place in the UnrollBlock because
  3845   // we are very short of registers
  3847   Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
  3848   //__ pop(reason);	
  3849   __ sw(reason, unpack_kind);
  3850   // save the unpack_kind value
  3851   // Retrieve the possible live values (return values)
  3852   // All callee save registers representing jvm state
  3853   // are now in the vframeArray.
  3855   Label noException;
  3856   __ move(AT, Deoptimization::Unpack_exception);
  3857   __ bne(AT, reason, noException);// Was exception pending?
  3858   __ delayed()->nop();
  3859   __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
  3860   __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
  3861   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
  3862   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
  3864   __ verify_oop(V0);
  3866   // Overwrite the result registers with the exception results.
  3867   __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); 
  3868   __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
  3870   __ bind(noException);
  3873   // Stack is back to only having register save data on the stack.
  3874   // Now restore the result registers. Everything else is either dead or captured
  3875   // in the vframeArray.
  3877   RegisterSaver::restore_result_registers(masm);
  3878   // All of the register save area has been popped of the stack. Only the
  3879   // return address remains.
  3880   // Pop all the frames we must move/replace. 
  3881   // Frame picture (youngest to oldest)
  3882   // 1: self-frame (no frame link)
  3883   // 2: deopting frame  (no frame link)
  3884   // 3: caller of deopting frame (could be compiled/interpreted). 
  3885   //
  3886   // Note: by leaving the return address of self-frame on the stack
  3887   // and using the size of frame 2 to adjust the stack
  3888   // when we are done the return to frame 3 will still be on the stack.
  3890   // register for the sender's sp
  3891   Register sender_sp = Rsender;
  3892   // register for frame pcs
  3893   Register pcs = T0;
  3894   // register for frame sizes
  3895   Register sizes = T1;
  3896   // register for frame count
  3897   Register count = T3;
  3899   // Pop deoptimized frame
  3900   __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
  3901   __ add(SP, SP, AT);
  3902   // sp should be pointing at the return address to the caller (3)
  3904   // Load array of frame pcs into pcs
  3905   __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
  3906   __ addi(SP, SP, wordSize);  // trash the old pc
  3907   // Load array of frame sizes into T6
  3908   __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
  3912   // Load count of frams into T3
  3913   __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
  3914   // Pick up the initial fp we should save
  3915   __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
  3916    // Now adjust the caller's stack to make up for the extra locals
  3917   // but record the original sp so that we can save it in the skeletal interpreter
  3918   // frame and the stack walking of interpreter_sender will get the unextended sp
  3919   // value and not the "real" sp value.
  3920   __ move(sender_sp, SP);
  3921   __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
  3922   __ sub(SP, SP, AT);
  3924   // Push interpreter frames in a loop
  3925 /*
  3927 Loop:
  3928    0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]	<--- error lw->ld
  3929    0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
  3930    0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 
  3931    0x000000555bd82d24: daddi sp, sp, 0xfffffff0
  3932    0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
  3933    0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
  3934    0x000000555bd82d30: dadd fp, sp, zero        ; fp <- sp 
  3935    0x000000555bd82d34: dsub sp, sp, t2          ; sp -= t2 
  3936    0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
  3937    0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
  3938    0x000000555bd82d40: dadd s4, sp, zero        ; move(sender_sp, SP);
  3939    0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
  3940    0x000000555bd82d48: daddi t1, t1, 0x4        ; sizes += 4
  3941    0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
  3942    0x000000555bd82d50: daddi t0, t0, 0x4        ; <--- error    t0 += 8
  3943  */
  3945 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
  3946   Label loop;
  3947   __ bind(loop);
  3948   __ ld(T2, sizes, 0);		// Load frame size
  3949   __ ld_ptr(AT, pcs, 0);  	       // save return address
  3950   __ addi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
  3951   __ push2(AT, FP);			
  3952   __ move(FP, SP);
  3953   __ sub(SP, SP, T2); 			// Prolog!
  3954   // This value is corrected by layout_activation_impl
  3955   __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
  3956   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
  3957   __ move(sender_sp, SP);	// pass to next frame
  3958   __ addi(count, count, -1); 	// decrement counter
  3959   __ addi(sizes, sizes, wordSize); 	// Bump array pointer (sizes)
  3960   __ bne(count, R0, loop);
  3961   __ delayed()->addi(pcs, pcs, wordSize); 	// Bump array pointer (pcs)
  3962   __ ld(AT, pcs, 0);			// frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
  3963   // Re-push self-frame
  3964   __ push2(AT, FP);			
  3965   __ move(FP, SP);
  3966   __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
  3967   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 
  3968   __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
  3970   // Restore frame locals after moving the frame
  3971   __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
  3972   __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
  3973   __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
  3974   __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
  3977   // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
  3978   // this call, no GC can happen.
  3979   __ move(A1, reason);	// exec_mode
  3980   __ get_thread(thread);
  3981   __ move(A0, thread);	// thread
  3982   __ addi(SP, SP, (-additional_words) *wordSize);
  3984   // set last_Java_sp, last_Java_fp
  3985   __ set_last_Java_frame(NOREG, FP, NULL);
  3987   __ move(AT, -(StackAlignmentInBytes));
  3988   __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
  3990   __ relocate(relocInfo::internal_pc_type); 
  3992     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  3993     __ li48(AT, save_pc);
  3995   __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  3997   //__ call(Deoptimization::unpack_frames);
  3998   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
  3999   __ delayed()->nop();
  4000   // Revert SP alignment after call since we're going to do some SP relative addressing below
  4001   __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
  4002   // Set an oopmap for the call site
  4003   oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
  4005   __ push(V0);
  4007   __ get_thread(thread);
  4008   __ reset_last_Java_frame(false, false);
  4010   // Collect return values
  4011   __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
  4012   __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
  4013   __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
  4014   __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
  4015   //FIXME, 
  4016   // Clear floating point stack before returning to interpreter
  4017   __ empty_FPU_stack();
  4018   //FIXME, we should consider about float and double
  4019   // Push a float or double return value if necessary.
  4020   __ leave();
  4022   // Jump to interpreter
  4023   __ jr(RA);
  4024   __ delayed()->nop();
  4026   masm->flush();
  4027   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
  4028   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
  4031 #ifdef COMPILER2
  4033 //------------------------------generate_uncommon_trap_blob--------------------
  4034 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  4035 // instead.
  4036 void SharedRuntime::generate_uncommon_trap_blob() {
  4037   // allocate space for the code
  4038   ResourceMark rm;
  4039   // setup code generation tools
  4040   CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 ); 
  4041   MacroAssembler* masm = new MacroAssembler(&buffer);   
  4043   enum frame_layout {
  4044 	s0_off, s0_off2,
  4045 	s1_off, s1_off2,
  4046 	s2_off, s2_off2,
  4047 	s3_off, s3_off2,
  4048 	s4_off, s4_off2,
  4049 	s5_off, s5_off2,
  4050 	s6_off, s6_off2,
  4051 	s7_off, s7_off2,
  4052 	fp_off, fp_off2,
  4053 	return_off, return_off2,    // slot for return address    sp + 9
  4054     framesize
  4055   };
  4056   assert(framesize % 4 == 0, "sp not 16-byte aligned");
  4058   address start = __ pc();
  4060   // Push self-frame.
  4061   __ daddiu(SP, SP, -framesize * BytesPerInt);
  4063   __ sd(RA, SP, return_off * BytesPerInt);
  4064   __ sd(FP, SP, fp_off * BytesPerInt);
  4066   // Save callee saved registers.  None for UseSSE=0, 
  4067   // floats-only for UseSSE=1, and doubles for UseSSE=2.
  4068   __ sd(S0, SP, s0_off * BytesPerInt);
  4069   __ sd(S1, SP, s1_off * BytesPerInt);
  4070   __ sd(S2, SP, s2_off * BytesPerInt);
  4071   __ sd(S3, SP, s3_off * BytesPerInt);
  4072   __ sd(S4, SP, s4_off * BytesPerInt);
  4073   __ sd(S5, SP, s5_off * BytesPerInt);
  4074   __ sd(S6, SP, s6_off * BytesPerInt);
  4075   __ sd(S7, SP, s7_off * BytesPerInt);
  4077   __ daddi(FP, SP, fp_off * BytesPerInt);
  4079   // Clear the floating point exception stack
  4080   __ empty_FPU_stack();
  4082   Register thread = TREG;
  4084 #ifndef OPT_THREAD
  4085   __ get_thread(thread);
  4086 #endif
  4087   // set last_Java_sp
  4088   __ set_last_Java_frame(NOREG, FP, NULL);
  4089   __ relocate(relocInfo::internal_pc_type); 
  4090   assert(NativeCall::return_address_offset == 24, "in sharedRuntime return_address_offset");
  4092     long save_pc = (long)__ pc() +  28 + NativeCall::return_address_offset;
  4093     __ li48(AT, (long)save_pc);
  4094     __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  4096   // Call C code.  Need thread but NOT official VM entry
  4097   // crud.  We cannot block on this call, no GC can happen.  Call should
  4098   // capture callee-saved registers as well as return values.
  4099   __ move(A0, thread);
  4100   // argument already in T0
  4101   __ move(A1, T0);
  4102   __ li48(T9, (long)Deoptimization::uncommon_trap);
  4103   __ jalr(T9);
  4104   __ delayed()->nop();
  4106   // Set an oopmap for the call site
  4107   OopMapSet *oop_maps = new OopMapSet();
  4108   OopMap* map =  new OopMap( framesize, 0 );
  4110   map->set_callee_saved( VMRegImpl::stack2reg(s0_off    ),  S0->as_VMReg() ); 
  4111   map->set_callee_saved( VMRegImpl::stack2reg(s1_off    ),  S1->as_VMReg() );
  4112   map->set_callee_saved( VMRegImpl::stack2reg(s2_off    ),  S2->as_VMReg() );
  4113   map->set_callee_saved( VMRegImpl::stack2reg(s3_off    ),  S3->as_VMReg() );
  4114   map->set_callee_saved( VMRegImpl::stack2reg(s4_off    ),  S4->as_VMReg() );
  4115   map->set_callee_saved( VMRegImpl::stack2reg(s5_off    ),  S5->as_VMReg() );
  4116   map->set_callee_saved( VMRegImpl::stack2reg(s6_off    ),  S6->as_VMReg() );
  4117   map->set_callee_saved( VMRegImpl::stack2reg(s7_off    ),  S7->as_VMReg() );
  4119   //oop_maps->add_gc_map( __ offset(), true, map);
  4120   oop_maps->add_gc_map( __ offset(),  map); 
  4122 #ifndef OPT_THREAD
  4123   __ get_thread(thread);
  4124 #endif
  4125   __ reset_last_Java_frame(false,false);
  4127   // Load UnrollBlock into S7
  4128   Register unroll = S7;
  4129   __ move(unroll, V0);
  4131   // Pop all the frames we must move/replace. 
  4132   // 
  4133   // Frame picture (youngest to oldest)
  4134   // 1: self-frame (no frame link)
  4135   // 2: deopting frame  (no frame link)
  4136   // 3: possible-i2c-adapter-frame 
  4137   // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
  4138   //    and c2i here)
  4140   // Pop self-frame.  We have no frame, and must rely only on EAX and ESP.
  4141   __ daddiu(SP, SP, framesize * BytesPerInt);
  4143   // Pop deoptimized frame
  4144   __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
  4145   __ dadd(SP, SP, AT);
  4147   // register for frame pcs
  4148   Register pcs = T8;
  4149   // register for frame sizes
  4150   Register sizes = T9;
  4151   // register for frame count
  4152   Register count = T3;
  4153   // register for the sender's sp
  4154   Register sender_sp = T1;
  4156   // sp should be pointing at the return address to the caller (4)
  4157   // Load array of frame pcs into ECX
  4158   __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
  4160 /* 2012/9/7 Not needed in MIPS
  4161   __ addiu(SP, SP, wordSize);
  4162 */
  4164   // Load array of frame sizes into ESI
  4165   __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
  4166   __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
  4168   // Pick up the initial fp we should save
  4169   __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
  4170   // Now adjust the caller's stack to make up for the extra locals
  4171   // but record the original sp so that we can save it in the skeletal interpreter
  4172   // frame and the stack walking of interpreter_sender will get the unextended sp
  4173   // value and not the "real" sp value.
  4175   __ move(sender_sp, SP);
  4176   __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
  4177   __ dsub(SP, SP, AT);
  4178   // Push interpreter frames in a loop
  4179   Label loop;
  4180   __ bind(loop);
  4181   __ ld(T2, sizes, 0);          // Load frame size
  4182   __ ld(AT, pcs, 0);           // save return address
  4183   __ daddi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
  4184   __ push2(AT, FP);
  4185   __ move(FP, SP);
  4186   __ dsub(SP, SP, T2);                   // Prolog!
  4187   // This value is corrected by layout_activation_impl
  4188   __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
  4189   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
  4190   __ move(sender_sp, SP);       // pass to next frame
  4191   __ daddi(count, count, -1);    // decrement counter
  4192   __ daddi(sizes, sizes, wordSize);     // Bump array pointer (sizes)
  4193   __ addi(pcs, pcs, wordSize);      // Bump array pointer (pcs)
  4194   __ bne(count, R0, loop);
  4195   __ delayed()->nop();      // Bump array pointer (pcs)
  4197   __ ld(RA, pcs, 0);
  4199   // Re-push self-frame
  4200   __ daddi(SP, SP, - 2 * wordSize);      // save old & set new FP
  4201   __ sd(FP, SP, 0 * wordSize);          // save final return address
  4202   __ sd(RA, SP, 1 * wordSize);
  4203   __ move(FP, SP); 
  4204   __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
  4206   // set last_Java_sp, last_Java_fp
  4207   __ set_last_Java_frame(NOREG, FP, NULL);
  4209   __ move(AT, -(StackAlignmentInBytes));
  4210   __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
  4212   __ relocate(relocInfo::internal_pc_type); 
  4214     long save_pc = (long)__ pc() +  28 + NativeCall::return_address_offset;
  4215     __ li48(AT, (long)save_pc);
  4217   __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  4219   // Call C code.  Need thread but NOT official VM entry
  4220   // crud.  We cannot block on this call, no GC can happen.  Call should
  4221   // restore return values to their stack-slots with the new SP.
  4222   __ move(A0, thread);
  4223   __ move(A1, Deoptimization::Unpack_uncommon_trap);
  4224   __ li48(T9, (long)Deoptimization::unpack_frames);
  4225   __ jalr(T9);
  4226   __ delayed()->nop();
  4227   // Set an oopmap for the call site
  4228   //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); 
  4229   oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );//Fu
  4231   __ reset_last_Java_frame(true,true);
  4233   // Pop self-frame.
  4234   __ leave();     // Epilog!
  4236   // Jump to interpreter
  4237   __ jr(RA);
  4238   __ delayed()->nop();
  4239   // -------------
  4240   // make sure all code is generated
  4241   masm->flush();
  4243   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
  4246 #endif // COMPILER2
  4248 //------------------------------generate_handler_blob-------------------
  4249 //
  4250 // Generate a special Compile2Runtime blob that saves all registers, and sets
  4251 // up an OopMap and calls safepoint code to stop the compiled code for
  4252 // a safepoint.
  4253 //
  4254 // This blob is jumped to (via a breakpoint and the signal handler) from a
  4255 // safepoint in compiled code. 
  4257 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
  4259   // Account for thread arg in our frame
  4260   const int additional_words = 0; 
  4261   int frame_size_in_words;
  4263   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");  
  4265   ResourceMark rm;
  4266   OopMapSet *oop_maps = new OopMapSet();
  4267   OopMap* map;
  4269   // allocate space for the code
  4270   // setup code generation tools  
  4271   CodeBuffer  buffer ("handler_blob", 2048, 512);
  4272   MacroAssembler* masm = new MacroAssembler( &buffer);
  4274   const Register thread = TREG; 
  4275   address start   = __ pc();  
  4276   address call_pc = NULL;  
  4277   bool cause_return = (pool_type == POLL_AT_RETURN);
  4278   bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
  4280   // If cause_return is true we are at a poll_return and there is
  4281   // the return address in RA to the caller on the nmethod
  4282   // that is safepoint. We can leave this return in RA and
  4283   // effectively complete the return and safepoint in the caller.
  4284   // Otherwise we load exception pc to RA.
  4285   __ push(thread);
  4286 #ifndef OPT_THREAD
  4287   __ get_thread(thread);
  4288 #endif
  4290   if(!cause_return) {
  4291     __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
  4294   __ pop(thread);
  4295   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
  4297 #ifndef OPT_THREAD
  4298   __ get_thread(thread);
  4299 #endif
  4300   // The following is basically a call_VM. However, we need the precise
  4301   // address of the call in order to generate an oopmap. Hence, we do all the
  4302   // work outselvs.
  4304   __ move(A0, thread);
  4305   __ set_last_Java_frame(NOREG, NOREG, NULL);
  4307   //__ relocate(relocInfo::internal_pc_type); 
  4308   if (!cause_return)
  4310 /*
  4311     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  4312     __ li48(AT, save_pc);
  4313     __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  4314 */
  4318   // do the call
  4319   //__ lui(T9, Assembler::split_high((int)call_ptr));
  4320   //__ addiu(T9, T9, Assembler::split_low((int)call_ptr));
  4321   __ call(call_ptr);
  4322   __ delayed()->nop();
  4324   // Set an oopmap for the call site.  This oopmap will map all
  4325   // oop-registers and debug-info registers as callee-saved.  This
  4326   // will allow deoptimization at this safepoint to find all possible
  4327   // debug-info recordings, as well as let GC find all oops.
  4328   oop_maps->add_gc_map(__ offset(),  map);
  4330   Label noException;
  4332   // Clear last_Java_sp again
  4333   __ reset_last_Java_frame(false, false);
  4335   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  4336   __ beq(AT, R0, noException);
  4337   __ delayed()->nop();
  4339   // Exception pending
  4341   RegisterSaver::restore_live_registers(masm, save_vectors);
  4342   //forward_exception_entry need return address on the stack
  4343   __ push(RA);
  4344   //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry()));
  4345   //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry()));
  4346   __ li(T9, StubRoutines::forward_exception_entry());
  4347   __ jr(T9);
  4348   __ delayed()->nop();
  4350   // No exception case
  4351   __ bind(noException);
  4352   // Normal exit, register restoring and exit  
  4353   RegisterSaver::restore_live_registers(masm, save_vectors);
  4354   __ jr(RA);
  4355   __ delayed()->nop();
  4357   masm->flush();  
  4359   // Fill-out other meta info
  4360   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);      
  4363 //
  4364 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
  4365 //
  4366 // Generate a stub that calls into vm to find out the proper destination
  4367 // of a java call. All the argument registers are live at this point
  4368 // but since this is generic code we don't know what they are and the caller
  4369 // must do any gc of the args.
  4370 //
  4371 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
  4372   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  4374   // allocate space for the code
  4375   ResourceMark rm;
  4377   //CodeBuffer buffer(name, 1000, 512);
  4378   //FIXME. aoqi. code_size
  4379   CodeBuffer buffer(name, 20000, 2048);
  4380   MacroAssembler* masm  = new MacroAssembler(&buffer);
  4382   int frame_size_words;
  4383   //we put the thread in A0 
  4385   OopMapSet *oop_maps = new OopMapSet();
  4386   OopMap* map = NULL;
  4388   int start = __ offset();
  4389   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  4392   int frame_complete = __ offset();
  4394   const Register thread = T8;
  4395   __ get_thread(thread);
  4397   __ move(A0, thread); 
  4398   __ set_last_Java_frame(noreg, FP, NULL);
  4399   //__ addi(SP, SP, -wordSize);
  4400   //align the stack before invoke native 
  4401   __ move(AT, -(StackAlignmentInBytes));
  4402   __ andr(SP, SP, AT); 
  4403   __ relocate(relocInfo::internal_pc_type); 
  4405     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 1 * BytesPerInstWord;
  4406 //tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test
  4407     __ li48(AT, save_pc);
  4409   __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  4411   __ call(destination);
  4412   __ delayed()->nop();
  4414   // Set an oopmap for the call site.
  4415   // We need this not only for callee-saved registers, but also for volatile
  4416   // registers that the compiler might be keeping live across a safepoint.
  4417   oop_maps->add_gc_map( __ offset() - start, map);
  4418   // V0 contains the address we are going to jump to assuming no exception got installed
  4419   __ get_thread(thread);
  4420   __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
  4421   // clear last_Java_sp
  4422   __ reset_last_Java_frame(true, true);
  4423   // check for pending exceptions
  4424   Label pending;
  4425   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  4426   __ bne(AT, R0, pending);
  4427   __ delayed()->nop(); 
  4428   // get the returned Method* 
  4429   //FIXME, do mips need this ? 
  4430   __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
  4431   __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
  4432   __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
  4433   RegisterSaver::restore_live_registers(masm);
  4435   // We are back the the original state on entry and ready to go the callee method.
  4436   __ jr(V0);
  4437   __ delayed()->nop();
  4438   // Pending exception after the safepoint
  4440   __ bind(pending);
  4442   RegisterSaver::restore_live_registers(masm);
  4444   // exception pending => remove activation and forward to exception handler
  4445   //forward_exception_entry need return address on the stack 
  4446   __ push(RA);
  4447   __ get_thread(thread);
  4448   __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); 
  4449   __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
  4450   __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  4451   __ delayed() -> nop();
  4452   // -------------
  4453   // make sure all code is generated
  4454   masm->flush();  
  4456   RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
  4457   return tmp;
  4460 /*void SharedRuntime::generate_stubs() {
  4461 	_wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4462 				SharedRuntime::handle_wrong_method),"wrong_method_stub");
  4463 	_ic_miss_blob      = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4464 				SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub");
  4465 	_resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4466 				SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call");
  4467 	_resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4468 				SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call");
  4469 	_resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4470 				SharedRuntime::resolve_static_call_C),"resolve_static_call");
  4471 	_polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, 
  4472 				SafepointSynchronize::handle_polling_page_exception), false);
  4473 	_polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
  4474 				SafepointSynchronize::handle_polling_page_exception), true);
  4475 	generate_deopt_blob();
  4476 #ifdef COMPILER2
  4477 	generate_uncommon_trap_blob();
  4478 #endif // COMPILER2
  4479 }*/
  4481 extern "C" int SpinPause() {return 0;}
  4482 // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
  4483 // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;

mercurial