src/cpu/mips/vm/sharedRuntime_mips_64.cpp

Tue, 26 Jul 2016 17:06:17 +0800

author
fujie
date
Tue, 26 Jul 2016 17:06:17 +0800
changeset 41
d885f8d65c58
parent 21
afa196fe1bf6
child 361
63fe053f1032
permissions
-rw-r--r--

Add multiply word to GPR instruction (mul) in MIPS assembler.

     1 /*
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
     4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5  *
     6  * This code is free software; you can redistribute it and/or modify it
     7  * under the terms of the GNU General Public License version 2 only, as
     8  * published by the Free Software Foundation.
     9  *
    10  * This code is distributed in the hope that it will be useful, but WITHOUT
    11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    13  * version 2 for more details (a copy is included in the LICENSE file that
    14  * accompanied this code).
    15  *
    16  * You should have received a copy of the GNU General Public License version
    17  * 2 along with this work; if not, write to the Free Software Foundation,
    18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    19  *
    20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    21  * or visit www.oracle.com if you need additional information or have any
    22  * questions.
    23  *
    24  */
    26 #include "precompiled.hpp"
    27 #include "asm/macroAssembler.hpp"
    28 #include "asm/macroAssembler.inline.hpp"
    29 #include "code/debugInfoRec.hpp"
    30 #include "code/icBuffer.hpp"
    31 #include "code/vtableStubs.hpp"
    32 #include "interpreter/interpreter.hpp"
    33 #include "oops/compiledICHolder.hpp"
    34 #include "prims/jvmtiRedefineClassesTrace.hpp"
    35 #include "runtime/sharedRuntime.hpp"
    36 #include "runtime/vframeArray.hpp"
    37 #include "vmreg_mips.inline.hpp"
    38 #ifdef COMPILER1
    39 #include "c1/c1_Runtime1.hpp"
    40 #endif
    41 #ifdef COMPILER2
    42 #include "opto/runtime.hpp"
    43 #endif
    45 #define __ masm->
    46 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
    48 class RegisterSaver {
    49 	enum { FPU_regs_live = 32 };
    50 	// Capture info about frame layout
    51 	enum layout { 
    52 #define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
    53 		DEF_LAYOUT_OFFS(for_16_bytes_aligned)
    54 		DEF_LAYOUT_OFFS(fpr0)
    55 		DEF_LAYOUT_OFFS(fpr1)
    56 		DEF_LAYOUT_OFFS(fpr2)
    57 		DEF_LAYOUT_OFFS(fpr3)
    58 		DEF_LAYOUT_OFFS(fpr4)
    59 		DEF_LAYOUT_OFFS(fpr5)
    60 		DEF_LAYOUT_OFFS(fpr6)
    61 		DEF_LAYOUT_OFFS(fpr7)
    62 		DEF_LAYOUT_OFFS(fpr8)
    63 		DEF_LAYOUT_OFFS(fpr9)
    64 		DEF_LAYOUT_OFFS(fpr10)
    65 		DEF_LAYOUT_OFFS(fpr11)
    66 		DEF_LAYOUT_OFFS(fpr12)
    67 		DEF_LAYOUT_OFFS(fpr13)
    68 		DEF_LAYOUT_OFFS(fpr14)
    69 		DEF_LAYOUT_OFFS(fpr15)
    70 		DEF_LAYOUT_OFFS(fpr16)
    71 		DEF_LAYOUT_OFFS(fpr17)
    72 		DEF_LAYOUT_OFFS(fpr18)
    73 		DEF_LAYOUT_OFFS(fpr19)
    74 		DEF_LAYOUT_OFFS(fpr20)
    75 		DEF_LAYOUT_OFFS(fpr21)
    76 		DEF_LAYOUT_OFFS(fpr22)
    77 		DEF_LAYOUT_OFFS(fpr23)
    78 		DEF_LAYOUT_OFFS(fpr24)
    79 		DEF_LAYOUT_OFFS(fpr25)
    80 		DEF_LAYOUT_OFFS(fpr26)
    81 		DEF_LAYOUT_OFFS(fpr27)
    82 		DEF_LAYOUT_OFFS(fpr28)
    83 		DEF_LAYOUT_OFFS(fpr29)
    84 		DEF_LAYOUT_OFFS(fpr30)
    85 		DEF_LAYOUT_OFFS(fpr31)
    87 		DEF_LAYOUT_OFFS(v0)
    88 		DEF_LAYOUT_OFFS(v1)
    89 		DEF_LAYOUT_OFFS(a0)
    90 		DEF_LAYOUT_OFFS(a1)
    91 		DEF_LAYOUT_OFFS(a2)
    92 		DEF_LAYOUT_OFFS(a3)
    93 		DEF_LAYOUT_OFFS(a4)
    94 		DEF_LAYOUT_OFFS(a5)
    95 		DEF_LAYOUT_OFFS(a6)
    96 		DEF_LAYOUT_OFFS(a7)
    97 		DEF_LAYOUT_OFFS(t0)
    98 		DEF_LAYOUT_OFFS(t1)
    99 		DEF_LAYOUT_OFFS(t2)
   100 		DEF_LAYOUT_OFFS(t3)
   101 		DEF_LAYOUT_OFFS(s0)
   102 		DEF_LAYOUT_OFFS(s1)
   103 		DEF_LAYOUT_OFFS(s2)
   104 		DEF_LAYOUT_OFFS(s3)
   105 		DEF_LAYOUT_OFFS(s4)
   106 		DEF_LAYOUT_OFFS(s5)
   107 		DEF_LAYOUT_OFFS(s6)
   108 		DEF_LAYOUT_OFFS(s7)
   109 		DEF_LAYOUT_OFFS(t8)
   110 		DEF_LAYOUT_OFFS(t9)
   112 		DEF_LAYOUT_OFFS(gp)
   113 		DEF_LAYOUT_OFFS(fp)
   114 		DEF_LAYOUT_OFFS(return)
   115 /*
   116 		fpr0_off, fpr1_off,
   117 		fpr2_off, fpr3_off,
   118 		fpr4_off, fpr5_off,
   119 		fpr6_off, fpr7_off,
   120 		fpr8_off, fpr9_off,
   121 		fpr10_off, fpr11_off,
   122 		fpr12_off, fpr13_off,
   123 		fpr14_off, fpr15_off,
   124 		fpr16_off, fpr17_off,
   125 		fpr18_off, fpr19_off,
   126 		fpr20_off, fpr21_off,
   127 		fpr22_off, fpr23_off,
   128 		fpr24_off, fpr25_off,
   129 		fpr26_off, fpr27_off,
   130 		fpr28_off, fpr29_off,
   131 		fpr30_off, fpr31_off,
   133 		v0_off, v1_off,
   134 		a0_off, a1_off,
   135 		a2_off, a3_off,
   136 		a4_off, a5_off,
   137 		a6_off, a7_off,
   138 		t0_off, t1_off, t2_off, t3_off,
   139 		s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off,
   140 		t8_off, t9_off,
   142 		gp_off, fp_off,
   143 		return_off,
   144 */
   145 		reg_save_size
   146 	};
   148   public:
   150 	static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
   151 	static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
   152 	//FIXME, I have no idea which register to use
   153 	static int raOffset(void) { return return_off / 2; }
   154 	//Rmethod
   155 	static int methodOffset(void) { return s3_off / 2; }
   157 	static int v0Offset(void) { return v0_off / 2; }
   158 	static int v1Offset(void) { return v1_off / 2; }
   160 	static int fpResultOffset(void) { return fpr0_off / 2; }
   162 	// During deoptimization only the result register need to be restored
   163 	// all the other values have already been extracted.
   165 	static void restore_result_registers(MacroAssembler* masm);
   166 };
   168 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
   170 /*
   171   int frame_words = reg_save_size + additional_frame_words;
   172   int frame_size_in_bytes =  frame_words * wordSize;
   173   *total_frame_words = frame_words;
   174   */
   175   // Always make the frame size 16-byte aligned
   176   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
   177                                      reg_save_size*BytesPerInt, 16);
   178   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
   179   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
   180   // The caller will allocate additional_frame_words
   181   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
   182   // CodeBlob frame size is in words.
   183   int frame_size_in_words = frame_size_in_bytes / wordSize;
   184   *total_frame_words = frame_size_in_words;
   186   // save registers, fpu state, and flags  
   187   // We assume caller has already has return address slot on the stack
   188   // We push epb twice in this sequence because we want the real ebp
   189   // to be under the return like a normal enter and we want to use pushad
   190   // We push by hand instead of pusing push
   192   __ daddiu(SP, SP, - reg_save_size * jintSize);
   194   __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
   195   __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
   196   __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
   197   __ sdc1(F6, SP, fpr6_off * jintSize);	__ sdc1(F7, SP, fpr7_off * jintSize);
   198   __ sdc1(F8, SP, fpr8_off * jintSize);	__ sdc1(F9, SP, fpr9_off * jintSize);
   199   __ sdc1(F10, SP, fpr10_off * jintSize);	__ sdc1(F11, SP, fpr11_off * jintSize);
   200   __ sdc1(F12, SP, fpr12_off * jintSize);	__ sdc1(F13, SP, fpr13_off * jintSize);
   201   __ sdc1(F14, SP, fpr14_off * jintSize);	__ sdc1(F15, SP, fpr15_off * jintSize);
   202   __ sdc1(F16, SP, fpr16_off * jintSize);	__ sdc1(F17, SP, fpr17_off * jintSize);
   203   __ sdc1(F18, SP, fpr18_off * jintSize);	__ sdc1(F19, SP, fpr19_off * jintSize);
   204   __ sdc1(F20, SP, fpr20_off * jintSize);	__ sdc1(F21, SP, fpr21_off * jintSize);
   205   __ sdc1(F22, SP, fpr22_off * jintSize);	__ sdc1(F23, SP, fpr23_off * jintSize);
   206   __ sdc1(F24, SP, fpr24_off * jintSize);	__ sdc1(F25, SP, fpr25_off * jintSize);
   207   __ sdc1(F26, SP, fpr26_off * jintSize);	__ sdc1(F27, SP, fpr27_off * jintSize);
   208   __ sdc1(F28, SP, fpr28_off * jintSize);	__ sdc1(F29, SP, fpr29_off * jintSize);
   209   __ sdc1(F30, SP, fpr30_off * jintSize);	__ sdc1(F31, SP, fpr31_off * jintSize);
   210   __ sd(V0, SP, v0_off * jintSize);	__ sd(V1, SP, v1_off * jintSize);
   211   __ sd(A0, SP, a0_off * jintSize);	__ sd(A1, SP, a1_off * jintSize);
   212   __ sd(A2, SP, a2_off * jintSize);	__ sd(A3, SP, a3_off * jintSize);
   213   __ sd(A4, SP, a4_off * jintSize);	__ sd(A5, SP, a5_off * jintSize);
   214   __ sd(A6, SP, a6_off * jintSize);	__ sd(A7, SP, a7_off * jintSize);
   215   __ sd(T0, SP, t0_off * jintSize);
   216   __ sd(T1, SP, t1_off * jintSize);
   217   __ sd(T2, SP, t2_off * jintSize);
   218   __ sd(T3, SP, t3_off * jintSize);
   219   __ sd(S0, SP, s0_off * jintSize);
   220   __ sd(S1, SP, s1_off * jintSize);
   221   __ sd(S2, SP, s2_off * jintSize);
   222   __ sd(S3, SP, s3_off * jintSize);
   223   __ sd(S4, SP, s4_off * jintSize);
   224   __ sd(S5, SP, s5_off * jintSize);
   225   __ sd(S6, SP, s6_off * jintSize);
   226   __ sd(S7, SP, s7_off * jintSize);
   228   __ sd(T8, SP, t8_off * jintSize);
   229   __ sd(T9, SP, t9_off * jintSize);
   231   __ sd(GP, SP, gp_off * jintSize);
   232   __ sd(FP, SP, fp_off * jintSize);
   233   __ sd(RA, SP, return_off * jintSize);
   234   __ daddi(FP, SP, fp_off * jintSize);
   236   OopMapSet *oop_maps = new OopMapSet();
   237   //OopMap* map =  new OopMap( frame_words, 0 );  
   238   OopMap* map =  new OopMap( frame_size_in_slots, 0 );  
   241 //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
   242 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
   243   map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
   244   map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
   245   map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
   246   map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
   247   map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
   248   map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
   249   map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
   250   map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
   251   map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
   252   map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
   253   map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
   254   map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
   255   map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
   256   map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
   257   map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
   258   map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
   259   map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
   260   map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
   261   map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
   262   map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
   263   map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
   264   map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
   265   map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
   266   map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
   267   map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
   268   map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
   269   map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
   271   map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
   272   map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
   273   map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
   274   map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
   275   map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
   276   map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
   277   map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
   278   map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
   279   map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
   280   map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
   281   map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
   282   map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
   283   map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
   284   map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
   285   map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
   286   map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
   287   map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
   288   map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
   289   map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
   290   map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
   291   map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
   292   map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
   293   map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
   294   map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
   295   map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
   296   map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
   297   map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
   298   map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
   299   map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
   300   map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
   301   map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
   302   map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
   304 /*
   305   if (true) {
   306     map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next());
   307     map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next());
   308     map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next());
   309     map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next());
   310     map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next());
   311     map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next());
   312     map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next());
   313     map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next());
   314     map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next());
   315     map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next());
   316     map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next());
   317     map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next());
   318     map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next());
   319     map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next());
   320     map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next());
   321     map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next());
   322     map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next());
   323     map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next());
   324     map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next());
   325     map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next());
   326     map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next());
   327     map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next());
   328     map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next());
   329     map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next());
   330     map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next());
   331     map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next());
   332     map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next());
   334     map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next());
   335     map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next());
   336     map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next());
   337     map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next());
   338     map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next());
   339     map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next());
   340     map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next());
   341     map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next());
   342     map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next());
   343     map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next());
   344     map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next());
   345     map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next());
   346     map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next());
   347     map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next());
   348     map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next());
   349     map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next());
   350   }
   351 */
   352 #undef STACK_OFFSET
   353   return map;
   354 }
   357 // Pop the current frame and restore all the registers that we
   358 // saved.
   359 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   360   __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
   361   __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
   362   __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
   363   __ ldc1(F6, SP, fpr6_off * jintSize);	__ ldc1(F7, SP, fpr7_off * jintSize);
   364   __ ldc1(F8, SP, fpr8_off * jintSize);	__ ldc1(F9, SP, fpr9_off * jintSize);
   365   __ ldc1(F10, SP, fpr10_off * jintSize);	__ ldc1(F11, SP, fpr11_off * jintSize);
   366   __ ldc1(F12, SP, fpr12_off * jintSize);	__ ldc1(F13, SP, fpr13_off * jintSize);
   367   __ ldc1(F14, SP, fpr14_off * jintSize);	__ ldc1(F15, SP, fpr15_off * jintSize);
   368   __ ldc1(F16, SP, fpr16_off * jintSize);	__ ldc1(F17, SP, fpr17_off * jintSize);
   369   __ ldc1(F18, SP, fpr18_off * jintSize);	__ ldc1(F19, SP, fpr19_off * jintSize);
   370   __ ldc1(F20, SP, fpr20_off * jintSize);	__ ldc1(F21, SP, fpr21_off * jintSize);
   371   __ ldc1(F22, SP, fpr22_off * jintSize);	__ ldc1(F23, SP, fpr23_off * jintSize);
   372   __ ldc1(F24, SP, fpr24_off * jintSize);	__ ldc1(F25, SP, fpr25_off * jintSize);
   373   __ ldc1(F26, SP, fpr26_off * jintSize);	__ ldc1(F27, SP, fpr27_off * jintSize);
   374   __ ldc1(F28, SP, fpr28_off * jintSize);	__ ldc1(F29, SP, fpr29_off * jintSize);
   375   __ ldc1(F30, SP, fpr30_off * jintSize);	__ ldc1(F31, SP, fpr31_off * jintSize);
   377   __ ld(V0, SP, v0_off * jintSize);	__ ld(V1, SP, v1_off * jintSize);
   378   __ ld(A0, SP, a0_off * jintSize);	__ ld(A1, SP, a1_off * jintSize);
   379   __ ld(A2, SP, a2_off * jintSize);	__ ld(A3, SP, a3_off * jintSize);
   380   __ ld(A4, SP, a4_off * jintSize);	__ ld(A5, SP, a5_off * jintSize);
   381   __ ld(A6, SP, a6_off * jintSize);	__ ld(A7, SP, a7_off * jintSize);
   382   __ ld(T0, SP, t0_off * jintSize);
   383   __ ld(T1, SP, t1_off * jintSize);
   384   __ ld(T2, SP, t2_off * jintSize);
   385   __ ld(T3, SP, t3_off * jintSize);
   386   __ ld(S0, SP, s0_off * jintSize);
   387   __ ld(S1, SP, s1_off * jintSize);
   388   __ ld(S2, SP, s2_off * jintSize);
   389   __ ld(S3, SP, s3_off * jintSize);
   390   __ ld(S4, SP, s4_off * jintSize);
   391   __ ld(S5, SP, s5_off * jintSize);
   392   __ ld(S6, SP, s6_off * jintSize);
   393   __ ld(S7, SP, s7_off * jintSize);
   395   __ ld(T8, SP, t8_off * jintSize);
   396   __ ld(T9, SP, t9_off * jintSize);
   398   __ ld(GP, SP, gp_off * jintSize);
   399   __ ld(FP, SP, fp_off * jintSize);
   400   __ ld(RA, SP, return_off * jintSize);
   402   __ addiu(SP, SP, reg_save_size * jintSize);
   403 }
   405 // Pop the current frame and restore the registers that might be holding
   406 // a result.
   407 // FIXME, if the result is float?
   408 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
   409   // Just restore result register. Only used by deoptimization. By
   410   // now any callee save register that needs to be restore to a c2
   411   // caller of the deoptee has been extracted into the vframeArray
   412   // and will be stuffed into the c2i adapter we create for later
   413   // restoration so only result registers need to be restored here.
   414   //
   415   __ ld(V0, SP, v0_off * jintSize);
   416   __ ld(V1, SP, v1_off * jintSize);
   417   __ addiu(SP, SP, return_off * jintSize); 
   418 }
   420  // Is vector's size (in bytes) bigger than a size saved by default?
   421  // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
   422  bool SharedRuntime::is_wide_vector(int size) {
   423    return size > 16;
   424  }
   426 // The java_calling_convention describes stack locations as ideal slots on
   427 // a frame with no abi restrictions. Since we must observe abi restrictions
   428 // (like the placement of the register window) the slots must be biased by
   429 // the following value.
   431 static int reg2offset_in(VMReg r) { 
   432 	// Account for saved ebp and return address
   433 	// This should really be in_preserve_stack_slots
   434 	return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
   435 }
   437 static int reg2offset_out(VMReg r) { 
   438 	return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
   439 }
   441 // ---------------------------------------------------------------------------
   442 // Read the array of BasicTypes from a signature, and compute where the
   443 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
   444 // quantities.  Values less than SharedInfo::stack0 are registers, those above
   445 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
   446 // as framesizes are fixed.
   447 // VMRegImpl::stack0 refers to the first slot 0(sp).
   448 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
   449 // up to RegisterImpl::number_of_registers) are the 32-bit
   450 // integer registers.
   452 // Pass first five oop/int args in registers T0, A0 - A3.
   453 // Pass float/double/long args in stack.
   454 // Doubles have precedence, so if you pass a mix of floats and doubles
   455 // the doubles will grab the registers before the floats will.
   457 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
   458 // either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
   459 // units regardless of build. Of course for i486 there is no 64 bit build
   462 // ---------------------------------------------------------------------------
   463 // The compiled Java calling convention.
   464 // Pass first five oop/int args in registers T0, A0 - A3.
   465 // Pass float/double/long args in stack.
   466 // Doubles have precedence, so if you pass a mix of floats and doubles
   467 // the doubles will grab the registers before the floats will.
   469 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
   470                                            VMRegPair *regs,
   471                                            int total_args_passed,
   472                                            int is_outgoing) {
   473 //#define aoqi_test
   474 #ifdef aoqi_test
   475 tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed);
   476 #endif
   478   // Create the mapping between argument positions and
   479   // registers.
   480   //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
   481   static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
   482     T0, A0, A1, A2, A3, A4, A5, A6, A7
   483   };
   484   //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
   485   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
   486     F12, F13, F14, F15, F16, F17, F18, F19
   487   };
   490   uint args = 0;
   491   uint stk_args = 0; // inc by 2 each time
   493   for (int i = 0; i < total_args_passed; i++) {
   494     switch (sig_bt[i]) {
   495     case T_VOID:
   496       // halves of T_LONG or T_DOUBLE
   497       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
   498       regs[i].set_bad();
   499       break;
   500     case T_BOOLEAN:
   501     case T_CHAR:
   502     case T_BYTE:
   503     case T_SHORT:
   504     case T_INT:
   505       if (args < Argument::n_register_parameters) {
   506         regs[i].set1(INT_ArgReg[args++]->as_VMReg());
   507       } else {
   508         regs[i].set1(VMRegImpl::stack2reg(stk_args));
   509         stk_args += 2;
   510       }
   511       break;
   512     case T_LONG:
   513       assert(sig_bt[i + 1] == T_VOID, "expecting half");
   514       // fall through
   515     case T_OBJECT:
   516     case T_ARRAY:
   517     case T_ADDRESS:
   518       if (args < Argument::n_register_parameters) {
   519         regs[i].set2(INT_ArgReg[args++]->as_VMReg());
   520       } else {
   521         regs[i].set2(VMRegImpl::stack2reg(stk_args));
   522         stk_args += 2;
   523       }
   524       break;
   525     case T_FLOAT:
   526       if (args < Argument::n_float_register_parameters) {
   527         regs[i].set1(FP_ArgReg[args++]->as_VMReg());
   528       } else {
   529         regs[i].set1(VMRegImpl::stack2reg(stk_args));
   530         stk_args += 2;
   531       }
   532       break;
   533     case T_DOUBLE:
   534       assert(sig_bt[i + 1] == T_VOID, "expecting half");
   535       if (args < Argument::n_float_register_parameters) {
   536         regs[i].set2(FP_ArgReg[args++]->as_VMReg());
   537       } else {
   538         regs[i].set2(VMRegImpl::stack2reg(stk_args));
   539         stk_args += 2;
   540       }
   541       break;
   542     default:
   543       ShouldNotReachHere();
   544       break;
   545     }
   546 #ifdef aoqi_test
   547 tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args);
   548 #endif
   549   }
   551   return round_to(stk_args, 2);
   552 /*
   553 	// Starting stack position for args on stack
   554   uint    stack = 0;
   556 	// Pass first five oop/int args in registers T0, A0 - A3.
   557 	uint reg_arg0 = 9999;
   558 	uint reg_arg1 = 9999;
   559 	uint reg_arg2 = 9999;
   560 	uint reg_arg3 = 9999;
   561 	uint reg_arg4 = 9999;
   564   // Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
   565 	int i;
   566 	for( i = 0; i < total_args_passed; i++) {
   567 		if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
   568 			stack += 2;
   569 		}
   570 	}
   571 	int dstack = 0;  // Separate counter for placing doubles
   572   for( i = 0; i < total_args_passed; i++) {
   573     // From the type and the argument number (count) compute the location
   574     switch( sig_bt[i] ) {
   575     case T_SHORT:
   576     case T_CHAR:
   577     case T_BYTE:
   578     case T_BOOLEAN:
   579     case T_INT:
   580     case T_ARRAY:
   581     case T_OBJECT:
   582     case T_ADDRESS:
   583 	    if( reg_arg0 == 9999 )  {
   584 		    reg_arg0 = i;
   585 		    regs[i].set1(T0->as_VMReg());
   586 	    } else if( reg_arg1 == 9999 ) {
   587 		    reg_arg1 = i;
   588 		    regs[i].set1(A0->as_VMReg());
   589 	    } else if( reg_arg2 == 9999 ) {
   590 		    reg_arg2 = i;
   591 		    regs[i].set1(A1->as_VMReg());
   592 	    }else if( reg_arg3 == 9999 ) {
   593 		    reg_arg3 = i;
   594 		    regs[i].set1(A2->as_VMReg());
   595 	    }else if( reg_arg4 == 9999 ) {
   596 		    reg_arg4 = i;
   597 		    regs[i].set1(A3->as_VMReg());
   598 	    } else {
   599 		    regs[i].set1(VMRegImpl::stack2reg(stack++));
   600 	    }
   601 	    break;
   602     case T_FLOAT:
   603 	    regs[i].set1(VMRegImpl::stack2reg(stack++));
   604 	    break;
   605     case T_LONG:      
   606 	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
   607 	    regs[i].set2(VMRegImpl::stack2reg(dstack));
   608 	    dstack += 2;
   609 	    break;
   610     case T_DOUBLE:
   611 	    assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
   612 	    regs[i].set2(VMRegImpl::stack2reg(dstack));
   613 	    dstack += 2;
   614 	    break;
   615     case T_VOID: regs[i].set_bad(); break;
   616 		 break;
   617     default:
   618 		 ShouldNotReachHere();
   619 		 break;
   620     }
   621  }
   622   // return value can be odd number of VMRegImpl stack slots make multiple of 2
   623   return round_to(stack, 2);
   624 */
   625 }
   627 // Helper class mostly to avoid passing masm everywhere, and handle store
   628 // displacement overflow logic for LP64
   629 class AdapterGenerator {
   630   MacroAssembler *masm;
   631 #ifdef _LP64
   632   Register Rdisp;
   633   void set_Rdisp(Register r)  { Rdisp = r; }
   634 #endif // _LP64
   636   void patch_callers_callsite();
   637 //  void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
   639   // base+st_off points to top of argument
   640   int arg_offset(const int st_off) { return st_off; }
   641   int next_arg_offset(const int st_off) {
   642     return st_off - Interpreter::stackElementSize;
   643   }
   645 #ifdef _LP64
   646   // On _LP64 argument slot values are loaded first into a register
   647   // because they might not fit into displacement.
   648   Register arg_slot(const int st_off);
   649   Register next_arg_slot(const int st_off);
   650 #else
   651   int arg_slot(const int st_off)      { return arg_offset(st_off); }
   652   int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
   653 #endif // _LP64
   655   // Stores long into offset pointed to by base
   656   void store_c2i_long(Register r, Register base,
   657                       const int st_off, bool is_stack);
   658   void store_c2i_object(Register r, Register base,
   659                         const int st_off);
   660   void store_c2i_int(Register r, Register base,
   661                      const int st_off);
   662   void store_c2i_double(VMReg r_2,
   663                         VMReg r_1, Register base, const int st_off);
   664   void store_c2i_float(FloatRegister f, Register base,
   665                        const int st_off);
   667  public:
   668   //void tag_stack(const BasicType sig, int st_off);
   669   void gen_c2i_adapter(int total_args_passed,
   670                               // VMReg max_arg,
   671                               int comp_args_on_stack, // VMRegStackSlots
   672                               const BasicType *sig_bt,
   673                               const VMRegPair *regs,
   674                               Label& skip_fixup);
   675   void gen_i2c_adapter(int total_args_passed,
   676                               // VMReg max_arg,
   677                               int comp_args_on_stack, // VMRegStackSlots
   678                               const BasicType *sig_bt,
   679                               const VMRegPair *regs);
   681   AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
   682 };
   685 // Patch the callers callsite with entry to compiled code if it exists.
   686 void AdapterGenerator::patch_callers_callsite() {
   687 	Label L;
   688 	//FIXME , what is stored in eax? 
   689 	//__ verify_oop(ebx);
   690 	__ verify_oop(Rmethod);
   691 	// __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
   692 	__ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); 
   693 	//__ jcc(Assembler::equal, L);
   694 	__ beq(AT,R0,L); 
   695 	__ delayed()->nop(); 
   696 	// Schedule the branch target address early.
   697 	// Call into the VM to patch the caller, then jump to compiled callee
   698 	// eax isn't live so capture return address while we easily can
   699 	//  __ movl(eax, Address(esp, 0));
   700 //	__ lw(T5,SP,0);  
   701 	__ move(V0, RA);
   703 	__ pushad();
   704       	//jerome_for_debug
   705 	// __ pushad();
   706 	// __ pushfd();
   707 #ifdef COMPILER2
   708 	// C2 may leave the stack dirty if not in SSE2+ mode
   709 	__ empty_FPU_stack();
   710 #endif /* COMPILER2 */
   712 	// VM needs caller's callsite
   713 	//  __ pushl(eax);
   715 	// VM needs target method
   716 	// __ pushl(ebx);
   717 	//  __ push(Rmethod);
   718 	// __ verify_oop(ebx);
   720 	__ move(A0, Rmethod); 
   721 	__ move(A1, V0); 
   722 //	__ addi(SP, SP, -8);
   723 //we should preserve the return address
   724 	__ verify_oop(Rmethod);
   725         __ move(S0, SP); 
   726         __ move(AT, -(StackAlignmentInBytes));   // align the stack
   727         __ andr(SP, SP, AT);
   728       	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), 
   729 			relocInfo::runtime_call_type);
   730 	//__ addl(esp, 2*wordSize);
   732 	__ delayed()->nop(); 
   733   //      __ addi(SP, SP, 8);
   734 	//  __ popfd();
   735         __ move(SP, S0);
   736 	__ popad();
   737 	__ bind(L);
   738 }
   739 /*
   740 void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
   741                  Register scratch) {
   742 	Unimplemented();
   743 }*/
   745 #ifdef _LP64
   746 Register AdapterGenerator::arg_slot(const int st_off) {
   747 	Unimplemented();
   748 }
   750 Register AdapterGenerator::next_arg_slot(const int st_off){
   751 	Unimplemented();
   752 }
   753 #endif // _LP64
   755 // Stores long into offset pointed to by base
   756 void AdapterGenerator::store_c2i_long(Register r, Register base,
   757                                       const int st_off, bool is_stack) {
   758 	Unimplemented();
   759 }
   761 void AdapterGenerator::store_c2i_object(Register r, Register base,
   762                       const int st_off) {
   763 	Unimplemented();
   764 }
   766 void AdapterGenerator::store_c2i_int(Register r, Register base,
   767                    const int st_off) {
   768 	Unimplemented();
   769 }
   771 // Stores into offset pointed to by base
   772 void AdapterGenerator::store_c2i_double(VMReg r_2,
   773                       VMReg r_1, Register base, const int st_off) {
   774 	Unimplemented();
   775 }
   777 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
   778                                        const int st_off) {
   779 	Unimplemented();
   780 }
   781 /*
   782 void  AdapterGenerator::tag_stack(const BasicType sig, int st_off) {
   783 	if (TaggedStackInterpreter) {
   784 		int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
   785 		if (sig == T_OBJECT || sig == T_ARRAY) {
   786 			//   __ movl(Address(esp, tag_offset), frame::TagReference);
   787 			//  __ addi(AT,R0, frame::TagReference); 
   789 			__ move(AT, frame::TagReference);
   790 			__ sw (AT, SP, tag_offset); 
   791 		} else if (sig == T_LONG || sig == T_DOUBLE) {
   792 			int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
   793 			// __ movl(Address(esp, next_tag_offset), frame::TagValue);
   794 			// __ addi(AT,R0, frame::TagValue); 
   795 			__ move(AT, frame::TagValue); 
   796 			__ sw (AT, SP, next_tag_offset); 
   797 			//__ movl(Address(esp, tag_offset), frame::TagValue);
   798 			//   __ addi(AT,R0, frame::TagValue); 
   799 			__ move(AT, frame::TagValue); 
   800 			__ sw (AT, SP, tag_offset); 
   802 		} else {
   803 			//  __ movl(Address(esp, tag_offset), frame::TagValue);
   804 			//__ addi(AT,R0, frame::TagValue); 
   805 			__ move(AT, frame::TagValue); 
   806 			__ sw (AT, SP, tag_offset); 
   808 		}
   809 	}
   810 }*/
   812 void AdapterGenerator::gen_c2i_adapter(
   813                             int total_args_passed,
   814                             // VMReg max_arg,
   815                             int comp_args_on_stack, // VMRegStackSlots
   816                             const BasicType *sig_bt,
   817                             const VMRegPair *regs,
   818                             Label& skip_fixup) {
   820   // Before we get into the guts of the C2I adapter, see if we should be here
   821   // at all.  We've come from compiled code and are attempting to jump to the
   822   // interpreter, which means the caller made a static call to get here
   823   // (vcalls always get a compiled target if there is one).  Check for a
   824   // compiled target.  If there is one, we need to patch the caller's call.
   825   // However we will run interpreted if we come thru here. The next pass
   826   // thru the call site will run compiled. If we ran compiled here then
   827   // we can (theorectically) do endless i2c->c2i->i2c transitions during
   828   // deopt/uncommon trap cycles. If we always go interpreted here then
   829   // we can have at most one and don't need to play any tricks to keep
   830   // from endlessly growing the stack.
   831   //
   832   // Actually if we detected that we had an i2c->c2i transition here we
   833   // ought to be able to reset the world back to the state of the interpreted
   834   // call and not bother building another interpreter arg area. We don't
   835   // do that at this point.
   837 	patch_callers_callsite();
   839 	__ bind(skip_fixup);
   841 #ifdef COMPILER2
   842 	__ empty_FPU_stack();
   843 #endif /* COMPILER2 */
   844 	//this is for native ?
   845 	// Since all args are passed on the stack, total_args_passed * interpreter_
   846 	// stack_element_size  is the
   847 	// space we need.
   848 	int extraspace = total_args_passed * Interpreter::stackElementSize;
   850         // stack is aligned, keep it that way
   851         extraspace = round_to(extraspace, 2*wordSize);
   853 	// Get return address
   854 	// __ popl(eax);
   855 	//__ pop(T4);
   856         __ move(V0, RA);		
   857 	// set senderSP value
   858 	// __ movl(esi, esp);
   859 //refer to interpreter_mips.cpp:generate_asm_entry
   860 	__ move(Rsender, SP); 
   861 	//__ subl(esp, extraspace);
   862 	__ addi(SP, SP, -extraspace);
   864 	// Now write the args into the outgoing interpreter space
   865 	for (int i = 0; i < total_args_passed; i++) {
   866 		if (sig_bt[i] == T_VOID) {
   867 			assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), 
   868 					"missing half");
   869 			continue;
   870 		}
   872 		// st_off points to lowest address on stack.
   873 		int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
   874 #ifdef aoqi_test
   875 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
   876 #endif
   877 		// Say 4 args:
   878 		// i   st_off
   879 		// 0   12 T_LONG
   880 		// 1    8 T_VOID
   881 		// 2    4 T_OBJECT
   882 		// 3    0 T_BOOL
   883 		VMReg r_1 = regs[i].first();
   884 		VMReg r_2 = regs[i].second();
   885 		if (!r_1->is_valid()) {
   886 			assert(!r_2->is_valid(), "");
   887 			continue;
   888 		}
   890 		if (r_1->is_stack()) {
   891 			// memory to memory use fpu stack top
   892 			int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
   893 #ifdef aoqi_test
   894 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off);
   895 #endif
   897 			if (!r_2->is_valid()) {
   898 #ifdef aoqi_test
   899 tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
   900 #endif
   901 				__ ld_ptr(AT, SP, ld_off); 
   902 				__ st_ptr(AT, SP, st_off); 
   903 				//tag_stack(sig_bt[i], st_off);
   904 			} else {
   905 #ifdef aoqi_test
   906 tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off);
   907 #endif
   909 				// ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
   910 				// st_off == MSW, st_off-wordSize == LSW
   912 				int next_off = st_off - Interpreter::stackElementSize;
   913 				/*
   914 				__ lw(AT, SP, ld_off); 
   915 				__ sw(AT, SP, next_off);
   916 				__ lw(AT, SP, ld_off + wordSize);
   917 				__ sw(AT, SP, st_off);
   918 				*/
   919 				__ ld_ptr(AT, SP, ld_off); 
   920 				__ st_ptr(AT, SP, st_off); 
   922 				/* Ref to is_Register condition */
   923 				if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
   924 					__ st_ptr(AT,SP,st_off - 8);
   925 				//tag_stack(sig_bt[i], next_off);
   926 			}
   927 		} else if (r_1->is_Register()) {
   928 			Register r = r_1->as_Register();
   929 			if (!r_2->is_valid()) {
   930 #ifdef aoqi_test
   931 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
   932 #endif
   933 			  // __ movl(Address(esp, st_off), r);
   934 			    __ sd(r,SP, st_off); //aoqi_test FIXME
   935 			  //tag_stack(sig_bt[i], st_off);
   936 			} else {
   937 #ifdef aoqi_test
   938 tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off);
   939 #endif
   940 				//FIXME, mips will not enter here 
   941 				// long/double in gpr
   942 			    __ sd(r,SP, st_off); //aoqi_test FIXME
   943 /* Jin: In [java/util/zip/ZipFile.java] 
   945     private static native long open(String name, int mode, long lastModified);
   946     private static native int getTotal(long jzfile);
   947  *
   948  * We need to transfer T_LONG paramenters from a compiled method to a native method.
   949  * It's a complex process:
   950  *
   951  * Caller -> lir_static_call -> gen_resolve_stub
   952       -> -- resolve_static_call_C
   953          `- gen_c2i_adapter()	[*]
   954              |
   955 	     `- AdapterHandlerLibrary::get_create_apapter_index
   956       -> generate_native_entry
   957       -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
   959  * In [**], T_Long parameter is stored in stack as:
   961    (high)
   962     |         |
   963     -----------
   964     | 8 bytes |
   965     | (void)  |
   966     -----------
   967     | 8 bytes |
   968     | (long)  |
   969     -----------
   970     |         |
   971    (low)
   972  *
   973  * However, the sequence is reversed here: 
   974  *
   975    (high)
   976     |         |
   977     -----------
   978     | 8 bytes |
   979     | (long)  |
   980     -----------
   981     | 8 bytes |
   982     | (void)  |
   983     -----------
   984     |         |
   985    (low)
   986  *
   987  * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
   988  */
   989 			    if (sig_bt[i] == T_LONG)
   990 			        __ sd(r,SP, st_off - 8);
   991 			//	ShouldNotReachHere();
   992 			//	int next_off = st_off - Interpreter::stackElementSize;
   993 			//	__ sw(r_2->as_Register(),SP, st_off);
   994 			//	__ sw(r,SP, next_off);
   995 			//	tag_stack(masm, sig_bt[i], next_off);
   996 			}
   997 		} else if (r_1->is_FloatRegister()) {
   998 			assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
  1000 			FloatRegister fr = r_1->as_FloatRegister();
  1001 			if (sig_bt[i] == T_FLOAT)
  1002 		            __ swc1(fr,SP, st_off);
  1003 			else
  1005 		            __ sdc1(fr,SP, st_off);
  1006 		            __ sdc1(fr,SP, st_off - 8);	/* T_DOUBLE needs two slots */
  1011 	// Schedule the branch target address early.
  1012 	__ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); 
  1013 	// And repush original return address
  1014 	__ move(RA, V0);	
  1015 	__ jr (AT); 
  1016 	__ delayed()->nop();
  1019 void AdapterGenerator::gen_i2c_adapter(
  1020                             int total_args_passed,
  1021                             // VMReg max_arg,
  1022                             int comp_args_on_stack, // VMRegStackSlots
  1023                             const BasicType *sig_bt,
  1024 			    const VMRegPair *regs) {
  1026   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
  1027   // layout.  Lesp was saved by the calling I-frame and will be restored on
  1028   // return.  Meanwhile, outgoing arg space is all owned by the callee
  1029   // C-frame, so we can mangle it at will.  After adjusting the frame size,
  1030   // hoist register arguments and repack other args according to the compiled
  1031   // code convention.  Finally, end in a jump to the compiled code.  The entry
  1032   // point address is the start of the buffer.
  1034   // We will only enter here from an interpreted frame and never from after
  1035   // passing thru a c2i. Azul allowed this but we do not. If we lose the
  1036   // race and use a c2i we will remain interpreted for the race loser(s).
  1037   // This removes all sorts of headaches on the mips side and also eliminates
  1038   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
  1041   __ move(T9, SP);
  1043   // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
  1044   // in registers, we will occasionally have no stack args.
  1045   int comp_words_on_stack = 0;
  1046   if (comp_args_on_stack) {
  1047     // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
  1048     // registers are below.  By subtracting stack0, we either get a negative
  1049     // number (all values in registers) or the maximum stack slot accessed.
  1050     // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
  1051     // Convert 4-byte stack slots to words.
  1052     // did mips need round? FIXME  aoqi
  1053     comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
  1054     // Round up to miminum stack alignment, in wordSize
  1055     comp_words_on_stack = round_to(comp_words_on_stack, 2);
  1056     __ daddi(SP, SP, -comp_words_on_stack * wordSize);
  1059   // Align the outgoing SP
  1060   __ move(AT, -(StackAlignmentInBytes));
  1061   __ andr(SP, SP, AT);	
  1062   // push the return address on the stack (note that pushing, rather
  1063   // than storing it, yields the correct frame alignment for the callee)
  1064   // Put saved SP in another register
  1065   // const Register saved_sp = eax;
  1066   const Register saved_sp = V0;
  1067   __ move(saved_sp, T9);
  1070   // Will jump to the compiled code just as if compiled code was doing it.
  1071   // Pre-load the register-jump target early, to schedule it better.
  1072   __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
  1074   // Now generate the shuffle code.  Pick up all register args and move the
  1075   // rest through the floating point stack top.
  1076   for (int i = 0; i < total_args_passed; i++) {
  1077     if (sig_bt[i] == T_VOID) {
  1078       // Longs and doubles are passed in native word order, but misaligned
  1079       // in the 32-bit build.
  1080       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
  1081       continue;
  1084     // Pick up 0, 1 or 2 words from SP+offset.  
  1086   //FIXME. aoqi. just delete the assert
  1087     //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
  1088     // Load in argument order going down.
  1089     int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
  1090     // Point to interpreter value (vs. tag)
  1091     int next_off = ld_off - Interpreter::stackElementSize;
  1092     //
  1093     //  
  1094     //
  1095     VMReg r_1 = regs[i].first();
  1096     VMReg r_2 = regs[i].second();
  1097     if (!r_1->is_valid()) {
  1098       assert(!r_2->is_valid(), "");
  1099       continue;
  1101 #ifdef aoqi_test
  1102 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off);
  1103 #endif
  1104     if (r_1->is_stack()) { 
  1105       // Convert stack slot to an SP offset (+ wordSize to 
  1106       // account for return address )
  1107       //NOTICE HERE!!!! I sub a wordSize here	
  1108       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; 
  1109       //+ wordSize;
  1111       // We can use esi as a temp here because compiled code doesn't 
  1112       // need esi as an input
  1113       // and if we end up going thru a c2i because of a miss a reasonable 
  1114       // value of esi 
  1115       // we be generated. 
  1116       if (!r_2->is_valid()) {
  1117 #ifdef aoqi_test
  1118 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
  1119 #endif
  1120 	__ ld(AT, saved_sp, ld_off);
  1121 	__ sd(AT, SP, st_off); 
  1122       } else {
  1123 #ifdef aoqi_test
  1124 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off);
  1125 #endif
  1126 	// Interpreter local[n] == MSW, local[n+1] == LSW however locals
  1127 	// are accessed as negative so LSW is at LOW address
  1129 	// ld_off is MSW so get LSW
  1130 	// st_off is LSW (i.e. reg.first())
  1131 	/*
  1132 	__ ld(AT, saved_sp, next_off); 
  1133 	__ sd(AT, SP, st_off); 
  1134 	__ ld(AT, saved_sp, ld_off); 
  1135 	__ sd(AT, SP, st_off + wordSize); 
  1136 	*/
  1138 	/* 2012/4/9 Jin
  1139 	 * [./org/eclipse/swt/graphics/GC.java] 
  1140 	 * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, 
  1141 		int destX, int destY, int destWidth, int destHeight, 
  1142 		boolean simple, 
  1143 		int imgWidth, int imgHeight, 
  1144 		long maskPixmap,	<-- Pass T_LONG in stack
  1145 		int maskType);
  1146 	 * Before this modification, Eclipse displays icons with solid black background.
  1147 	 */
  1148 	__ ld(AT, saved_sp, ld_off);
  1149         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
  1150 	  __ ld(AT, saved_sp, ld_off - 8);
  1151 	__ sd(AT, SP, st_off); 
  1152 	//__ ld(AT, saved_sp, next_off); 
  1153 	//__ sd(AT, SP, st_off + wordSize); 
  1155     } else if (r_1->is_Register()) {  // Register argument
  1156       Register r = r_1->as_Register();
  1157       // assert(r != eax, "must be different");
  1158       if (r_2->is_valid()) {
  1159 #ifdef aoqi_test
  1160 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
  1161 #endif
  1162 	//  assert(r_2->as_Register() != eax, "need another temporary register");
  1163 	// Remember r_1 is low address (and LSB on mips)
  1164 	// So r_2 gets loaded from high address regardless of the platform
  1165 	//aoqi
  1166 	assert(r_2->as_Register() == r_1->as_Register(), "");
  1167 	//__ ld(r_2->as_Register(), saved_sp, ld_off);
  1168 	//__ ld(r, saved_sp, next_off);
  1169 	__ ld(r, saved_sp, ld_off);
  1171 /* Jin: 
  1173  * For T_LONG type, the real layout is as below:
  1175    (high)
  1176     |         |
  1177     -----------
  1178     | 8 bytes |
  1179     | (void)  |
  1180     -----------
  1181     | 8 bytes |
  1182     | (long)  |
  1183     -----------
  1184     |         |
  1185    (low)
  1187  * We should load the low-8 bytes.
  1188  */
  1189       if (sig_bt[i] == T_LONG)
  1190 	__ ld(r, saved_sp, ld_off - 8);
  1191       } else {
  1192 #ifdef aoqi_test
  1193 tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed);
  1194 #endif
  1195 	__ lw(r, saved_sp, ld_off);
  1197     } else if (r_1->is_FloatRegister()) { // Float Register
  1198 	assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
  1200 	FloatRegister fr = r_1->as_FloatRegister();
  1201 	if (sig_bt[i] == T_FLOAT)
  1202 	    __ lwc1(fr, saved_sp, ld_off);
  1203 	else
  1205 	    __ ldc1(fr, saved_sp, ld_off);
  1206 	    __ ldc1(fr, saved_sp, ld_off - 8);
  1211   // 6243940 We might end up in handle_wrong_method if
  1212   // the callee is deoptimized as we race thru here. If that
  1213   // happens we don't want to take a safepoint because the
  1214   // caller frame will look interpreted and arguments are now
  1215   // "compiled" so it is much better to make this transition
  1216   // invisible to the stack walking code. Unfortunately if
  1217   // we try and find the callee by normal means a safepoint
  1218   // is possible. So we stash the desired callee in the thread
  1219   // and the vm will find there should this case occur.
  1220   __ get_thread(T8);
  1221   __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
  1223   // move methodOop to eax in case we end up in an c2i adapter.
  1224   // the c2i adapters expect methodOop in eax (c2) because c2's
  1225   // resolve stubs return the result (the method) in eax.
  1226   // I'd love to fix this. 
  1227   __ move(V0, Rmethod);	
  1228   __ jr(T9);
  1229   __ delayed()->nop();
  1232 // ---------------------------------------------------------------
  1233 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
  1234                                                             int total_args_passed,
  1235                                                             // VMReg max_arg,
  1236                                                             int comp_args_on_stack, // VMRegStackSlots
  1237                                                             const BasicType *sig_bt,
  1238                                                             const VMRegPair *regs,
  1239                                                             AdapterFingerPrint* fingerprint) {
  1240   address i2c_entry = __ pc();
  1242   AdapterGenerator agen(masm);
  1244   agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
  1247   // -------------------------------------------------------------------------
  1248   // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
  1249   // args start out packed in the compiled layout.  They need to be unpacked
  1250   // into the interpreter layout.  This will almost always require some stack
  1251   // space.  We grow the current (compiled) stack, then repack the args.  We
  1252   // finally end in a jump to the generic interpreter entry point.  On exit
  1253   // from the interpreter, the interpreter will restore our SP (lest the
  1254   // compiled code, which relys solely on SP and not FP, get sick).
  1256   address c2i_unverified_entry = __ pc();
  1257   Label skip_fixup;
  1259     Register holder = T1;
  1260     Register receiver = T0;
  1261     Register temp = T8;
  1262     address ic_miss = SharedRuntime::get_ic_miss_stub();
  1264     Label missed;
  1266     __ verify_oop(holder);
  1267     // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
  1268     //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes());
  1269     //add for compressedoops
  1270     __ load_klass(temp, receiver);
  1271     __ verify_oop(temp);
  1273     //  __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
  1274     __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); 
  1275     //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset()));
  1276     __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset());
  1277     //__ jcc(Assembler::notEqual, missed);
  1278     __ bne(AT, temp, missed); 
  1279     __ delayed()->nop(); 
  1280     // Method might have been compiled since the call site was patched to
  1281     // interpreted if that is the case treat it as a miss so we can get
  1282     // the call site corrected.
  1283     //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD);
  1284     //__ jcc(Assembler::equal, skip_fixup);
  1285     __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
  1286     __ beq(AT, R0, skip_fixup); 
  1287     __ delayed()->nop(); 
  1288     __ bind(missed);
  1289     //   __ move(AT, (int)&jerome7);	
  1290     //	__ sw(RA, AT, 0);	
  1292     __ jmp(ic_miss, relocInfo::runtime_call_type);
  1293     __ delayed()->nop(); 
  1296   address c2i_entry = __ pc();
  1298   agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
  1300   __ flush();
  1301   return  AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry);
  1304 /*
  1305 // Helper function for native calling conventions
  1306 static VMReg int_stk_helper( int i ) {
  1307   // Bias any stack based VMReg we get by ignoring the window area
  1308   // but not the register parameter save area.
  1309   //
  1310   // This is strange for the following reasons. We'd normally expect
  1311   // the calling convention to return an VMReg for a stack slot
  1312   // completely ignoring any abi reserved area. C2 thinks of that
  1313   // abi area as only out_preserve_stack_slots. This does not include
  1314   // the area allocated by the C abi to store down integer arguments
  1315   // because the java calling convention does not use it. So
  1316   // since c2 assumes that there are only out_preserve_stack_slots
  1317   // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
  1318   // location the c calling convention must add in this bias amount
  1319   // to make up for the fact that the out_preserve_stack_slots is
  1320   // insufficient for C calls. What a mess. I sure hope those 6
  1321   // stack words were worth it on every java call!
  1323   // Another way of cleaning this up would be for out_preserve_stack_slots
  1324   // to take a parameter to say whether it was C or java calling conventions.
  1325   // Then things might look a little better (but not much).
  1327   int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
  1328   if( mem_parm_offset < 0 ) {
  1329     return as_oRegister(i)->as_VMReg();
  1330   } else {
  1331     int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
  1332     // Now return a biased offset that will be correct when out_preserve_slots is added back in
  1333     return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
  1336 */
  1339 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
  1340                                          VMRegPair *regs,
  1341                                          VMRegPair *regs2,
  1342                                          int total_args_passed) {
  1343     assert(regs2 == NULL, "not needed on MIPS");
  1344 #ifdef aoqi_test
  1345 tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
  1346 #endif
  1347     // Return the number of VMReg stack_slots needed for the args.
  1348     // This value does not include an abi space (like register window
  1349     // save area).
  1351     // The native convention is V8 if !LP64
  1352     // The LP64 convention is the V9 convention which is slightly more sane.
  1354     // We return the amount of VMReg stack slots we need to reserve for all
  1355     // the arguments NOT counting out_preserve_stack_slots. Since we always
  1356     // have space for storing at least 6 registers to memory we start with that.
  1357     // See int_stk_helper for a further discussion.
  1358 	// We return the amount of VMRegImpl stack slots we need to reserve for all
  1359 	// the arguments NOT counting out_preserve_stack_slots. 
  1360   static const Register INT_ArgReg[Argument::n_register_parameters] = {
  1361     A0, A1, A2, A3, A4, A5, A6, A7
  1362   };
  1363   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
  1364     F12, F13, F14, F15, F16, F17, F18, F19
  1365   };
  1366     uint args = 0;
  1367     uint stk_args = 0; // inc by 2 each time
  1369 /* Example:
  1370 ---   n   java.lang.UNIXProcess::forkAndExec
  1371     private native int forkAndExec(byte[] prog,
  1372                                    byte[] argBlock, int argc,
  1373                                    byte[] envBlock, int envc,
  1374                                    byte[] dir,
  1375                                    boolean redirectErrorStream,
  1376                                    FileDescriptor stdin_fd,
  1377                                    FileDescriptor stdout_fd,
  1378                                    FileDescriptor stderr_fd)
  1379 JNIEXPORT jint JNICALL
  1380 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
  1381                                        jobject process,
  1382                                        jbyteArray prog,
  1383                                        jbyteArray argBlock, jint argc,
  1384                                        jbyteArray envBlock, jint envc,
  1385                                        jbyteArray dir,
  1386                                        jboolean redirectErrorStream,
  1387                                        jobject stdin_fd,
  1388                                        jobject stdout_fd,
  1389                                        jobject stderr_fd)
  1391 ::c_calling_convention
  1392 0: 		// env		<-- a0
  1393 1: L		// klass/obj	<-- t0 => a1
  1394 2: [		// prog[]	<-- a0 => a2
  1395 3: [		// argBlock[]	<-- a1 => a3
  1396 4: I		// argc
  1397 5: [		// envBlock[]	<-- a3 => a5
  1398 6: I		// envc
  1399 7: [		// dir[]	<-- a5 => a7
  1400 8: Z		// redirectErrorStream	a6 => sp[0]
  1401 9: L		// stdin		a7 => sp[8]
  1402 10: L		// stdout		fp[16] => sp[16]
  1403 11: L		// stderr		fp[24] => sp[24]
  1404 */
  1405     for (int i = 0; i < total_args_passed; i++) {
  1406       switch (sig_bt[i]) {
  1407       case T_VOID: // Halves of longs and doubles
  1408         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
  1409         regs[i].set_bad();
  1410         break;
  1411       case T_BOOLEAN:
  1412       case T_CHAR:
  1413       case T_BYTE:
  1414       case T_SHORT:
  1415       case T_INT:
  1416         if (args < Argument::n_register_parameters) {
  1417           regs[i].set1(INT_ArgReg[args++]->as_VMReg());
  1418         } else {
  1419           regs[i].set1(VMRegImpl::stack2reg(stk_args));
  1420           stk_args += 2;
  1422         break;
  1423       case T_LONG:
  1424         assert(sig_bt[i + 1] == T_VOID, "expecting half");
  1425         // fall through
  1426       case T_OBJECT:
  1427       case T_ARRAY:
  1428       case T_ADDRESS:
  1429       case T_METADATA:
  1430         if (args < Argument::n_register_parameters) {
  1431           regs[i].set2(INT_ArgReg[args++]->as_VMReg());
  1432         } else {
  1433           regs[i].set2(VMRegImpl::stack2reg(stk_args));
  1434           stk_args += 2;
  1436         break;
  1437       case T_FLOAT:
  1438         if (args < Argument::n_float_register_parameters) {
  1439           regs[i].set1(FP_ArgReg[args++]->as_VMReg());
  1440         } else {
  1441           regs[i].set1(VMRegImpl::stack2reg(stk_args));
  1442           stk_args += 2;
  1444         break;
  1445       case T_DOUBLE:
  1446         assert(sig_bt[i + 1] == T_VOID, "expecting half");
  1447         if (args < Argument::n_float_register_parameters) {
  1448           regs[i].set2(FP_ArgReg[args++]->as_VMReg());
  1449         } else {
  1450           regs[i].set2(VMRegImpl::stack2reg(stk_args));
  1451           stk_args += 2;
  1453         break;
  1454       default:
  1455         ShouldNotReachHere();
  1456         break;
  1460   return round_to(stk_args, 2);
  1462 /*
  1463 int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, 
  1464                                          VMRegPair *regs,
  1465                                          int total_args_passed) {
  1466 // We return the amount of VMRegImpl stack slots we need to reserve for all
  1467 // the arguments NOT counting out_preserve_stack_slots. 
  1468    bool unalign = 0;
  1469   uint    stack = 0;        // All arguments on stack
  1470 #ifdef aoqi_test
  1471 tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed);
  1472 #endif
  1474   for( int i = 0; i < total_args_passed; i++) {
  1475     // From the type and the argument number (count) compute the location
  1476     switch( sig_bt[i] ) {
  1477     case T_BOOLEAN:
  1478     case T_CHAR:
  1479     case T_FLOAT:
  1480     case T_BYTE:
  1481     case T_SHORT:
  1482     case T_INT:
  1483     case T_OBJECT:
  1484     case T_ARRAY:
  1485     case T_ADDRESS:
  1486       regs[i].set1(VMRegImpl::stack2reg(stack++));
  1487       unalign = !unalign;
  1488       break;
  1489     case T_LONG:
  1490     case T_DOUBLE: // The stack numbering is reversed from Java
  1491       // Since C arguments do not get reversed, the ordering for
  1492       // doubles on the stack must be opposite the Java convention
  1493       assert(sig_bt[i+1] == T_VOID, "missing Half" ); 
  1494       if(unalign){
  1495             stack += 1; 
  1496      	    unalign = ! unalign; 
  1498       regs[i].set2(VMRegImpl::stack2reg(stack));
  1499       stack += 2;
  1500       break;
  1501     case T_VOID: regs[i].set_bad(); break;
  1502     default:
  1503       ShouldNotReachHere();
  1504       break;
  1507   return stack;
  1509 */
  1511 // ---------------------------------------------------------------------------
  1512 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1513 	// We always ignore the frame_slots arg and just use the space just below frame pointer
  1514 	// which by this time is free to use
  1515 	switch (ret_type) {
  1516 		case T_FLOAT:
  1517 			__ swc1(FSF, FP, -wordSize); 
  1518 			break;
  1519 		case T_DOUBLE:
  1520 			__ sdc1(FSF, FP, -wordSize ); 
  1521 			break;
  1522 		case T_VOID:  break;
  1523 		case T_LONG:
  1524 			      __ sd(V0, FP, -wordSize);
  1525 			      break;
  1526 		case T_OBJECT:
  1527 		case T_ARRAY:
  1528 			__ sd(V0, FP, -wordSize);
  1529 			break;
  1530 		default: {
  1531 				 __ sw(V0, FP, -wordSize);
  1536 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
  1537 	// We always ignore the frame_slots arg and just use the space just below frame pointer
  1538 	// which by this time is free to use
  1539 	switch (ret_type) {
  1540 		case T_FLOAT:
  1541 			__ lwc1(FSF, FP, -wordSize); 
  1542 			break;
  1543 		case T_DOUBLE:
  1544 			__ ldc1(FSF, FP, -wordSize ); 
  1545 			break;
  1546 		case T_LONG:
  1547 			__ ld(V0, FP, -wordSize);
  1548 			break;
  1549 		case T_VOID:  break;
  1550 		case T_OBJECT:
  1551 		case T_ARRAY:
  1552 			__ ld(V0, FP, -wordSize);
  1553 			break;
  1554 		default: {
  1555 				 __ lw(V0, FP, -wordSize);
  1560 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
  1561     for ( int i = first_arg ; i < arg_count ; i++ ) {
  1562       if (args[i].first()->is_Register()) {
  1563         __ push(args[i].first()->as_Register());
  1564       } else if (args[i].first()->is_FloatRegister()) {
  1565         __ push(args[i].first()->as_FloatRegister());
  1570 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
  1571     for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
  1572       if (args[i].first()->is_Register()) {
  1573         __ pop(args[i].first()->as_Register());
  1574       } else if (args[i].first()->is_FloatRegister()) {
  1575         __ pop(args[i].first()->as_FloatRegister());
  1580 // A simple move of integer like type
  1581 static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1582   if (src.first()->is_stack()) {
  1583     if (dst.first()->is_stack()) {
  1584       // stack to stack
  1585 		__ lw(AT, FP, reg2offset_in(src.first())); 
  1586 		__ sd(AT,SP, reg2offset_out(dst.first())); 
  1587     } else {
  1588       // stack to reg
  1589       //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1590 			__ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first())); 
  1592   } else if (dst.first()->is_stack()) {
  1593     // reg to stack
  1594 		__ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
  1595   } else {
  1596     //__ mov(src.first()->as_Register(), dst.first()->as_Register());
  1597 	  if (dst.first() != src.first()){ 
  1598 		__ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
  1602 /*
  1603 // On 64 bit we will store integer like items to the stack as
  1604 // 64 bits items (sparc abi) even though java would only store
  1605 // 32bits for a parameter. On 32bit it will simply be 32 bits
  1606 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
  1607 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1608   if (src.first()->is_stack()) {
  1609     if (dst.first()->is_stack()) {
  1610       // stack to stack
  1611       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
  1612       __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
  1613     } else {
  1614       // stack to reg
  1615       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
  1617   } else if (dst.first()->is_stack()) {
  1618     // reg to stack
  1619     __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
  1620   } else {
  1621     __ mov(src.first()->as_Register(), dst.first()->as_Register());
  1624 */
  1626 // An oop arg. Must pass a handle not the oop itself
  1627 static void object_move(MacroAssembler* masm,
  1628                         OopMap* map,
  1629                         int oop_handle_offset,
  1630                         int framesize_in_slots,
  1631                         VMRegPair src,
  1632                         VMRegPair dst,
  1633                         bool is_receiver,
  1634                         int* receiver_offset) {
  1636   // must pass a handle. First figure out the location we use as a handle
  1638 	//FIXME, for mips, dst can be register
  1639 	if (src.first()->is_stack()) {
  1640 		// Oop is already on the stack as an argument
  1641 		Register rHandle = V0;
  1642 		Label nil;
  1643 		//__ xorl(rHandle, rHandle);
  1644 		__ xorr(rHandle, rHandle, rHandle);
  1645 		//__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD);
  1646 		__ ld(AT, FP, reg2offset_in(src.first())); 
  1647 		//__ jcc(Assembler::equal, nil);
  1648 		__ beq(AT,R0, nil); 
  1649 		__ delayed()->nop(); 
  1650 		// __ leal(rHandle, Address(ebp, reg2offset_in(src.first())));
  1651 		__ lea(rHandle, Address(FP, reg2offset_in(src.first())));
  1652 		__ bind(nil);
  1653 		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
  1654 		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
  1655 		else                       __ move( (dst.first())->as_Register(),rHandle); 
  1656 		//if dst is register 
  1657 	//FIXME, do mips need out preserve stack slots?	
  1658 		int offset_in_older_frame = src.first()->reg2stack() 
  1659 			+ SharedRuntime::out_preserve_stack_slots();
  1660 		map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
  1661 		if (is_receiver) {
  1662 			*receiver_offset = (offset_in_older_frame 
  1663 					+ framesize_in_slots) * VMRegImpl::stack_slot_size;
  1665 	} else {
  1666 		// Oop is in an a register we must store it to the space we reserve
  1667 		// on the stack for oop_handles
  1668 		const Register rOop = src.first()->as_Register();
  1669 		assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
  1670 		//   const Register rHandle = eax;
  1671 		const Register rHandle = V0;
  1672 		//Important: refer to java_calling_convertion	
  1673 		int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
  1674 		int offset = oop_slot*VMRegImpl::stack_slot_size;
  1675 		Label skip;
  1676 		// __ movl(Address(esp, offset), rOop);
  1677 		__ sd( rOop , SP, offset );
  1678 		map->set_oop(VMRegImpl::stack2reg(oop_slot));
  1679 		//    __ xorl(rHandle, rHandle);
  1680 		__ xorr( rHandle, rHandle, rHandle);
  1681 		//__ cmpl(rOop, NULL_WORD);
  1682 		// __ jcc(Assembler::equal, skip);
  1683 		__ beq(rOop, R0, skip); 
  1684 		__ delayed()->nop(); 
  1685 		//  __ leal(rHandle, Address(esp, offset));
  1686 		__ lea(rHandle, Address(SP, offset));
  1687 		__ bind(skip);
  1688 		// Store the handle parameter
  1689 		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
  1690 		if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
  1691 		else                       __ move((dst.first())->as_Register(), rHandle); 
  1692 		//if dst is register 
  1694 		if (is_receiver) {
  1695 			*receiver_offset = offset;
  1700 // A float arg may have to do float reg int reg conversion
  1701 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1702   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
  1704 	if (src.first()->is_stack()) {
  1705 		if(dst.first()->is_stack()){
  1706 			//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1707 			__ lwc1(F12 , FP, reg2offset_in(src.first()));
  1708 			// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1709 			__ swc1(F12 ,SP, reg2offset_out(dst.first()));
  1711 		else
  1712 			__ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); 
  1713 	} else {
  1714 		// reg to stack
  1715 		// __ movss(Address(esp, reg2offset_out(dst.first())), 
  1716 		// src.first()->as_XMMRegister());
  1717 		// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1718 		if(dst.first()->is_stack())
  1719 			__ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
  1720 		else
  1721 			__ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); 
  1724 /*
  1725 static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1726   VMRegPair src_lo(src.first());
  1727   VMRegPair src_hi(src.second());
  1728   VMRegPair dst_lo(dst.first());
  1729   VMRegPair dst_hi(dst.second());
  1730   simple_move32(masm, src_lo, dst_lo);
  1731   simple_move32(masm, src_hi, dst_hi);
  1733 */
  1734 // A long move
  1735 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1737 	// The only legal possibility for a long_move VMRegPair is:
  1738 	// 1: two stack slots (possibly unaligned)
  1739 	// as neither the java  or C calling convention will use registers
  1740 	// for longs.
  1742 	if (src.first()->is_stack()) {
  1743 		assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
  1744 		//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1745 		if( dst.first()->is_stack()){ 
  1746 			__ ld(AT, FP, reg2offset_in(src.first()));
  1747 			//  __ movl(ebx, address(ebp, reg2offset_in(src.second())));
  1748 			//__ lw(V0, FP, reg2offset_in(src.second())); 
  1749 			// __ movl(address(esp, reg2offset_out(dst.first())), eax);
  1750 			__ sd(AT, SP, reg2offset_out(dst.first()));
  1751 			// __ movl(address(esp, reg2offset_out(dst.second())), ebx);
  1752 			//__ sw(V0, SP,  reg2offset_out(dst.second())); 
  1753 		} else{
  1754 			__ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
  1755 			//__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); 
  1757 	} else {
  1758 		if( dst.first()->is_stack()){ 
  1759 			__ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
  1760 			//__ sw( (src.second())->as_Register(), SP,  reg2offset_out(dst.second())); 
  1761 		} else{
  1762 			__ move( (dst.first())->as_Register() , (src.first())->as_Register());
  1763 			//__ move( (dst.second())->as_Register(), (src.second())->as_Register()); 
  1768 // A double move
  1769 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
  1771 	// The only legal possibilities for a double_move VMRegPair are:
  1772 	// The painful thing here is that like long_move a VMRegPair might be
  1774 	// Because of the calling convention we know that src is either
  1775 	//   1: a single physical register (xmm registers only)
  1776 	//   2: two stack slots (possibly unaligned)
  1777 	// dst can only be a pair of stack slots.
  1779 	// assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || 
  1780 	// src.first()->is_stack()), "bad args");
  1781 	//  assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args");
  1783 	if (src.first()->is_stack()) {
  1784 		// source is all stack
  1785 		// __ movl(eax, Address(ebp, reg2offset_in(src.first())));
  1786 		if( dst.first()->is_stack()){ 
  1787 			__ ldc1(F12, FP, reg2offset_in(src.first()));
  1788 			//__ movl(ebx, Address(ebp, reg2offset_in(src.second())));
  1789 			//__ lwc1(F14, FP, reg2offset_in(src.second()));
  1791 			//   __ movl(Address(esp, reg2offset_out(dst.first())), eax);
  1792 			__ sdc1(F12, SP, reg2offset_out(dst.first())); 
  1793 			//  __ movl(Address(esp, reg2offset_out(dst.second())), ebx);
  1794 			//__ swc1(F14, SP, reg2offset_out(dst.second()));
  1795 		} else{
  1796 			__ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
  1797 			//__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second()));
  1800 	} else {
  1801 		// reg to stack
  1802 		// No worries about stack alignment
  1803 		// __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
  1804 		if( dst.first()->is_stack()){ 
  1805 			__ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
  1806 			//__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second()));
  1808 		else
  1809 			__ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
  1810 			//__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); 
  1815 static void verify_oop_args(MacroAssembler* masm,
  1816                             methodHandle method,
  1817                             const BasicType* sig_bt,
  1818                             const VMRegPair* regs) {
  1819   Register temp_reg = T9;  // not part of any compiled calling seq
  1820   if (VerifyOops) {
  1821     for (int i = 0; i < method->size_of_parameters(); i++) {
  1822       if (sig_bt[i] == T_OBJECT ||
  1823           sig_bt[i] == T_ARRAY) {
  1824         VMReg r = regs[i].first();
  1825         assert(r->is_valid(), "bad oop arg");
  1826         if (r->is_stack()) {
  1827 //          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1828           __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1829           __ verify_oop(temp_reg);
  1830         } else {
  1831           __ verify_oop(r->as_Register());
  1838 static void gen_special_dispatch(MacroAssembler* masm,
  1839                                  methodHandle method,
  1840                                  const BasicType* sig_bt,
  1841                                  const VMRegPair* regs) {
  1842   verify_oop_args(masm, method, sig_bt, regs);
  1843   vmIntrinsics::ID iid = method->intrinsic_id();
  1845   // Now write the args into the outgoing interpreter space
  1846   bool     has_receiver   = false;
  1847   Register receiver_reg   = noreg;
  1848   int      member_arg_pos = -1;
  1849   Register member_reg     = noreg;
  1850   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
  1851   if (ref_kind != 0) {
  1852     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
  1853 //    member_reg = rbx;  // known to be free at this point
  1854     member_reg = S3;  // known to be free at this point
  1855     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
  1856   } else if (iid == vmIntrinsics::_invokeBasic) {
  1857     has_receiver = true;
  1858   } else {
  1859     fatal(err_msg_res("unexpected intrinsic id %d", iid));
  1862   if (member_reg != noreg) {
  1863     // Load the member_arg into register, if necessary.
  1864     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
  1865     VMReg r = regs[member_arg_pos].first();
  1866     if (r->is_stack()) {
  1867 //      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1868       __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1869     } else {
  1870       // no data motion is needed
  1871       member_reg = r->as_Register();
  1875   if (has_receiver) {
  1876     // Make sure the receiver is loaded into a register.
  1877     assert(method->size_of_parameters() > 0, "oob");
  1878     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
  1879     VMReg r = regs[0].first();
  1880     assert(r->is_valid(), "bad receiver arg");
  1881     if (r->is_stack()) {
  1882       // Porting note:  This assumes that compiled calling conventions always
  1883       // pass the receiver oop in a register.  If this is not true on some
  1884       // platform, pick a temp and load the receiver from stack.
  1885       fatal("receiver always in a register");
  1886 //      receiver_reg = j_rarg0;  // known to be free at this point
  1887       receiver_reg = SSR;  // known to be free at this point
  1888 //      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1889       __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
  1890     } else {
  1891       // no data motion is needed
  1892       receiver_reg = r->as_Register();
  1896   // Figure out which address we are really jumping to:
  1897   MethodHandles::generate_method_handle_dispatch(masm, iid,
  1898                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
  1901 // ---------------------------------------------------------------------------
  1902 // Generate a native wrapper for a given method.  The method takes arguments
  1903 // in the Java compiled code convention, marshals them to the native
  1904 // convention (handlizes oops, etc), transitions to native, makes the call,
  1905 // returns to java state (possibly blocking), unhandlizes any result and
  1906 // returns.
  1907 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  1908                                                 methodHandle method,
  1909                                                 int compile_id,
  1910                                                 BasicType *in_sig_bt,
  1911                                                 VMRegPair *in_regs,
  1912                                                 BasicType ret_type) {
  1914   if (method->is_method_handle_intrinsic()) {
  1915     vmIntrinsics::ID iid = method->intrinsic_id();
  1916     intptr_t start = (intptr_t)__ pc();
  1917     int vep_offset = ((intptr_t)__ pc()) - start;
  1919     gen_special_dispatch(masm,
  1920                          method,
  1921                          in_sig_bt,
  1922                          in_regs);
  1924     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
  1925     __ flush();
  1926     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
  1927     return nmethod::new_native_nmethod(method,
  1928                                        compile_id,
  1929                                        masm->code(),
  1930                                        vep_offset,
  1931                                        frame_complete,
  1932                                        stack_slots / VMRegImpl::slots_per_word,
  1933                                        in_ByteSize(-1),
  1934                                        in_ByteSize(-1),
  1935                                        (OopMapSet*)NULL);
  1937   bool is_critical_native = true;
  1938   address native_func = method->critical_native_function();
  1939   if (native_func == NULL) {
  1940     native_func = method->native_function();
  1941     is_critical_native = false;
  1943   assert(native_func != NULL, "must have function");
  1945   // Native nmethod wrappers never take possesion of the oop arguments.
  1946   // So the caller will gc the arguments. The only thing we need an
  1947   // oopMap for is if the call is static
  1948   //
  1949   // An OopMap for lock (and class if static), and one for the VM call itself
  1950   OopMapSet *oop_maps = new OopMapSet();
  1952 	// We have received a description of where all the java arg are located
  1953 	// on entry to the wrapper. We need to convert these args to where
  1954 	// the jni function will expect them. To figure out where they go
  1955 	// we convert the java signature to a C signature by inserting
  1956 	// the hidden arguments as arg[0] and possibly arg[1] (static method)
  1958   const int total_in_args = method->size_of_parameters();
  1959   int total_c_args = total_in_args;
  1960   if (!is_critical_native) {
  1961     total_c_args += 1;
  1962     if (method->is_static()) {
  1963       total_c_args++;
  1965   } else {
  1966     for (int i = 0; i < total_in_args; i++) {
  1967       if (in_sig_bt[i] == T_ARRAY) {
  1968         total_c_args++;
  1973 	BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  1974 	VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
  1975   BasicType* in_elem_bt = NULL;
  1977   int argc = 0;
  1978   if (!is_critical_native) {
  1979     out_sig_bt[argc++] = T_ADDRESS;
  1980     if (method->is_static()) {
  1981       out_sig_bt[argc++] = T_OBJECT;
  1984     for (int i = 0; i < total_in_args ; i++ ) {
  1985       out_sig_bt[argc++] = in_sig_bt[i];
  1987   } else {
  1988     Thread* THREAD = Thread::current();
  1989     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
  1990     SignatureStream ss(method->signature());
  1991     for (int i = 0; i < total_in_args ; i++ ) {
  1992       if (in_sig_bt[i] == T_ARRAY) {
  1993         // Arrays are passed as int, elem* pair
  1994         out_sig_bt[argc++] = T_INT;
  1995         out_sig_bt[argc++] = T_ADDRESS;
  1996         Symbol* atype = ss.as_symbol(CHECK_NULL);
  1997         const char* at = atype->as_C_string();
  1998         if (strlen(at) == 2) {
  1999           assert(at[0] == '[', "must be");
  2000           switch (at[1]) {
  2001             case 'B': in_elem_bt[i]  = T_BYTE; break;
  2002             case 'C': in_elem_bt[i]  = T_CHAR; break;
  2003             case 'D': in_elem_bt[i]  = T_DOUBLE; break;
  2004             case 'F': in_elem_bt[i]  = T_FLOAT; break;
  2005             case 'I': in_elem_bt[i]  = T_INT; break;
  2006             case 'J': in_elem_bt[i]  = T_LONG; break;
  2007             case 'S': in_elem_bt[i]  = T_SHORT; break;
  2008             case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
  2009             default: ShouldNotReachHere();
  2012       } else {
  2013         out_sig_bt[argc++] = in_sig_bt[i];
  2014         in_elem_bt[i] = T_VOID;
  2016       if (in_sig_bt[i] != T_VOID) {
  2017         assert(in_sig_bt[i] == ss.type(), "must match");
  2018         ss.next();
  2023   // Now figure out where the args must be stored and how much stack space
  2024   // they require (neglecting out_preserve_stack_slots but space for storing
  2025   // the 1st six register arguments). It's weird see int_stk_helper.
  2026   //
  2027   int out_arg_slots;
  2028   //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
  2029 	out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
  2031   // Compute framesize for the wrapper.  We need to handlize all oops in
  2032   // registers. We must create space for them here that is disjoint from
  2033   // the windowed save area because we have no control over when we might
  2034   // flush the window again and overwrite values that gc has since modified.
  2035   // (The live window race)
  2036   //
  2037   // We always just allocate 6 word for storing down these object. This allow
  2038   // us to simply record the base and use the Ireg number to decide which
  2039   // slot to use. (Note that the reg number is the inbound number not the
  2040   // outbound number).
  2041   // We must shuffle args to match the native convention, and include var-args space.
  2043   // Calculate the total number of stack slots we will need.
  2045   // First count the abi requirement plus all of the outgoing args
  2046   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  2048   // Now the space for the inbound oop handle area
  2049   int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
  2050   if (is_critical_native) {
  2051     // Critical natives may have to call out so they need a save area
  2052     // for register arguments.
  2053     int double_slots = 0;
  2054     int single_slots = 0;
  2055     for ( int i = 0; i < total_in_args; i++) {
  2056       if (in_regs[i].first()->is_Register()) {
  2057         const Register reg = in_regs[i].first()->as_Register();
  2058         switch (in_sig_bt[i]) {
  2059           case T_BOOLEAN:
  2060           case T_BYTE:
  2061           case T_SHORT:
  2062           case T_CHAR:
  2063           case T_INT:  single_slots++; break;
  2064           case T_ARRAY:  // specific to LP64 (7145024)
  2065           case T_LONG: double_slots++; break;
  2066           default:  ShouldNotReachHere();
  2068       } else if (in_regs[i].first()->is_FloatRegister()) {
  2069         switch (in_sig_bt[i]) {
  2070           case T_FLOAT:  single_slots++; break;
  2071           case T_DOUBLE: double_slots++; break;
  2072           default:  ShouldNotReachHere();
  2076     total_save_slots = double_slots * 2 + single_slots;
  2077     // align the save area
  2078     if (double_slots != 0) {
  2079       stack_slots = round_to(stack_slots, 2);
  2083   int oop_handle_offset = stack_slots;
  2084 //  stack_slots += 9*VMRegImpl::slots_per_word;	// T0, A0 ~ A7
  2085   stack_slots += total_save_slots;
  2087   // Now any space we need for handlizing a klass if static method
  2089 	int klass_slot_offset = 0;
  2090 	int klass_offset = -1;
  2091 	int lock_slot_offset = 0;
  2092 	bool is_static = false;
  2093 	//int oop_temp_slot_offset = 0;
  2095   if (method->is_static()) {
  2096     klass_slot_offset = stack_slots;
  2097     stack_slots += VMRegImpl::slots_per_word;
  2098     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
  2099     is_static = true;
  2102   // Plus a lock if needed
  2104   if (method->is_synchronized()) {
  2105     lock_slot_offset = stack_slots;
  2106     stack_slots += VMRegImpl::slots_per_word;
  2109   // Now a place to save return value or as a temporary for any gpr -> fpr moves
  2110 	// + 2 for return address (which we own) and saved ebp
  2111   //stack_slots += 2;
  2112   stack_slots += 2 + 9 * VMRegImpl::slots_per_word;	// (T0, A0, A1, A2, A3, A4, A5, A6, A7)
  2114   // Ok The space we have allocated will look like:
  2115   //
  2116   //
  2117   // FP-> |                     |
  2118   //      |---------------------|
  2119   //      | 2 slots for moves   |
  2120   //      |---------------------|
  2121   //      | lock box (if sync)  |
  2122   //      |---------------------| <- lock_slot_offset
  2123   //      | klass (if static)   |
  2124   //      |---------------------| <- klass_slot_offset
  2125   //      | oopHandle area      |
  2126   //      |---------------------| <- oop_handle_offset
  2127   //      | outbound memory     |
  2128   //      | based arguments     |
  2129   //      |                     |
  2130   //      |---------------------|
  2131   //      | vararg area         |
  2132   //      |---------------------|
  2133   //      |                     |
  2134   // SP-> | out_preserved_slots |
  2135   //
  2136   //
  2139   // Now compute actual number of stack words we need rounding to make
  2140   // stack properly aligned.
  2141   stack_slots = round_to(stack_slots, StackAlignmentInSlots);
  2143   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  2145 	intptr_t start = (intptr_t)__ pc();
  2149 	// First thing make an ic check to see if we should even be here
  2150 	address ic_miss = SharedRuntime::get_ic_miss_stub();
  2152 	// We are free to use all registers as temps without saving them and
  2153 	// restoring them except ebp. ebp is the only callee save register
  2154 	// as far as the interpreter and the compiler(s) are concerned.
  2156   //refer to register_mips.hpp:IC_Klass
  2157 	const Register ic_reg = T1;
  2158 	const Register receiver = T0;
  2159 	Label hit;
  2160 	Label exception_pending;
  2162 	__ verify_oop(receiver);
  2163 	//__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); 
  2164 	//add for compressedoops
  2165 	__ load_klass(AT, receiver);
  2166 	__ beq(AT, ic_reg, hit); 
  2167 	__ delayed()->nop(); 
  2168 	__ jmp(ic_miss, relocInfo::runtime_call_type);
  2169 	__ delayed()->nop();
  2170 	// verified entry must be aligned for code patching.
  2171 	// and the first 5 bytes must be in the same cache line
  2172 	// if we align at 8 then we will be sure 5 bytes are in the same line
  2173 	__ align(8);
  2175 	__ bind(hit);
  2178 	int vep_offset = ((intptr_t)__ pc()) - start;
  2179 #ifdef COMPILER1
  2180 	if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
  2181 		// Object.hashCode can pull the hashCode from the header word
  2182 		// instead of doing a full VM transition once it's been computed.
  2183 		// Since hashCode is usually polymorphic at call sites we can't do
  2184 		// this optimization at the call site without a lot of work.
  2185 		Label slowCase;
  2186 		Register receiver = T0;
  2187 		Register result = V0;
  2188 		__ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); 
  2189 		// check if locked
  2190 		__ andi(AT, result, markOopDesc::unlocked_value); 
  2191 		__ beq(AT, R0, slowCase); 
  2192 		__ delayed()->nop(); 
  2193 		if (UseBiasedLocking) {
  2194 			// Check if biased and fall through to runtime if so
  2195 			__ andi (AT, result, markOopDesc::biased_lock_bit_in_place);	  
  2196 			__ bne(AT,R0, slowCase); 
  2197 			__ delayed()->nop(); 
  2199 		// get hash
  2200 		__ li(AT, markOopDesc::hash_mask_in_place);
  2201 		__ andr (AT, result, AT);
  2202 		// test if hashCode exists
  2203 		__ beq (AT, R0, slowCase); 
  2204 		__ delayed()->nop(); 
  2205 		__ shr(result, markOopDesc::hash_shift);
  2206 		__ jr(RA); 
  2207 		__ delayed()->nop(); 
  2208 		__ bind (slowCase);
  2210 #endif // COMPILER1
  2212 	// The instruction at the verified entry point must be 5 bytes or longer
  2213 	// because it can be patched on the fly by make_non_entrant. The stack bang
  2214 	// instruction fits that requirement. 
  2216 	// Generate stack overflow check
  2218 	if (UseStackBanging) {
  2219 	//this function will modify the value in A0	
  2220 		__ push(A0);
  2221 		__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
  2222 		__ pop(A0);
  2223 	} else {
  2224 		// need a 5 byte instruction to allow MT safe patching to non-entrant
  2225 		__ nop(); 
  2226 		__ nop(); 
  2227 		__ nop(); 
  2228 		__ nop(); 
  2229 		__ nop(); 
  2231 	// Generate a new frame for the wrapper.
  2232 	// do mips need this ? 
  2233 #ifndef OPT_THREAD
  2234 	__ get_thread(TREG);
  2235 #endif
  2236 //FIXME here
  2237 	__ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
  2238 	// -2 because return address is already present and so is saved ebp
  2239 	__ move(AT, -(StackAlignmentInBytes));
  2240 	__ andr(SP, SP, AT);
  2242 	__ enter();
  2243 	__ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
  2245 	// Frame is now completed as far a size and linkage.
  2247 	int frame_complete = ((intptr_t)__ pc()) - start;
  2249 	// Calculate the difference between esp and ebp. We need to know it
  2250 	// after the native call because on windows Java Natives will pop
  2251 	// the arguments and it is painful to do esp relative addressing
  2252 	// in a platform independent way. So after the call we switch to
  2253 	// ebp relative addressing.
  2254 //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
  2255 //the SP 
  2256 	int fp_adjustment = stack_size - 2*wordSize;
  2258 #ifdef COMPILER2
  2259 	// C2 may leave the stack dirty if not in SSE2+ mode
  2260 	// if (UseSSE >= 2) {
  2261 	//  __ verify_FPU(0, "c2i transition should have clean FPU stack");
  2262 	//} else {
  2263 	__ empty_FPU_stack();
  2264 	//}
  2265 #endif /* COMPILER2 */
  2267 	// Compute the ebp offset for any slots used after the jni call
  2269 	int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
  2270 	// We use edi as a thread pointer because it is callee save and
  2271 	// if we load it once it is usable thru the entire wrapper
  2272 	// const Register thread = edi;
  2273 	const Register thread = TREG;
  2275 	// We use esi as the oop handle for the receiver/klass
  2276 	// It is callee save so it survives the call to native
  2278 	// const Register oop_handle_reg = esi;
  2279 	const Register oop_handle_reg = S4;
  2280   if (is_critical_native) {
  2281      __ stop("generate_native_wrapper in sharedRuntime <2>");
  2282 //TODO:Fu
  2283 /*
  2284     check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
  2285                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
  2286 */
  2289 #ifndef OPT_THREAD
  2290 	__ get_thread(thread);
  2291 #endif
  2293   //
  2294   // We immediately shuffle the arguments so that any vm call we have to
  2295   // make from here on out (sync slow path, jvmpi, etc.) we will have
  2296   // captured the oops from our caller and have a valid oopMap for
  2297   // them.
  2299   // -----------------
  2300   // The Grand Shuffle 
  2301   //
  2302   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  2303   // and, if static, the class mirror instead of a receiver.  This pretty much
  2304   // guarantees that register layout will not match (and mips doesn't use reg
  2305   // parms though amd does).  Since the native abi doesn't use register args
  2306   // and the java conventions does we don't have to worry about collisions.
  2307   // All of our moved are reg->stack or stack->stack.
  2308   // We ignore the extra arguments during the shuffle and handle them at the
  2309   // last moment. The shuffle is described by the two calling convention
  2310   // vectors we have in our possession. We simply walk the java vector to
  2311   // get the source locations and the c vector to get the destinations.
  2313 	int c_arg = method->is_static() ? 2 : 1 ;
  2315 	// Record esp-based slot for receiver on stack for non-static methods
  2316 	int receiver_offset = -1;
  2318 	// This is a trick. We double the stack slots so we can claim
  2319 	// the oops in the caller's frame. Since we are sure to have
  2320 	// more args than the caller doubling is enough to make
  2321 	// sure we can capture all the incoming oop args from the
  2322 	// caller. 
  2323 	//
  2324 	OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
  2326   // Mark location of rbp (someday)
  2327   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
  2329   // Use eax, ebx as temporaries during any memory-memory moves we have to do
  2330   // All inbound args are referenced based on rbp and all outbound args via rsp.
  2334 #ifdef ASSERT
  2335   bool reg_destroyed[RegisterImpl::number_of_registers];
  2336   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  2337   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  2338     reg_destroyed[r] = false;
  2340   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  2341     freg_destroyed[f] = false;
  2344 #endif /* ASSERT */
  2346 	// We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
  2347 	// Are free to temporaries if we have to do  stack to steck moves.
  2348 	// All inbound args are referenced based on ebp and all outbound args via esp.
  2350   // This may iterate in two different directions depending on the
  2351   // kind of native it is.  The reason is that for regular JNI natives
  2352   // the incoming and outgoing registers are offset upwards and for
  2353   // critical natives they are offset down.
  2354   GrowableArray<int> arg_order(2 * total_in_args);
  2355   VMRegPair tmp_vmreg;
  2356 //  tmp_vmreg.set1(rbx->as_VMReg());
  2357   tmp_vmreg.set1(T8->as_VMReg());
  2359   if (!is_critical_native) {
  2360     for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
  2361       arg_order.push(i);
  2362       arg_order.push(c_arg);
  2364   } else {
  2365     // Compute a valid move order, using tmp_vmreg to break any cycles
  2366      __ stop("generate_native_wrapper in sharedRuntime <2>");
  2367 //TODO:Fu
  2368 //    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
  2371   int temploc = -1;
  2372   for (int ai = 0; ai < arg_order.length(); ai += 2) {
  2373     int i = arg_order.at(ai);
  2374     int c_arg = arg_order.at(ai + 1);
  2375     __ block_comment(err_msg("move %d -> %d", i, c_arg));
  2376     if (c_arg == -1) {
  2377       assert(is_critical_native, "should only be required for critical natives");
  2378       // This arg needs to be moved to a temporary
  2379       __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
  2380       in_regs[i] = tmp_vmreg;
  2381       temploc = i;
  2382       continue;
  2383     } else if (i == -1) {
  2384       assert(is_critical_native, "should only be required for critical natives");
  2385       // Read from the temporary location
  2386       assert(temploc != -1, "must be valid");
  2387       i = temploc;
  2388       temploc = -1;
  2390 #ifdef ASSERT
  2391     if (in_regs[i].first()->is_Register()) {
  2392       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
  2393     } else if (in_regs[i].first()->is_FloatRegister()) {
  2394       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
  2396     if (out_regs[c_arg].first()->is_Register()) {
  2397       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  2398     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  2399       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
  2401 #endif /* ASSERT */
  2402     switch (in_sig_bt[i]) {
  2403       case T_ARRAY:
  2404         if (is_critical_native) {
  2405 	  __ stop("generate_native_wrapper in sharedRuntime <2>");
  2406          //TODO:Fu
  2407          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
  2408           c_arg++;
  2409 #ifdef ASSERT
  2410           if (out_regs[c_arg].first()->is_Register()) {
  2411             reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
  2412           } else if (out_regs[c_arg].first()->is_FloatRegister()) {
  2413             freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
  2415 #endif
  2416           break;
  2418       case T_OBJECT:
  2419         assert(!is_critical_native, "no oop arguments");
  2420         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
  2421                     ((i == 0) && (!is_static)),
  2422                     &receiver_offset);
  2423         break;
  2424       case T_VOID:
  2425         break;
  2427       case T_FLOAT:
  2428         float_move(masm, in_regs[i], out_regs[c_arg]);
  2429           break;
  2431       case T_DOUBLE:
  2432         assert( i + 1 < total_in_args &&
  2433                 in_sig_bt[i + 1] == T_VOID &&
  2434                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  2435         double_move(masm, in_regs[i], out_regs[c_arg]);
  2436         break;
  2438       case T_LONG :
  2439         long_move(masm, in_regs[i], out_regs[c_arg]);
  2440         break;
  2442       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  2444       default:
  2445 //        move32_64(masm, in_regs[i], out_regs[c_arg]);
  2446         simple_move32(masm, in_regs[i], out_regs[c_arg]);
  2450   // point c_arg at the first arg that is already loaded in case we
  2451   // need to spill before we call out
  2452    c_arg = total_c_args - total_in_args;
  2453 	// Pre-load a static method's oop into esi.  Used both by locking code and
  2454 	// the normal JNI call code.
  2456 	__ move(oop_handle_reg, A1);
  2458 	if (method->is_static() && !is_critical_native) {
  2460 		//  load opp into a register
  2461 		int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
  2462 					(method->method_holder())->java_mirror()));
  2465 		RelocationHolder rspec = oop_Relocation::spec(oop_index);
  2466 		__ relocate(rspec);
  2467 		//__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local(
  2468 		//	Klass::cast(method->method_holder())->java_mirror())));
  2469 		//__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int)
  2470 		//    JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())));
  2471 		__ li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
  2472 	//	__ verify_oop(oop_handle_reg);
  2473 		// Now handlize the static class mirror it's known not-null.
  2474 		__ sd( oop_handle_reg, SP, klass_offset); 
  2475 		map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
  2477 		// Now get the handle
  2478 		__ lea(oop_handle_reg, Address(SP, klass_offset));
  2479 		// store the klass handle as second argument
  2480 		__ move(A1, oop_handle_reg);
  2481                 // and protect the arg if we must spill
  2482                 c_arg--;
  2484   // Change state to native (we save the return address in the thread, since it might not
  2485   // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
  2486   // points into the right code segment. It does not have to be the correct return pc.
  2487   // We use the same pc/oopMap repeatedly when we call out
  2489 	intptr_t the_pc = (intptr_t) __ pc();
  2491 	oop_maps->add_gc_map(the_pc - start, map);
  2493 	//__ set_last_Java_frame(thread, esp, noreg, (address)the_pc);
  2494 	__ set_last_Java_frame(SP, noreg, NULL);
  2495 	__ relocate(relocInfo::internal_pc_type); 
  2497 		intptr_t save_pc = (intptr_t)the_pc ;
  2498 		__ li48(AT, save_pc);
  2500 	__ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  2503 	// We have all of the arguments setup at this point. We must not touch any register
  2504 	// argument registers at this point (what if we save/restore them there are no oop?
  2506 		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
  2507 		int metadata_index = __ oop_recorder()->find_index(method());
  2508 		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
  2509 		__ relocate(rspec);
  2510 		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
  2511 		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
  2512 		__ li48(AT, (long)(method()));
  2514 		__ call_VM_leaf(
  2515 				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 
  2516 		   thread, AT); 
  2520   // These are register definitions we need for locking/unlocking 
  2521 //  const Register swap_reg = eax;  // Must use eax for cmpxchg instruction
  2522 //  const Register obj_reg  = ecx;  // Will contain the oop
  2523  // const Register lock_reg = edx;  // Address of compiler lock object (BasicLock)
  2524 //FIXME, I hava no idea which register to use
  2525 	const Register swap_reg = T8;  // Must use eax for cmpxchg instruction
  2526 	const Register obj_reg  = T9;  // Will contain the oop
  2527 	//const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
  2528 	const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
  2532 	Label slow_path_lock;
  2533 	Label lock_done;
  2535 	// Lock a synchronized method
  2536 	if (method->is_synchronized()) {
  2537                 assert(!is_critical_native, "unhandled");
  2539 		const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
  2541 		// Get the handle (the 2nd argument)
  2542 		__ move(oop_handle_reg, A1);
  2544 		// Get address of the box
  2545 		__ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
  2547 		// Load the oop from the handle 
  2548 		__ ld(obj_reg, oop_handle_reg, 0);
  2550 		if (UseBiasedLocking) {
  2551 			// Note that oop_handle_reg is trashed during this call
  2552 		__ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, 
  2553 				false, lock_done, &slow_path_lock);
  2556 		// Load immediate 1 into swap_reg %eax
  2557 		__ move(swap_reg, 1);
  2559 		__ ld(AT, obj_reg, 0);   
  2560 		__ orr(swap_reg, swap_reg, AT); 
  2562 		__ sd( swap_reg, lock_reg, mark_word_offset);
  2563 		__ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
  2564 		__ bne(AT, R0, lock_done);
  2565 		__ delayed()->nop(); 
  2566 		// Test if the oopMark is an obvious stack pointer, i.e.,
  2567 		//  1) (mark & 3) == 0, and
  2568 		//  2) esp <= mark < mark + os::pagesize()
  2569 		// These 3 tests can be done by evaluating the following
  2570 		// expression: ((mark - esp) & (3 - os::vm_page_size())),
  2571 		// assuming both stack pointer and pagesize have their
  2572 		// least significant 2 bits clear.
  2573 		// NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
  2575 		__ dsub(swap_reg, swap_reg,SP);
  2576  		__ move(AT, 3 - os::vm_page_size());
  2577 		__ andr(swap_reg , swap_reg, AT);
  2578 		// Save the test result, for recursive case, the result is zero
  2579 		__ sd(swap_reg, lock_reg, mark_word_offset); 
  2580 	//FIXME here, Why notEqual? 	
  2581 		__ bne(swap_reg,R0, slow_path_lock);
  2582 		__ delayed()->nop();  
  2583 		// Slow path will re-enter here
  2584 		__ bind(lock_done);
  2586 		if (UseBiasedLocking) {
  2587 			// Re-fetch oop_handle_reg as we trashed it above
  2588 			__ move(A1, oop_handle_reg);
  2593 	// Finally just about ready to make the JNI call
  2596 	// get JNIEnv* which is first argument to native
  2597   if (!is_critical_native) {
  2598 	__ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
  2601 	// Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
  2602 	/* Load the second arguments into A1 */
  2603 	//__ ld(A1, SP , wordSize ); 	// klass
  2605 	// Now set thread in native
  2606 	__ addi(AT, R0, _thread_in_native); 
  2607 	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); 
  2608 	/* Jin: do the call */
  2609 	__ call(method->native_function(), relocInfo::runtime_call_type);
  2610 	__ delayed()->nop();
  2611 	// WARNING - on Windows Java Natives use pascal calling convention and pop the
  2612 	// arguments off of the stack. We could just re-adjust the stack pointer here
  2613 	// and continue to do SP relative addressing but we instead switch to FP
  2614 	// relative addressing.
  2616 	// Unpack native results.  
  2617 	switch (ret_type) {
  2618 	case T_BOOLEAN: __ c2bool(V0);            break;
  2619 	case T_CHAR   : __ andi(V0,V0, 0xFFFF);      break;
  2620 	case T_BYTE   : __ sign_extend_byte (V0); break;
  2621 	case T_SHORT  : __ sign_extend_short(V0); break;
  2622 	case T_INT    : // nothing to do         break;
  2623 	case T_DOUBLE :
  2624 	case T_FLOAT  :
  2625 	// Result is in st0 we'll save as needed
  2626 	break;
  2627 	case T_ARRAY:                 // Really a handle
  2628 	case T_OBJECT:                // Really a handle
  2629 	break; // can't de-handlize until after safepoint check
  2630 	case T_VOID: break;
  2631 	case T_LONG: break;
  2632 	default       : ShouldNotReachHere();
  2634 	// Switch thread to "native transition" state before reading the synchronization state.
  2635 	// This additional state is necessary because reading and testing the synchronization
  2636 	// state is not atomic w.r.t. GC, as this scenario demonstrates:
  2637 	//     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
  2638 	//     VM thread changes sync state to synchronizing and suspends threads for GC.
  2639 	//     Thread A is resumed to finish this native method, but doesn't block here since it
  2640 	//     didn't see any synchronization is progress, and escapes.
  2641 	// __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);    
  2642 	//__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());    
  2643 	//   __ move(AT, (int)_thread_in_native_trans);
  2644 	__ addi(AT, R0, _thread_in_native_trans); 
  2645 	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));    
  2647   Label after_transition;
  2649 	// check for safepoint operation in progress and/or pending suspend requests
  2650 	{ Label Continue;
  2651 //FIXME here, which regiser should we use?
  2652 		//        SafepointSynchronize::_not_synchronized);
  2653 		__ li(AT, SafepointSynchronize::address_of_state());
  2654 		__ lw(A0, AT, 0);	
  2655 		__ addi(AT, A0, -SafepointSynchronize::_not_synchronized); 
  2656 		Label L;
  2657 		__ bne(AT,R0, L); 
  2658 		__ delayed()->nop();	
  2659 		__ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); 
  2660 		__ beq(AT, R0, Continue); 
  2661 		__ delayed()->nop(); 
  2662 		__ bind(L);
  2664 		// Don't use call_VM as it will see a possible pending exception and forward it
  2665 		// and never return here preventing us from clearing _last_native_pc down below.
  2666 		// Also can't use call_VM_leaf either as it will check to see if esi & edi are
  2667 		// preserved and correspond to the bcp/locals pointers. So we do a runtime call
  2668 		// by hand.
  2669 		//
  2670 		save_native_result(masm, ret_type, stack_slots);
  2671 		__ move (A0, thread); 
  2672 		__ addi(SP,SP, -wordSize); 
  2673     __ push(S2);
  2674     __ move(AT, -(StackAlignmentInBytes));
  2675     __ move(S2, SP);     // use S2 as a sender SP holder
  2676     __ andr(SP, SP, AT); // align stack as required by ABI
  2677     if (!is_critical_native) {
  2678       __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
  2679       __ delayed()->nop(); 
  2680     } else {
  2681       __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
  2682       __ delayed()->nop(); 
  2684     __ move(SP, S2);     // use S2 as a sender SP holder
  2685     __ pop(S2);
  2686 		__ addi(SP,SP, wordSize); 
  2687 		//add for compressedoops
  2688 		__ reinit_heapbase();
  2689 		// Restore any method result value
  2690 		restore_native_result(masm, ret_type, stack_slots);
  2692     if (is_critical_native) {
  2693       // The call above performed the transition to thread_in_Java so
  2694       // skip the transition logic below.
  2695       __ beq(R0, R0, after_transition);
  2696       __ delayed()->nop(); 
  2699 		__ bind(Continue);
  2702 	// change thread state
  2703 	__ addi(AT, R0, _thread_in_Java); 
  2704 	__ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset())); 
  2705   __ bind(after_transition);
  2706 	Label reguard;
  2707 	Label reguard_done;
  2708 	__ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); 
  2709 	__ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); 
  2710 	__ beq(AT, R0, reguard);
  2711 	__ delayed()->nop();  
  2712 	// slow path reguard  re-enters here
  2713 	__ bind(reguard_done);
  2715 	// Handle possible exception (will unlock if necessary)
  2717 	// native result if any is live 
  2719 	// Unlock
  2720 	Label slow_path_unlock;
  2721 	Label unlock_done;
  2722 	if (method->is_synchronized()) {
  2724 		Label done;
  2726 		// Get locked oop from the handle we passed to jni
  2727 		__ ld( obj_reg, oop_handle_reg, 0);
  2728 		//FIXME 
  2729 		if (UseBiasedLocking) {
  2730 			__ biased_locking_exit(obj_reg, T8, done);
  2734 		// Simple recursive lock?
  2736 		__ ld(AT, FP, lock_slot_ebp_offset); 
  2737 		__ beq(AT, R0, done);
  2738 		__ delayed()->nop();	
  2739 		// Must save eax if if it is live now because cmpxchg must use it
  2740 		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
  2741 			save_native_result(masm, ret_type, stack_slots);
  2744 		//  get old displaced header
  2745 		__ ld (T8, FP, lock_slot_ebp_offset);
  2746 		// get address of the stack lock
  2747 		//FIXME aoqi
  2748 		//__ addi (T6, FP, lock_slot_ebp_offset);
  2749 		__ addi (c_rarg0, FP, lock_slot_ebp_offset);
  2750 		// Atomic swap old header if oop still contains the stack lock
  2751 		//FIXME aoqi
  2752 		//__ cmpxchg(T8, Address(obj_reg, 0),T6 );
  2753 		__ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
  2755 		__ beq(AT, R0, slow_path_unlock);
  2756 		__ delayed()->nop(); 
  2757 		// slow path re-enters here
  2758 		__ bind(unlock_done);
  2759 		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
  2760 			restore_native_result(masm, ret_type, stack_slots);
  2763 		__ bind(done);
  2767 		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
  2768 		// Tell dtrace about this method exit
  2769 		save_native_result(masm, ret_type, stack_slots);
  2770 		int metadata_index = __ oop_recorder()->find_index( (method()));
  2771 		RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
  2772 		__ relocate(rspec);
  2773 		//__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
  2774 		//__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
  2775 		__ li48(AT, (long)(method()));
  2777 		__ call_VM_leaf(
  2778 				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 
  2779 				thread, AT);
  2780 		restore_native_result(masm, ret_type, stack_slots);
  2783 	// We can finally stop using that last_Java_frame we setup ages ago
  2785 	__ reset_last_Java_frame(false, true);
  2787 	// Unpack oop result
  2788 	if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
  2789 		Label L;
  2790 		//  __ cmpl(eax, NULL_WORD);
  2791 		//  __ jcc(Assembler::equal, L);
  2792 		__ beq(V0, R0,L ); 
  2793 		__ delayed()->nop(); 
  2794 		//  __ movl(eax, Address(eax));
  2795 		__ ld(V0, V0, 0);	
  2796 		__ bind(L);
  2797 		// __ verify_oop(eax);
  2798 		__ verify_oop(V0);
  2801   if (!is_critical_native) {
  2802 	// reset handle block
  2803 	__ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
  2804 	__ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); 
  2807   if (!is_critical_native) {
  2808 	// Any exception pending?
  2809 	__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2811 	__ bne(AT, R0, exception_pending);
  2812 	__ delayed()->nop();
  2814 	// no exception, we're almost done
  2816 	// check that only result value is on FPU stack
  2817 	__ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
  2819   // Fixup floating pointer results so that result looks like a return from a compiled method
  2820 /*  if (ret_type == T_FLOAT) {
  2821     if (UseSSE >= 1) {
  2822       // Pop st0 and store as float and reload into xmm register
  2823       __ fstp_s(Address(ebp, -4));
  2824       __ movss(xmm0, Address(ebp, -4));
  2826   } else if (ret_type == T_DOUBLE) {
  2827     if (UseSSE >= 2) {
  2828       // Pop st0 and store as double and reload into xmm register
  2829       __ fstp_d(Address(ebp, -8));
  2830       __ movsd(xmm0, Address(ebp, -8));
  2833 */
  2834   // Return
  2835 #ifndef OPT_THREAD
  2836        __ get_thread(TREG);
  2837 #endif
  2838 	__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
  2839 	__ leave();
  2841 	__ jr(RA);
  2842 	__ delayed()->nop(); 
  2843 	// Unexpected paths are out of line and go here
  2844 /*
  2845   if (!is_critical_native) {
  2846     // forward the exception
  2847     __ bind(exception_pending);
  2849     // and forward the exception
  2850     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
  2852 */
  2853 	// Slow path locking & unlocking
  2854 	if (method->is_synchronized()) {
  2856 		// BEGIN Slow path lock
  2858 		__ bind(slow_path_lock);
  2860                 // protect the args we've loaded
  2861                 save_args(masm, total_c_args, c_arg, out_regs);
  2863 		// has last_Java_frame setup. No exceptions so do vanilla call not call_VM
  2864 		// args are (oop obj, BasicLock* lock, JavaThread* thread)
  2866 		__ move(A0, obj_reg); 
  2867 		__ move(A1, lock_reg); 
  2868 		__ move(A2, thread); 
  2869 		__ addi(SP, SP, - 3*wordSize); 
  2871                 __ move(AT, -(StackAlignmentInBytes));
  2872                 __ move(S2, SP);     // use S2 as a sender SP holder
  2873                 __ andr(SP, SP, AT); // align stack as required by ABI
  2875 		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
  2876 		__ delayed()->nop();
  2877                 __ move(SP, S2);
  2878 		__ addi(SP, SP, 3*wordSize); 
  2880                 restore_args(masm, total_c_args, c_arg, out_regs);
  2882 #ifdef ASSERT
  2883 		{ Label L;
  2884 			// __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
  2885 			__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2886 			//__ jcc(Assembler::equal, L);
  2887 			__ beq(AT, R0, L); 
  2888 			__ delayed()->nop(); 
  2889 			__ stop("no pending exception allowed on exit from monitorenter");
  2890 			__ bind(L);
  2892 #endif
  2893 		__ b(lock_done);
  2894 		__ delayed()->nop();
  2895 		// END Slow path lock
  2897 		// BEGIN Slow path unlock
  2898 		__ bind(slow_path_unlock);
  2900 		// Slow path unlock
  2902 		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
  2903 			save_native_result(masm, ret_type, stack_slots);
  2905 		// Save pending exception around call to VM (which contains an EXCEPTION_MARK)
  2907 		__ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2908 		__ push(AT); 
  2909 		__ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
  2911                 __ move(AT, -(StackAlignmentInBytes));
  2912                 __ move(S2, SP);     // use S2 as a sender SP holder
  2913                 __ andr(SP, SP, AT); // align stack as required by ABI
  2915 		// should be a peal
  2916 		// +wordSize because of the push above
  2917 		__ addi(A1, FP, lock_slot_ebp_offset);
  2919 		__ move(A0, obj_reg); 
  2920 		__ addi(SP,SP, -2*wordSize);
  2921 		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
  2922 				relocInfo::runtime_call_type);
  2923 		__ delayed()->nop(); 
  2924 		__ addi(SP,SP, 2*wordSize);
  2925                 __ move(SP, S2);
  2926 		//add for compressedoops
  2927 		__ reinit_heapbase();
  2928 #ifdef ASSERT
  2930 			Label L;
  2931 			//    __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
  2932 			__ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); 
  2933 			//__ jcc(Assembler::equal, L);
  2934 			__ beq(AT, R0, L); 
  2935 			__ delayed()->nop(); 
  2936 			__ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
  2937 			__ bind(L);
  2939 #endif /* ASSERT */
  2941 		__ pop(AT); 
  2942 		__ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
  2943 		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
  2944 			restore_native_result(masm, ret_type, stack_slots);
  2946 		__ b(unlock_done);
  2947 		__ delayed()->nop(); 
  2948 		// END Slow path unlock
  2952 	// SLOW PATH Reguard the stack if needed
  2954 	__ bind(reguard);
  2955 	save_native_result(masm, ret_type, stack_slots);
  2956 	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 
  2957 			relocInfo::runtime_call_type);
  2958 	__ delayed()->nop();	
  2959 	//add for compressedoops
  2960 	__ reinit_heapbase();
  2961 	restore_native_result(masm, ret_type, stack_slots);
  2962 	__ b(reguard_done);
  2963 	__ delayed()->nop();
  2965 	// BEGIN EXCEPTION PROCESSING
  2966     if (!is_critical_native) {
  2967 	// Forward  the exception
  2968 	__ bind(exception_pending);
  2970 	// remove possible return value from FPU register stack
  2971 	__ empty_FPU_stack();
  2973 	// pop our frame
  2974  //forward_exception_entry need return address on stack
  2975         __ addiu(SP, FP, wordSize);
  2976 	__ ld(FP, SP, (-1) * wordSize);
  2978 	// and forward the exception
  2979 	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  2980 	__ delayed()->nop();
  2982 	__ flush();
  2984 	nmethod *nm = nmethod::new_native_nmethod(method,
  2985                         compile_id,
  2986 			masm->code(),
  2987 			vep_offset,
  2988 			frame_complete,
  2989 			stack_slots / VMRegImpl::slots_per_word,
  2990 			(is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
  2991 			in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
  2992 			oop_maps);
  2994   if (is_critical_native) {
  2995     nm->set_lazy_critical_native(true);
  2997 	return nm;
  3002 #ifdef HAVE_DTRACE_H
  3003 // ---------------------------------------------------------------------------
  3004 // Generate a dtrace nmethod for a given signature.  The method takes arguments
  3005 // in the Java compiled code convention, marshals them to the native
  3006 // abi and then leaves nops at the position you would expect to call a native
  3007 // function. When the probe is enabled the nops are replaced with a trap
  3008 // instruction that dtrace inserts and the trace will cause a notification
  3009 // to dtrace.
  3010 //
  3011 // The probes are only able to take primitive types and java/lang/String as
  3012 // arguments.  No other java types are allowed. Strings are converted to utf8
  3013 // strings so that from dtrace point of view java strings are converted to C
  3014 // strings. There is an arbitrary fixed limit on the total space that a method
  3015 // can use for converting the strings. (256 chars per string in the signature).
  3016 // So any java string larger then this is truncated.
  3018 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
  3019 static bool offsets_initialized = false;
  3021 static VMRegPair reg64_to_VMRegPair(Register r) {
  3022   VMRegPair ret;
  3023   if (wordSize == 8) {
  3024     ret.set2(r->as_VMReg());
  3025   } else {
  3026     ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
  3028   return ret;
  3032 nmethod *SharedRuntime::generate_dtrace_nmethod(
  3033     MacroAssembler *masm, methodHandle method) {
  3036   // generate_dtrace_nmethod is guarded by a mutex so we are sure to
  3037   // be single threaded in this method.
  3038   assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
  3040   // Fill in the signature array, for the calling-convention call.
  3041   int total_args_passed = method->size_of_parameters();
  3043   BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
  3044   VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
  3046   // The signature we are going to use for the trap that dtrace will see
  3047   // java/lang/String is converted. We drop "this" and any other object
  3048   // is converted to NULL.  (A one-slot java/lang/Long object reference
  3049   // is converted to a two-slot long, which is why we double the allocation).
  3050   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
  3051   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
  3053   int i=0;
  3054   int total_strings = 0;
  3055   int first_arg_to_pass = 0;
  3056   int total_c_args = 0;
  3058   // Skip the receiver as dtrace doesn't want to see it
  3059   if( !method->is_static() ) {
  3060     in_sig_bt[i++] = T_OBJECT;
  3061     first_arg_to_pass = 1;
  3064   SignatureStream ss(method->signature());
  3065   for ( ; !ss.at_return_type(); ss.next()) {
  3066     BasicType bt = ss.type();
  3067     in_sig_bt[i++] = bt;  // Collect remaining bits of signature
  3068     out_sig_bt[total_c_args++] = bt;
  3069     if( bt == T_OBJECT) {
  3070       symbolOop s = ss.as_symbol_or_null();
  3071       if (s == vmSymbols::java_lang_String()) {
  3072         total_strings++;
  3073         out_sig_bt[total_c_args-1] = T_ADDRESS;
  3074       } else if (s == vmSymbols::java_lang_Boolean() ||
  3075                  s == vmSymbols::java_lang_Byte()) {
  3076         out_sig_bt[total_c_args-1] = T_BYTE;
  3077       } else if (s == vmSymbols::java_lang_Character() ||
  3078                  s == vmSymbols::java_lang_Short()) {
  3079         out_sig_bt[total_c_args-1] = T_SHORT;
  3080       } else if (s == vmSymbols::java_lang_Integer() ||
  3081                  s == vmSymbols::java_lang_Float()) {
  3082         out_sig_bt[total_c_args-1] = T_INT;
  3083       } else if (s == vmSymbols::java_lang_Long() ||
  3084                  s == vmSymbols::java_lang_Double()) {
  3085         out_sig_bt[total_c_args-1] = T_LONG;
  3086         out_sig_bt[total_c_args++] = T_VOID;
  3088     } else if ( bt == T_LONG || bt == T_DOUBLE ) {
  3089       in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
  3090       // We convert double to long
  3091       out_sig_bt[total_c_args-1] = T_LONG;
  3092       out_sig_bt[total_c_args++] = T_VOID;
  3093     } else if ( bt == T_FLOAT) {
  3094       // We convert float to int
  3095       out_sig_bt[total_c_args-1] = T_INT;
  3099   assert(i==total_args_passed, "validly parsed signature");
  3101   // Now get the compiled-Java layout as input arguments
  3102   int comp_args_on_stack;
  3103   comp_args_on_stack = SharedRuntime::java_calling_convention(
  3104       in_sig_bt, in_regs, total_args_passed, false);
  3106   // We have received a description of where all the java arg are located
  3107   // on entry to the wrapper. We need to convert these args to where
  3108   // the a  native (non-jni) function would expect them. To figure out
  3109   // where they go we convert the java signature to a C signature and remove
  3110   // T_VOID for any long/double we might have received.
  3113   // Now figure out where the args must be stored and how much stack space
  3114   // they require (neglecting out_preserve_stack_slots but space for storing
  3115   // the 1st six register arguments). It's weird see int_stk_helper.
  3116   //
  3117   int out_arg_slots;
  3118   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
  3120   // Calculate the total number of stack slots we will need.
  3122   // First count the abi requirement plus all of the outgoing args
  3123   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
  3125   // Plus a temp for possible converion of float/double/long register args
  3127   int conversion_temp = stack_slots;
  3128   stack_slots += 2;
  3131   // Now space for the string(s) we must convert
  3133   int string_locs = stack_slots;
  3134   stack_slots += total_strings *
  3135                    (max_dtrace_string_size / VMRegImpl::stack_slot_size);
  3137   // Ok The space we have allocated will look like:
  3138   //
  3139   //
  3140   // FP-> |                     |
  3141   //      |---------------------|
  3142   //      | string[n]           |
  3143   //      |---------------------| <- string_locs[n]
  3144   //      | string[n-1]         |
  3145   //      |---------------------| <- string_locs[n-1]
  3146   //      | ...                 |
  3147   //      | ...                 |
  3148   //      |---------------------| <- string_locs[1]
  3149   //      | string[0]           |
  3150   //      |---------------------| <- string_locs[0]
  3151   //      | temp                |
  3152   //      |---------------------| <- conversion_temp
  3153   //      | outbound memory     |
  3154   //      | based arguments     |
  3155   //      |                     |
  3156   //      |---------------------|
  3157   //      |                     |
  3158   // SP-> | out_preserved_slots |
  3159   //
  3160   //
  3162   // Now compute actual number of stack words we need rounding to make
  3163   // stack properly aligned.
  3164   stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
  3166   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
  3168   intptr_t start = (intptr_t)__ pc();
  3170   // First thing make an ic check to see if we should even be here
  3173     Label L;
  3174     const Register temp_reg = G3_scratch;
  3175     Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
  3176     __ verify_oop(O0);
  3177     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
  3178     __ cmp(temp_reg, G5_inline_cache_reg);
  3179     __ brx(Assembler::equal, true, Assembler::pt, L);
  3180     __ delayed()->nop();
  3182     __ jump_to(ic_miss, 0);
  3183     __ delayed()->nop();
  3184     __ align(CodeEntryAlignment);
  3185     __ bind(L);
  3188   int vep_offset = ((intptr_t)__ pc()) - start;
  3191   // The instruction at the verified entry point must be 5 bytes or longer
  3192   // because it can be patched on the fly by make_non_entrant. The stack bang
  3193   // instruction fits that requirement.
  3195   // Generate stack overflow check before creating frame
  3196   __ generate_stack_overflow_check(stack_size);
  3198   assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
  3199          "valid size for make_non_entrant");
  3201   // Generate a new frame for the wrapper.
  3202   __ save(SP, -stack_size, SP);
  3204   // Frame is now completed as far a size and linkage.
  3206   int frame_complete = ((intptr_t)__ pc()) - start;
  3208 #ifdef ASSERT
  3209   bool reg_destroyed[RegisterImpl::number_of_registers];
  3210   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
  3211   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
  3212     reg_destroyed[r] = false;
  3214   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
  3215     freg_destroyed[f] = false;
  3218 #endif /* ASSERT */
  3220   VMRegPair zero;
  3221   const Register g0 = G0; // without this we get a compiler warning (why??)
  3222   zero.set2(g0->as_VMReg());
  3224   int c_arg, j_arg;
  3226   Register conversion_off = noreg;
  3228   for (j_arg = first_arg_to_pass, c_arg = 0 ;
  3229        j_arg < total_args_passed ; j_arg++, c_arg++ ) {
  3231     VMRegPair src = in_regs[j_arg];
  3232     VMRegPair dst = out_regs[c_arg];
  3234 #ifdef ASSERT
  3235     if (src.first()->is_Register()) {
  3236       assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
  3237     } else if (src.first()->is_FloatRegister()) {
  3238       assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
  3239                                                FloatRegisterImpl::S)], "ack!");
  3241     if (dst.first()->is_Register()) {
  3242       reg_destroyed[dst.first()->as_Register()->encoding()] = true;
  3243     } else if (dst.first()->is_FloatRegister()) {
  3244       freg_destroyed[dst.first()->as_FloatRegister()->encoding(
  3245                                                  FloatRegisterImpl::S)] = true;
  3247 #endif /* ASSERT */
  3249     switch (in_sig_bt[j_arg]) {
  3250       case T_ARRAY:
  3251       case T_OBJECT:
  3253           if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
  3254               out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
  3255             // need to unbox a one-slot value
  3256             Register in_reg = L0;
  3257             Register tmp = L2;
  3258             if ( src.first()->is_reg() ) {
  3259               in_reg = src.first()->as_Register();
  3260             } else {
  3261               assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
  3262                      "must be");
  3263               __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
  3265             // If the final destination is an acceptable register
  3266             if ( dst.first()->is_reg() ) {
  3267               if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
  3268                 tmp = dst.first()->as_Register();
  3272             Label skipUnbox;
  3273             if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
  3274               __ mov(G0, tmp->successor());
  3276             __ br_null(in_reg, true, Assembler::pn, skipUnbox);
  3277             __ delayed()->mov(G0, tmp);
  3279             BasicType bt = out_sig_bt[c_arg];
  3280             int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
  3281             switch (bt) {
  3282                 case T_BYTE:
  3283                   __ ldub(in_reg, box_offset, tmp); break;
  3284                 case T_SHORT:
  3285                   __ lduh(in_reg, box_offset, tmp); break;
  3286                 case T_INT:
  3287                   __ ld(in_reg, box_offset, tmp); break;
  3288                 case T_LONG:
  3289                   __ ld_long(in_reg, box_offset, tmp); break;
  3290                 default: ShouldNotReachHere();
  3293             __ bind(skipUnbox);
  3294             // If tmp wasn't final destination copy to final destination
  3295             if (tmp == L2) {
  3296               VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
  3297               if (out_sig_bt[c_arg] == T_LONG) {
  3298                 long_move(masm, tmp_as_VM, dst);
  3299               } else {
  3300                 move32_64(masm, tmp_as_VM, out_regs[c_arg]);
  3303             if (out_sig_bt[c_arg] == T_LONG) {
  3304               assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
  3305               ++c_arg; // move over the T_VOID to keep the loop indices in sync
  3307           } else if (out_sig_bt[c_arg] == T_ADDRESS) {
  3308             Register s =
  3309                 src.first()->is_reg() ? src.first()->as_Register() : L2;
  3310             Register d =
  3311                 dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  3313             // We store the oop now so that the conversion pass can reach
  3314             // while in the inner frame. This will be the only store if
  3315             // the oop is NULL.
  3316             if (s != L2) {
  3317               // src is register
  3318               if (d != L2) {
  3319                 // dst is register
  3320                 __ mov(s, d);
  3321               } else {
  3322                 assert(Assembler::is_simm13(reg2offset(dst.first()) +
  3323                           STACK_BIAS), "must be");
  3324                 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
  3326             } else {
  3327                 // src not a register
  3328                 assert(Assembler::is_simm13(reg2offset(src.first()) +
  3329                            STACK_BIAS), "must be");
  3330                 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
  3331                 if (d == L2) {
  3332                   assert(Assembler::is_simm13(reg2offset(dst.first()) +
  3333                              STACK_BIAS), "must be");
  3334                   __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
  3337           } else if (out_sig_bt[c_arg] != T_VOID) {
  3338             // Convert the arg to NULL
  3339             if (dst.first()->is_reg()) {
  3340               __ mov(G0, dst.first()->as_Register());
  3341             } else {
  3342               assert(Assembler::is_simm13(reg2offset(dst.first()) +
  3343                          STACK_BIAS), "must be");
  3344               __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
  3348         break;
  3349       case T_VOID:
  3350         break;
  3352       case T_FLOAT:
  3353         if (src.first()->is_stack()) {
  3354           // Stack to stack/reg is simple
  3355           move32_64(masm, src, dst);
  3356         } else {
  3357           if (dst.first()->is_reg()) {
  3358             // freg -> reg
  3359             int off =
  3360               STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  3361             Register d = dst.first()->as_Register();
  3362             if (Assembler::is_simm13(off)) {
  3363               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3364                      SP, off);
  3365               __ ld(SP, off, d);
  3366             } else {
  3367               if (conversion_off == noreg) {
  3368                 __ set(off, L6);
  3369                 conversion_off = L6;
  3371               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3372                      SP, conversion_off);
  3373               __ ld(SP, conversion_off , d);
  3375           } else {
  3376             // freg -> mem
  3377             int off = STACK_BIAS + reg2offset(dst.first());
  3378             if (Assembler::is_simm13(off)) {
  3379               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3380                      SP, off);
  3381             } else {
  3382               if (conversion_off == noreg) {
  3383                 __ set(off, L6);
  3384                 conversion_off = L6;
  3386               __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
  3387                      SP, conversion_off);
  3391         break;
  3393       case T_DOUBLE:
  3394         assert( j_arg + 1 < total_args_passed &&
  3395                 in_sig_bt[j_arg + 1] == T_VOID &&
  3396                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
  3397         if (src.first()->is_stack()) {
  3398           // Stack to stack/reg is simple
  3399           long_move(masm, src, dst);
  3400         } else {
  3401           Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
  3403           // Destination could be an odd reg on 32bit in which case
  3404           // we can't load direct to the destination.
  3406           if (!d->is_even() && wordSize == 4) {
  3407             d = L2;
  3409           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  3410           if (Assembler::is_simm13(off)) {
  3411             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  3412                    SP, off);
  3413             __ ld_long(SP, off, d);
  3414           } else {
  3415             if (conversion_off == noreg) {
  3416               __ set(off, L6);
  3417               conversion_off = L6;
  3419             __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
  3420                    SP, conversion_off);
  3421             __ ld_long(SP, conversion_off, d);
  3423           if (d == L2) {
  3424             long_move(masm, reg64_to_VMRegPair(L2), dst);
  3427         break;
  3429       case T_LONG :
  3430         // 32bit can't do a split move of something like g1 -> O0, O1
  3431         // so use a memory temp
  3432         if (src.is_single_phys_reg() && wordSize == 4) {
  3433           Register tmp = L2;
  3434           if (dst.first()->is_reg() &&
  3435               (wordSize == 8 || dst.first()->as_Register()->is_even())) {
  3436             tmp = dst.first()->as_Register();
  3439           int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
  3440           if (Assembler::is_simm13(off)) {
  3441             __ stx(src.first()->as_Register(), SP, off);
  3442             __ ld_long(SP, off, tmp);
  3443           } else {
  3444             if (conversion_off == noreg) {
  3445               __ set(off, L6);
  3446               conversion_off = L6;
  3448             __ stx(src.first()->as_Register(), SP, conversion_off);
  3449             __ ld_long(SP, conversion_off, tmp);
  3452           if (tmp == L2) {
  3453             long_move(masm, reg64_to_VMRegPair(L2), dst);
  3455         } else {
  3456           long_move(masm, src, dst);
  3458         break;
  3460       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
  3462       default:
  3463         move32_64(masm, src, dst);
  3468   // If we have any strings we must store any register based arg to the stack
  3469   // This includes any still live xmm registers too.
  3471   if (total_strings > 0 ) {
  3473     // protect all the arg registers
  3474     __ save_frame(0);
  3475     __ mov(G2_thread, L7_thread_cache);
  3476     const Register L2_string_off = L2;
  3478     // Get first string offset
  3479     __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
  3481     for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
  3482       if (out_sig_bt[c_arg] == T_ADDRESS) {
  3484         VMRegPair dst = out_regs[c_arg];
  3485         const Register d = dst.first()->is_reg() ?
  3486             dst.first()->as_Register()->after_save() : noreg;
  3488         // It's a string the oop and it was already copied to the out arg
  3489         // position
  3490         if (d != noreg) {
  3491           __ mov(d, O0);
  3492         } else {
  3493           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  3494                  "must be");
  3495           __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
  3497         Label skip;
  3499         __ br_null(O0, false, Assembler::pn, skip);
  3500         __ delayed()->add(FP, L2_string_off, O1);
  3502         if (d != noreg) {
  3503           __ mov(O1, d);
  3504         } else {
  3505           assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
  3506                  "must be");
  3507           __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
  3510         __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
  3511                 relocInfo::runtime_call_type);
  3512         __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
  3514         __ bind(skip);
  3519     __ mov(L7_thread_cache, G2_thread);
  3520     __ restore();
  3525   // Ok now we are done. Need to place the nop that dtrace wants in order to
  3526   // patch in the trap
  3528   int patch_offset = ((intptr_t)__ pc()) - start;
  3530   __ nop();
  3533   // Return
  3535   __ ret();
  3536   __ delayed()->restore();
  3538   __ flush();
  3540   nmethod *nm = nmethod::new_dtrace_nmethod(
  3541       method, masm->code(), vep_offset, patch_offset, frame_complete,
  3542       stack_slots / VMRegImpl::slots_per_word);
  3543   return nm;
  3547 #endif // HAVE_DTRACE_H
  3549 // this function returns the adjust size (in number of words) to a c2i adapter
  3550 // activation for use during deoptimization
  3551 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  3552 	return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
  3555 // "Top of Stack" slots that may be unused by the calling convention but must
  3556 // otherwise be preserved.
  3557 // On Intel these are not necessary and the value can be zero.
  3558 // On Sparc this describes the words reserved for storing a register window
  3559 // when an interrupt occurs.
  3560 uint SharedRuntime::out_preserve_stack_slots() {
  3561   //return frame::register_save_words * VMRegImpl::slots_per_word;
  3562 	 return 0;
  3564 /*
  3565 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
  3566 //
  3567 // Common out the new frame generation for deopt and uncommon trap
  3568 //
  3569   Register        G3pcs              = G3_scratch; // Array of new pcs (input)
  3570   Register        Oreturn0           = O0;
  3571   Register        Oreturn1           = O1;
  3572   Register        O2UnrollBlock      = O2;
  3573   Register        O3array            = O3;         // Array of frame sizes (input)
  3574   Register        O4array_size       = O4;         // number of frames (input)
  3575   Register        O7frame_size       = O7;         // number of frames (input)
  3577   __ ld_ptr(O3array, 0, O7frame_size);
  3578   __ sub(G0, O7frame_size, O7frame_size);
  3579   __ save(SP, O7frame_size, SP);
  3580   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
  3582   #ifdef ASSERT
  3583   // make sure that the frames are aligned properly
  3584 #ifndef _LP64
  3585   __ btst(wordSize*2-1, SP);
  3586   __ breakpoint_trap(Assembler::notZero);
  3587 #endif
  3588   #endif
  3590   // Deopt needs to pass some extra live values from frame to frame
  3592   if (deopt) {
  3593     __ mov(Oreturn0->after_save(), Oreturn0);
  3594     __ mov(Oreturn1->after_save(), Oreturn1);
  3597   __ mov(O4array_size->after_save(), O4array_size);
  3598   __ sub(O4array_size, 1, O4array_size);
  3599   __ mov(O3array->after_save(), O3array);
  3600   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
  3601   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
  3603   #ifdef ASSERT
  3604   // trash registers to show a clear pattern in backtraces
  3605   __ set(0xDEAD0000, I0);
  3606   __ add(I0,  2, I1);
  3607   __ add(I0,  4, I2);
  3608   __ add(I0,  6, I3);
  3609   __ add(I0,  8, I4);
  3610   // Don't touch I5 could have valuable savedSP
  3611   __ set(0xDEADBEEF, L0);
  3612   __ mov(L0, L1);
  3613   __ mov(L0, L2);
  3614   __ mov(L0, L3);
  3615   __ mov(L0, L4);
  3616   __ mov(L0, L5);
  3618   // trash the return value as there is nothing to return yet
  3619   __ set(0xDEAD0001, O7);
  3620   #endif
  3622   __ mov(SP, O5_savedSP);
  3626 static void make_new_frames(MacroAssembler* masm, bool deopt) {
  3627   //
  3628   // loop through the UnrollBlock info and create new frames
  3629   //
  3630   Register        G3pcs              = G3_scratch;
  3631   Register        Oreturn0           = O0;
  3632   Register        Oreturn1           = O1;
  3633   Register        O2UnrollBlock      = O2;
  3634   Register        O3array            = O3;
  3635   Register        O4array_size       = O4;
  3636   Label           loop;
  3638   // Before we make new frames, check to see if stack is available.
  3639   // Do this after the caller's return address is on top of stack
  3640   if (UseStackBanging) {
  3641     // Get total frame size for interpreted frames
  3642     __ ld(Address(O2UnrollBlock, 0,
  3643          Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
  3644     __ bang_stack_size(O4, O3, G3_scratch);
  3647   __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
  3648   __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
  3650   __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
  3652   // Adjust old interpreter frame to make space for new frame's extra java locals
  3653   //
  3654   // We capture the original sp for the transition frame only because it is needed in
  3655   // order to properly calculate interpreter_sp_adjustment. Even though in real life
  3656   // every interpreter frame captures a savedSP it is only needed at the transition
  3657   // (fortunately). If we had to have it correct everywhere then we would need to
  3658   // be told the sp_adjustment for each frame we create. If the frame size array
  3659   // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
  3660   // for each frame we create and keep up the illusion every where.
  3661   //
  3663   __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
  3664   __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
  3665   __ sub(SP, O7, SP);
  3667 #ifdef ASSERT
  3668   // make sure that there is at least one entry in the array
  3669   __ tst(O4array_size);
  3670   __ breakpoint_trap(Assembler::zero);
  3671 #endif
  3673   // Now push the new interpreter frames
  3674   __ bind(loop);
  3676   // allocate a new frame, filling the registers
  3678   gen_new_frame(masm, deopt);        // allocate an interpreter frame
  3680   __ tst(O4array_size);
  3681   __ br(Assembler::notZero, false, Assembler::pn, loop);
  3682   __ delayed()->add(O3array, wordSize, O3array);
  3683   __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
  3686 */
  3688 //------------------------------generate_deopt_blob----------------------------
  3689 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  3690 // instead.
  3691 void SharedRuntime::generate_deopt_blob() {
  3692   // allocate space for the code
  3693   ResourceMark rm;
  3694   // setup code generation tools
  3695   //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
  3696   CodeBuffer     buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
  3697   MacroAssembler* masm  = new MacroAssembler( & buffer);
  3698   int frame_size_in_words;
  3699   OopMap* map = NULL;
  3700   // Account for the extra args we place on the stack
  3701   // by the time we call fetch_unroll_info
  3702   const int additional_words = 2; // deopt kind, thread
  3704   OopMapSet *oop_maps = new OopMapSet();
  3706   address start = __ pc();
  3707   Label cont;
  3708   // we use S3 for DeOpt reason register
  3709   Register reason = S3;
  3710   // use S6 for thread register
  3711   Register thread = TREG;
  3712   // use S7 for fetch_unroll_info returned UnrollBlock
  3713   Register unroll = S7;
  3714   // Prolog for non exception case!
  3715   // Correct the return address we were given.
  3716   //FIXME, return address is on the tos or Ra? 
  3717   __ addi(RA, RA, - (NativeCall::return_address_offset));
  3718   // Save everything in sight.
  3719   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  3720   // Normal deoptimization
  3721   __ move(reason, Deoptimization::Unpack_deopt);
  3722   __ b(cont);
  3723   __ delayed()->nop();
  3725   int reexecute_offset = __ pc() - start;
  3727    // Reexecute case
  3728    // return address is the pc describes what bci to do re-execute at
  3730    // No need to update map as each call to save_live_registers will produce identical oopmap
  3731   //__ addi(RA, RA, - (NativeCall::return_address_offset));
  3732   (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  3733   __ move(reason, Deoptimization::Unpack_reexecute); 
  3734   __ b(cont);
  3735   __ delayed()->nop();
  3737   int   exception_offset = __ pc() - start;
  3738   // Prolog for exception case
  3740   // all registers are dead at this entry point, except for eax and
  3741   // edx which contain the exception oop and exception pc
  3742   // respectively.  Set them in TLS and fall thru to the
  3743   // unpack_with_exception_in_tls entry point.
  3745   __ get_thread(thread);
  3746   __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); 
  3747   __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
  3748   int exception_in_tls_offset = __ pc() - start;
  3749   // new implementation because exception oop is now passed in JavaThread
  3751   // Prolog for exception case
  3752   // All registers must be preserved because they might be used by LinearScan
  3753   // Exceptiop oop and throwing PC are passed in JavaThread
  3754   // tos: stack at point of call to method that threw the exception (i.e. only
  3755   // args are on the stack, no return address)
  3757   // Return address will be patched later with the throwing pc. The correct value is not 
  3758   // available now because loading it from memory would destroy registers.
  3759    // Save everything in sight.
  3760   // No need to update map as each call to save_live_registers will produce identical oopmap
  3761   __ addi(RA, RA, - (NativeCall::return_address_offset));
  3762   (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
  3764   // Now it is safe to overwrite any register
  3765   // store the correct deoptimization type
  3766   __ move(reason, Deoptimization::Unpack_exception);
  3767   // load throwing pc from JavaThread and patch it as the return address 
  3768   // of the current frame. Then clear the field in JavaThread
  3769   __ get_thread(thread);
  3770   __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
  3771   __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
  3772   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
  3775 #ifdef ASSERT
  3776   // verify that there is really an exception oop in JavaThread
  3777   __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
  3778   __ verify_oop(AT);
  3779   // verify that there is no pending exception
  3780   Label no_pending_exception;
  3781   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  3782   __ beq(AT, R0, no_pending_exception); 
  3783   __ delayed()->nop(); 
  3784   __ stop("must not have pending exception here");
  3785   __ bind(no_pending_exception);
  3786 #endif
  3787   __ bind(cont);
  3788   // Compiled code leaves the floating point stack dirty, empty it.
  3789   __ empty_FPU_stack();
  3792   // Call C code.  Need thread and this frame, but NOT official VM entry
  3793   // crud.  We cannot block on this call, no GC can happen.  
  3794 #ifndef OPT_THREAD
  3795   __ get_thread(thread);
  3796 #endif
  3798 /*
  3800    0x000000555bd82aec: dadd a0, s6, zero                ; __ move(A0, thread);
  3801    0x000000555bd82af0: daddi sp, sp, 0xfffffff0         ; __ addi(SP, SP, -additional_words  * wordSize);
  3802    0x000000555bd82af4: sd sp, 0x1c8(s6)                 ; __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
  3803    0x000000555bd82af8: lui at, 0x0                      ; __ li64(AT, save_pc);
  3804    0x000000555bd82afc: ori at, at, 0x55
  3805    0x000000555bd82b00: dsll at, at, 16
  3806    0x000000555bd82b04: ori at, at, 0x5bd8
  3807    0x000000555bd82b08: dsll at, at, 16
  3808    0x000000555bd82b0c: ori at, at, 0x2b34       ; save_pc = pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4
  3809    0x000000555bd82b10: sd at, 0x1d0(s6)
  3810    0x000000555bd82b14: lui t9, 0x0
  3811    0x000000555bd82b18: ori t9, t9, 0x55
  3812    0x000000555bd82b1c: dsll t9, t9, 16
  3813    0x000000555bd82b20: ori t9, t9, 0x5aa6
  3814    0x000000555bd82b24: dsll t9, t9, 16
  3815    0x000000555bd82b28: ori t9, t9, 0x4074
  3816    0x000000555bd82b2c: jalr t9
  3817    0x000000555bd82b30: sll zero, zero, 0
  3819    0x000000555bd82b34: daddiu sp, sp, 0x10	; save_pc
  3820  */
  3821   __ move(A0, thread);
  3822   __ addi(SP, SP, -additional_words  * wordSize);
  3824   __ set_last_Java_frame(NOREG, NOREG, NULL);
  3826   // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
  3827   // this call, no GC can happen.  Call should capture return values.
  3829   __ relocate(relocInfo::internal_pc_type); 
  3831     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  3832     __ li48(AT, save_pc);
  3834   __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  3836   __ call((address)Deoptimization::fetch_unroll_info);
  3837   //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
  3838   __ delayed()->nop();
  3839   oop_maps->add_gc_map(__ pc() - start, map);
  3840   __ addiu(SP, SP, additional_words * wordSize);
  3841   __ get_thread(thread);
  3842   __ reset_last_Java_frame(false, true);
  3844   // Load UnrollBlock into S7
  3845   __ move(unroll, V0);
  3848   // Move the unpack kind to a safe place in the UnrollBlock because
  3849   // we are very short of registers
  3851   Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
  3852   //__ pop(reason);	
  3853   __ sw(reason, unpack_kind);
  3854   // save the unpack_kind value
  3855   // Retrieve the possible live values (return values)
  3856   // All callee save registers representing jvm state
  3857   // are now in the vframeArray.
  3859   Label noException;
  3860   __ move(AT, Deoptimization::Unpack_exception);
  3861   __ bne(AT, reason, noException);// Was exception pending?
  3862   __ delayed()->nop();
  3863   __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
  3864   __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
  3865   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
  3866   __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
  3868   __ verify_oop(V0);
  3870   // Overwrite the result registers with the exception results.
  3871   __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); 
  3872   __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
  3874   __ bind(noException);
  3877   // Stack is back to only having register save data on the stack.
  3878   // Now restore the result registers. Everything else is either dead or captured
  3879   // in the vframeArray.
  3881   RegisterSaver::restore_result_registers(masm);
  3882   // All of the register save area has been popped of the stack. Only the
  3883   // return address remains.
  3884   // Pop all the frames we must move/replace. 
  3885   // Frame picture (youngest to oldest)
  3886   // 1: self-frame (no frame link)
  3887   // 2: deopting frame  (no frame link)
  3888   // 3: caller of deopting frame (could be compiled/interpreted). 
  3889   //
  3890   // Note: by leaving the return address of self-frame on the stack
  3891   // and using the size of frame 2 to adjust the stack
  3892   // when we are done the return to frame 3 will still be on the stack.
  3894   // register for the sender's sp
  3895   Register sender_sp = Rsender;
  3896   // register for frame pcs
  3897   Register pcs = T0;
  3898   // register for frame sizes
  3899   Register sizes = T1;
  3900   // register for frame count
  3901   Register count = T3;
  3903   // Pop deoptimized frame
  3904   __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
  3905   __ add(SP, SP, AT);
  3906   // sp should be pointing at the return address to the caller (3)
  3908   // Load array of frame pcs into pcs
  3909   __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
  3910   __ addi(SP, SP, wordSize);  // trash the old pc
  3911   // Load array of frame sizes into T6
  3912   __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
  3916   // Load count of frams into T3
  3917   __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
  3918   // Pick up the initial fp we should save
  3919   __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
  3920    // Now adjust the caller's stack to make up for the extra locals
  3921   // but record the original sp so that we can save it in the skeletal interpreter
  3922   // frame and the stack walking of interpreter_sender will get the unextended sp
  3923   // value and not the "real" sp value.
  3924   __ move(sender_sp, SP);
  3925   __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
  3926   __ sub(SP, SP, AT);
  3928   // Push interpreter frames in a loop
  3929 /*
  3931 Loop:
  3932    0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]	<--- error lw->ld
  3933    0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
  3934    0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 
  3935    0x000000555bd82d24: daddi sp, sp, 0xfffffff0
  3936    0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
  3937    0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
  3938    0x000000555bd82d30: dadd fp, sp, zero        ; fp <- sp 
  3939    0x000000555bd82d34: dsub sp, sp, t2          ; sp -= t2 
  3940    0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
  3941    0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
  3942    0x000000555bd82d40: dadd s4, sp, zero        ; move(sender_sp, SP);
  3943    0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
  3944    0x000000555bd82d48: daddi t1, t1, 0x4        ; sizes += 4
  3945    0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
  3946    0x000000555bd82d50: daddi t0, t0, 0x4        ; <--- error    t0 += 8
  3947  */
  3949 // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
  3950   Label loop;
  3951   __ bind(loop);
  3952   __ ld(T2, sizes, 0);		// Load frame size
  3953   __ ld_ptr(AT, pcs, 0);  	       // save return address
  3954   __ addi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
  3955   __ push2(AT, FP);			
  3956   __ move(FP, SP);
  3957   __ sub(SP, SP, T2); 			// Prolog!
  3958   // This value is corrected by layout_activation_impl
  3959   __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
  3960   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
  3961   __ move(sender_sp, SP);	// pass to next frame
  3962   __ addi(count, count, -1); 	// decrement counter
  3963   __ addi(sizes, sizes, wordSize); 	// Bump array pointer (sizes)
  3964   __ bne(count, R0, loop);
  3965   __ delayed()->addi(pcs, pcs, wordSize); 	// Bump array pointer (pcs)
  3966   __ ld(AT, pcs, 0);			// frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
  3967   // Re-push self-frame
  3968   __ push2(AT, FP);			
  3969   __ move(FP, SP);
  3970   __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 
  3971   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 
  3972   __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
  3974   // Restore frame locals after moving the frame
  3975   __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
  3976   __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
  3977   __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
  3978   __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
  3981   // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
  3982   // this call, no GC can happen.
  3983   __ move(A1, reason);	// exec_mode
  3984   __ get_thread(thread);
  3985   __ move(A0, thread);	// thread
  3986   __ addi(SP, SP, (-additional_words) *wordSize);
  3988   // set last_Java_sp, last_Java_fp
  3989   __ set_last_Java_frame(NOREG, FP, NULL);
  3991   __ move(AT, -(StackAlignmentInBytes));
  3992   __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
  3994   __ relocate(relocInfo::internal_pc_type); 
  3996     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  3997     __ li48(AT, save_pc);
  3999   __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  4001   //__ call(Deoptimization::unpack_frames);
  4002   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
  4003   __ delayed()->nop();
  4004   // Revert SP alignment after call since we're going to do some SP relative addressing below
  4005   __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
  4006   // Set an oopmap for the call site
  4007   oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
  4009   __ push(V0);
  4011   __ get_thread(thread);
  4012   __ reset_last_Java_frame(false, false);
  4014   // Collect return values
  4015   __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
  4016   __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
  4017   __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
  4018   __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
  4019   //FIXME, 
  4020   // Clear floating point stack before returning to interpreter
  4021   __ empty_FPU_stack();
  4022   //FIXME, we should consider about float and double
  4023   // Push a float or double return value if necessary.
  4024   __ leave();
  4026   // Jump to interpreter
  4027   __ jr(RA);
  4028   __ delayed()->nop();
  4030   masm->flush();
  4031   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
  4032   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
  4035 #ifdef COMPILER2
  4037 //------------------------------generate_uncommon_trap_blob--------------------
  4038 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
  4039 // instead.
  4040 void SharedRuntime::generate_uncommon_trap_blob() {
  4041   // allocate space for the code
  4042   ResourceMark rm;
  4043   // setup code generation tools
  4044   CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 ); 
  4045   MacroAssembler* masm = new MacroAssembler(&buffer);   
  4047   enum frame_layout {
  4048 	s0_off, s0_off2,
  4049 	s1_off, s1_off2,
  4050 	s2_off, s2_off2,
  4051 	s3_off, s3_off2,
  4052 	s4_off, s4_off2,
  4053 	s5_off, s5_off2,
  4054 	s6_off, s6_off2,
  4055 	s7_off, s7_off2,
  4056 	fp_off, fp_off2,
  4057 	return_off, return_off2,    // slot for return address    sp + 9
  4058     framesize
  4059   };
  4060   assert(framesize % 4 == 0, "sp not 16-byte aligned");
  4062   address start = __ pc();
  4064   // Push self-frame.
  4065   __ daddiu(SP, SP, -framesize * BytesPerInt);
  4067   __ sd(RA, SP, return_off * BytesPerInt);
  4068   __ sd(FP, SP, fp_off * BytesPerInt);
  4070   // Save callee saved registers.  None for UseSSE=0, 
  4071   // floats-only for UseSSE=1, and doubles for UseSSE=2.
  4072   __ sd(S0, SP, s0_off * BytesPerInt);
  4073   __ sd(S1, SP, s1_off * BytesPerInt);
  4074   __ sd(S2, SP, s2_off * BytesPerInt);
  4075   __ sd(S3, SP, s3_off * BytesPerInt);
  4076   __ sd(S4, SP, s4_off * BytesPerInt);
  4077   __ sd(S5, SP, s5_off * BytesPerInt);
  4078   __ sd(S6, SP, s6_off * BytesPerInt);
  4079   __ sd(S7, SP, s7_off * BytesPerInt);
  4081   __ daddi(FP, SP, fp_off * BytesPerInt);
  4083   // Clear the floating point exception stack
  4084   __ empty_FPU_stack();
  4086   Register thread = TREG;
  4088 #ifndef OPT_THREAD
  4089   __ get_thread(thread);
  4090 #endif
  4091   // set last_Java_sp
  4092   __ set_last_Java_frame(NOREG, FP, NULL);
  4093   __ relocate(relocInfo::internal_pc_type); 
  4094   assert(NativeCall::return_address_offset == 24, "in sharedRuntime return_address_offset");
  4096     long save_pc = (long)__ pc() +  28 + NativeCall::return_address_offset;
  4097     __ li48(AT, (long)save_pc);
  4098     __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  4100   // Call C code.  Need thread but NOT official VM entry
  4101   // crud.  We cannot block on this call, no GC can happen.  Call should
  4102   // capture callee-saved registers as well as return values.
  4103   __ move(A0, thread);
  4104   // argument already in T0
  4105   __ move(A1, T0);
  4106   __ li48(T9, (long)Deoptimization::uncommon_trap);
  4107   __ jalr(T9);
  4108   __ delayed()->nop();
  4110   // Set an oopmap for the call site
  4111   OopMapSet *oop_maps = new OopMapSet();
  4112   OopMap* map =  new OopMap( framesize, 0 );
  4114   map->set_callee_saved( VMRegImpl::stack2reg(s0_off    ),  S0->as_VMReg() ); 
  4115   map->set_callee_saved( VMRegImpl::stack2reg(s1_off    ),  S1->as_VMReg() );
  4116   map->set_callee_saved( VMRegImpl::stack2reg(s2_off    ),  S2->as_VMReg() );
  4117   map->set_callee_saved( VMRegImpl::stack2reg(s3_off    ),  S3->as_VMReg() );
  4118   map->set_callee_saved( VMRegImpl::stack2reg(s4_off    ),  S4->as_VMReg() );
  4119   map->set_callee_saved( VMRegImpl::stack2reg(s5_off    ),  S5->as_VMReg() );
  4120   map->set_callee_saved( VMRegImpl::stack2reg(s6_off    ),  S6->as_VMReg() );
  4121   map->set_callee_saved( VMRegImpl::stack2reg(s7_off    ),  S7->as_VMReg() );
  4123   //oop_maps->add_gc_map( __ offset(), true, map);
  4124   oop_maps->add_gc_map( __ offset(),  map); 
  4126 #ifndef OPT_THREAD
  4127   __ get_thread(thread);
  4128 #endif
  4129   __ reset_last_Java_frame(false,false);
  4131   // Load UnrollBlock into S7
  4132   Register unroll = S7;
  4133   __ move(unroll, V0);
  4135   // Pop all the frames we must move/replace. 
  4136   // 
  4137   // Frame picture (youngest to oldest)
  4138   // 1: self-frame (no frame link)
  4139   // 2: deopting frame  (no frame link)
  4140   // 3: possible-i2c-adapter-frame 
  4141   // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
  4142   //    and c2i here)
  4144   // Pop self-frame.  We have no frame, and must rely only on EAX and ESP.
  4145   __ daddiu(SP, SP, framesize * BytesPerInt);
  4147   // Pop deoptimized frame
  4148   __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
  4149   __ dadd(SP, SP, AT);
  4151   // register for frame pcs
  4152   Register pcs = T8;
  4153   // register for frame sizes
  4154   Register sizes = T9;
  4155   // register for frame count
  4156   Register count = T3;
  4157   // register for the sender's sp
  4158   Register sender_sp = T1;
  4160   // sp should be pointing at the return address to the caller (4)
  4161   // Load array of frame pcs into ECX
  4162   __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
  4164 /* 2012/9/7 Not needed in MIPS
  4165   __ addiu(SP, SP, wordSize);
  4166 */
  4168   // Load array of frame sizes into ESI
  4169   __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
  4170   __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
  4172   // Pick up the initial fp we should save
  4173   __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
  4174   // Now adjust the caller's stack to make up for the extra locals
  4175   // but record the original sp so that we can save it in the skeletal interpreter
  4176   // frame and the stack walking of interpreter_sender will get the unextended sp
  4177   // value and not the "real" sp value.
  4179   __ move(sender_sp, SP);
  4180   __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
  4181   __ dsub(SP, SP, AT);
  4182   // Push interpreter frames in a loop
  4183   Label loop;
  4184   __ bind(loop);
  4185   __ ld(T2, sizes, 0);          // Load frame size
  4186   __ ld(AT, pcs, 0);           // save return address
  4187   __ daddi(T2, T2, -2*wordSize);           // we'll push pc and rbp, by hand
  4188   __ push2(AT, FP);
  4189   __ move(FP, SP);
  4190   __ dsub(SP, SP, T2);                   // Prolog!
  4191   // This value is corrected by layout_activation_impl
  4192   __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
  4193   __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
  4194   __ move(sender_sp, SP);       // pass to next frame
  4195   __ daddi(count, count, -1);    // decrement counter
  4196   __ daddi(sizes, sizes, wordSize);     // Bump array pointer (sizes)
  4197   __ addi(pcs, pcs, wordSize);      // Bump array pointer (pcs)
  4198   __ bne(count, R0, loop);
  4199   __ delayed()->nop();      // Bump array pointer (pcs)
  4201   __ ld(RA, pcs, 0);
  4203   // Re-push self-frame
  4204   __ daddi(SP, SP, - 2 * wordSize);      // save old & set new FP
  4205   __ sd(FP, SP, 0 * wordSize);          // save final return address
  4206   __ sd(RA, SP, 1 * wordSize);
  4207   __ move(FP, SP); 
  4208   __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize);
  4210   // set last_Java_sp, last_Java_fp
  4211   __ set_last_Java_frame(NOREG, FP, NULL);
  4213   __ move(AT, -(StackAlignmentInBytes));
  4214   __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
  4216   __ relocate(relocInfo::internal_pc_type); 
  4218     long save_pc = (long)__ pc() +  28 + NativeCall::return_address_offset;
  4219     __ li48(AT, (long)save_pc);
  4221   __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
  4223   // Call C code.  Need thread but NOT official VM entry
  4224   // crud.  We cannot block on this call, no GC can happen.  Call should
  4225   // restore return values to their stack-slots with the new SP.
  4226   __ move(A0, thread);
  4227   __ move(A1, Deoptimization::Unpack_uncommon_trap);
  4228   __ li48(T9, (long)Deoptimization::unpack_frames);
  4229   __ jalr(T9);
  4230   __ delayed()->nop();
  4231   // Set an oopmap for the call site
  4232   //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); 
  4233   oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );//Fu
  4235   __ reset_last_Java_frame(true,true);
  4237   // Pop self-frame.
  4238   __ leave();     // Epilog!
  4240   // Jump to interpreter
  4241   __ jr(RA);
  4242   __ delayed()->nop();
  4243   // -------------
  4244   // make sure all code is generated
  4245   masm->flush();
  4247   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
  4250 #endif // COMPILER2
  4252 //------------------------------generate_handler_blob-------------------
  4253 //
  4254 // Generate a special Compile2Runtime blob that saves all registers, and sets
  4255 // up an OopMap and calls safepoint code to stop the compiled code for
  4256 // a safepoint.
  4257 //
  4258 // This blob is jumped to (via a breakpoint and the signal handler) from a
  4259 // safepoint in compiled code. 
  4261 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
  4263   // Account for thread arg in our frame
  4264   const int additional_words = 0; 
  4265   int frame_size_in_words;
  4267   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");  
  4269   ResourceMark rm;
  4270   OopMapSet *oop_maps = new OopMapSet();
  4271   OopMap* map;
  4273   // allocate space for the code
  4274   // setup code generation tools  
  4275   CodeBuffer  buffer ("handler_blob", 2048, 512);
  4276   MacroAssembler* masm = new MacroAssembler( &buffer);
  4278   const Register thread = TREG; 
  4279   address start   = __ pc();  
  4280   address call_pc = NULL;  
  4281   bool cause_return = (pool_type == POLL_AT_RETURN);
  4282   bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
  4284   // If cause_return is true we are at a poll_return and there is
  4285   // the return address in RA to the caller on the nmethod
  4286   // that is safepoint. We can leave this return in RA and
  4287   // effectively complete the return and safepoint in the caller.
  4288   // Otherwise we load exception pc to RA.
  4289   __ push(thread);
  4290 #ifndef OPT_THREAD
  4291   __ get_thread(thread);
  4292 #endif
  4294   if(!cause_return) {
  4295     __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
  4298   __ pop(thread);
  4299   map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
  4301 #ifndef OPT_THREAD
  4302   __ get_thread(thread);
  4303 #endif
  4304   // The following is basically a call_VM. However, we need the precise
  4305   // address of the call in order to generate an oopmap. Hence, we do all the
  4306   // work outselvs.
  4308   __ move(A0, thread);
  4309   __ set_last_Java_frame(NOREG, NOREG, NULL);
  4311   //__ relocate(relocInfo::internal_pc_type); 
  4312   if (!cause_return)
  4314 /*
  4315     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4;
  4316     __ li48(AT, save_pc);
  4317     __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  4318 */
  4322   // do the call
  4323   //__ lui(T9, Assembler::split_high((int)call_ptr));
  4324   //__ addiu(T9, T9, Assembler::split_low((int)call_ptr));
  4325   __ call(call_ptr);
  4326   __ delayed()->nop();
  4328   // Set an oopmap for the call site.  This oopmap will map all
  4329   // oop-registers and debug-info registers as callee-saved.  This
  4330   // will allow deoptimization at this safepoint to find all possible
  4331   // debug-info recordings, as well as let GC find all oops.
  4332   oop_maps->add_gc_map(__ offset(),  map);
  4334   Label noException;
  4336   // Clear last_Java_sp again
  4337   __ reset_last_Java_frame(false, false);
  4339   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  4340   __ beq(AT, R0, noException);
  4341   __ delayed()->nop();
  4343   // Exception pending
  4345   RegisterSaver::restore_live_registers(masm, save_vectors);
  4346   //forward_exception_entry need return address on the stack
  4347   __ push(RA);
  4348   //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry()));
  4349   //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry()));
  4350   __ li(T9, StubRoutines::forward_exception_entry());
  4351   __ jr(T9);
  4352   __ delayed()->nop();
  4354   // No exception case
  4355   __ bind(noException);
  4356   // Normal exit, register restoring and exit  
  4357   RegisterSaver::restore_live_registers(masm, save_vectors);
  4358   __ jr(RA);
  4359   __ delayed()->nop();
  4361   masm->flush();  
  4363   // Fill-out other meta info
  4364   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);      
  4367 //
  4368 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
  4369 //
  4370 // Generate a stub that calls into vm to find out the proper destination
  4371 // of a java call. All the argument registers are live at this point
  4372 // but since this is generic code we don't know what they are and the caller
  4373 // must do any gc of the args.
  4374 //
  4375 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
  4376   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
  4378   // allocate space for the code
  4379   ResourceMark rm;
  4381   //CodeBuffer buffer(name, 1000, 512);
  4382   //FIXME. aoqi. code_size
  4383   CodeBuffer buffer(name, 20000, 2048);
  4384   MacroAssembler* masm  = new MacroAssembler(&buffer);
  4386   int frame_size_words;
  4387   //we put the thread in A0 
  4389   OopMapSet *oop_maps = new OopMapSet();
  4390   OopMap* map = NULL;
  4392   int start = __ offset();
  4393   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
  4396   int frame_complete = __ offset();
  4398   const Register thread = T8;
  4399   __ get_thread(thread);
  4401   __ move(A0, thread); 
  4402   __ set_last_Java_frame(noreg, FP, NULL);
  4403   //__ addi(SP, SP, -wordSize);
  4404   //align the stack before invoke native 
  4405   __ move(AT, -(StackAlignmentInBytes));
  4406   __ andr(SP, SP, AT); 
  4407   __ relocate(relocInfo::internal_pc_type); 
  4409     intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 1 * BytesPerInstWord;
  4410 //tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test
  4411     __ li48(AT, save_pc);
  4413   __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
  4415   __ call(destination);
  4416   __ delayed()->nop();
  4418   // Set an oopmap for the call site.
  4419   // We need this not only for callee-saved registers, but also for volatile
  4420   // registers that the compiler might be keeping live across a safepoint.
  4421   oop_maps->add_gc_map( __ offset() - start, map);
  4422   // V0 contains the address we are going to jump to assuming no exception got installed
  4423   __ get_thread(thread);
  4424   __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
  4425   // clear last_Java_sp
  4426   __ reset_last_Java_frame(true, true);
  4427   // check for pending exceptions
  4428   Label pending;
  4429   __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
  4430   __ bne(AT, R0, pending);
  4431   __ delayed()->nop(); 
  4432   // get the returned Method* 
  4433   //FIXME, do mips need this ? 
  4434   __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
  4435   __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
  4436   __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
  4437   RegisterSaver::restore_live_registers(masm);
  4439   // We are back the the original state on entry and ready to go the callee method.
  4440   __ jr(V0);
  4441   __ delayed()->nop();
  4442   // Pending exception after the safepoint
  4444   __ bind(pending);
  4446   RegisterSaver::restore_live_registers(masm);
  4448   // exception pending => remove activation and forward to exception handler
  4449   //forward_exception_entry need return address on the stack 
  4450   __ push(RA);
  4451   __ get_thread(thread);
  4452   __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); 
  4453   __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
  4454   __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
  4455   __ delayed() -> nop();
  4456   // -------------
  4457   // make sure all code is generated
  4458   masm->flush();  
  4460   RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
  4461   return tmp;
  4464 /*void SharedRuntime::generate_stubs() {
  4465 	_wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4466 				SharedRuntime::handle_wrong_method),"wrong_method_stub");
  4467 	_ic_miss_blob      = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4468 				SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub");
  4469 	_resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4470 				SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call");
  4471 	_resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4472 				SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call");
  4473 	_resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, 
  4474 				SharedRuntime::resolve_static_call_C),"resolve_static_call");
  4475 	_polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, 
  4476 				SafepointSynchronize::handle_polling_page_exception), false);
  4477 	_polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
  4478 				SafepointSynchronize::handle_polling_page_exception), true);
  4479 	generate_deopt_blob();
  4480 #ifdef COMPILER2
  4481 	generate_uncommon_trap_blob();
  4482 #endif // COMPILER2
  4483 }*/
  4485 extern "C" int SpinPause() {return 0;}
  4486 // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
  4487 // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;

mercurial